summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2012-07-01 12:48:22 -0400
committerAdrian Thurston <thurston@complang.org>2012-07-01 12:48:22 -0400
commit247904a84430b8c9151fa6afb68f01b60afb92c9 (patch)
tree58d498f783a935b02255120c814c387745dc6e41 /src
parentd8cdec468bb7efad768d25872147533312cffe91 (diff)
downloadcolm-247904a84430b8c9151fa6afb68f01b60afb92c9.tar.gz
moved 'colm' dir to 'src'
Diffstat (limited to 'src')
-rw-r--r--src/.gitignore27
-rw-r--r--src/Makefile.am183
-rw-r--r--src/buffer.h55
-rw-r--r--src/bytecode.c3579
-rw-r--r--src/bytecode.h487
-rw-r--r--src/closure.cc458
-rw-r--r--src/codegen.cc50
-rw-r--r--src/codevect.c183
-rw-r--r--src/colm.h55
-rw-r--r--src/compiler.cc1496
-rw-r--r--src/ctinput.cc439
-rw-r--r--src/debug.c78
-rw-r--r--src/debug.h58
-rw-r--r--src/declare.cc383
-rw-r--r--src/defs.h.in49
-rw-r--r--src/dotgen.cc113
-rw-r--r--src/dotgen.h51
-rw-r--r--src/exports.cc285
-rw-r--r--src/fsmap.cc856
-rw-r--r--src/fsmattach.cc425
-rw-r--r--src/fsmbase.cc602
-rw-r--r--src/fsmcodegen.cc1098
-rw-r--r--src/fsmcodegen.h212
-rw-r--r--src/fsmexec.cc208
-rw-r--r--src/fsmgraph.cc1408
-rw-r--r--src/fsmgraph.h1388
-rw-r--r--src/fsmmin.cc732
-rw-r--r--src/fsmrun.h36
-rw-r--r--src/fsmstate.cc467
-rw-r--r--src/global.h90
-rw-r--r--src/input.c847
-rw-r--r--src/input.h214
-rw-r--r--src/keyops.h283
-rw-r--r--src/list.c105
-rw-r--r--src/lmparse.kh120
-rw-r--r--src/lmparse.kl2677
-rw-r--r--src/lmscan.h118
-rw-r--r--src/lmscan.rl636
-rw-r--r--src/main.cc623
-rw-r--r--src/map.c763
-rw-r--r--src/map.cc26
-rw-r--r--src/map.h108
-rw-r--r--src/parsedata.h1063
-rw-r--r--src/parsetree.cc1776
-rw-r--r--src/parsetree.h2253
-rw-r--r--src/pcheck.cc154
-rw-r--r--src/pcheck.h48
-rw-r--r--src/pdabuild.cc2091
-rw-r--r--src/pdacodegen.cc653
-rw-r--r--src/pdacodegen.h106
-rw-r--r--src/pdagraph.cc533
-rw-r--r--src/pdagraph.h515
-rw-r--r--src/pdarun.c2272
-rw-r--r--src/pdarun.h473
-rw-r--r--src/pool.c330
-rw-r--r--src/pool.h86
-rw-r--r--src/program.c254
-rw-r--r--src/program.h128
-rw-r--r--src/redbuild.cc650
-rw-r--r--src/redbuild.h161
-rw-r--r--src/redfsm.cc1112
-rw-r--r--src/redfsm.h524
-rw-r--r--src/resolve.cc805
-rw-r--r--src/rtvector.h34
-rw-r--r--src/string.c240
-rw-r--r--src/synthesis.cc3277
-rw-r--r--src/tree.c2484
-rw-r--r--src/tree.h355
68 files changed, 44448 insertions, 0 deletions
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 00000000..c2e96f8e
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,27 @@
+# Common testing files.
+/tmp.lm
+/tmp.c
+/tmp.bin
+/input
+/out
+
+/*.o
+/Makefile.in
+/Makefile
+/.*.d
+/colm
+/lmparse.h
+/lmparse.cc
+/lmscan.cc
+/config.h.in
+/config.h.in~
+/config.h
+/defs.h
+/version.h
+/tags
+/.deps
+/libcolmd.a
+/libcolmp.a
+/.libs
+/stamp-h1
+/stamp-h2
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 00000000..c4a3504a
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,183 @@
+#
+# Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+#
+
+# This file is part of Colm.
+#
+# Colm is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Colm is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Colm; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+INCLUDES = -I$(top_srcdir)/aapl
+
+bin_PROGRAMS = colm
+
+RUNTIME_SRC = \
+ map.c pdarun.c list.c input.c debug.c \
+ codevect.c pool.c string.c tree.c bytecode.c program.c
+
+RUNTIME_HDR = \
+ bytecode.h config.h defs.h debug.h pool.h input.h \
+ fsmrun.h pdarun.h map.h tree.h program.h colm.h
+
+lib_LIBRARIES = libcolmp.a libcolmd.a
+
+libcolmp_a_SOURCES = $(RUNTIME_SRC)
+libcolmp_a_CFLAGS = -I..
+
+libcolmd_a_SOURCES = $(RUNTIME_SRC)
+libcolmd_a_CFLAGS = -I..
+
+colm_CXXFLAGS = \
+ -Wall \
+ -DCOLM_LOG \
+ -DPREFIX='"$(prefix)"' \
+ -I..
+
+colm_LDADD = libcolmp.a
+
+# ADT DEFS_COLM += -DCOLM_LOG -DPREFIX='"$(prefix)"'
+
+colm_SOURCES = \
+ buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \
+ fsmrun.h input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \
+ parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \
+ redfsm.h rtvector.h tree.h version.h global.h colm.h \
+ \
+ resolve.cc synthesis.cc lmparse.cc lmscan.cc parsetree.cc \
+ fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \
+ fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \
+ redfsm.cc fsmexec.cc main.cc redbuild.cc closure.cc fsmap.cc \
+ dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc \
+ exports.cc compiler.cc
+
+colmincdir = $(includedir)/colm
+
+colminc_HEADERS = $(RUNTIME_HDR)
+
+BUILT_SOURCES = \
+ version.h lmscan.cc lmparse.h lmparse.cc
+
+version.h: Makefile
+ echo '#define VERSION "$(PACKAGE_VERSION)"' > version.h
+ echo '#define PUBDATE "$(PUBDATE)"' >> version.h
+
+if BUILD_PARSERS
+
+lmparse.h: lmparse.kh
+ $(KELBT) -o $@ $<
+
+lmparse.cc: lmparse.kl lmparse.kh
+ $(KELBT) -o $@ $<
+
+lmscan.cc: lmparse.h
+
+lmscan.cc: lmscan.rl
+ $(RAGEL) -G2 -o $@ $<
+
+endif
+
+# ADT
+# ADT # Logging:
+# ADT # colm: rt on/off
+# ADT # rt_prd: off
+# ADT # rt_db: on
+# ADT # rt_clm: rt on/off
+# ADT
+# ADT INCS += -I../aapl
+# ADT
+# ADT DEFS_COLM += -DCOLM_LOG -DPREFIX='"$(prefix)"'
+# ADT DEFS_RT_P +=
+# ADT DEFS_RT_D += -DCOLM_LOG
+# ADT
+# ADT CFLAGS += -g -Wall -Wwrite-strings
+# ADT LDFLAGS +=
+# ADT
+# ADT # Files in ALL_SRC that are generated.
+# ADT GEN_SRC = version.h lmscan.cc lmparse.h lmparse.cc
+# ADT
+# ADT RUNTIME_P = libcolmp.a
+# ADT RUNTIME_D = libcolmd.a
+# ADT
+# ADT LIBS =
+# ADT
+# ADT #*************************************
+# ADT
+# ADT # Get the version info.
+# ADT include ../version.mk
+# ADT
+# ADT prefix = @prefix@
+# ADT
+# ADT BUILD_PARSERS = @BUILD_PARSERS@
+# ADT
+# ADT # Programs
+# ADT CXX = @CXX@
+# ADT CC = @CC@
+# ADT
+# ADT # Get objects and dependencies from sources.
+# ADT COLM_OBJ = $(COLM_SRC:%.cc=%.o)
+# ADT RUNTIME_OBJ_P = $(RUNTIME_SRC:%.c=%_p.o)
+# ADT RUNTIME_OBJ_D = $(RUNTIME_SRC:%.c=%_d.o)
+# ADT
+# ADT DEPS = $(COLM_SRC:%.cc=.%.d) $(RUNTIME_SRC:%.c=.%_p.d) $(RUNTIME_SRC:%.c=.%_d.d)
+# ADT
+# ADT # Rules.
+# ADT all: colm $(RUNTIME_P) $(RUNTIME_D)
+# ADT
+# ADT colm: $(GEN_SRC) $(COLM_OBJ) $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) $(LIBS)
+# ADT $(CXX) $(LDFLAGS) -o $@ $(COLM_OBJ) $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) $(LIBS)
+# ADT
+# ADT $(RUNTIME_P): $(RUNTIME_OBJ_P) $(RUNTIME_OBJ_C_P)
+# ADT ar -cr $@ $^
+# ADT
+# ADT $(RUNTIME_D): $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D)
+# ADT ar -cr $@ $^
+# ADT
+# ADT version.h: ../version.mk
+# ADT echo '#define VERSION "$(VERSION)"' > version.h
+# ADT echo '#define PUBDATE "$(PUBDATE)"' >> version.h
+# ADT
+# ADT
+# ADT $(COLM_OBJ): %.o: %.cc
+# ADT @$(CXX) -M $(DEFS_COLM) $(INCS) $< > .$*.d
+# ADT $(CXX) -c $(CFLAGS) $(DEFS_COLM) $(INCS) -o $@ $<
+# ADT
+# ADT $(RUNTIME_OBJ_P): %_p.o: %.c
+# ADT @$(CC) -M -MT $@ $(DEFS_RT_P) $< > .$*_p.d
+# ADT $(CC) -c $(CFLAGS) $(DEFS_RT_P) -o $@ $<
+# ADT
+# ADT $(RUNTIME_OBJ_D): %_d.o: %.c
+# ADT @$(CC) -M -MT $@ $(DEFS_RT_D) $< > .$*_d.d
+# ADT $(CC) -c $(CFLAGS) $(DEFS_RT_D) -o $@ $<
+# ADT
+# ADT distclean: clean
+# ADT rm -f Makefile config.h
+# ADT
+# ADT ifeq ($(BUILD_PARSERS),true)
+# ADT EXTRA_CLEAN = $(GEN_SRC)
+# ADT endif
+# ADT
+# ADT clean:
+# ADT rm -f tags .*.d *.o colm $(EXTRA_CLEAN) $(RUNTIME_P) $(RUNTIME_D)
+# ADT
+# ADT install: all
+# ADT install -d $(prefix)/bin
+# ADT install -d $(prefix)/include
+# ADT install -d $(prefix)/include/colm
+# ADT install -d $(prefix)/lib
+# ADT install -s colm $(prefix)/bin/colm
+# ADT install libcolmp.a libcolmd.a $(prefix)/lib
+# ADT install $(RUNTIME_HDR) $(prefix)/include/colm
+# ADT
+# ADT -include $(DEPS)
+
diff --git a/src/buffer.h b/src/buffer.h
new file mode 100644
index 00000000..9039ad4b
--- /dev/null
+++ b/src/buffer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2003 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _BUFFER_H
+#define _BUFFER_H
+
+#define BUFFER_INITIAL_SIZE 4096
+
+/* An automatically grown buffer for collecting tokens. Always reuses space;
+ * never down resizes. */
+struct Buffer
+{
+ Buffer()
+ {
+ data = (char*) malloc( BUFFER_INITIAL_SIZE );
+ allocated = BUFFER_INITIAL_SIZE;
+ length = 0;
+ }
+ ~Buffer() { free(data); }
+
+ void append( char p )
+ {
+ if ( length == allocated ) {
+ allocated *= 2;
+ data = (char*) realloc( data, allocated );
+ }
+ data[length++] = p;
+ }
+
+ void clear() { length = 0; }
+
+ char *data;
+ int allocated;
+ int length;
+};
+
+#endif /* _BUFFER_H */
diff --git a/src/bytecode.c b/src/bytecode.c
new file mode 100644
index 00000000..2cfa598c
--- /dev/null
+++ b/src/bytecode.c
@@ -0,0 +1,3579 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+//#define COLM_LOG
+
+#include <pdarun.h>
+#include <fsmrun.h>
+#include <tree.h>
+#include <bytecode.h>
+#include <pool.h>
+#include <debug.h>
+#include <config.h>
+
+#include <alloca.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+#define true 1
+#define false 0
+
+/* More common macros are in bytecode.h. */
+#define vm_top_off(n) (sp[n])
+#define vm_popn(n) (sp += (n))
+#define vm_pushn(n) (sp -= (n))
+#define vm_local(o) (exec->framePtr[o])
+#define vm_plocal(o) (&exec->framePtr[o])
+#define vm_local_iframe(o) (exec->iframePtr[o])
+#define vm_plocal_iframe(o) (&exec->iframePtr[o])
+
+#define read_byte( i ) do { \
+ i = ((uchar) *instr++); \
+} while(0)
+
+#define consume_byte( ) do { \
+ instr += 1; \
+} while(0)
+
+
+#define read_word_p( i, p ) do { \
+ i = ((Word) p[0]); \
+ i |= ((Word) p[1]) << 8; \
+ i |= ((Word) p[2]) << 16; \
+ i |= ((Word) p[3]) << 24; \
+} while(0)
+
+/* There are better ways. */
+#if SIZEOF_LONG == 4
+ #define read_word( i ) do { \
+ i = ((Word) *instr++); \
+ i |= ((Word) *instr++) << 8; \
+ i |= ((Word) *instr++) << 16; \
+ i |= ((Word) *instr++) << 24; \
+ } while(0)
+#else
+ #define read_word( i ) do { \
+ i = ((Word) *instr++); \
+ i |= ((Word) *instr++) << 8; \
+ i |= ((Word) *instr++) << 16; \
+ i |= ((Word) *instr++) << 24; \
+ i |= ((Word) *instr++) << 32; \
+ i |= ((Word) *instr++) << 40; \
+ i |= ((Word) *instr++) << 48; \
+ i |= ((Word) *instr++) << 56; \
+ } while(0)
+#endif
+
+/* There are better ways. */
+#if SIZEOF_LONG == 4
+ #define read_tree( i ) do { \
+ Word w; \
+ w = ((Word) *instr++); \
+ w |= ((Word) *instr++) << 8; \
+ w |= ((Word) *instr++) << 16; \
+ w |= ((Word) *instr++) << 24; \
+ i = (Tree*) w; \
+ } while(0)
+
+ #define read_word_type( Type, i ) do { \
+ Word w; \
+ w = ((Word) *instr++); \
+ w |= ((Word) *instr++) << 8; \
+ w |= ((Word) *instr++) << 16; \
+ w |= ((Word) *instr++) << 24; \
+ i = (Type) w; \
+ } while(0)
+
+ #define consume_word( ) do { \
+ instr += 4; \
+ } while(0)
+#else
+ #define read_tree( i ) do { \
+ Word w; \
+ w = ((Word) *instr++); \
+ w |= ((Word) *instr++) << 8; \
+ w |= ((Word) *instr++) << 16; \
+ w |= ((Word) *instr++) << 24; \
+ w |= ((Word) *instr++) << 32; \
+ w |= ((Word) *instr++) << 40; \
+ w |= ((Word) *instr++) << 48; \
+ w |= ((Word) *instr++) << 56; \
+ i = (Tree*) w; \
+ } while(0)
+
+ #define read_word_type( Type, i ) do { \
+ Word w; \
+ w = ((Word) *instr++); \
+ w |= ((Word) *instr++) << 8; \
+ w |= ((Word) *instr++) << 16; \
+ w |= ((Word) *instr++) << 24; \
+ w |= ((Word) *instr++) << 32; \
+ w |= ((Word) *instr++) << 40; \
+ w |= ((Word) *instr++) << 48; \
+ w |= ((Word) *instr++) << 56; \
+ i = (Type) w; \
+ } while(0)
+
+ #define consume_word( ) do { \
+ instr += 8; \
+ } while(0)
+#endif
+
+#define read_half( i ) do { \
+ i = ((Word) *instr++); \
+ i |= ((Word) *instr++) << 8; \
+} while(0)
+
+int colm_log_bytecode = 0;
+int colm_log_parse = 0;
+int colm_log_match = 0;
+int colm_log_compile = 0;
+int colm_log_conds = 0;
+
+void vm_grow( Program *prg )
+{
+ debug( REALM_BYTECODE, "growing stack\n" );
+}
+
+void parserSetContext( Program *prg, Tree **sp, Parser *parser, Tree *val )
+{
+ parser->pdaRun->context = splitTree( prg, val );
+}
+
+Head *treeToStr( Program *prg, Tree **sp, Tree *tree, int trim )
+{
+ /* Collect the tree data. */
+ StrCollect collect;
+ initStrCollect( &collect );
+
+ printTreeCollect( prg, sp, &collect, tree, trim );
+
+ /* Set up the input stream. */
+ Head *ret = stringAllocFull( prg, collect.data, collect.length );
+
+ strCollectDestroy( &collect );
+
+ return ret;
+}
+
+Word streamAppend( Program *prg, Tree **sp, Tree *input, InputStream *inputStream )
+{
+ long length = 0;
+
+ if ( input->id == LEL_ID_STR ) {
+ //assert(false);
+ /* Collect the tree data. */
+ StrCollect collect;
+ initStrCollect( &collect );
+ printTreeCollect( prg, sp, &collect, input, true );
+
+ /* Load it into the input. */
+ appendData( inputStream, collect.data, collect.length );
+ length = collect.length;
+ strCollectDestroy( &collect );
+ }
+ else if ( input->id == LEL_ID_STREAM ) {
+ treeUpref( input );
+ appendStream( inputStream, input );
+ }
+ else {
+ treeUpref( input );
+ appendTree( inputStream, input );
+ }
+
+ return length;
+}
+
+long parseFrag( Program *prg, Tree **sp, Parser *parser, long stopId, long entry )
+{
+switch ( entry ) {
+case PcrStart:
+
+ if ( ! parser->pdaRun->parseError ) {
+ parser->pdaRun->stopTarget = stopId;
+
+ long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
+
+ while ( pcr != PcrDone ) {
+
+return pcr;
+case PcrReduction:
+case PcrGeneration:
+case PcrPreEof:
+case PcrReverse:
+
+ pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
+ }
+ }
+
+case PcrDone:
+break; }
+
+ return PcrDone;
+}
+
+long parseFinish( Tree **result, Program *prg, Tree **sp,
+ Parser *parser, int revertOn, long entry )
+{
+switch ( entry ) {
+case PcrStart:
+
+ if ( parser->pdaRun->stopTarget <= 0 ) {
+ setEof( parser->input->in );
+
+ if ( ! parser->pdaRun->parseError ) {
+ long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
+
+ while ( pcr != PcrDone ) {
+
+return pcr;
+case PcrReduction:
+case PcrGeneration:
+case PcrPreEof:
+case PcrReverse:
+
+ pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
+ }
+ }
+ }
+
+ /* FIXME: need something here to check that we aren' stopped waiting for
+ * more data when we are actually expected to finish. This check doesn't
+ * work (at time of writing). */
+ //assert( (parser->pdaRun->stopTarget > 0 && parser->pdaRun->stopParsing) || parser->input->in->eofSent );
+
+ if ( !revertOn )
+ commitFull( prg, sp, parser->pdaRun, 0 );
+
+ Tree *tree = getParsedRoot( parser->pdaRun, parser->pdaRun->stopTarget > 0 );
+ treeUpref( tree );
+
+ *result = tree;
+
+case PcrDone:
+break; }
+
+ return PcrDone;
+}
+
+long undoParseFrag( Program *prg, Tree **sp, Parser *parser, long steps, long entry )
+{
+ InputStream *inputStream = parser->input->in;
+ FsmRun *fsmRun = parser->fsmRun;
+ PdaRun *pdaRun = parser->pdaRun;
+
+ debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps );
+
+ resetToken( fsmRun );
+
+switch ( entry ) {
+case PcrStart:
+
+ if ( steps < pdaRun->steps ) {
+ /* Setup environment for going backwards until we reduced steps to
+ * what we want. */
+ pdaRun->numRetry += 1;
+ pdaRun->targetSteps = steps;
+ pdaRun->triggerUndo = 1;
+
+ /* The parse loop will recognise the situation. */
+ long pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry );
+ while ( pcr != PcrDone ) {
+
+return pcr;
+case PcrReduction:
+case PcrGeneration:
+case PcrPreEof:
+case PcrReverse:
+
+ pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry );
+ }
+
+ /* Reset environment. */
+ pdaRun->triggerUndo = 0;
+ pdaRun->targetSteps = -1;
+ pdaRun->numRetry -= 1;
+ }
+
+case PcrDone:
+break; }
+
+ return PcrDone;
+}
+
+Tree *streamPullBc( Program *prg, FsmRun *fsmRun, InputStream *in, Tree *length )
+{
+ long len = ((Int*)length)->value;
+ Head *tokdata = streamPull( prg, fsmRun, in, len );
+ return constructString( prg, tokdata );
+}
+
+void undoPull( Program *prg, FsmRun *fsmRun, InputStream *in, Tree *str )
+{
+ const char *data = stringData( ( (Str*)str )->value );
+ long length = stringLength( ( (Str*)str )->value );
+ undoStreamPull( fsmRun, in, data, length );
+}
+
+long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *in, Tree *tree, int ignore )
+{
+ if ( tree->id == LEL_ID_STR ) {
+ /* This should become a compile error. If it's text, it's up to the
+ * scanner to decide. Want to force it then send a token. */
+ assert( !ignore );
+
+ /* Collect the tree data. */
+ StrCollect collect;
+ initStrCollect( &collect );
+ printTreeCollect( prg, sp, &collect, tree, true );
+
+ streamPushText( fsmRun, in, collect.data, collect.length );
+ long length = collect.length;
+ strCollectDestroy( &collect );
+
+ return length;
+ }
+ else {
+ treeUpref( tree );
+ streamPushTree( fsmRun, in, tree, ignore );
+ return -1;
+ }
+}
+
+void setLocal( Tree **frame, long field, Tree *tree )
+{
+ if ( tree != 0 )
+ assert( tree->refs >= 1 );
+ frame[field] = tree;
+}
+
+Tree *getLocalSplit( Program *prg, Tree **frame, long field )
+{
+ Tree *val = frame[field];
+ Tree *split = splitTree( prg, val );
+ frame[field] = split;
+ return split;
+}
+
+void downrefLocalTrees( Program *prg, Tree **sp, Tree **frame, char *trees, long treesLen )
+{
+ long i;
+ for ( i = 0; i < treesLen; i++ ) {
+ debug( REALM_BYTECODE, "local tree downref: %ld\n", (long)trees[i] );
+
+ treeDownref( prg, sp, frame[((long)trees[i])] );
+ }
+}
+
+UserIter *uiterCreate( Program *prg, Tree ***psp, FunctionInfo *fi, long searchId )
+{
+ Tree **sp = *psp;
+ vm_pushn( sizeof(UserIter) / sizeof(Word) );
+ void *mem = vm_ptop();
+
+ UserIter *uiter = mem;
+ initUserIter( uiter, vm_ptop(), fi->argSize, searchId );
+ *psp = sp;
+ return uiter;
+}
+
+void uiterInit( Program *prg, Tree **sp, UserIter *uiter,
+ FunctionInfo *fi, int revertOn )
+{
+ /* Set up the first yeild so when we resume it starts at the beginning. */
+ uiter->ref.kid = 0;
+ uiter->stackSize = uiter->stackRoot - vm_ptop();
+ uiter->frame = &uiter->stackRoot[-IFR_AA];
+
+ if ( revertOn )
+ uiter->resume = prg->rtd->frameInfo[fi->frameId].codeWV;
+ else
+ uiter->resume = prg->rtd->frameInfo[fi->frameId].codeWC;
+}
+
+void treeIterDestroy( Tree ***psp, TreeIter *iter )
+{
+ Tree **sp = *psp;
+ long curStackSize = iter->stackRoot - vm_ptop();
+ assert( iter->stackSize == curStackSize );
+ vm_popn( iter->stackSize );
+ *psp = sp;
+}
+
+void userIterDestroy( Tree ***psp, UserIter *uiter )
+{
+ Tree **sp = *psp;
+
+ /* We should always be coming from a yield. The current stack size will be
+ * nonzero and the stack size in the iterator will be correct. */
+ long curStackSize = uiter->stackRoot - vm_ptop();
+ assert( uiter->stackSize == curStackSize );
+
+ long argSize = uiter->argSize;
+
+ vm_popn( uiter->stackRoot - vm_ptop() );
+ vm_popn( sizeof(UserIter) / sizeof(Word) );
+ vm_popn( argSize );
+
+ *psp = sp;
+}
+
+Tree *constructArgv( Program *prg, int argc, const char **argv )
+{
+ Tree *list = createGeneric( prg, prg->rtd->argvGenericId );
+ treeUpref( list );
+ int i;
+ for ( i = 0; i < argc; i++ ) {
+ Head *head = stringAllocPointer( prg, argv[i], strlen(argv[i]) );
+ Tree *arg = constructString( prg, head );
+ treeUpref( arg );
+ listAppend2( prg, (List*)list, arg );
+ }
+ return list;
+}
+
+/*
+ * Execution environment
+ */
+
+void initExecution( Execution *exec, Parser *parser, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, int frameId )
+{
+ exec->parser = parser;
+ exec->pdaRun = pdaRun;
+ exec->fsmRun = fsmRun;
+ exec->inputStream = inputStream;
+ exec->framePtr = 0;
+ exec->iframePtr = 0;
+ exec->frameId = frameId;
+ exec->rcodeUnitLen = 0;
+}
+
+void rcodeDownrefAll( Program *prg, Tree **sp, RtCodeVect *rev )
+{
+ while ( rev->tabLen > 0 ) {
+ /* Read the length */
+ Code *prcode = rev->data + rev->tabLen - SIZEOF_WORD;
+ Word len;
+ read_word_p( len, prcode );
+
+ /* Find the start of block. */
+ long start = rev->tabLen - len - SIZEOF_WORD;
+ prcode = rev->data + start;
+
+ /* Execute it. */
+ rcodeDownref( prg, sp, prcode );
+
+ /* Backup over it. */
+ rev->tabLen -= len + SIZEOF_WORD;
+ }
+}
+
+void rcodeDownref( Program *prg, Tree **sp, Code *instr )
+{
+again:
+ switch ( *instr++ ) {
+ case IN_PARSE_LOAD_START: {
+ debug( REALM_BYTECODE, "IN_PARSE_LOAD_START\n" );
+ break;
+ }
+ case IN_PARSE_SAVE_STEPS: {
+ debug( REALM_BYTECODE, "IN_PARSE_SAVE_STEPS\n" );
+ break;
+ }
+ case IN_LOAD_TREE: {
+ Word w;
+ read_word( w );
+ debug( REALM_BYTECODE, "IN_LOAD_TREE %p\n", (Tree*)w );
+ treeDownref( prg, sp, (Tree*)w );
+ break;
+ }
+ case IN_LOAD_WORD: {
+ Word w;
+ read_word( w );
+ debug( REALM_BYTECODE, "IN_LOAD_WORD\n" );
+ break;
+ }
+ case IN_RESTORE_LHS: {
+ Tree *restore;
+ read_tree( restore );
+ debug( REALM_BYTECODE, "IN_RESTORE_LHS\n" );
+ treeDownref( prg, sp, restore );
+ break;
+ }
+
+ case IN_PARSE_FRAG_BKT: {
+ Half stopId;
+ read_half( stopId );
+ debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" );
+ break;
+ }
+ case IN_PARSE_FRAG_BKT3: {
+ debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT3\n" );
+ break;
+ }
+ case IN_PARSE_FINISH_BKT: {
+ Half stopId;
+ read_half( stopId );
+ debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT\n" );
+ break;
+ }
+ case IN_PARSE_FINISH_BKT3: {
+ debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT3\n" );
+ break;
+ }
+ case IN_PCR_CALL: {
+ debug( REALM_BYTECODE, "IN_PCR_CALL\n" );
+ break;
+ }
+ case IN_PCR_RET: {
+ debug( REALM_BYTECODE, "IN_PCR_RET\n" );
+ return;
+ }
+ case IN_PCR_END_DECK: {
+ debug( REALM_BYTECODE, "IN_PCR_END_DECK\n" );
+ return;
+ }
+ case IN_INPUT_APPEND_BKT: {
+ Tree *parser;
+ Tree *input;
+ Word len;
+ read_tree( parser );
+ read_tree( input );
+ read_word( len );
+
+ debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" );
+
+ treeDownref( prg, sp, parser );
+ treeDownref( prg, sp, input );
+ break;
+ }
+ case IN_INPUT_PULL_BKT: {
+ Word f;
+ Tree *string;
+ read_tree( string );
+ read_word( f );
+
+ debug( REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" );
+
+ treeDownref( prg, sp, string );
+ break;
+ }
+ case IN_INPUT_PUSH_BKT: {
+ Word len;
+ read_word( len );
+
+ debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" );
+ break;
+ }
+ case IN_LOAD_GLOBAL_BKT: {
+ debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" );
+ break;
+ }
+ case IN_LOAD_CONTEXT_BKT: {
+ debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" );
+ break;
+ }
+ case IN_LOAD_ACCUM_BKT: {
+ /* Tree *parser; */
+ consume_word();
+ debug( REALM_BYTECODE, "IN_LOAD_ACCUM_BKT\n" );
+ break;
+ }
+ case IN_LOAD_INPUT_BKT: {
+ /* Tree *input; */
+ consume_word();
+ debug( REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" );
+ break;
+ }
+ case IN_GET_FIELD_BKT: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_FIELD_BKT %hd\n", field );
+ break;
+ }
+ case IN_SET_FIELD_BKT: {
+ short field;
+ Tree *val;
+ read_half( field );
+ read_tree( val );
+
+ debug( REALM_BYTECODE, "IN_SET_FIELD_BKT %hd\n", field );
+
+ treeDownref( prg, sp, val );
+ break;
+ }
+ case IN_PTR_DEREF_BKT: {
+ Tree *ptr;
+ read_tree( ptr );
+
+ debug( REALM_BYTECODE, "IN_PTR_DEREF_BKT\n" );
+
+ treeDownref( prg, sp, ptr );
+ break;
+ }
+ case IN_SET_TOKEN_DATA_BKT: {
+ Word oldval;
+ read_word( oldval );
+
+ debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT\n" );
+
+ Head *head = (Head*)oldval;
+ stringFree( prg, head );
+ break;
+ }
+ case IN_LIST_APPEND_BKT: {
+ debug( REALM_BYTECODE, "IN_LIST_APPEND_BKT\n" );
+ break;
+ }
+ case IN_LIST_REMOVE_END_BKT: {
+ Tree *val;
+ read_tree( val );
+
+ debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_BKT\n" );
+
+ treeDownref( prg, sp, val );
+ break;
+ }
+ case IN_GET_LIST_MEM_BKT: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_LIST_MEM_BKT %hd\n", field );
+ break;
+ }
+ case IN_SET_LIST_MEM_BKT: {
+ Half field;
+ Tree *val;
+ read_half( field );
+ read_tree( val );
+
+ debug( REALM_BYTECODE, "IN_SET_LIST_MEM_BKT %hd\n", field );
+
+ treeDownref( prg, sp, val );
+ break;
+ }
+ case IN_MAP_INSERT_BKT: {
+ /* uchar inserted; */
+ Tree *key;
+ consume_byte();
+ read_tree( key );
+
+ debug( REALM_BYTECODE, "IN_MAP_INSERT_BKT\n" );
+
+ treeDownref( prg, sp, key );
+ break;
+ }
+ case IN_MAP_STORE_BKT: {
+ Tree *key, *val;
+ read_tree( key );
+ read_tree( val );
+
+ debug( REALM_BYTECODE,"IN_MAP_STORE_BKT\n" );
+
+ treeDownref( prg, sp, key );
+ treeDownref( prg, sp, val );
+ break;
+ }
+ case IN_MAP_REMOVE_BKT: {
+ Tree *key, *val;
+ read_tree( key );
+ read_tree( val );
+
+ debug( REALM_BYTECODE, "IN_MAP_REMOVE_BKT\n" );
+
+ treeDownref( prg, sp, key );
+ treeDownref( prg, sp, val );
+ break;
+ }
+ case IN_STOP: {
+ return;
+ }
+ default: {
+ fatal( "UNKNOWN INSTRUCTION 0x%2x: -- reverse code downref\n", *(instr-1));
+ assert(false);
+ break;
+ }
+ }
+ goto again;
+}
+
+void mainExecution( Program *prg, Execution *exec, Code *code )
+{
+ Tree **sp = prg->vm_root;
+
+ /* Set up the stack as if we have called. We allow a return value. */
+ vm_push( 0 );
+ vm_push( 0 );
+ vm_push( 0 );
+ vm_push( 0 );
+
+ /* Execution loop. */
+ executeCode( prg, exec, sp, code );
+
+ vm_pop_ignore();
+ vm_pop_ignore();
+ prg->returnVal = vm_pop();
+}
+
+int makeReverseCode( PdaRun *pdaRun )
+{
+ RtCodeVect *reverseCode = &pdaRun->reverseCode;
+ RtCodeVect *rcodeCollect = &pdaRun->rcodeCollect;
+
+ /* Do we need to revert the left hand side? */
+
+ /* Check if there was anything generated. */
+ if ( rcodeCollect->tabLen == 0 )
+ return false;
+
+ if ( pdaRun->rcBlockCount == 0 ) {
+ /* One reverse code run for the DECK terminator. */
+ append( reverseCode, IN_PCR_END_DECK );
+ append( reverseCode, IN_PCR_RET );
+ appendWord( reverseCode, 2 );
+ pdaRun->rcBlockCount += 1;
+ incrementSteps( pdaRun );
+ }
+
+ long startLength = reverseCode->tabLen;
+
+ /* Go backwards, group by group, through the reverse code. Push each group
+ * to the global reverse code stack. */
+ Code *p = rcodeCollect->data + rcodeCollect->tabLen;
+ while ( p != rcodeCollect->data ) {
+ p--;
+ long len = *p;
+ p = p - len;
+ append2( reverseCode, p, len );
+ }
+
+ /* Stop, then place a total length in the global stack. */
+ append( reverseCode, IN_PCR_RET );
+ long length = reverseCode->tabLen - startLength;
+ appendWord( reverseCode, length );
+
+ /* Clear the revere code buffer. */
+ rcodeCollect->tabLen = 0;
+
+ pdaRun->rcBlockCount += 1;
+ incrementSteps( pdaRun );
+
+ return true;
+}
+
+void transferReverseCode( PdaRun *pdaRun, ParseTree *parseTree )
+{
+ if ( pdaRun->rcBlockCount > 0 ) {
+ debug( REALM_PARSE, "attaching reverse code to token\n" );
+ parseTree->flags |= PF_HAS_RCODE;
+ pdaRun->rcBlockCount = 0;
+ }
+}
+
+Code *popReverseCode( RtCodeVect *allRev )
+{
+ /* Read the length */
+ Code *prcode = allRev->data + allRev->tabLen - SIZEOF_WORD;
+ Word len;
+ read_word_p( len, prcode );
+
+ /* Find the start of block. */
+ long start = allRev->tabLen - len - SIZEOF_WORD;
+ prcode = allRev->data + start;
+
+ /* Backup over it. */
+ allRev->tabLen -= len + SIZEOF_WORD;
+ return prcode;
+}
+
+Tree **executeCode( Program *prg, Execution *exec, Tree **sp, Code *instr )
+{
+ /* When we exit we are going to verify that we did not eat up any stack
+ * space. */
+ Tree **root = sp;
+ Code c;
+
+again:
+ c = *instr++;
+ //debug( REALM_BYTECODE, "--in 0x%x\n", c );
+
+ switch ( c ) {
+ case IN_RESTORE_LHS: {
+ Tree *restore;
+ read_tree( restore );
+
+ debug( REALM_BYTECODE, "IN_RESTORE_LHS\n" );
+ treeDownref( prg, sp, exec->pdaRun->parseInput->shadow->tree );
+ exec->pdaRun->parseInput->shadow->tree = restore;
+ break;
+ }
+ case IN_LOAD_NIL: {
+ debug( REALM_BYTECODE, "IN_LOAD_NIL\n" );
+ vm_push( 0 );
+ break;
+ }
+ case IN_LOAD_TREE: {
+ debug( REALM_BYTECODE, "IN_LOAD_TREE\n" );
+ Tree *tree;
+ read_tree( tree );
+ vm_push( tree );
+ break;
+ }
+ case IN_LOAD_WORD: {
+ debug( REALM_BYTECODE, "IN_LOAD_WORD\n" );
+ Word w;
+ read_word( w );
+ vm_push( (SW)w );
+ break;
+ }
+ case IN_LOAD_TRUE: {
+ debug( REALM_BYTECODE, "IN_LOAD_TRUE\n" );
+ treeUpref( prg->trueVal );
+ vm_push( prg->trueVal );
+ break;
+ }
+ case IN_LOAD_FALSE: {
+ debug( REALM_BYTECODE, "IN_LOAD_FALSE\n" );
+ treeUpref( prg->falseVal );
+ vm_push( prg->falseVal );
+ break;
+ }
+ case IN_LOAD_INT: {
+ Word i;
+ read_word( i );
+
+ debug( REALM_BYTECODE, "IN_LOAD_INT %d\n", i );
+
+ Tree *tree = constructInteger( prg, i );
+ treeUpref( tree );
+ vm_push( tree );
+ break;
+ }
+ case IN_LOAD_STR: {
+ Word offset;
+ read_word( offset );
+
+ debug( REALM_BYTECODE, "IN_LOAD_STR %d\n", offset );
+
+ Head *lit = makeLiteral( prg, offset );
+ Tree *tree = constructString( prg, lit );
+ treeUpref( tree );
+ vm_push( tree );
+ break;
+ }
+ case IN_PRINT: {
+ int n;
+ read_byte( n );
+ debug( REALM_BYTECODE, "IN_PRINT %d\n", n );
+
+ while ( n-- > 0 ) {
+ Tree *tree = vm_pop();
+ printTreeFile( prg, sp, stdout, tree, true );
+ treeDownref( prg, sp, tree );
+ }
+ break;
+ }
+ case IN_PRINT_XML_AC: {
+ int n;
+ read_byte( n );
+
+ debug( REALM_BYTECODE, "IN_PRINT_XML_AC %d\n", n );
+
+ while ( n-- > 0 ) {
+ Tree *tree = vm_pop();
+ printXmlStdout( prg, sp, tree, true, true );
+ treeDownref( prg, sp, tree );
+ }
+ break;
+ }
+ case IN_PRINT_XML: {
+ int n;
+ read_byte( n );
+ debug( REALM_BYTECODE, "IN_PRINT_XML %d", n );
+
+ while ( n-- > 0 ) {
+ Tree *tree = vm_pop();
+ printXmlStdout( prg, sp, tree, false, true );
+ treeDownref( prg, sp, tree );
+ }
+ break;
+ }
+ case IN_PRINT_STREAM: {
+ int n;
+ read_byte( n );
+ debug( REALM_BYTECODE, "IN_PRINT_STREAM\n" );
+
+ Stream *stream = (Stream*)vm_pop();
+ while ( n-- > 0 ) {
+ Tree *tree = vm_pop();
+ printTreeFile( prg, sp, stream->file, tree, true );
+ treeDownref( prg, sp, tree );
+ }
+ treeDownref( prg, sp, (Tree*)stream );
+ break;
+ }
+ case IN_LOAD_CONTEXT_R: {
+ debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_R\n" );
+
+ treeUpref( exec->pdaRun->context );
+ vm_push( exec->pdaRun->context );
+ break;
+ }
+ case IN_LOAD_CONTEXT_WV: {
+ debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_WV\n" );
+
+ treeUpref( exec->pdaRun->context );
+ vm_push( exec->pdaRun->context );
+
+ /* Set up the reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_LOAD_CONTEXT_BKT );
+ exec->rcodeUnitLen = SIZEOF_CODE;
+ break;
+ }
+ case IN_LOAD_CONTEXT_WC: {
+ debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_WC\n" );
+
+ /* This is identical to the _R version, but using it for writing
+ * would be confusing. */
+ treeUpref( exec->pdaRun->context );
+ vm_push( exec->pdaRun->context );
+ break;
+ }
+ case IN_LOAD_CONTEXT_BKT: {
+ debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" );
+
+ treeUpref( exec->pdaRun->context );
+ vm_push( exec->pdaRun->context );
+ break;
+ }
+ case IN_LOAD_GLOBAL_R: {
+ debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_R\n" );
+
+ treeUpref( prg->global );
+ vm_push( prg->global );
+ break;
+ }
+ case IN_LOAD_GLOBAL_WV: {
+ debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_WV\n" );
+
+ treeUpref( prg->global );
+ vm_push( prg->global );
+
+ /* Set up the reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_LOAD_GLOBAL_BKT );
+ exec->rcodeUnitLen = SIZEOF_CODE;
+ break;
+ }
+ case IN_LOAD_GLOBAL_WC: {
+ debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_WC\n" );
+
+ /* This is identical to the _R version, but using it for writing
+ * would be confusing. */
+ treeUpref( prg->global );
+ vm_push( prg->global );
+ break;
+ }
+ case IN_LOAD_GLOBAL_BKT: {
+ debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" );
+
+ treeUpref( prg->global );
+ vm_push( prg->global );
+ break;
+ }
+ case IN_LOAD_ACCUM_R: {
+ debug( REALM_BYTECODE, "IN_LOAD_ACCUM_R\n" );
+
+ treeUpref( (Tree*)exec->parser );
+ vm_push( (Tree*)exec->parser );
+ assert( exec->parser != 0 );
+ break;
+ }
+ case IN_LOAD_ACCUM_WV: {
+ debug( REALM_BYTECODE, "IN_LOAD_ACCUM_WV\n" );
+
+ treeUpref( (Tree*)exec->parser );
+ vm_push( (Tree*)exec->parser );
+ assert( exec->parser != 0 );
+
+ /* Set up the reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_LOAD_ACCUM_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser );
+ exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD;
+ break;
+ }
+ case IN_LOAD_ACCUM_WC: {
+ debug( REALM_BYTECODE, "IN_LOAD_ACCUM_WC\n" );
+
+ /* This is identical to the _R version, but using it for writing
+ * would be confusing. */
+ treeUpref( (Tree*)exec->parser );
+ vm_push( (Tree*)exec->parser );
+ assert( exec->parser != 0 );
+ break;
+ }
+ case IN_LOAD_ACCUM_BKT: {
+ Tree *parser;
+ read_tree( parser );
+
+ debug( REALM_BYTECODE, "IN_LOAD_ACCUM_BKT\n" );
+
+ treeUpref( parser );
+ vm_push( parser );
+ break;
+ }
+ case IN_LOAD_INPUT_R: {
+ debug( REALM_BYTECODE, "IN_LOAD_INPUT_R\n" );
+
+ assert( exec->parser != 0 );
+ treeUpref( (Tree*)exec->parser->input );
+ vm_push( (Tree*)exec->parser->input );
+ break;
+ }
+ case IN_LOAD_INPUT_WV: {
+ debug( REALM_BYTECODE, "IN_LOAD_INPUT_WV\n" );
+
+ assert( exec->parser != 0 );
+ treeUpref( (Tree*)exec->parser->input );
+ vm_push( (Tree*)exec->parser->input );
+
+ /* Set up the reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_LOAD_INPUT_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser->input );
+ exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD;
+ break;
+ }
+ case IN_LOAD_INPUT_WC: {
+ debug( REALM_BYTECODE, "IN_LOAD_INPUT_WC\n" );
+
+ /* This is identical to the _R version, but using it for writing
+ * would be confusing. */
+ assert( exec->parser != 0 );
+ treeUpref( (Tree*)exec->parser->input );
+ vm_push( (Tree*)exec->parser->input );
+ break;
+ }
+ case IN_LOAD_INPUT_BKT: {
+ Tree *accumStream;
+ read_tree( accumStream );
+
+ debug( REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" );
+
+ treeUpref( accumStream );
+ vm_push( accumStream );
+ break;
+ }
+ case IN_LOAD_CTX_R: {
+ debug( REALM_BYTECODE, "IN_LOAD_CTX_R\n" );
+
+ treeUpref( exec->pdaRun->context );
+ vm_push( exec->pdaRun->context );
+ break;
+ }
+ case IN_LOAD_CTX_WV: {
+ debug( REALM_BYTECODE, "IN_LOAD_CTX_WV\n" );
+
+ treeUpref( exec->pdaRun->context );
+ vm_push( exec->pdaRun->context );
+
+ /* Set up the reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_LOAD_ACCUM_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser );
+ exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD;
+ break;
+ }
+ case IN_LOAD_CTX_WC: {
+ debug( REALM_BYTECODE, "IN_LOAD_CTX_WC\n" );
+
+ /* This is identical to the _R version, but using it for writing
+ * would be confusing. */
+ treeUpref( exec->pdaRun->context );
+ vm_push( exec->pdaRun->context );
+ break;
+ }
+ case IN_LOAD_CTX_BKT: {
+ debug( REALM_BYTECODE, "IN_LOAD_CTX_BKT\n" );
+
+ treeUpref( exec->pdaRun->context );
+ vm_push( exec->pdaRun->context );
+ break;
+ }
+ case IN_INIT_CAPTURES: {
+ /* uchar ncaps; */
+ consume_byte();
+
+ debug( REALM_BYTECODE, "IN_INIT_CAPTURES\n" );
+
+ /* If there are captures (this is a translate block) then copy them into
+ * the local frame now. */
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ char **mark = exec->fsmRun->mark;
+
+ int i;
+ for ( i = 0; i < lelInfo[exec->pdaRun->tokenId].numCaptureAttr; i++ ) {
+ CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[exec->pdaRun->tokenId].captureAttr + i];
+ Head *data = stringAllocFull( prg,
+ mark[ca->mark_enter], mark[ca->mark_leave] - mark[ca->mark_enter] );
+ Tree *string = constructString( prg, data );
+ treeUpref( string );
+ setLocal( exec->framePtr, -1 - i, string );
+ }
+ break;
+ }
+ case IN_INIT_RHS_EL: {
+ Half position;
+ short field;
+ read_half( position );
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_INIT_RHS_EL %hd\n", field );
+
+ Tree *val = getRhsEl( prg, exec->pdaRun->redLel->shadow->tree, position );
+ treeUpref( val );
+ vm_local(field) = val;
+ break;
+ }
+
+ case IN_INIT_LHS_EL: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_INIT_LHS_EL %hd\n", field );
+
+ /* We transfer it to to the local field. Possibly take a copy. */
+ Tree *val = exec->pdaRun->redLel->shadow->tree;
+
+ /* Save it. */
+ treeUpref( val );
+ exec->pdaRun->parsed = val;
+
+ exec->pdaRun->redLel->shadow->tree = 0;
+ vm_local(field) = val;
+ break;
+ }
+ case IN_STORE_LHS_EL: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_STORE_LHS_EL %hd\n", field );
+
+ Tree *val = vm_local(field);
+ vm_local(field) = 0;
+ exec->pdaRun->redLel->shadow->tree = val;
+ break;
+ }
+ case IN_UITER_ADVANCE: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_UITER_ADVANCE\n" );
+
+ /* Get the iterator. */
+ UserIter *uiter = (UserIter*) vm_local(field);
+
+ long stackSize = uiter->stackRoot - vm_ptop();
+ assert( uiter->stackSize == stackSize );
+
+ /* Fix the return instruction pointer. */
+ uiter->stackRoot[-IFR_AA + IFR_RIN] = (SW)instr;
+
+ instr = uiter->resume;
+ exec->framePtr = uiter->frame;
+ exec->iframePtr = &uiter->stackRoot[-IFR_AA];
+ break;
+ }
+ case IN_UITER_GET_CUR_R: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_UITER_GET_CUR_R\n" );
+
+ UserIter *uiter = (UserIter*) vm_local(field);
+ Tree *val = uiter->ref.kid->tree;
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_UITER_GET_CUR_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_UITER_GET_CUR_WC\n" );
+
+ UserIter *uiter = (UserIter*) vm_local(field);
+ splitRef( prg, &sp, &uiter->ref );
+ Tree *split = uiter->ref.kid->tree;
+ treeUpref( split );
+ vm_push( split );
+ break;
+ }
+ case IN_UITER_SET_CUR_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_UITER_SET_CUR_WC\n" );
+
+ Tree *t = vm_pop();
+ UserIter *uiter = (UserIter*) vm_local(field);
+ splitRef( prg, &sp, &uiter->ref );
+ Tree *old = uiter->ref.kid->tree;
+ setUiterCur( prg, uiter, t );
+ treeDownref( prg, sp, old );
+ break;
+ }
+ case IN_GET_LOCAL_R: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_LOCAL_R\n" );
+
+ Tree *val = vm_local(field);
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_GET_LOCAL_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_LOCAL_WC\n" );
+
+ Tree *split = getLocalSplit( prg, exec->framePtr, field );
+ treeUpref( split );
+ vm_push( split );
+ break;
+ }
+ case IN_SET_LOCAL_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_SET_LOCAL_WC %d\n", field );
+
+ Tree *val = vm_pop();
+ treeDownref( prg, sp, vm_local(field) );
+ setLocal( exec->framePtr, field, val );
+ break;
+ }
+ case IN_SAVE_RET: {
+ debug( REALM_BYTECODE, "IN_SAVE_RET\n" );
+
+ Tree *val = vm_pop();
+ vm_local(FR_RV) = val;
+ break;
+ }
+ case IN_GET_LOCAL_REF_R: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_LOCAL_REF_R\n" );
+
+ Ref *ref = (Ref*) vm_plocal(field);
+ Tree *val = ref->kid->tree;
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_GET_LOCAL_REF_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_LOCAL_REF_WC\n" );
+
+ Ref *ref = (Ref*) vm_plocal(field);
+ splitRef( prg, &sp, ref );
+ Tree *val = ref->kid->tree;
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_SET_LOCAL_REF_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_SET_LOCAL_REF_WC\n" );
+
+ Tree *val = vm_pop();
+ Ref *ref = (Ref*) vm_plocal(field);
+ splitRef( prg, &sp, ref );
+ refSetValue( ref, val );
+ break;
+ }
+ case IN_GET_FIELD_R: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_FIELD_R %d\n", field );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *val = getField( obj, field );
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_GET_FIELD_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_FIELD_WC %d\n", field );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *split = getFieldSplit( prg, obj, field );
+ treeUpref( split );
+ vm_push( split );
+ break;
+ }
+ case IN_GET_FIELD_WV: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_FIELD_WV\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *split = getFieldSplit( prg, obj, field );
+ treeUpref( split );
+ vm_push( split );
+
+ /* Set up the reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_GET_FIELD_BKT );
+ appendHalf( &exec->pdaRun->rcodeCollect, field );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF;
+ break;
+ }
+ case IN_GET_FIELD_BKT: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_FIELD_BKT\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *split = getFieldSplit( prg, obj, field );
+ treeUpref( split );
+ vm_push( split );
+ break;
+ }
+ case IN_SET_FIELD_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_SET_FIELD_WC %d\n", field );
+
+ Tree *obj = vm_pop();
+ Tree *val = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ /* Downref the old value. */
+ Tree *prev = getField( obj, field );
+ treeDownref( prg, sp, prev );
+
+ setField( prg, obj, field, val );
+ break;
+ }
+ case IN_SET_FIELD_WV: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_SET_FIELD_WV %d\n", field );
+
+ Tree *obj = vm_pop();
+ Tree *val = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ /* Save the old value, then set the field. */
+ Tree *prev = getField( obj, field );
+ setField( prg, obj, field, val );
+
+ /* Set up the reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_SET_FIELD_BKT );
+ appendHalf( &exec->pdaRun->rcodeCollect, field );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)prev );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF + SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+ /* FLUSH */
+ break;
+ }
+ case IN_SET_FIELD_BKT: {
+ short field;
+ Tree *val;
+ read_half( field );
+ read_tree( val );
+
+ debug( REALM_BYTECODE, "IN_SET_FIELD_BKT\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ /* Downref the old value. */
+ Tree *prev = getField( obj, field );
+ treeDownref( prg, sp, prev );
+
+ setField( prg, obj, field, val );
+ break;
+ }
+ case IN_SET_FIELD_LEAVE_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_SET_FIELD_LEAVE_WC\n" );
+
+ /* Note that we don't downref the object here because we are
+ * leaving it on the stack. */
+ Tree *obj = vm_pop();
+ Tree *val = vm_pop();
+
+ /* Downref the old value. */
+ Tree *prev = getField( obj, field );
+ treeDownref( prg, sp, prev );
+
+ /* Set the field. */
+ setField( prg, obj, field, val );
+
+ /* Leave the object on the top of the stack. */
+ vm_push( obj );
+ break;
+ }
+ case IN_GET_RHS_VAL_R: {
+ debug( REALM_BYTECODE, "IN_GET_RHS_VAL_R\n" );
+ int i, done = 0;
+ uchar len;
+
+ Tree *obj = vm_pop(), *val = 0;
+ treeDownref( prg, sp, obj );
+
+ read_byte( len );
+ for ( i = 0; i < len; i++ ) {
+ uchar prodNum, childNum;
+ read_byte( prodNum );
+ read_byte( childNum );
+ if ( !done && obj->prodNum == prodNum ) {
+ val = getRhsEl( prg, obj, childNum );
+ done = 1;
+ }
+ }
+
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_POP: {
+ debug( REALM_BYTECODE, "IN_POP\n" );
+
+ Tree *val = vm_pop();
+ treeDownref( prg, sp, val );
+ break;
+ }
+ case IN_POP_N_WORDS: {
+ short n;
+ read_half( n );
+
+ debug( REALM_BYTECODE, "IN_POP_N_WORDS\n" );
+
+ vm_popn( n );
+ break;
+ }
+ case IN_SPRINTF: {
+ debug( REALM_BYTECODE, "IN_SPRINTF\n" );
+
+ Tree *f = vm_pop();
+ f++;
+ Tree *integer = vm_pop();
+ Tree *format = vm_pop();
+ Head *res = stringSprintf( prg, (Str*)format, (Int*)integer );
+ Tree *str = constructString( prg, res );
+ treeUpref( str );
+ vm_push( str );
+ treeDownref( prg, sp, integer );
+ treeDownref( prg, sp, format );
+ break;
+ }
+ case IN_STR_ATOI: {
+ debug( REALM_BYTECODE, "IN_STR_ATOI\n" );
+
+ Str *str = (Str*)vm_pop();
+ Word res = strAtoi( str->value );
+ Tree *integer = constructInteger( prg, res );
+ treeUpref( integer );
+ vm_push( integer );
+ treeDownref( prg, sp, (Tree*)str );
+ break;
+ }
+ case IN_INT_TO_STR: {
+ debug( REALM_BYTECODE, "IN_INT_TO_STR\n" );
+
+ Int *i = (Int*)vm_pop();
+ Head *res = intToStr( prg, i->value );
+ Tree *str = constructString( prg, res );
+ treeUpref( str );
+ vm_push( str );
+ treeDownref( prg, sp, (Tree*) i );
+ break;
+ }
+ case IN_TREE_TO_STR: {
+ debug( REALM_BYTECODE, "IN_TREE_TO_STR\n" );
+
+ Tree *tree = vm_pop();
+ Head *res = treeToStr( prg, sp, tree, true );
+ Tree *str = constructString( prg, res );
+ treeUpref( str );
+ vm_push( str );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_TREE_TO_STR_NOTRIM: {
+ debug( REALM_BYTECODE, "IN_TREE_TO_STR_NOTRIM\n" );
+
+ Tree *tree = vm_pop();
+ Head *res = treeToStr( prg, sp, tree, false );
+ Tree *str = constructString( prg, res );
+ treeUpref( str );
+ vm_push( str );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_TREE_TRIM: {
+ debug( REALM_BYTECODE, "IN_TREE_TRIM\n" );
+
+ Tree *tree = vm_pop();
+ Tree *trimmed = treeTrim( prg, sp, tree );
+ vm_push( trimmed );
+ break;
+ }
+ case IN_CONCAT_STR: {
+ debug( REALM_BYTECODE, "IN_CONCAT_STR\n" );
+
+ Str *s2 = (Str*)vm_pop();
+ Str *s1 = (Str*)vm_pop();
+ Head *res = concatStr( s1->value, s2->value );
+ Tree *str = constructString( prg, res );
+ treeUpref( str );
+ treeDownref( prg, sp, (Tree*)s1 );
+ treeDownref( prg, sp, (Tree*)s2 );
+ vm_push( str );
+ break;
+ }
+ case IN_STR_UORD8: {
+ debug( REALM_BYTECODE, "IN_STR_UORD8\n" );
+
+ Str *str = (Str*)vm_pop();
+ Word res = strUord8( str->value );
+ Tree *tree = constructInteger( prg, res );
+ treeUpref( tree );
+ vm_push( tree );
+ treeDownref( prg, sp, (Tree*)str );
+ break;
+ }
+ case IN_STR_UORD16: {
+ debug( REALM_BYTECODE, "IN_STR_UORD16\n" );
+
+ Str *str = (Str*)vm_pop();
+ Word res = strUord16( str->value );
+ Tree *tree = constructInteger( prg, res );
+ treeUpref( tree );
+ vm_push( tree );
+ treeDownref( prg, sp, (Tree*)str );
+ break;
+ }
+
+ case IN_STR_LENGTH: {
+ debug( REALM_BYTECODE, "IN_STR_LENGTH\n" );
+
+ Str *str = (Str*)vm_pop();
+ long len = stringLength( str->value );
+ Tree *res = constructInteger( prg, len );
+ treeUpref( res );
+ vm_push( res );
+ treeDownref( prg, sp, (Tree*)str );
+ break;
+ }
+ case IN_JMP_FALSE: {
+ short dist;
+ read_half( dist );
+
+ debug( REALM_BYTECODE, "IN_JMP_FALSE %d\n", dist );
+
+ Tree *tree = vm_pop();
+ if ( testFalse( prg, tree ) )
+ instr += dist;
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_JMP_TRUE: {
+ short dist;
+ read_half( dist );
+
+ debug( REALM_BYTECODE, "IN_JMP_TRUE %d\n", dist );
+
+ Tree *tree = vm_pop();
+ if ( !testFalse( prg, tree ) )
+ instr += dist;
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_JMP: {
+ short dist;
+ read_half( dist );
+
+ debug( REALM_BYTECODE, "IN_JMP\n" );
+
+ instr += dist;
+ break;
+ }
+ case IN_REJECT: {
+ debug( REALM_BYTECODE, "IN_REJECT\n" );
+ exec->pdaRun->reject = true;
+ break;
+ }
+
+ /*
+ * Binary comparison operators.
+ */
+ case IN_TST_EQL: {
+ debug( REALM_BYTECODE, "IN_TST_EQL\n" );
+
+ Tree *o2 = vm_pop();
+ Tree *o1 = vm_pop();
+ long r = cmpTree( prg, o1, o2 );
+ Tree *val = r ? prg->falseVal : prg->trueVal;
+ treeUpref( val );
+ vm_push( val );
+ treeDownref( prg, sp, o1 );
+ treeDownref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_NOT_EQL: {
+ debug( REALM_BYTECODE, "IN_TST_NOT_EQL\n" );
+
+ Tree *o2 = vm_pop();
+ Tree *o1 = vm_pop();
+ long r = cmpTree( prg, o1, o2 );
+ Tree *val = r ? prg->trueVal : prg->falseVal;
+ treeUpref( val );
+ vm_push( val );
+ treeDownref( prg, sp, o1 );
+ treeDownref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_LESS: {
+ debug( REALM_BYTECODE, "IN_TST_LESS\n" );
+
+ Tree *o2 = vm_pop();
+ Tree *o1 = vm_pop();
+ long r = cmpTree( prg, o1, o2 );
+ Tree *val = r < 0 ? prg->trueVal : prg->falseVal;
+ treeUpref( val );
+ vm_push( val );
+ treeDownref( prg, sp, o1 );
+ treeDownref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_LESS_EQL: {
+ debug( REALM_BYTECODE, "IN_TST_LESS_EQL\n" );
+
+ Tree *o2 = vm_pop();
+ Tree *o1 = vm_pop();
+ long r = cmpTree( prg, o1, o2 );
+ Tree *val = r <= 0 ? prg->trueVal : prg->falseVal;
+ treeUpref( val );
+ vm_push( val );
+ treeDownref( prg, sp, o1 );
+ treeDownref( prg, sp, o2 );
+ }
+ case IN_TST_GRTR: {
+ debug( REALM_BYTECODE, "IN_TST_GRTR\n" );
+
+ Tree *o2 = vm_pop();
+ Tree *o1 = vm_pop();
+ long r = cmpTree( prg, o1, o2 );
+ Tree *val = r > 0 ? prg->trueVal : prg->falseVal;
+ treeUpref( val );
+ vm_push( val );
+ treeDownref( prg, sp, o1 );
+ treeDownref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_GRTR_EQL: {
+ debug( REALM_BYTECODE, "IN_TST_GRTR_EQL\n" );
+
+ Tree *o2 = (Tree*)vm_pop();
+ Tree *o1 = (Tree*)vm_pop();
+ long r = cmpTree( prg, o1, o2 );
+ Tree *val = r >= 0 ? prg->trueVal : prg->falseVal;
+ treeUpref( val );
+ vm_push( val );
+ treeDownref( prg, sp, o1 );
+ treeDownref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_LOGICAL_AND: {
+ debug( REALM_BYTECODE, "IN_TST_LOGICAL_AND\n" );
+
+ Tree *o2 = vm_pop();
+ Tree *o1 = vm_pop();
+ long v2 = !testFalse( prg, o2 );
+ long v1 = !testFalse( prg, o1 );
+ Word r = v1 && v2;
+ Tree *val = r ? prg->trueVal : prg->falseVal;
+ treeUpref( val );
+ vm_push( val );
+ treeDownref( prg, sp, o1 );
+ treeDownref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_LOGICAL_OR: {
+ debug( REALM_BYTECODE, "IN_TST_LOGICAL_OR\n" );
+
+ Tree *o2 = vm_pop();
+ Tree *o1 = vm_pop();
+ long v2 = !testFalse( prg, o2 );
+ long v1 = !testFalse( prg, o1 );
+ Word r = v1 || v2;
+ Tree *val = r ? prg->trueVal : prg->falseVal;
+ treeUpref( val );
+ vm_push( val );
+ treeDownref( prg, sp, o1 );
+ treeDownref( prg, sp, o2 );
+ break;
+ }
+ case IN_NOT: {
+ debug( REALM_BYTECODE, "IN_NOT\n" );
+
+ Tree *tree = (Tree*)vm_pop();
+ long r = testFalse( prg, tree );
+ Tree *val = r ? prg->trueVal : prg->falseVal;
+ treeUpref( val );
+ vm_push( val );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+
+ case IN_ADD_INT: {
+ debug( REALM_BYTECODE, "IN_ADD_INT\n" );
+
+ Int *o2 = (Int*)vm_pop();
+ Int *o1 = (Int*)vm_pop();
+ long r = o1->value + o2->value;
+ Tree *tree = constructInteger( prg, r );
+ treeUpref( tree );
+ vm_push( tree );
+ treeDownref( prg, sp, (Tree*)o1 );
+ treeDownref( prg, sp, (Tree*)o2 );
+ break;
+ }
+ case IN_MULT_INT: {
+ debug( REALM_BYTECODE, "IN_MULT_INT\n" );
+
+ Int *o2 = (Int*)vm_pop();
+ Int *o1 = (Int*)vm_pop();
+ long r = o1->value * o2->value;
+ Tree *tree = constructInteger( prg, r );
+ treeUpref( tree );
+ vm_push( tree );
+ treeDownref( prg, sp, (Tree*)o1 );
+ treeDownref( prg, sp, (Tree*)o2 );
+ break;
+ }
+ case IN_DIV_INT: {
+ debug( REALM_BYTECODE, "IN_DIV_INT\n" );
+
+ Int *o2 = (Int*)vm_pop();
+ Int *o1 = (Int*)vm_pop();
+ long r = o1->value / o2->value;
+ Tree *tree = constructInteger( prg, r );
+ treeUpref( tree );
+ vm_push( tree );
+ treeDownref( prg, sp, (Tree*)o1 );
+ treeDownref( prg, sp, (Tree*)o2 );
+ break;
+ }
+ case IN_SUB_INT: {
+ debug( REALM_BYTECODE, "IN_SUB_INT\n" );
+
+ Int *o2 = (Int*)vm_pop();
+ Int *o1 = (Int*)vm_pop();
+ long r = o1->value - o2->value;
+ Tree *tree = constructInteger( prg, r );
+ treeUpref( tree );
+ vm_push( tree );
+ treeDownref( prg, sp, (Tree*)o1 );
+ treeDownref( prg, sp, (Tree*)o2 );
+ break;
+ }
+ case IN_DUP_TOP_OFF: {
+ short off;
+ read_half( off );
+
+ debug( REALM_BYTECODE, "IN_DUP_TOP_OFF %hd\n", off );
+
+ Tree *val = vm_top_off(off);
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_DUP_TOP: {
+ debug( REALM_BYTECODE, "IN_DUP_TOP\n" );
+
+ Tree *val = vm_top();
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_TRITER_FROM_REF: {
+ short field;
+ Half searchTypeId;
+ read_half( field );
+ read_half( searchTypeId );
+
+ debug( REALM_BYTECODE, "IN_TRITER_FROM_REF\n" );
+
+ Ref rootRef;
+ rootRef.kid = (Kid*)vm_pop();
+ rootRef.next = (Ref*)vm_pop();
+ void *mem = vm_plocal(field);
+ initTreeIter( (TreeIter*)mem, &rootRef, searchTypeId, vm_ptop() );
+ break;
+ }
+ case IN_TRITER_DESTROY: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_TRITER_DESTROY\n" );
+
+ TreeIter *iter = (TreeIter*) vm_plocal(field);
+ treeIterDestroy( &sp, iter );
+ break;
+ }
+ case IN_REV_TRITER_FROM_REF: {
+ short field;
+ Half searchTypeId;
+ read_half( field );
+ read_half( searchTypeId );
+
+ debug( REALM_BYTECODE, "IN_REV_TRITER_FROM_REF\n" );
+
+ Ref rootRef;
+ rootRef.kid = (Kid*)vm_pop();
+ rootRef.next = (Ref*)vm_pop();
+
+ Tree **stackRoot = vm_ptop();
+
+ int children = 0;
+ Kid *kid = treeChild( prg, rootRef.kid->tree );
+ while ( kid != 0 ) {
+ children++;
+ vm_push( (SW) kid );
+ kid = kid->next;
+ }
+
+ void *mem = vm_plocal(field);
+ initRevTreeIter( (RevTreeIter*)mem, &rootRef, searchTypeId, stackRoot, children );
+ break;
+ }
+ case IN_REV_TRITER_DESTROY: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_REV_TRITER_DESTROY\n" );
+
+ RevTreeIter *iter = (RevTreeIter*) vm_plocal(field);
+ long curStackSize = iter->stackRoot - vm_ptop();
+ assert( iter->stackSize == curStackSize );
+ vm_popn( iter->stackSize );
+ break;
+ }
+ case IN_TREE_SEARCH: {
+ Word id;
+ read_word( id );
+
+ debug( REALM_BYTECODE, "IN_TREE_SEARCH\n" );
+
+ Tree *tree = vm_pop();
+ Tree *res = treeSearch2( prg, tree, id );
+ treeUpref( res );
+ vm_push( res );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_TRITER_ADVANCE: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_TRITER_ADVANCE\n" );
+
+ TreeIter *iter = (TreeIter*) vm_plocal(field);
+ Tree *res = treeIterAdvance( prg, &sp, iter );
+ treeUpref( res );
+ vm_push( res );
+ break;
+ }
+ case IN_TRITER_NEXT_CHILD: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_TRITER_NEXT_CHILD\n" );
+
+ TreeIter *iter = (TreeIter*) vm_plocal(field);
+ Tree *res = treeIterNextChild( prg, &sp, iter );
+ treeUpref( res );
+ vm_push( res );
+ break;
+ }
+ case IN_REV_TRITER_PREV_CHILD: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_REV_TRITER_PREV_CHILD\n" );
+
+ RevTreeIter *iter = (RevTreeIter*) vm_plocal(field);
+ Tree *res = treeRevIterPrevChild( prg, &sp, iter );
+ treeUpref( res );
+ vm_push( res );
+ break;
+ }
+ case IN_TRITER_NEXT_REPEAT: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_TRITER_NEXT_REPEAT\n" );
+
+ TreeIter *iter = (TreeIter*) vm_plocal(field);
+ Tree *res = treeIterNextRepeat( prg, &sp, iter );
+ treeUpref( res );
+ vm_push( res );
+ break;
+ }
+ case IN_TRITER_PREV_REPEAT: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_TRITER_PREV_REPEAT\n" );
+
+ TreeIter *iter = (TreeIter*) vm_plocal(field);
+ Tree *res = treeIterPrevRepeat( prg, &sp, iter );
+ treeUpref( res );
+ vm_push( res );
+ break;
+ }
+ case IN_TRITER_GET_CUR_R: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_TRITER_GET_CUR_R\n" );
+
+ TreeIter *iter = (TreeIter*) vm_plocal(field);
+ Tree *tree = treeIterDerefCur( iter );
+ treeUpref( tree );
+ vm_push( tree );
+ break;
+ }
+ case IN_TRITER_GET_CUR_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_TRITER_GET_CUR_WC\n" );
+
+ TreeIter *iter = (TreeIter*) vm_plocal(field);
+ splitIterCur( prg, &sp, iter );
+ Tree *tree = treeIterDerefCur( iter );
+ treeUpref( tree );
+ vm_push( tree );
+ break;
+ }
+ case IN_TRITER_SET_CUR_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_TRITER_SET_CUR_WC\n" );
+
+ Tree *tree = vm_pop();
+ TreeIter *iter = (TreeIter*) vm_plocal(field);
+ splitIterCur( prg, &sp, iter );
+ Tree *old = treeIterDerefCur( iter );
+ setTriterCur( prg, iter, tree );
+ treeDownref( prg, sp, old );
+ break;
+ }
+ case IN_MATCH: {
+ Half patternId;
+ read_half( patternId );
+
+ debug( REALM_BYTECODE, "IN_MATCH\n" );
+
+ Tree *tree = vm_pop();
+
+ /* Run the match, push the result. */
+ int rootNode = prg->rtd->patReplInfo[patternId].offset;
+
+ /* Bindings are indexed starting at 1. Zero bindId to represent no
+ * binding. We make a space for it here rather than do math at
+ * access them. */
+ long numBindings = prg->rtd->patReplInfo[patternId].numBindings;
+ Tree *bindings[1+numBindings];
+ memset( bindings, 0, sizeof(Tree*)*(1+numBindings) );
+
+ Kid kid;
+ kid.tree = tree;
+ kid.next = 0;
+ int matched = matchPattern( bindings, prg, rootNode, &kid, false );
+
+ if ( !matched )
+ memset( bindings, 0, sizeof(Tree*)*(1+numBindings) );
+ else {
+ int b;
+ for ( b = 1; b <= numBindings; b++ )
+ assert( bindings[b] != 0 );
+ }
+
+ Tree *result = matched ? tree : 0;
+ treeUpref( result );
+ vm_push( result ? tree : 0 );
+ int b;
+ for ( b = 1; b <= numBindings; b++ ) {
+ treeUpref( bindings[b] );
+ vm_push( bindings[b] );
+ }
+
+ treeDownref( prg, sp, tree );
+ break;
+ }
+
+ case IN_GET_ACCUM_CTX_R: {
+ debug( REALM_BYTECODE, "IN_GET_ACCUM_CTX_R\n" );
+
+ Tree *obj = vm_pop();
+ Tree *ctx = ((Parser*)obj)->pdaRun->context;
+ treeUpref( ctx );
+ vm_push( ctx );
+ treeDownref( prg, sp, obj );
+ break;
+ }
+
+ case IN_SET_ACCUM_CTX_WC: {
+ debug( REALM_BYTECODE, "IN_SET_ACCUM_CTX_WC\n" );
+
+ Tree *parser = vm_pop();
+ Tree *val = vm_pop();
+ parserSetContext( prg, sp, (Parser*)parser, val );
+ treeDownref( prg, sp, parser );
+ break;
+ }
+
+// case IN_GET_ACCUM_CTX_WC:
+// case IN_GET_ACCUM_CTX_WV:
+// case IN_SET_ACCUM_CTX_WC:
+// case IN_SET_ACCUM_CTX_WV:
+// break;
+
+ case IN_INPUT_APPEND_WC: {
+ debug( REALM_BYTECODE, "IN_INPUT_APPEND_WC \n" );
+
+ Input *accumStream = (Input*)vm_pop();
+ Tree *input = vm_pop();
+ streamAppend( prg, sp, input, accumStream->in );
+
+ vm_push( (Tree*)accumStream );
+ treeDownref( prg, sp, input );
+ break;
+ }
+ case IN_INPUT_APPEND_WV: {
+ debug( REALM_BYTECODE, "IN_INPUT_APPEND_WV \n" );
+
+ Input *accumStream = (Input*)vm_pop();
+ Tree *input = vm_pop();
+ Word len = streamAppend( prg, sp, input, accumStream->in );
+
+ treeUpref( (Tree*)accumStream );
+ vm_push( (Tree*)accumStream );
+
+ append( &exec->pdaRun->rcodeCollect, IN_INPUT_APPEND_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word) accumStream );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word) input );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word) len );
+ append( &exec->pdaRun->rcodeCollect, SIZEOF_CODE + 3 * SIZEOF_WORD );
+ break;
+ }
+
+ case IN_INPUT_APPEND_BKT: {
+ Tree *accumStream;
+ Tree *input;
+ Word len;
+ read_tree( accumStream );
+ read_tree( input );
+ read_word( len );
+
+ debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" );
+
+ undoStreamAppend( prg, sp, 0, ((Input*)accumStream)->in, input, len );
+ treeDownref( prg, sp, accumStream );
+ treeDownref( prg, sp, input );
+ break;
+ }
+
+ case IN_PARSE_LOAD_START: {
+ debug( REALM_BYTECODE, "IN_PARSE_LOAD_START\n" );
+ vm_push( (SW) PcrStart );
+ break;
+ }
+
+ case IN_PARSE_SAVE_STEPS: {
+ debug( REALM_BYTECODE, "IN_PARSE_SAVE_STEPS\n" );
+
+ Parser *parser = (Parser*)vm_pop();
+ long steps = parser->pdaRun->steps;
+
+ vm_push( (SW)steps );
+ vm_push( (SW)parser );
+ break;
+ }
+
+ case IN_PCR_CALL: {
+ debug( REALM_BYTECODE, "IN_PCR_CALL\n" );
+
+ long pcr = (long)vm_pop();
+ Parser *parser = (Parser*)vm_pop();
+ long steps = (long)vm_pop();
+
+ vm_push( (SW)steps );
+ vm_push( (SW)parser );
+ vm_push( (SW)pcr );
+
+ vm_push( (SW)exec->parser );
+ vm_push( (SW)exec->pdaRun );
+ vm_push( (SW)exec->fsmRun );
+ vm_push( (SW)exec->inputStream );
+ vm_push( (SW)exec->framePtr );
+ vm_push( (SW)exec->iframePtr );
+ vm_push( (SW)exec->frameId );
+ vm_push( (SW)exec->rcodeUnitLen );
+
+ Code *returnTo = instr - ( SIZEOF_CODE + SIZEOF_CODE + SIZEOF_HALF );
+ vm_push( (SW)returnTo );
+
+ initExecution( exec, parser, parser->pdaRun, parser->fsmRun, parser->input->in, parser->pdaRun->frameId );
+ instr = parser->pdaRun->code;
+ break;
+ }
+
+ case IN_PCR_RET: {
+ debug( REALM_BYTECODE, "IN_PCR_RET\n" );
+
+ FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId];
+ downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen );
+ vm_popn( fi->frameSize );
+
+ instr = (Code*) vm_pop();
+ exec->rcodeUnitLen = ( long ) vm_pop();
+ exec->frameId = ( long ) vm_pop();
+ exec->iframePtr = ( Tree ** ) vm_pop();
+ exec->framePtr = ( Tree ** ) vm_pop();
+ exec->inputStream = ( InputStream * ) vm_pop();
+ exec->fsmRun = ( FsmRun * ) vm_pop();
+ exec->pdaRun = ( PdaRun * ) vm_pop();
+ exec->parser = ( Parser * ) vm_pop();
+
+ if ( instr == 0 ) {
+ fflush( stdout );
+ goto out;
+ }
+ break;
+ }
+
+ case IN_PCR_END_DECK: {
+ debug( REALM_BYTECODE, "IN_PCR_END_DECK\n" );
+ exec->pdaRun->onDeck = false;
+ break;
+ }
+
+ case IN_PARSE_FRAG_WC: {
+ debug( REALM_BYTECODE, "IN_PARSE_FRAG_WC\n" );
+
+ Half stopId;
+ read_half( stopId );
+
+ long pcr = (long)vm_pop();
+ Parser *parser = (Parser*)vm_pop();
+ long steps = (long)vm_pop();
+
+ pcr = parseFrag( prg, sp, parser, stopId, pcr );
+
+ vm_push( (SW)steps );
+ vm_push( (SW)parser );
+ vm_push( (SW)pcr );
+
+ /* If done, jump to the terminating instruction, otherwise fall
+ * through to call some code, then jump back here. */
+ if ( pcr == PcrDone )
+ instr += SIZEOF_CODE;
+ break;
+ }
+
+ case IN_PARSE_FRAG_WC3: {
+ debug( REALM_BYTECODE, "IN_PARSE_FRAG_WC3\n" );
+
+ vm_pop_ignore();
+ Parser *parser = (Parser*)vm_pop();
+ vm_pop_ignore();
+
+ treeDownref( prg, sp, (Tree*)parser );
+
+ if ( prg->induceExit )
+ goto out;
+
+ break;
+ }
+
+ case IN_PARSE_FRAG_WV: {
+ Half stopId;
+ read_half( stopId );
+
+ debug( REALM_BYTECODE, "IN_PARSE_FRAG_WV\n" );
+
+ long pcr = (long)vm_pop();
+ Parser *parser = (Parser*)vm_pop();
+ long steps = (long)vm_pop();
+
+ pcr = parseFrag( prg, sp, parser, stopId, pcr );
+
+ vm_push( (SW)steps );
+ vm_push( (SW)parser );
+ vm_push( (SW)pcr );
+
+ /* If done, jump to the terminating instruction, otherwise fall
+ * through to call some code, then jump back here. */
+ if ( pcr == PcrDone )
+ instr += SIZEOF_CODE;
+ break;
+ }
+
+ case IN_PARSE_FRAG_WV3: {
+ debug( REALM_BYTECODE, "IN_PARSE_FRAG_WV3 \n" );
+
+ vm_pop_ignore();
+ Parser *parser = (Parser*)vm_pop();
+ long steps = (long)vm_pop();
+
+ append( &exec->pdaRun->rcodeCollect, IN_LOAD_WORD );
+ appendWord( &exec->pdaRun->rcodeCollect, steps );
+ append( &exec->pdaRun->rcodeCollect, IN_LOAD_TREE );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)parser );
+ append( &exec->pdaRun->rcodeCollect, IN_PARSE_LOAD_START );
+ append( &exec->pdaRun->rcodeCollect, IN_PARSE_FRAG_BKT );
+ appendHalf( &exec->pdaRun->rcodeCollect, 0 );
+ append( &exec->pdaRun->rcodeCollect, IN_PCR_CALL );
+ append( &exec->pdaRun->rcodeCollect, IN_PARSE_FRAG_BKT3 );
+ append( &exec->pdaRun->rcodeCollect, 6 * SIZEOF_CODE + 2 * SIZEOF_WORD + SIZEOF_HALF );
+
+ if ( prg->induceExit )
+ goto out;
+ break;
+ }
+
+ case IN_PARSE_FRAG_BKT: {
+ Half stopId;
+ read_half( stopId );
+
+ debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" );
+
+ long pcr = (long)vm_pop();
+ Parser *parser = (Parser*)vm_pop();
+ long steps = (long)vm_pop();
+
+ pcr = undoParseFrag( prg, sp, parser, steps, pcr );
+
+ vm_push( (SW)steps );
+ vm_push( (SW)parser );
+ vm_push( (SW)pcr );
+
+ if ( pcr == PcrDone )
+ instr += SIZEOF_CODE;
+ break;
+ }
+
+ case IN_PARSE_FRAG_BKT3: {
+ debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT3\n" );
+
+ vm_pop_ignore();
+ Parser *parser = (Parser*)vm_pop();
+ vm_pop_ignore();
+
+ treeDownref( prg, sp, (Tree*)parser );
+ break;
+ }
+
+ case IN_PARSE_FINISH_WC: {
+ Half stopId;
+ read_half( stopId );
+
+ debug( REALM_BYTECODE, "IN_PARSE_FINISH_WC\n" );
+
+ long pcr = (long)vm_pop();
+ Parser *parser = (Parser*)vm_pop();
+ long steps = (long)vm_pop();
+
+ parser->result = 0;
+ pcr = parseFinish( &parser->result, prg, sp, parser, false, pcr );
+
+ vm_push( (SW)steps );
+ vm_push( (SW)parser );
+ vm_push( (SW)pcr );
+
+ /* If done, jump to the terminating instruction, otherwise fall
+ * through to call some code, then jump back here. */
+ if ( pcr == PcrDone )
+ instr += SIZEOF_CODE;
+ break;
+ }
+
+ case IN_PARSE_FINISH_WC3: {
+ debug( REALM_BYTECODE, "IN_PARSE_FINISH_WC3\n" );
+
+ vm_pop_ignore();
+ Parser *parser = (Parser*)vm_pop();
+ vm_pop_ignore();
+
+ vm_push( parser->result );
+ debug( REALM_BYTECODE, "parser refs: %d\n", parser->refs );
+ treeDownref( prg, sp, (Tree*)parser );
+ if ( prg->induceExit )
+ goto out;
+
+ break;
+ }
+
+ case IN_PARSE_FINISH_WV: {
+ Half stopId;
+ read_half( stopId );
+
+ debug( REALM_BYTECODE, "IN_PARSE_FINISH_WV\n" );
+
+ long pcr = (long)vm_pop();
+ Parser *parser = (Parser*)vm_pop();
+ long steps = (long)vm_pop();
+
+ parser->result = 0;
+ pcr = parseFinish( &parser->result, prg, sp, parser, true, pcr );
+
+ vm_push( (SW)steps );
+ vm_push( (SW)parser );
+ vm_push( (SW)pcr );
+
+ if ( pcr == PcrDone )
+ instr += SIZEOF_CODE;
+ break;
+ }
+
+ case IN_PARSE_FINISH_WV3: {
+ debug( REALM_BYTECODE, "IN_PARSE_FINISH_WV3\n" );
+
+ vm_pop_ignore();
+ Parser *parser = (Parser*)vm_pop();
+ long steps = (long)vm_pop();
+
+ vm_push( parser->result );
+
+ append( &exec->pdaRun->rcodeCollect, IN_LOAD_WORD );
+ appendWord( &exec->pdaRun->rcodeCollect, steps );
+ append( &exec->pdaRun->rcodeCollect, IN_LOAD_TREE );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)parser );
+ append( &exec->pdaRun->rcodeCollect, IN_PARSE_LOAD_START );
+ append( &exec->pdaRun->rcodeCollect, IN_PARSE_FINISH_BKT );
+ appendHalf( &exec->pdaRun->rcodeCollect, 0 );
+ append( &exec->pdaRun->rcodeCollect, IN_PCR_CALL );
+ append( &exec->pdaRun->rcodeCollect, IN_PARSE_FINISH_BKT3 );
+ append( &exec->pdaRun->rcodeCollect, 6 * SIZEOF_CODE + 2 * SIZEOF_WORD + SIZEOF_HALF );
+
+ if ( prg->induceExit )
+ goto out;
+
+ break;
+ }
+
+ case IN_PARSE_FINISH_BKT: {
+ Half stopId;
+ read_half( stopId );
+
+ debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT\n" );
+
+ long pcr = (long)vm_pop();
+ Parser *parser = (Parser*)vm_pop();
+ long steps = (long)vm_pop();
+
+ pcr = undoParseFrag( prg, sp, parser, steps, pcr );
+
+ vm_push( (SW)steps );
+ vm_push( (SW)parser );
+ vm_push( (SW)pcr );
+
+ if ( pcr == PcrDone )
+ instr += SIZEOF_CODE;
+ break;
+ }
+
+ case IN_PARSE_FINISH_BKT3: {
+ debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT3\n" );
+
+ vm_pop_ignore();
+ Parser *parser = (Parser*)vm_pop();
+ vm_pop_ignore();
+
+ unsetEof( parser->input->in );
+ treeDownref( prg, sp, (Tree*)parser );
+ break;
+ }
+
+ case IN_INPUT_PULL_WV: {
+ debug( REALM_BYTECODE, "IN_INPUT_PULL_WV\n" );
+
+ Input *accumStream = (Input*)vm_pop();
+ Tree *len = vm_pop();
+ Tree *string = streamPullBc( prg, exec->fsmRun, accumStream->in, len );
+ treeUpref( string );
+ vm_push( string );
+
+ /* Single unit. */
+ treeUpref( string );
+ append( &exec->pdaRun->rcodeCollect, IN_INPUT_PULL_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word) string );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word) exec->fsmRun );
+ exec->rcodeUnitLen += SIZEOF_CODE + 2 *SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+
+ treeDownref( prg, sp, (Tree*)accumStream );
+ treeDownref( prg, sp, len );
+ break;
+ }
+ case IN_INPUT_PULL_BKT: {
+ Word f;
+ Tree *string;
+ read_tree( string );
+ read_word( f );
+ FsmRun *fsmRun = (FsmRun*)f;
+
+ Tree *accumStream = vm_pop();
+
+ debug( REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" );
+
+ undoPull( prg, fsmRun, ((Input*)accumStream)->in, string );
+ treeDownref( prg, sp, accumStream );
+ treeDownref( prg, sp, string );
+ break;
+ }
+ case IN_INPUT_PUSH_WV: {
+ debug( REALM_BYTECODE, "IN_INPUT_PUSH_WV\n" );
+
+ Input *input = (Input*)vm_pop();
+ Tree *tree = vm_pop();
+ long len = streamPush( prg, sp, 0, input->in, tree, false );
+ vm_push( 0 );
+
+ /* Single unit. */
+ append( &exec->pdaRun->rcodeCollect, IN_INPUT_PUSH_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, len );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+
+ treeDownref( prg, sp, (Tree*)input );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_INPUT_PUSH_IGNORE_WV: {
+ debug( REALM_BYTECODE, "IN_INPUT_PUSH_IGNORE_WV\n" );
+
+ Input *input = (Input*)vm_pop();
+ Tree *tree = vm_pop();
+ long len = streamPush( prg, sp, 0, input->in, tree, true );
+ vm_push( 0 );
+
+ /* Single unit. */
+ append( &exec->pdaRun->rcodeCollect, IN_INPUT_PUSH_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, len );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+
+ treeDownref( prg, sp, (Tree*)input );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_INPUT_PUSH_BKT: {
+ Word len;
+ read_word( len );
+
+ Input *input = (Input*)vm_pop();
+
+ debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" );
+
+ undoStreamPush( prg, sp, 0, input->in, len );
+ treeDownref( prg, sp, (Tree*)input );
+ break;
+ }
+ case IN_CONSTRUCT: {
+ Half patternId;
+ read_half( patternId );
+
+ debug( REALM_BYTECODE, "IN_CONSTRUCT\n" );
+
+ int rootNode = prg->rtd->patReplInfo[patternId].offset;
+
+ /* Note that bindIds are indexed at one. Add one spot for them. */
+ int numBindings = prg->rtd->patReplInfo[patternId].numBindings;
+ Tree *bindings[1+numBindings];
+
+ int b;
+ for ( b = 1; b <= numBindings; b++ ) {
+ bindings[b] = vm_pop();
+ assert( bindings[b] != 0 );
+ }
+
+ Tree *replTree = 0;
+ PatReplNode *nodes = prg->rtd->patReplNodes;
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ long genericId = lelInfo[nodes[rootNode].id].genericId;
+ if ( genericId > 0 ) {
+ replTree = createGeneric( prg, genericId );
+ treeUpref( replTree );
+ }
+ else {
+ replTree = constructReplacementTree( 0, bindings,
+ prg, rootNode );
+ }
+
+ vm_push( replTree );
+ break;
+ }
+ case IN_CONSTRUCT_INPUT: {
+ debug( REALM_BYTECODE, "IN_CONSTRUCT_INPUT\n" );
+
+ Tree *input = constructInput( prg );
+ treeUpref( input );
+ vm_push( input );
+ break;
+ }
+ case IN_GET_INPUT: {
+ debug( REALM_BYTECODE, "IN_GET_INPUT\n" );
+
+ Parser *parser = (Parser*)vm_pop();
+ treeUpref( (Tree*)parser->input );
+ vm_push( (Tree*)parser->input );
+ treeDownref( prg, sp, (Tree*)parser );
+ break;
+ }
+ case IN_SET_INPUT: {
+ debug( REALM_BYTECODE, "IN_SET_INPUT\n" );
+
+ Parser *parser = (Parser*)vm_pop();
+ Input *accumStream = (Input*)vm_pop();
+ parser->input = accumStream;
+ treeUpref( (Tree*)accumStream );
+ treeDownref( prg, sp, (Tree*)parser );
+ treeDownref( prg, sp, (Tree*)accumStream );
+ break;
+ }
+ case IN_CONSTRUCT_TERM: {
+ Half tokenId;
+ read_half( tokenId );
+
+ debug( REALM_BYTECODE, "IN_CONSTRUCT_TERM\n" );
+
+ /* Pop the string we are constructing the token from. */
+ Str *str = (Str*)vm_pop();
+ Tree *res = constructTerm( prg, tokenId, str->value );
+ treeUpref( res );
+ vm_push( res );
+ break;
+ }
+ case IN_MAKE_TOKEN: {
+ uchar nargs;
+ read_byte( nargs );
+
+ debug( REALM_BYTECODE, "IN_MAKE_TOKEN\n" );
+
+ Tree *result = constructToken( prg, sp, nargs );
+ long i;
+ for ( i = 0; i < nargs; i++ ) {
+ Tree *arg = vm_pop();
+ treeDownref( prg, sp, arg );
+ }
+ vm_push( result );
+ break;
+ }
+ case IN_MAKE_TREE: {
+ uchar nargs;
+ read_byte( nargs );
+
+ debug( REALM_BYTECODE, "IN_MAKE_TREE\n" );
+
+ Tree *result = makeTree( prg, sp, nargs );
+ long i;
+ for ( i = 0; i < nargs; i++ ) {
+ Tree *arg = vm_pop();
+ treeDownref( prg, sp, arg );
+ }
+ vm_push( result );
+ break;
+ }
+ case IN_TREE_NEW: {
+ debug( REALM_BYTECODE, "IN_TREE_NEW \n" );
+
+ Tree *tree = vm_pop();
+ Tree *res = constructPointer( prg, tree );
+ treeUpref( res );
+ vm_push( res );
+ break;
+ }
+ case IN_PTR_DEREF_R: {
+ debug( REALM_BYTECODE, "IN_PTR_DEREF_R\n" );
+
+ Pointer *ptr = (Pointer*)vm_pop();
+ treeDownref( prg, sp, (Tree*)ptr );
+
+ Tree *dval = getPtrVal( ptr );
+ treeUpref( dval );
+ vm_push( dval );
+ break;
+ }
+ case IN_PTR_DEREF_WC: {
+ debug( REALM_BYTECODE, "IN_PTR_DEREF_WC\n" );
+
+ Pointer *ptr = (Pointer*)vm_pop();
+ treeDownref( prg, sp, (Tree*)ptr );
+
+ Tree *dval = getPtrValSplit( prg, ptr );
+ treeUpref( dval );
+ vm_push( dval );
+ break;
+ }
+ case IN_PTR_DEREF_WV: {
+ debug( REALM_BYTECODE, "IN_PTR_DEREF_WV\n" );
+
+ Pointer *ptr = (Pointer*)vm_pop();
+ /* Don't downref the pointer since it is going into the reverse
+ * instruction. */
+
+ Tree *dval = getPtrValSplit( prg, ptr );
+ treeUpref( dval );
+ vm_push( dval );
+
+ /* This is an initial global load. Need to reverse execute it. */
+ append( &exec->pdaRun->rcodeCollect, IN_PTR_DEREF_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word) ptr );
+ exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD;
+ break;
+ }
+ case IN_PTR_DEREF_BKT: {
+ Word p;
+ read_word( p );
+
+ debug( REALM_BYTECODE, "IN_PTR_DEREF_BKT\n" );
+
+ Pointer *ptr = (Pointer*)p;
+
+ Tree *dval = getPtrValSplit( prg, ptr );
+ treeUpref( dval );
+ vm_push( dval );
+
+ treeDownref( prg, sp, (Tree*)ptr );
+ break;
+ }
+ case IN_REF_FROM_LOCAL: {
+ short int field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_REF_FROM_LOCAL\n" );
+
+ /* First push the null next pointer, then the kid pointer. */
+ Tree **ptr = vm_plocal(field);
+ vm_push( 0 );
+ vm_push( (SW)ptr );
+ break;
+ }
+ case IN_REF_FROM_REF: {
+ short int field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_REF_FROM_REF\n" );
+
+ Ref *ref = (Ref*)vm_plocal(field);
+ vm_push( (SW)ref );
+ vm_push( (SW)ref->kid );
+ break;
+ }
+ case IN_REF_FROM_QUAL_REF: {
+ short int back;
+ short int field;
+ read_half( back );
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_REF_FROM_QUAL_REF\n" );
+
+ Ref *ref = (Ref*)(sp + back);
+
+ Tree *obj = ref->kid->tree;
+ Kid *attr_kid = getFieldKid( obj, field );
+
+ vm_push( (SW)ref );
+ vm_push( (SW)attr_kid );
+ break;
+ }
+ case IN_TRITER_REF_FROM_CUR: {
+ short int field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_TRITER_REF_FROM_CUR\n" );
+
+ /* Push the next pointer first, then the kid. */
+ TreeIter *iter = (TreeIter*) vm_plocal(field);
+ Ref *ref = &iter->ref;
+ vm_push( (SW)ref );
+ vm_push( (SW)iter->ref.kid );
+ break;
+ }
+ case IN_UITER_REF_FROM_CUR: {
+ short int field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_UITER_REF_FROM_CUR\n" );
+
+ /* Push the next pointer first, then the kid. */
+ UserIter *uiter = (UserIter*) vm_local(field);
+ vm_push( (SW)uiter->ref.next );
+ vm_push( (SW)uiter->ref.kid );
+ break;
+ }
+ case IN_GET_TOKEN_DATA_R: {
+ debug( REALM_BYTECODE, "IN_GET_TOKEN_DATA_R\n" );
+
+ Tree *tree = (Tree*) vm_pop();
+ Head *data = stringCopy( prg, tree->tokdata );
+ Tree *str = constructString( prg, data );
+ treeUpref( str );
+ vm_push( str );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_SET_TOKEN_DATA_WC: {
+ debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_WC\n" );
+
+ Tree *tree = vm_pop();
+ Tree *val = vm_pop();
+ Head *head = stringCopy( prg, ((Str*)val)->value );
+ stringFree( prg, tree->tokdata );
+ tree->tokdata = head;
+
+ treeDownref( prg, sp, tree );
+ treeDownref( prg, sp, val );
+ break;
+ }
+ case IN_SET_TOKEN_DATA_WV: {
+ debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_WV\n" );
+
+ Tree *tree = vm_pop();
+ Tree *val = vm_pop();
+
+ Head *oldval = tree->tokdata;
+ Head *head = stringCopy( prg, ((Str*)val)->value );
+ tree->tokdata = head;
+
+ /* Set up reverse code. Needs no args. */
+ append( &exec->pdaRun->rcodeCollect, IN_SET_TOKEN_DATA_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)oldval );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+
+ treeDownref( prg, sp, tree );
+ treeDownref( prg, sp, val );
+ break;
+ }
+ case IN_SET_TOKEN_DATA_BKT: {
+ debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT \n" );
+
+ Word oldval;
+ read_word( oldval );
+
+ Tree *tree = vm_pop();
+ Head *head = (Head*)oldval;
+ stringFree( prg, tree->tokdata );
+ tree->tokdata = head;
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_GET_TOKEN_POS_R: {
+ debug( REALM_BYTECODE, "IN_GET_TOKEN_POS_R\n" );
+
+ Tree *tree = (Tree*) vm_pop();
+ Tree *integer = 0;
+ if ( tree->tokdata->location ) {
+ integer = constructInteger( prg, tree->tokdata->location->byte );
+ treeUpref( integer );
+ }
+ vm_push( integer );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_GET_TOKEN_LINE_R: {
+ debug( REALM_BYTECODE, "IN_GET_TOKEN_LINE_R\n" );
+
+ Tree *tree = (Tree*) vm_pop();
+ Tree *integer = 0;
+ if ( tree->tokdata->location ) {
+ integer = constructInteger( prg, tree->tokdata->location->line );
+ treeUpref( integer );
+ }
+ vm_push( integer );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_GET_MATCH_LENGTH_R: {
+ debug( REALM_BYTECODE, "IN_GET_MATCH_LENGTH_R\n" );
+
+ Tree *integer = constructInteger( prg, stringLength(exec->pdaRun->tokdata) );
+ treeUpref( integer );
+ vm_push( integer );
+ break;
+ }
+ case IN_GET_MATCH_TEXT_R: {
+ debug( REALM_BYTECODE, "IN_GET_MATCH_TEXT_R\n" );
+
+ Head *s = stringCopy( prg, exec->pdaRun->tokdata );
+ Tree *tree = constructString( prg, s );
+ treeUpref( tree );
+ vm_push( tree );
+ break;
+ }
+ case IN_LIST_LENGTH: {
+ debug( REALM_BYTECODE, "IN_LIST_LENGTH\n" );
+
+ List *list = (List*) vm_pop();
+ long len = listLength( list );
+ Tree *res = constructInteger( prg, len );
+ treeDownref( prg, sp, (Tree*)list );
+ treeUpref( res );
+ vm_push( res );
+ break;
+ }
+ case IN_LIST_APPEND_WV: {
+ debug( REALM_BYTECODE, "IN_LIST_APPEND_WV\n" );
+
+ Tree *obj = vm_pop();
+ Tree *val = vm_pop();
+
+ treeDownref( prg, sp, obj );
+
+ listAppend2( prg, (List*)obj, val );
+ treeUpref( prg->trueVal );
+ vm_push( prg->trueVal );
+
+ /* Set up reverse code. Needs no args. */
+ append( &exec->pdaRun->rcodeCollect, IN_LIST_APPEND_BKT );
+ exec->rcodeUnitLen += SIZEOF_CODE;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+ /* FLUSH */
+ break;
+ }
+ case IN_LIST_APPEND_WC: {
+ debug( REALM_BYTECODE, "IN_LIST_APPEND_WC\n" );
+
+ Tree *obj = vm_pop();
+ Tree *val = vm_pop();
+
+ treeDownref( prg, sp, obj );
+
+ listAppend2( prg, (List*)obj, val );
+ treeUpref( prg->trueVal );
+ vm_push( prg->trueVal );
+ break;
+ }
+ case IN_LIST_APPEND_BKT: {
+ debug( REALM_BYTECODE, "IN_LIST_APPEND_BKT\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *tree = listRemoveEnd( prg, (List*)obj );
+ treeDownref( prg, sp, tree );
+ break;
+ }
+ case IN_LIST_REMOVE_END_WC: {
+ debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_WC\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *end = listRemoveEnd( prg, (List*)obj );
+ vm_push( end );
+ break;
+ }
+ case IN_LIST_REMOVE_END_WV: {
+ debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_WV\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *end = listRemoveEnd( prg, (List*)obj );
+ vm_push( end );
+
+ /* Set up reverse. The result comes off the list downrefed.
+ * Need it up referenced for the reverse code too. */
+ treeUpref( end );
+ append( &exec->pdaRun->rcodeCollect, IN_LIST_REMOVE_END_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)end );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+ /* FLUSH */
+ break;
+ }
+ case IN_LIST_REMOVE_END_BKT: {
+ debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_BKT\n" );
+
+ Tree *val;
+ read_tree( val );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ listAppend2( prg, (List*)obj, val );
+ break;
+ }
+ case IN_GET_LIST_MEM_R: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_LIST_MEM_R\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *val = getListMem( (List*)obj, field );
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_GET_LIST_MEM_WC: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_LIST_MEM_WC\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *val = getListMemSplit( prg, (List*)obj, field );
+ treeUpref( val );
+ vm_push( val );
+ break;
+ }
+ case IN_GET_LIST_MEM_WV: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_LIST_MEM_WV\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *val = getListMemSplit( prg, (List*)obj, field );
+ treeUpref( val );
+ vm_push( val );
+
+ /* Set up the reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_GET_LIST_MEM_BKT );
+ appendHalf( &exec->pdaRun->rcodeCollect, field );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF;
+ break;
+ }
+ case IN_GET_LIST_MEM_BKT: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_GET_LIST_MEM_BKT\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *res = getListMemSplit( prg, (List*)obj, field );
+ treeUpref( res );
+ vm_push( res );
+ break;
+ }
+ case IN_SET_LIST_MEM_WC: {
+ Half field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_SET_LIST_MEM_WC\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *val = vm_pop();
+ Tree *existing = setListMem( (List*)obj, field, val );
+ treeDownref( prg, sp, existing );
+ break;
+ }
+ case IN_SET_LIST_MEM_WV: {
+ Half field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_SET_LIST_MEM_WV\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *val = vm_pop();
+ Tree *existing = setListMem( (List*)obj, field, val );
+
+ /* Set up the reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_SET_LIST_MEM_BKT );
+ appendHalf( &exec->pdaRun->rcodeCollect, field );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)existing );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF + SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+ /* FLUSH */
+ break;
+ }
+ case IN_SET_LIST_MEM_BKT: {
+ Half field;
+ Tree *val;
+ read_half( field );
+ read_tree( val );
+
+ debug( REALM_BYTECODE, "IN_SET_LIST_MEM_BKT\n" );
+
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+
+ Tree *undid = setListMem( (List*)obj, field, val );
+ treeDownref( prg, sp, undid );
+ break;
+ }
+ case IN_MAP_INSERT_WV: {
+ debug( REALM_BYTECODE, "IN_MAP_INSERT_WV\n" );
+
+ Tree *obj = vm_pop();
+ Tree *val = vm_pop();
+ Tree *key = vm_pop();
+
+ treeDownref( prg, sp, obj );
+
+ int inserted = mapInsert( prg, (Map*)obj, key, val );
+ Tree *result = inserted ? prg->trueVal : prg->falseVal;
+ treeUpref( result );
+ vm_push( result );
+
+ /* Set up the reverse instruction. If the insert fails still need
+ * to pop the loaded map object. Just use the reverse instruction
+ * since it's nice to see it in the logs. */
+
+ /* Need to upref key for storage in reverse code. */
+ treeUpref( key );
+ append( &exec->pdaRun->rcodeCollect, IN_MAP_INSERT_BKT );
+ append( &exec->pdaRun->rcodeCollect, inserted );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)key );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_CODE + SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+
+ if ( ! inserted ) {
+ treeDownref( prg, sp, key );
+ treeDownref( prg, sp, val );
+ }
+ break;
+ }
+ case IN_MAP_INSERT_WC: {
+ debug( REALM_BYTECODE, "IN_MAP_INSERT_WC\n" );
+
+ Tree *obj = vm_pop();
+ Tree *val = vm_pop();
+ Tree *key = vm_pop();
+
+ treeDownref( prg, sp, obj );
+
+ int inserted = mapInsert( prg, (Map*)obj, key, val );
+ Tree *result = inserted ? prg->trueVal : prg->falseVal;
+ treeUpref( result );
+ vm_push( result );
+
+ if ( ! inserted ) {
+ treeDownref( prg, sp, key );
+ treeDownref( prg, sp, val );
+ }
+ break;
+ }
+ case IN_MAP_INSERT_BKT: {
+ uchar inserted;
+ Tree *key;
+ read_byte( inserted );
+ read_tree( key );
+
+ debug( REALM_BYTECODE, "IN_MAP_INSERT_BKT\n" );
+
+ Tree *obj = vm_pop();
+ if ( inserted ) {
+ Tree *val = mapUninsert( prg, (Map*)obj, key );
+ treeDownref( prg, sp, key );
+ treeDownref( prg, sp, val );
+ }
+
+ treeDownref( prg, sp, obj );
+ treeDownref( prg, sp, key );
+ break;
+ }
+ case IN_MAP_STORE_WC: {
+ debug( REALM_BYTECODE, "IN_MAP_STORE_WC\n" );
+
+ Tree *obj = vm_pop();
+ Tree *element = vm_pop();
+ Tree *key = vm_pop();
+
+ Tree *existing = mapStore( prg, (Map*)obj, key, element );
+ Tree *result = existing == 0 ? prg->trueVal : prg->falseVal;
+ treeUpref( result );
+ vm_push( result );
+
+ treeDownref( prg, sp, obj );
+ if ( existing != 0 ) {
+ treeDownref( prg, sp, key );
+ treeDownref( prg, sp, existing );
+ }
+ break;
+ }
+ case IN_MAP_STORE_WV: {
+ debug( REALM_BYTECODE, "IN_MAP_STORE_WV\n" );
+
+ Tree *obj = vm_pop();
+ Tree *element = vm_pop();
+ Tree *key = vm_pop();
+
+ Tree *existing = mapStore( prg, (Map*)obj, key, element );
+ Tree *result = existing == 0 ? prg->trueVal : prg->falseVal;
+ treeUpref( result );
+ vm_push( result );
+
+ /* Set up the reverse instruction. */
+ treeUpref( key );
+ treeUpref( existing );
+ append( &exec->pdaRun->rcodeCollect, IN_MAP_STORE_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)key );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)existing );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD + SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+ /* FLUSH */
+
+ treeDownref( prg, sp, obj );
+ if ( existing != 0 ) {
+ treeDownref( prg, sp, key );
+ treeDownref( prg, sp, existing );
+ }
+ break;
+ }
+ case IN_MAP_STORE_BKT: {
+ Tree *key, *val;
+ read_tree( key );
+ read_tree( val );
+
+ debug( REALM_BYTECODE, "IN_MAP_STORE_BKT\n" );
+
+ Tree *obj = vm_pop();
+ Tree *stored = mapUnstore( prg, (Map*)obj, key, val );
+
+ treeDownref( prg, sp, stored );
+ if ( val == 0 )
+ treeDownref( prg, sp, key );
+
+ treeDownref( prg, sp, obj );
+ treeDownref( prg, sp, key );
+ break;
+ }
+ case IN_MAP_REMOVE_WC: {
+ debug( REALM_BYTECODE, "IN_MAP_REMOVE_WC\n" );
+
+ Tree *obj = vm_pop();
+ Tree *key = vm_pop();
+ TreePair pair = mapRemove( prg, (Map*)obj, key );
+
+ vm_push( pair.val );
+
+ treeDownref( prg, sp, obj );
+ treeDownref( prg, sp, key );
+ treeDownref( prg, sp, pair.key );
+ break;
+ }
+ case IN_MAP_REMOVE_WV: {
+ debug( REALM_BYTECODE, "IN_MAP_REMOVE_WV\n" );
+
+ Tree *obj = vm_pop();
+ Tree *key = vm_pop();
+ TreePair pair = mapRemove( prg, (Map*)obj, key );
+
+ treeUpref( pair.val );
+ vm_push( pair.val );
+
+ /* Reverse instruction. */
+ append( &exec->pdaRun->rcodeCollect, IN_MAP_REMOVE_BKT );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)pair.key );
+ appendWord( &exec->pdaRun->rcodeCollect, (Word)pair.val );
+ exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD + SIZEOF_WORD;
+ append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
+
+ treeDownref( prg, sp, obj );
+ treeDownref( prg, sp, key );
+ break;
+ }
+ case IN_MAP_REMOVE_BKT: {
+ Tree *key, *val;
+ read_tree( key );
+ read_tree( val );
+
+ debug( REALM_BYTECODE, "IN_MAP_REMOVE_BKT\n" );
+
+ /* Either both or neither. */
+ assert( ( key == 0 ) ^ ( val != 0 ) );
+
+ Tree *obj = vm_pop();
+ if ( key != 0 )
+ mapUnremove( prg, (Map*)obj, key, val );
+
+ treeDownref( prg, sp, obj );
+ break;
+ }
+ case IN_MAP_LENGTH: {
+ debug( REALM_BYTECODE, "IN_MAP_LENGTH\n" );
+
+ Tree *obj = vm_pop();
+ long len = mapLength( (Map*)obj );
+ Tree *res = constructInteger( prg, len );
+ treeUpref( res );
+ vm_push( res );
+
+ treeDownref( prg, sp, obj );
+ break;
+ }
+ case IN_MAP_FIND: {
+ debug( REALM_BYTECODE, "IN_MAP_FIND\n" );
+
+ Tree *obj = vm_pop();
+ Tree *key = vm_pop();
+ Tree *result = mapFind( prg, (Map*)obj, key );
+ treeUpref( result );
+ vm_push( result );
+
+ treeDownref( prg, sp, obj );
+ treeDownref( prg, sp, key );
+ break;
+ }
+ case IN_INIT_LOCALS: {
+ Half size;
+ read_half( size );
+
+ debug( REALM_BYTECODE, "IN_INIT_LOCALS\n" );
+
+ exec->framePtr = vm_ptop();
+ vm_pushn( size );
+ memset( vm_ptop(), 0, sizeof(Word) * size );
+ break;
+ }
+ case IN_CALL_WV: {
+ Half funcId;
+ read_half( funcId );
+
+ FunctionInfo *fi = &prg->rtd->functionInfo[funcId];
+
+ debug( REALM_BYTECODE, "IN_CALL_WV %ld\n", fi->name );
+
+ vm_push( 0 ); /* Return value. */
+ vm_push( (SW)instr );
+ vm_push( (SW)exec->framePtr );
+ vm_push( (SW)exec->frameId );
+
+ instr = prg->rtd->frameInfo[fi->frameId].codeWV;
+ exec->framePtr = vm_ptop();
+ exec->frameId = fi->frameId;
+ break;
+ }
+ case IN_CALL_WC: {
+ Half funcId;
+ read_half( funcId );
+
+ FunctionInfo *fi = &prg->rtd->functionInfo[funcId];
+
+ debug( REALM_BYTECODE, "IN_CALL_WC %ld\n", fi->name );
+
+ vm_push( 0 ); /* Return value. */
+ vm_push( (SW)instr );
+ vm_push( (SW)exec->framePtr );
+ vm_push( (SW)exec->frameId );
+
+ instr = prg->rtd->frameInfo[fi->frameId].codeWC;
+ exec->framePtr = vm_ptop();
+ exec->frameId = fi->frameId;
+ break;
+ }
+ case IN_YIELD: {
+ debug( REALM_BYTECODE, "IN_YIELD\n" );
+
+ Kid *kid = (Kid*)vm_pop();
+ Ref *next = (Ref*)vm_pop();
+ UserIter *uiter = (UserIter*) vm_plocal_iframe( IFR_AA );
+
+ if ( kid == 0 || kid->tree == 0 ||
+ kid->tree->id == uiter->searchId ||
+ uiter->searchId == prg->rtd->anyId )
+ {
+ /* Store the yeilded value. */
+ uiter->ref.kid = kid;
+ uiter->ref.next = next;
+ uiter->stackSize = uiter->stackRoot - vm_ptop();
+ uiter->resume = instr;
+ uiter->frame = exec->framePtr;
+
+ /* Restore the instruction and frame pointer. */
+ instr = (Code*) vm_local_iframe(IFR_RIN);
+ exec->framePtr = (Tree**) vm_local_iframe(IFR_RFR);
+ exec->iframePtr = (Tree**) vm_local_iframe(IFR_RIF);
+
+ /* Return the yield result on the top of the stack. */
+ Tree *result = uiter->ref.kid != 0 ? prg->trueVal : prg->falseVal;
+ treeUpref( result );
+ vm_push( result );
+ }
+ break;
+ }
+ case IN_UITER_CREATE_WV: {
+ short field;
+ Half funcId, searchId;
+ read_half( field );
+ read_half( funcId );
+ read_half( searchId );
+
+ debug( REALM_BYTECODE, "IN_UITER_CREATE_WV\n" );
+
+ FunctionInfo *fi = prg->rtd->functionInfo + funcId;
+ UserIter *uiter = uiterCreate( prg, &sp, fi, searchId );
+ vm_local(field) = (SW) uiter;
+
+ /* This is a setup similar to as a call, only the frame structure
+ * is slightly different for user iterators. We aren't going to do
+ * the call. We don't need to set up the return ip because the
+ * uiter advance will set it. The frame we need to do because it
+ * is set once for the lifetime of the iterator. */
+ vm_push( 0 ); /* Return instruction pointer, */
+ vm_push( (SW)exec->iframePtr ); /* Return iframe. */
+ vm_push( (SW)exec->framePtr ); /* Return frame. */
+
+ uiterInit( prg, sp, uiter, fi, true );
+ break;
+ }
+ case IN_UITER_CREATE_WC: {
+ short field;
+ Half funcId, searchId;
+ read_half( field );
+ read_half( funcId );
+ read_half( searchId );
+
+ debug( REALM_BYTECODE, "IN_UITER_CREATE_WC\n" );
+
+ FunctionInfo *fi = prg->rtd->functionInfo + funcId;
+ UserIter *uiter = uiterCreate( prg, &sp, fi, searchId );
+ vm_local(field) = (SW) uiter;
+
+ /* This is a setup similar to as a call, only the frame structure
+ * is slightly different for user iterators. We aren't going to do
+ * the call. We don't need to set up the return ip because the
+ * uiter advance will set it. The frame we need to do because it
+ * is set once for the lifetime of the iterator. */
+ vm_push( 0 ); /* Return instruction pointer, */
+ vm_push( (SW)exec->iframePtr ); /* Return iframe. */
+ vm_push( (SW)exec->framePtr ); /* Return frame. */
+
+ uiterInit( prg, sp, uiter, fi, false );
+ break;
+ }
+ case IN_UITER_DESTROY: {
+ short field;
+ read_half( field );
+
+ debug( REALM_BYTECODE, "IN_UITER_DESTROY\n" );
+
+ UserIter *uiter = (UserIter*) vm_local(field);
+ userIterDestroy( &sp, uiter );
+ break;
+ }
+ case IN_RET: {
+ debug( REALM_BYTECODE, "IN_RET\n" );
+
+ FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId];
+ downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen );
+ vm_popn( fi->frameSize );
+
+ exec->frameId = (long) vm_pop();
+ exec->framePtr = (Tree**) vm_pop();
+ instr = (Code*) vm_pop();
+ Tree *retVal = vm_pop();
+ vm_popn( fi->argSize );
+ vm_push( retVal );
+ break;
+ }
+ case IN_TO_UPPER: {
+ debug( REALM_BYTECODE, "IN_TO_UPPER\n" );
+
+ Tree *in = vm_pop();
+ Head *head = stringToUpper( in->tokdata );
+ Tree *upper = constructString( prg, head );
+ treeUpref( upper );
+ vm_push( upper );
+ treeDownref( prg, sp, in );
+ break;
+ }
+ case IN_TO_LOWER: {
+ debug( REALM_BYTECODE, "IN_TO_LOWER\n" );
+
+ Tree *in = vm_pop();
+ Head *head = stringToLower( in->tokdata );
+ Tree *lower = constructString( prg, head );
+ treeUpref( lower );
+ vm_push( lower );
+ treeDownref( prg, sp, in );
+ break;
+ }
+ case IN_ERROR: {
+ debug( REALM_BYTECODE, "IN_ERROR\n" );
+
+ /* Pop the global. */
+ Tree *global = vm_pop();
+ treeDownref( prg, sp, global );
+ treeUpref( prg->lastParseError );
+ vm_push( prg->lastParseError );
+ break;
+ }
+ case IN_OPEN_FILE: {
+ debug( REALM_BYTECODE, "IN_OPEN_FILE\n" );
+
+ Tree *mode = vm_pop();
+ Tree *name = vm_pop();
+ Tree *res = (Tree*)openFile( prg, name, mode );
+ treeUpref( res );
+ vm_push( res );
+ treeDownref( prg, sp, name );
+ treeDownref( prg, sp, mode );
+ break;
+ }
+ case IN_GET_STDIN: {
+ debug( REALM_BYTECODE, "IN_GET_STDIN\n" );
+
+ /* Pop the root object. */
+ Tree *obj = vm_pop();
+ treeDownref( prg, sp, obj );
+ if ( prg->stdinVal == 0 ) {
+ prg->stdinVal = openStreamFd( prg, 0 );
+ treeUpref( (Tree*)prg->stdinVal );
+ }
+
+ treeUpref( (Tree*)prg->stdinVal );
+ vm_push( (Tree*)prg->stdinVal );
+ break;
+ }
+ case IN_LOAD_ARGV: {
+ Half field;
+ read_half( field );
+ debug( REALM_BYTECODE, "IN_LOAD_ARGV %lu\n", field );
+
+ /* Tree comes back upreffed. */
+ Tree *tree = constructArgv( prg, prg->argc, prg->argv );
+ setField( prg, prg->global, field, tree );
+ break;
+ }
+
+ case IN_EXIT: {
+ debug( REALM_BYTECODE, "IN_EXIT\n" );
+
+ Tree *global = vm_pop();
+ Int *status = (Int*)vm_pop();
+ prg->exitStatus = status->value;
+ prg->induceExit = 1;
+ treeDownref( prg, sp, global );
+ treeDownref( prg, sp, (Tree*)status );
+
+ while ( true ) {
+ FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId];
+ downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen );
+ vm_popn( fi->frameSize );
+
+ /* Call layout. */
+ exec->frameId = (long) vm_pop();
+ exec->framePtr = (Tree**) vm_pop();
+ instr = (Code*) vm_pop();
+ Tree *retVal = vm_pop();
+ vm_popn( fi->argSize );
+
+ treeDownref( prg, sp, retVal );
+
+ /* We stop on the root, which doesn't have the full function
+ * stack layout. */
+ if ( exec->frameId == prg->rtd->rootFrameId )
+ break;
+ }
+
+ goto out;
+ }
+
+ case IN_STOP: {
+ debug( REALM_BYTECODE, "IN_STOP\n" );
+
+ FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId];
+ downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen );
+ vm_popn( fi->frameSize );
+
+ fflush( stdout );
+ goto out;
+ }
+
+ /* Halt is a default instruction given by the compiler when it is
+ * asked to generate and instruction it doesn't have. It is deliberate
+ * and can represent "not implemented" or "compiler error" because a
+ * variable holding instructions was not properly initialize. */
+ case IN_HALT: {
+ fatal( "IN_HALT -- compiler did something wrong\n" );
+ exit(1);
+ break;
+ }
+ default: {
+ fatal( "UNKNOWN INSTRUCTION: 0x%2x -- something is wrong\n", *(instr-1) );
+ assert(false);
+ break;
+ }
+ }
+ goto again;
+
+out:
+ if ( ! prg->induceExit )
+ assert( sp == root );
+ return sp;
+}
+
diff --git a/src/bytecode.h b/src/bytecode.h
new file mode 100644
index 00000000..8e626ef1
--- /dev/null
+++ b/src/bytecode.h
@@ -0,0 +1,487 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _BYTECODE_H
+#define _BYTECODE_H
+
+#include <pdarun.h>
+#include <tree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+typedef unsigned long ulong;
+typedef unsigned char uchar;
+
+#define IN_LOAD_INT 0x02
+#define IN_LOAD_STR 0x03
+#define IN_LOAD_NIL 0x04
+#define IN_LOAD_TRUE 0x05
+#define IN_LOAD_FALSE 0x06
+#define IN_LOAD_TREE 0xf4
+#define IN_LOAD_WORD 0xf5
+
+#define IN_ADD_INT 0x07
+#define IN_SUB_INT 0x08
+#define IN_MULT_INT 0x09
+#define IN_DIV_INT 0xd0
+
+#define IN_TST_EQL 0x0a
+#define IN_TST_NOT_EQL 0x0b
+#define IN_TST_LESS 0x0c
+#define IN_TST_GRTR 0x0d
+#define IN_TST_LESS_EQL 0x0e
+#define IN_TST_GRTR_EQL 0x0f
+#define IN_TST_LOGICAL_AND 0x10
+#define IN_TST_LOGICAL_OR 0x11
+
+#define IN_NOT 0x12
+
+#define IN_JMP 0x13
+#define IN_JMP_FALSE 0x14
+#define IN_JMP_TRUE 0x15
+
+#define IN_STR_ATOI 0x16
+#define IN_STR_LENGTH 0x17
+#define IN_CONCAT_STR 0x18
+#define IN_TREE_TRIM 0xfc
+
+#define IN_INIT_LOCALS 0x19
+#define IN_POP 0x1b
+#define IN_POP_N_WORDS 0x1c
+#define IN_DUP_TOP 0x1d
+#define IN_DUP_TOP_OFF 0xbc
+#define IN_REJECT 0x1e
+#define IN_MATCH 0x1f
+#define IN_CONSTRUCT 0x20
+#define IN_TREE_NEW 0x21
+
+#define IN_GET_LOCAL_R 0x22
+#define IN_GET_LOCAL_WC 0x23
+#define IN_SET_LOCAL_WC 0x24
+
+#define IN_GET_LOCAL_REF_R 0x25
+#define IN_GET_LOCAL_REF_WC 0x26
+#define IN_SET_LOCAL_REF_WC 0x27
+
+#define IN_SAVE_RET 0x28
+
+#define IN_GET_FIELD_R 0x29
+#define IN_GET_FIELD_WC 0x2a
+#define IN_GET_FIELD_WV 0x2b
+#define IN_GET_FIELD_BKT 0x2c
+
+#define IN_SET_FIELD_WV 0x2d
+#define IN_SET_FIELD_WC 0x2e
+#define IN_SET_FIELD_BKT 0x2f
+#define IN_SET_FIELD_LEAVE_WC 0x30
+
+#define IN_GET_MATCH_LENGTH_R 0x31
+#define IN_GET_MATCH_TEXT_R 0x32
+
+#define IN_GET_TOKEN_DATA_R 0x33
+#define IN_SET_TOKEN_DATA_WC 0x34
+#define IN_SET_TOKEN_DATA_WV 0x35
+#define IN_SET_TOKEN_DATA_BKT 0x36
+
+#define IN_GET_TOKEN_POS_R 0x37
+#define IN_GET_TOKEN_LINE_R 0xf6
+
+#define IN_INIT_RHS_EL 0x38
+#define IN_INIT_LHS_EL 0xef
+#define IN_INIT_CAPTURES 0x39
+#define IN_STORE_LHS_EL 0xf0
+#define IN_RESTORE_LHS 0x01
+
+#define IN_TRITER_FROM_REF 0x3a
+#define IN_TRITER_ADVANCE 0x3b
+#define IN_TRITER_NEXT_CHILD 0x3c
+#define IN_TRITER_GET_CUR_R 0x3d
+#define IN_TRITER_GET_CUR_WC 0x3e
+#define IN_TRITER_SET_CUR_WC 0x3f
+#define IN_TRITER_DESTROY 0x40
+#define IN_TRITER_NEXT_REPEAT 0x41
+#define IN_TRITER_PREV_REPEAT 0x42
+
+#define IN_REV_TRITER_FROM_REF 0x43
+#define IN_REV_TRITER_DESTROY 0x44
+#define IN_REV_TRITER_PREV_CHILD 0x45
+
+#define IN_UITER_DESTROY 0x46
+#define IN_UITER_CREATE_WV 0x47
+#define IN_UITER_CREATE_WC 0x48
+#define IN_UITER_ADVANCE 0x49
+#define IN_UITER_GET_CUR_R 0x4a
+#define IN_UITER_GET_CUR_WC 0x4b
+#define IN_UITER_SET_CUR_WC 0x4c
+
+#define IN_TREE_SEARCH 0x4d
+
+#define IN_LOAD_GLOBAL_R 0x4e
+#define IN_LOAD_GLOBAL_WV 0x4f
+#define IN_LOAD_GLOBAL_WC 0x50
+#define IN_LOAD_GLOBAL_BKT 0x51
+
+#define IN_PTR_DEREF_R 0x52
+#define IN_PTR_DEREF_WV 0x53
+#define IN_PTR_DEREF_WC 0x54
+#define IN_PTR_DEREF_BKT 0x55
+
+#define IN_REF_FROM_LOCAL 0x56
+#define IN_REF_FROM_REF 0x57
+#define IN_REF_FROM_QUAL_REF 0x58
+#define IN_TRITER_REF_FROM_CUR 0x59
+#define IN_UITER_REF_FROM_CUR 0x5a
+
+#define IN_MAP_LENGTH 0x5b
+#define IN_MAP_FIND 0x5c
+#define IN_MAP_INSERT_WV 0x5d
+#define IN_MAP_INSERT_WC 0x5e
+#define IN_MAP_INSERT_BKT 0x5f
+#define IN_MAP_STORE_WV 0x60
+#define IN_MAP_STORE_WC 0x61
+#define IN_MAP_STORE_BKT 0x62
+#define IN_MAP_REMOVE_WV 0x63
+#define IN_MAP_REMOVE_WC 0x64
+#define IN_MAP_REMOVE_BKT 0x65
+
+#define IN_LIST_LENGTH 0x66
+#define IN_LIST_APPEND_WV 0x67
+#define IN_LIST_APPEND_WC 0x68
+#define IN_LIST_APPEND_BKT 0x69
+#define IN_LIST_REMOVE_END_WV 0x6a
+#define IN_LIST_REMOVE_END_WC 0x6b
+#define IN_LIST_REMOVE_END_BKT 0x6c
+
+#define IN_GET_LIST_MEM_R 0x6d
+#define IN_GET_LIST_MEM_WC 0x6e
+#define IN_GET_LIST_MEM_WV 0x6f
+#define IN_GET_LIST_MEM_BKT 0x70
+#define IN_SET_LIST_MEM_WV 0x71
+#define IN_SET_LIST_MEM_WC 0x72
+#define IN_SET_LIST_MEM_BKT 0x73
+
+#define IN_VECTOR_LENGTH 0x74
+#define IN_VECTOR_APPEND_WV 0x75
+#define IN_VECTOR_APPEND_WC 0x76
+#define IN_VECTOR_APPEND_BKT 0x77
+#define IN_VECTOR_INSERT_WV 0x78
+#define IN_VECTOR_INSERT_WC 0x79
+#define IN_VECTOR_INSERT_BKT 0x7a
+
+#define IN_PRINT 0x7b
+#define IN_PRINT_XML_AC 0x7c
+#define IN_PRINT_XML 0x7d
+#define IN_PRINT_STREAM 0x7e
+
+#define IN_HALT 0x7f
+
+#define IN_CALL_WC 0x80
+#define IN_CALL_WV 0x81
+#define IN_RET 0x82
+#define IN_YIELD 0x83
+#define IN_STOP 0x84
+
+#define IN_STR_UORD8 0x85
+#define IN_STR_SORD8 0x86
+#define IN_STR_UORD16 0x87
+#define IN_STR_SORD16 0x88
+#define IN_STR_UORD32 0x89
+#define IN_STR_SORD32 0x8a
+
+#define IN_INT_TO_STR 0x8b
+#define IN_TREE_TO_STR 0x8c
+#define IN_TREE_TO_STR_NOTRIM 0xfd
+
+#define IN_CREATE_TOKEN 0x8d
+#define IN_MAKE_TOKEN 0x8e
+#define IN_MAKE_TREE 0x8f
+#define IN_CONSTRUCT_TERM 0x90
+
+#define IN_INPUT_PULL_WV 0xf7
+#define IN_INPUT_PULL_BKT 0xf8
+
+#define IN_PARSE_LOAD_START 0xf2
+#define IN_PARSE_SAVE_STEPS 0xf3
+#define IN_PARSE_FRAG_WC 0xc0
+#define IN_PARSE_FRAG_WC3 0xe1
+
+#define IN_PARSE_FRAG_WV 0xc1
+#define IN_PARSE_FRAG_WV3 0xe4
+
+#define IN_PARSE_FRAG_BKT 0xc2
+#define IN_PARSE_FRAG_BKT3 0xe6
+
+#define IN_INPUT_APPEND_WC 0x91
+#define IN_INPUT_APPEND_WV 0x92
+#define IN_INPUT_APPEND_BKT 0x93
+
+#define IN_PARSE_FINISH_WC 0x9d
+#define IN_PARSE_FINISH_WC3 0xea
+
+#define IN_PARSE_FINISH_WV 0xbd
+#define IN_PARSE_FINISH_WV3 0xeb
+
+#define IN_PARSE_FINISH_BKT 0xbf
+#define IN_PARSE_FINISH_BKT3 0xec
+
+#define IN_PCR_CALL 0xe0
+#define IN_PCR_RET 0xe3
+#define IN_PCR_END_DECK 0xed
+
+#define IN_PARSE_EXTRACT_INPUT
+
+#define IN_OPEN_FILE 0x9e
+#define IN_GET_STDIN 0x9f
+#define IN_GET_STDOUT 0xa0
+#define IN_GET_STDERR 0xa1
+#define IN_LOAD_ARGV 0xa2
+#define IN_TO_UPPER 0xa3
+#define IN_TO_LOWER 0xa4
+#define IN_EXIT 0xa5
+#define IN_ERROR 0xa6
+
+#define IN_LOAD_ACCUM_R 0xa8
+#define IN_LOAD_ACCUM_WV 0xa9
+#define IN_LOAD_ACCUM_WC 0xaa
+#define IN_LOAD_ACCUM_BKT 0xab
+
+#define IN_LOAD_INPUT_R 0x98
+#define IN_LOAD_INPUT_WV 0x99
+#define IN_LOAD_INPUT_WC 0x9a
+#define IN_LOAD_INPUT_BKT 0x9b
+
+#define IN_INPUT_PUSH_WV 0xf9
+#define IN_INPUT_PUSH_BKT 0xfa
+#define IN_INPUT_PUSH_IGNORE_WV 0xfb
+
+#define IN_LOAD_CONTEXT_R 0xac
+#define IN_LOAD_CONTEXT_WV 0xad
+#define IN_LOAD_CONTEXT_WC 0xae
+#define IN_LOAD_CONTEXT_BKT 0xaf
+
+#define IN_GET_ACCUM_CTX_R 0xb0
+#define IN_GET_ACCUM_CTX_WC 0xb1
+#define IN_GET_ACCUM_CTX_WV 0xb2
+#define IN_SET_ACCUM_CTX_WC 0xb3
+#define IN_SET_ACCUM_CTX_WV 0xb4
+
+#define IN_LOAD_CTX_R 0xb5
+#define IN_LOAD_CTX_WC 0xb6
+#define IN_LOAD_CTX_WV 0xb7
+#define IN_LOAD_CTX_BKT 0xb8
+
+#define IN_SPRINTF 0xcf
+
+#define IN_GET_RHS_VAL_R 0xd1
+#define IN_GET_RHS_VAL_WC 0xd2
+#define IN_GET_RHS_VAL_WV 0xd3
+#define IN_GET_RHS_VAL_BKT 0xd4
+#define IN_SET_RHS_VAL_WC 0xd5
+#define IN_SET_RHS_VAL_WV 0xd6
+#define IN_SET_RHS_VAL_BKT 0xd7
+
+#define IN_CONSTRUCT_INPUT 0x9c
+#define IN_SET_INPUT 0xa7
+#define IN_GET_INPUT 0xb9
+
+/* Types */
+#define TYPE_NIL 0x01
+#define TYPE_TREE 0x02
+#define TYPE_REF 0x03
+#define TYPE_PTR 0x04
+#define TYPE_ITER 0x05
+#define TYPE_IGNORE_LIST 0x06
+
+/* Types of Generics. */
+#define GEN_LIST 0x10
+#define GEN_MAP 0x11
+#define GEN_VECTOR 0x12
+#define GEN_PARSER 0x13
+
+/* Virtual machine stack size, number of pointers.
+ * This will be mmapped. */
+#define VM_STACK_SIZE (SIZEOF_WORD*1024ll*1024ll)
+
+/* Known language element ids. */
+#define LEL_ID_PTR 1
+#define LEL_ID_BOOL 2
+#define LEL_ID_INT 3
+#define LEL_ID_STR 4
+#define LEL_ID_STREAM 5
+#define LEL_ID_INPUT 6
+#define LEL_ID_IGNORE 7
+
+/*
+ * Flags
+ */
+
+/* A tree that has been generated by a termDup. */
+#define PF_TERM_DUP 0x0001
+
+/* Has been processed by the commit function. All children have also been
+ * processed. */
+#define PF_COMMITTED 0x0002
+
+/* Created by a token generation action, not made from the input. */
+#define PF_ARTIFICIAL 0x0004
+
+/* Named node from a pattern or constructor. */
+#define PF_NAMED 0x0008
+
+/* There is reverse code associated with this tree node. */
+#define PF_HAS_RCODE 0x0010
+
+#define PF_RIGHT_IGNORE 0x0020
+
+#define PF_LEFT_IL_ATTACHED 0x0400
+#define PF_RIGHT_IL_ATTACHED 0x0800
+
+#define AF_LEFT_IGNORE 0x0100
+#define AF_RIGHT_IGNORE 0x0200
+
+#define AF_SUPPRESS_LEFT 0x4000
+#define AF_SUPPRESS_RIGHT 0x8000
+
+/*
+ * Call stack.
+ */
+
+/* Number of spots in the frame, after the args. */
+#define FR_AA 4
+
+/* Positions relative to the frame pointer. */
+#define FR_RV 3 /* return value */
+#define FR_RI 2 /* return instruction */
+#define FR_RFP 1 /* return frame pointer */
+#define FR_RFD 0 /* return frame id. */
+
+/*
+ * Calling Convention:
+ * a1
+ * a2
+ * a3
+ * ...
+ * return value FR_RV
+ * return instr FR_RI
+ * return frame ptr FR_RFP
+ * return frame id FR_RFD
+ */
+
+/*
+ * User iterator call stack.
+ * Adds an iframe pointer, removes the return value.
+ */
+
+/* Number of spots in the frame, after the args. */
+#define IFR_AA 3
+
+/* Positions relative to the frame pointer. */
+#define IFR_RIN 2 /* return instruction */
+#define IFR_RIF 1 /* return iframe pointer */
+#define IFR_RFR 0 /* return frame pointer */
+
+/* Exported to modules other than bytecode.c */
+#define vm_push(i) /*if ( sp == prg->se ) vm_grow( prg ); */ (*(--sp) = (i))
+#define vm_pop() (*sp++)
+#define vm_top() (*sp)
+#define vm_ptop() (sp)
+#define vm_pop_ignore() (sp++)
+
+void vm_grow( struct ColmProgram * );
+
+typedef Tree *SW;
+typedef Tree **StackPtr;
+
+
+/* Can't use sizeof() because we have used types that are bigger than the
+ * serial representation. */
+#define SIZEOF_CODE 1
+#define SIZEOF_HALF 2
+#define SIZEOF_WORD sizeof(Word)
+
+typedef struct _Execution
+{
+ Parser *parser;
+ PdaRun *pdaRun;
+ FsmRun *fsmRun;
+ InputStream *inputStream;
+ Tree **framePtr;
+ Tree **iframePtr;
+ long frameId;
+ long rcodeUnitLen;
+} Execution;
+
+long stringLength( Head *str );
+const char *stringData( Head *str );
+Head *stringAllocFull( struct ColmProgram *prg, const char *data, long length );
+Head *stringCopy( struct ColmProgram *prg, Head *head );
+void stringFree( struct ColmProgram *prg, Head *head );
+void stringShorten( Head *tokdata, long newlen );
+Head *concatStr( Head *s1, Head *s2 );
+Word strAtoi( Head *str );
+Word strUord16( Head *head );
+Word strUord8( Head *head );
+Word cmpString( Head *s1, Head *s2 );
+Head *stringToUpper( Head *s );
+Head *stringToLower( Head *s );
+Head *stringSprintf( struct ColmProgram *prg, Str *format, Int *integer );
+
+Head *makeLiteral( struct ColmProgram *prg, long litoffset );
+Head *intToStr( struct ColmProgram *prg, Word i );
+
+Tree *constructString( struct ColmProgram *prg, Head *s );
+
+void initExecution( Execution *exec, Parser *parser, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, int frameId );
+
+void mainExecution( struct ColmProgram *prg, Execution *exec, Code *code );
+void reductionExecution( Execution *exec, Tree **sp );
+void generationExecution( Execution *exec, Tree **sp );
+void reverseExecution( Execution *exec, Tree **sp, RtCodeVect *allRev );
+
+Kid *allocAttrs( struct ColmProgram *prg, long length );
+void freeAttrs( struct ColmProgram *prg, Kid *attrs );
+void setAttr( Tree *tree, long pos, Tree *val );
+Kid *getAttrKid( Tree *tree, long pos );
+
+Tree *splitTree( struct ColmProgram *prg, Tree *t );
+void rcodeDownrefAll( struct ColmProgram *prg, Tree **sp, RtCodeVect *cv );
+void commitFull( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, long commitReduce );
+Tree *getParsedRoot( PdaRun *pdaRun, int stop );
+void splitRef( struct ColmProgram *prg, Tree ***sp, Ref *fromRef );
+
+void allocGlobal( struct ColmProgram *prg );
+Tree **executeCode( struct ColmProgram *prg, Execution *exec, Tree **sp, Code *instr );
+void rcodeDownref( struct ColmProgram *prg, Tree **sp, Code *instr );
+Tree **stackAlloc();
+Code *popReverseCode( RtCodeVect *allRev );
+void sendBackBuffered( FsmRun *fsmRun, InputStream *inputStream );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/closure.cc b/src/closure.cc
new file mode 100644
index 00000000..37b0e259
--- /dev/null
+++ b/src/closure.cc
@@ -0,0 +1,458 @@
+/*
+ * Copyright 2005-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "global.h"
+#include "parsedata.h"
+
+#include "vector.h"
+#include <assert.h>
+#include <string.h>
+#include <iostream>
+
+using std::endl;
+using std::cerr;
+
+void Compiler::lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState,
+ PdaTrans *expandFrom, Definition *prod )
+{
+ /* We use dot sets for finding unique states. In the future, should merge
+ * dots sets with the stateSet pointer (only need one of these). */
+ assert( dest != prodState );
+ dest->dotSet.insert( prodState->dotSet );
+
+ /* Get the epsilons, context, out priorities. */
+ dest->pendingCommits.insert( prodState->pendingCommits );
+ //if ( prodState->pendingCommits.length() > 0 )
+ // cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl;
+
+ if ( prodState->transMap.length() > 0 ) {
+ assert( prodState->transMap.length() == 1 );
+ PdaTrans *srcTrans = prodState->transMap[0].value;
+
+ /* Look for the source in the destination. */
+ TransMapEl *destTel = dest->transMap.find( srcTrans->lowKey );
+ if ( destTel == 0 ) {
+ /* Make a new state and transition to it. */
+ PdaState *newState = pdaGraph->addState();
+ PdaTrans *newTrans = new PdaTrans();
+
+ /* Attach the new transition to the new state. */
+ newTrans->lowKey = srcTrans->lowKey;
+ pdaGraph->attachTrans( dest, newState, newTrans );
+ pdaGraph->addInTrans( newTrans, srcTrans );
+
+ /* The transitions we make during lr0 closure are all shifts. */
+ assert( newTrans->isShift );
+ assert( srcTrans->isShift );
+
+ /* The new state must have its state set setup. */
+ newState->stateSet = new PdaStateSet;
+ newState->stateSet->insert( srcTrans->toState );
+
+ /* Insert the transition into the map. Be sure to set destTel, it
+ * is needed below. */
+ dest->transMap.insert( srcTrans->lowKey, newTrans, &destTel );
+
+ /* If the item is a non-term, queue it for closure. */
+ LangEl *langEl = langElIndex[srcTrans->lowKey];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ pdaGraph->transClosureQueue.append( newTrans );
+ //cerr << "put to trans closure queue" << endl;
+ }
+ }
+ else {
+ //cerr << "merging transitions" << endl;
+ destTel->value->toState->stateSet->insert( srcTrans->toState );
+ pdaGraph->addInTrans( destTel->value, srcTrans );
+ }
+
+ /* If this is an expansion then we may need to bring in commits. */
+ if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) {
+ //cerr << "SETTING COMMIT ON CLOSURE ROUND" << endl;
+ destTel->value->commits.insert( expandFrom->commits );
+
+ expandFrom->commits.empty();
+ }
+ }
+ else {
+ /* ProdState does not have any transitions out. It is at the end of a
+ * production. */
+ if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) {
+ //cerr << "SETTING COMMIT IN PENDING LOOKAHEAD" << endl;
+ for ( LongSet::Iter len = expandFrom->commits; len.lte(); len++ )
+ dest->pendingCommits.insert( ProdIdPair( prod->prodId, *len ) );
+
+ expandFrom->commits.empty();
+ }
+ }
+}
+
+void Compiler::lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state )
+{
+ /* State should not already be closed. */
+ assert( !state->inClosedMap );
+
+ /* This is used each time we invoke closure, it must be cleared. */
+ pdaGraph->transClosureQueue.abandon();
+
+ /* Drag in the core items. */
+ for ( PdaStateSet::Iter ssi = *state->stateSet; ssi.lte(); ssi++ )
+ lr0BringInItem( pdaGraph, state, *ssi, 0, 0 );
+
+ /* Now bring in the derived items. */
+ while ( pdaGraph->transClosureQueue.length() > 0 ) {
+ PdaTrans *toClose = pdaGraph->transClosureQueue.detachFirst();
+ //cerr << "have a transition to derive" << endl;
+
+ /* Get the langEl. */
+ LangEl *langEl = langElIndex[toClose->lowKey];
+
+ /* Make graphs for all of the productions that the non
+ * terminal goes to that are not already in the state's dotSet. */
+ for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) {
+ /* Bring in the start state of the production. */
+ lr0BringInItem( pdaGraph, state, prod->fsm->startState, toClose, prod );
+ }
+ }
+
+ /* Try and insert into the closed dict. */
+ DotSetMapEl *lastFound;
+ if ( pdaGraph->closedMap.insert( state, &lastFound ) ) {
+ /* Insertion into closed dict succeeded. There is no state with the
+ * same dot set. The state is now closed. It is guaranteed a spot in
+ * the closed dict and it will never go away (states never deleted
+ * during closure). */
+ pdaGraph->stateClosedList.append( state );
+ state->inClosedMap = true;
+
+ /* Add all of the states in the out transitions to the closure queue.
+ * This will give us a depth first search of the graph. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ /* Get the state the transEl goes to. */
+ PdaState *targ = trans->value->toState;
+
+ /* If the state on this tranisition has not already been slated
+ * for closure, then add it to the queue. */
+ if ( !targ->onClosureQueue && !targ->inClosedMap ) {
+ pdaGraph->stateClosureQueue.append( targ );
+ targ->onClosureQueue = true;
+ }
+ }
+ }
+ else {
+ /* Insertion into closed dict failed. There is an existing state
+ * with the same dot set. Get the existing state. */
+ pdaGraph->inTransMove( lastFound, state );
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ pdaGraph->stateList.detach( tel->value->toState );
+ delete tel->value->toState;
+ delete tel->value;
+ }
+ pdaGraph->stateList.detach( state );
+ delete state;
+ }
+}
+
+/* Invoke cloure on the graph. We use a queue here to achieve a breadth
+ * first search of the tree we build. Note, there are back edges in this
+ * tree. They are the edges made when upon closure, a dot set exists
+ * already. */
+void Compiler::lr0CloseAllStates( PdaGraph *pdaGraph )
+{
+ /* While there are items on the closure queue. */
+ while ( pdaGraph->stateClosureQueue.length() > 0 ) {
+ /* Pop the first item off. */
+ PdaState *state = pdaGraph->stateClosureQueue.detachFirst();
+ state->onClosureQueue = false;
+
+ /* Invoke closure upon the state. */
+ lr0InvokeClosure( pdaGraph, state );
+ }
+}
+
+void Compiler::transferCommits( PdaGraph *pdaGraph, PdaTrans *trans,
+ PdaState *state, long prodId )
+{
+ ProdIdPairSet &pendingCommits = state->pendingCommits;
+ for ( ProdIdPairSet::Iter pi = pendingCommits; pi.lte(); pi++ ) {
+ if ( pi->onReduce == prodId )
+ trans->commits.insert( pi->length );
+ }
+}
+
+void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys )
+{
+ for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) {
+ int prodId = ets->prodId;
+ PdaState *expandTo = ets->state;
+
+ for ( FollowToAdd::Iter fkey = followKeys; fkey.lte(); fkey++ ) {
+ TransMapEl *transEl = expandTo->transMap.find( fkey->key );
+
+ if ( transEl != 0 ) {
+ /* Set up the follow transition. */
+ PdaTrans *destTrans = transEl->value;
+
+ transferCommits( pdaGraph, destTrans, expandTo, prodId );
+
+ pdaGraph->addInReduction( destTrans, prodId, fkey->value );
+ }
+ else {
+ /* Set up the follow transition. */
+ PdaTrans *followTrans = new PdaTrans;
+ followTrans->lowKey = fkey->key;
+ followTrans->isShift = false;
+ followTrans->reductions.insert( prodId, fkey->value );
+
+ transferCommits( pdaGraph, followTrans, expandTo, prodId );
+
+ pdaGraph->attachTrans( expandTo, actionDestState, followTrans );
+ expandTo->transMap.insert( followTrans->lowKey, followTrans );
+ pdaGraph->transClosureQueue.append( followTrans );
+ }
+ }
+ }
+}
+
+long PdaTrans::maxPrior()
+{
+ long prior = LONG_MIN;
+ if ( isShift && shiftPrior > prior )
+ prior = shiftPrior;
+ for ( ReductionMap::Iter red = reductions; red.lte(); red++ ) {
+ if ( red->value > prior )
+ prior = red->value;
+ }
+ return prior;
+}
+
+void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state )
+{
+ /* Finding non-terminals into the state. */
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ long key = in->lowKey;
+ LangEl *langEl = langElIndex[key];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ /* Finding the following transitions. */
+ FollowToAdd followKeys;
+ for ( TransMap::Iter fout = state->transMap; fout.lte(); fout++ ) {
+ int fkey = fout->key;
+ LangEl *flel = langElIndex[fkey];
+ if ( flel == 0 || flel->type == LangEl::Term ) {
+ long prior = fout->value->maxPrior();
+ followKeys.insert( fkey, prior );
+ }
+ }
+
+ if ( followKeys.length() > 0 )
+ lalr1AddFollow2( pdaGraph, in, followKeys );
+ }
+ }
+}
+
+void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans,
+ long followKey, long prior )
+{
+ for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) {
+ int prodId = ets->prodId;
+ PdaState *expandTo = ets->state;
+
+ TransMapEl *transEl = expandTo->transMap.find( followKey );
+ if ( transEl != 0 ) {
+ /* Add in the reductions, or in the shift. */
+ PdaTrans *destTrans = transEl->value;
+
+ transferCommits( pdaGraph, destTrans, expandTo, prodId );
+
+ pdaGraph->addInReduction( destTrans, prodId, prior );
+ }
+ else {
+ /* Set up the follow transition. */
+ PdaTrans *followTrans = new PdaTrans;
+ followTrans->lowKey = followKey;
+ followTrans->isShift = false;
+ followTrans->reductions.insert( prodId, prior );
+
+ transferCommits( pdaGraph, followTrans, expandTo, prodId );
+
+ pdaGraph->attachTrans( expandTo, actionDestState, followTrans );
+ expandTo->transMap.insert( followTrans->lowKey, followTrans );
+ pdaGraph->transClosureQueue.append( followTrans );
+ }
+ }
+}
+
+void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans )
+{
+ PdaState *state = trans->fromState;
+ int fkey = trans->lowKey;
+ LangEl *flel = langElIndex[fkey];
+ if ( flel == 0 || flel->type == LangEl::Term ) {
+ /* Finding non-terminals into the state. */
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ long key = in->lowKey;
+ LangEl *langEl = langElIndex[key];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ //cerr << "FOLLOW PRIOR TRANSFER 2: " << prior << endl;
+ long prior = trans->maxPrior();
+ lalr1AddFollow2( pdaGraph, in, fkey, prior );
+ }
+ }
+ }
+}
+
+/* Add follow sets to an LR(0) graph to make it LALR(1). */
+void Compiler::lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ /* Make the state that all reduction actions go to. Since a reduction pops
+ * states of the stack and sets the new target state, this state is
+ * actually never reached. Just here to link the trans to. */
+ actionDestState = pdaGraph->addState();
+ pdaGraph->setFinState( actionDestState );
+
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ /* Get the entry into the graph and traverse over start. */
+ PdaState *overStart = pdaGraph->followFsm( (*pe)->startState, (*pe)->rootDef->fsm );
+
+ /* Add _eof after the initial _start. */
+ PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState,
+ (*pe)->eofLel->id, (*pe)->eofLel->id );
+ eofTrans->isShift = true;
+ }
+
+ /* This was used during lr0 table construction. */
+ pdaGraph->transClosureQueue.abandon();
+
+ /* Need to pass over every state initially. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ lalr1AddFollow1( pdaGraph, state );
+
+ /* While the closure queue has items, pop them off and add follow
+ * characters. */
+ while ( pdaGraph->transClosureQueue.length() > 0 ) {
+ /* Pop the first item off and add Follow for it . */
+ PdaTrans *trans = pdaGraph->transClosureQueue.detachFirst();
+ lalr1AddFollow1( pdaGraph, trans );
+ }
+}
+
+void Compiler::linkExpansions( PdaGraph *pdaGraph )
+{
+ pdaGraph->setStateNumbers();
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ /* Find transitions out on non terminals. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ long key = trans->key;
+ LangEl *langEl = langElIndex[key];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ /* For each production that the non terminal expand to ... */
+ for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) {
+ /* Follow the production and add to the trans's expand to set. */
+ PdaState *followRes = pdaGraph->followFsm( state, prod->fsm );
+
+ //LangEl *lel = langElIndex[key];
+ //cerr << state->stateNum << ", ";
+ //if ( lel != 0 )
+ // cerr << lel->data;
+ //else
+ // cerr << (char)key;
+ //cerr << " -> " << (*fto)->stateNum << " on " <<
+ // prod->data << " (fss = " << fin.pos() << ")" << endl;
+ trans->value->expandTo.insert( ExpandToEl( followRes, prod->prodId ) );
+ }
+ }
+ }
+ }
+}
+
+/* Add terminal versions of all nonterminal transitions. */
+void Compiler::addDupTerms( PdaGraph *pdaGraph )
+{
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ PdaTransList newTranitions;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ LangEl *lel = langElIndex[trans->value->lowKey];
+ if ( lel->type == LangEl::NonTerm ) {
+ PdaTrans *dupTrans = new PdaTrans;
+ dupTrans->lowKey = lel->termDup->id;
+ dupTrans->isShift = true;
+
+ /* Save the target state in to state. In the next loop when we
+ * attach the transition we must clear this because the
+ * attaching code requires the transition to be unattached. */
+ dupTrans->toState = trans->value->toState;
+ newTranitions.append( dupTrans );
+
+ /* Commit code used? */
+ //transferCommits( pdaGraph, followTrans, expandTo, prodId );
+ }
+ }
+
+ for ( PdaTrans *dup = newTranitions.head; dup != 0; ) {
+ PdaTrans *next = dup->next;
+ PdaState *toState = dup->toState;
+ dup->toState = 0;
+ pdaGraph->attachTrans( state, toState, dup );
+ state->transMap.insert( dup->lowKey, dup );
+ dup = next;
+ }
+ }
+}
+
+/* Generate a LALR(1) graph. */
+void Compiler::lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ /* Make the intial graph. */
+ pdaGraph->langElIndex = langElIndex;
+
+ for ( Vector<LangEl*>::Iter r = parserEls; r.lte(); r++ ) {
+ /* Create the entry point. */
+ PdaState *rs = pdaGraph->addState();
+ pdaGraph->entryStateSet.insert( rs );
+
+ /* State set of just one state. */
+ rs->stateSet = new PdaStateSet;
+ rs->stateSet->insert( (*r)->rootDef->fsm->startState );
+
+ /* Queue the start state for closure. */
+ rs->onClosureQueue = true;
+ pdaGraph->stateClosureQueue.append( rs );
+
+ (*r)->startState = rs;
+ }
+
+ /* Run the lr0 closure. */
+ lr0CloseAllStates( pdaGraph );
+
+ /* Add terminal versions of all nonterminal transitions. */
+ addDupTerms( pdaGraph );
+
+ /* Link production expansions to the place they expand to. */
+ linkExpansions( pdaGraph );
+
+ /* Walk the graph adding follow sets to the LR(0) graph. */
+ lalr1AddFollowSets( pdaGraph, parserEls );
+
+// /* Set the commit on the final eof shift. */
+// PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id );
+// PdaTrans *eofTrans = overStart->toState->findTrans( eofLangEl->id );
+// eofTrans->afterShiftCommits.insert( 2 );
+}
diff --git a/src/codegen.cc b/src/codegen.cc
new file mode 100644
index 00000000..4403cf8f
--- /dev/null
+++ b/src/codegen.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "parsedata.h"
+#include "fsmcodegen.h"
+#include "redfsm.h"
+#include "bstmap.h"
+#include "fsmrun.h"
+#include "debug.h"
+#include <sstream>
+#include <string>
+
+
+void FsmCodeGen::writeMain()
+{
+ out <<
+ "int main( int argc, const char **argv )\n"
+ "{\n"
+ " struct ColmProgram *prg;\n"
+ " int exitStatus;\n"
+ " colmInit( " << colmActiveRealm << " );\n"
+ " prg = colmNewProgram( &main_runtimeData, argc, argv );\n"
+ " colmRunProgram( prg );\n"
+ " exitStatus = colmDeleteProgram( prg );\n"
+ " return exitStatus;\n"
+ "}\n"
+ "\n";
+
+ out.flush();
+}
+
+
diff --git a/src/codevect.c b/src/codevect.c
new file mode 100644
index 00000000..f8997423
--- /dev/null
+++ b/src/codevect.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <rtvector.h>
+#include <pdarun.h>
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+
+void initRtCodeVect( RtCodeVect *vect )
+{
+ vect->data = 0;
+ vect->tabLen = 0;
+ vect->allocLen = 0;
+}
+
+static long newSizeUp( long existing, long needed )
+{
+ return needed > existing ? (needed<<1) : existing;
+}
+
+static long newSizeDown( long existing, long needed )
+{
+ return needed < (existing>>2) ? (needed<<1) : existing;
+}
+
+/* Up resize the data for len elements using Resize::upResize to tell us the
+ * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */
+static void upResize( RtCodeVect *vect, long len )
+{
+ /* Ask the resizer what the new tabLen will be. */
+ long newLen = newSizeUp(vect->allocLen, len);
+
+ /* Did the data grow? */
+ if ( newLen > vect->allocLen ) {
+ vect->allocLen = newLen;
+ if ( vect->data != 0 ) {
+ /* Table exists already, resize it up. */
+ vect->data = (Code*) realloc( vect->data, sizeof(Code) * newLen );
+ //if ( vect->data == 0 )
+ // throw std::bad_alloc();
+ }
+ else {
+ /* Create the data. */
+ vect->data = (Code*) malloc( sizeof(Code) * newLen );
+ //if ( vect->data == 0 )
+ // throw std::bad_alloc();
+ }
+ }
+}
+
+/* Down resize the data for len elements using Resize::downResize to determine
+ * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */
+static void downResize( RtCodeVect *vect, long len)
+{
+ /* Ask the resizer what the new tabLen will be. */
+ long newLen = newSizeDown( vect->allocLen, len );
+
+ /* Did the data shrink? */
+ if ( newLen < vect->allocLen ) {
+ vect->allocLen = newLen;
+ if ( newLen == 0 ) {
+ /* Simply free the data. */
+ free( vect->data );
+ vect->data = 0;
+ }
+ else {
+ /* Not shrinking to size zero, realloc it to the smaller size. */
+ vect->data = (Code*) realloc( vect->data, sizeof(Code) * newLen );
+ //if ( vect->data == 0 )
+ // throw std::bad_alloc();
+ }
+ }
+}
+
+
+void rtCodeVectEmpty( RtCodeVect *vect )
+{
+ if ( vect->data != 0 ) {
+ /* Free the data space. */
+ free( vect->data );
+ vect->data = 0;
+ vect->tabLen = vect->allocLen = 0;
+ }
+}
+
+void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len )
+{
+ long endPos, i;
+ //Code *item;
+
+ /* If we are given a negative position to replace at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = vect->tabLen + pos;
+
+ /* The end is the one past the last item that we want
+ * to write to. */
+ endPos = pos + len;
+
+ /* Make sure we have enough space. */
+ if ( endPos > vect->tabLen ) {
+ upResize( vect, endPos );
+
+ /* Delete any objects we need to delete. */
+ //item = vect->data + pos;
+ //for ( i = pos; i < vect->tabLen; i++, item++ )
+ // item->~Code();
+
+ /* We are extending the vector, set the new data length. */
+ vect->tabLen = endPos;
+ }
+ else {
+ /* Delete any objects we need to delete. */
+ //item = vect->data + pos;
+ //for ( i = pos; i < endPos; i++, item++ )
+ // item->~Code();
+ }
+
+ /* Copy data in using copy constructor. */
+ Code *dst = vect->data + pos;
+ const Code *src = val;
+ for ( i = 0; i < len; i++, dst++, src++ )
+ *dst = *src;
+}
+
+void rtCodeVectRemove( RtCodeVect *vect, long pos, long len )
+{
+ long newLen, lenToSlideOver, endPos;
+ Code *dst;//, *item;
+
+ /* If we are given a negative position to remove at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = vect->tabLen + pos;
+
+ /* The first position after the last item deleted. */
+ endPos = pos + len;
+
+ /* The new data length. */
+ newLen = vect->tabLen - len;
+
+ /* The place in the data we are deleting at. */
+ dst = vect->data + pos;
+
+ /* Call Destructors. */
+ //item = dst;
+ //for ( long i = 0; i < len; i += 1, item += 1 )
+ // item->~Code();
+
+ /* Shift data over if necessary. */
+ lenToSlideOver = vect->tabLen - endPos;
+ if ( len > 0 && lenToSlideOver > 0 )
+ memmove(dst, dst + len, sizeof(Code)*lenToSlideOver);
+
+ /* Shrink the data if necessary. */
+ downResize( vect, newLen );
+
+ /* Set the new data length. */
+ vect->tabLen = newLen;
+}
+
+
diff --git a/src/colm.h b/src/colm.h
new file mode 100644
index 00000000..4f169254
--- /dev/null
+++ b/src/colm.h
@@ -0,0 +1,55 @@
+#ifndef __COLM_COLM_H
+#define __COLM_COLM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ColmTree;
+struct ColmKid;
+struct ColmProgram;
+struct ColmRuntimeData;
+
+void colmInit( long debugRealm );
+struct ColmProgram *colmNewProgram( struct ColmRuntimeData *rtd, int argc, const char **argv );
+void colmRunProgram( struct ColmProgram *prg );
+int colmDeleteProgram( struct ColmProgram *prg );
+
+struct ColmPrintArgs
+{
+ void *arg;
+ int comm;
+ int attr;
+ int trim;
+ void (*out)( struct ColmPrintArgs *args, const char *data, int length );
+ void (*openTree)( struct ColmProgram *prg, struct ColmTree **sp,
+ struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid );
+ void (*printTerm)( struct ColmProgram *prg, struct ColmTree **sp,
+ struct ColmPrintArgs *args, struct ColmKid *kid );
+ void (*closeTree)( struct ColmProgram *prg, struct ColmTree **sp,
+ struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid );
+};
+
+void printNull( struct ColmProgram *prg, struct ColmTree **sp,
+ struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid );
+void printTermTree( struct ColmProgram *prg, struct ColmTree **sp,
+ struct ColmPrintArgs *printArgs, struct ColmKid *kid );
+
+struct ColmTree **vm_root( struct ColmProgram *prg );
+struct ColmTree *returnVal( struct ColmProgram *prg );
+void printTreeArgs( struct ColmProgram *prg, struct ColmTree **sp,
+ struct ColmPrintArgs *printArgs, struct ColmTree *tree );
+
+int repeatEnd( struct ColmTree *tree );
+int listLast( struct ColmTree *tree );
+struct ColmTree *getRhsVal( struct ColmProgram *prg, struct ColmTree *tree, int *a );
+struct ColmTree *getAttr( struct ColmTree *tree, long pos );
+struct ColmTree *getGlobal( struct ColmProgram *prg, long pos );
+struct ColmTree *getRepeatNext( struct ColmTree *tree );
+struct ColmTree *getRepeatVal( struct ColmTree *tree );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/compiler.cc b/src/compiler.cc
new file mode 100644
index 00000000..c1e775f2
--- /dev/null
+++ b/src/compiler.cc
@@ -0,0 +1,1496 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <sstream>
+
+#include "global.h"
+#include "lmparse.h"
+#include "parsedata.h"
+#include "parsetree.h"
+#include "mergesort.h"
+#include "redbuild.h"
+#include "pdacodegen.h"
+#include "fsmcodegen.h"
+#include "fsmrun.h"
+#include "pdarun.h"
+#include "colm.h"
+#include "pool.h"
+
+using namespace std;
+using std::ostringstream;
+
+char machineMain[] = "main";
+exit_object endp;
+void operator<<( ostream &out, exit_object & )
+{
+ out << endl;
+ exit(1);
+}
+
+/* Perform minimization after an operation according
+ * to the command line args. */
+void afterOpMinimize( FsmGraph *fsm, bool lastInSeq )
+{
+ /* Switch on the prefered minimization algorithm. */
+ if ( lastInSeq ) {
+ /* First clean up the graph. FsmGraph operations may leave these
+ * lying around. There should be no dead end states. The subtract
+ * intersection operators are the only places where they may be
+ * created and those operators clean them up. */
+ fsm->removeUnreachableStates();
+ fsm->minimizePartition2();
+ }
+}
+
+/* Count the transitions in the fsm by walking the state list. */
+int countTransitions( FsmGraph *fsm )
+{
+ int numTrans = 0;
+ FsmState *state = fsm->stateList.head;
+ while ( state != 0 ) {
+ numTrans += state->outList.length();
+ state = state->next;
+ }
+ return numTrans;
+}
+
+Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd )
+{
+ /* Reset errno so we can check for overflow or underflow. In the event of
+ * an error, sets the return val to the upper or lower bound being tested
+ * against. */
+ errno = 0;
+ unsigned int size = keyOps->alphType->size;
+ bool unusedBits = size < sizeof(unsigned long);
+
+ unsigned long ul = strtoul( str, 0, 16 );
+
+ if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) {
+ error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+ ul = 1 << (size * 8);
+ }
+
+ if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
+ ul |= (0xffffffff >> (size*8 ) ) << (size*8);
+
+ return Key( (long)ul );
+}
+
+Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd )
+{
+ /* Convert the number to a decimal. First reset errno so we can check
+ * for overflow or underflow. */
+ errno = 0;
+ long long minVal = keyOps->alphType->minVal;
+ long long maxVal = keyOps->alphType->maxVal;
+
+ long long ll = strtoll( str, 0, 10 );
+
+ /* Check for underflow. */
+ if ( (errno == ERANGE && ll < 0) || ll < minVal) {
+ error(loc) << "literal " << str << " underflows the alphabet type" << endl;
+ ll = minVal;
+ }
+ /* Check for overflow. */
+ else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) {
+ error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+ ll = maxVal;
+ }
+
+ if ( keyOps->alphType->isSigned )
+ return Key( (long)ll );
+ else
+ return Key( (unsigned long)ll );
+}
+
+/* Make an fsm key in int format (what the fsm graph uses) from an alphabet
+ * number returned by the parser. Validates that the number doesn't overflow
+ * the alphabet type. */
+Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd )
+{
+ /* Switch on hex/decimal format. */
+ if ( str[0] == '0' && str[1] == 'x' )
+ return makeFsmKeyHex( str, loc, pd );
+ else
+ return makeFsmKeyDec( str, loc, pd );
+}
+
+/* Make an fsm int format (what the fsm graph uses) from a single character.
+ * Performs proper conversion depending on signed/unsigned property of the
+ * alphabet. */
+Key makeFsmKeyChar( char c, Compiler *pd )
+{
+ if ( keyOps->isSigned ) {
+ /* Copy from a char type. */
+ return Key( c );
+ }
+ else {
+ /* Copy from an unsigned byte type. */
+ return Key( (unsigned char)c );
+ }
+}
+
+/* Make an fsm key array in int format (what the fsm graph uses) from a string
+ * of characters. Performs proper conversion depending on signed/unsigned
+ * property of the alphabet. */
+void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd )
+{
+ if ( keyOps->isSigned ) {
+ /* Copy from a char star type. */
+ char *src = data;
+ for ( int i = 0; i < len; i++ )
+ result[i] = Key(src[i]);
+ }
+ else {
+ /* Copy from an unsigned byte ptr type. */
+ unsigned char *src = (unsigned char*) data;
+ for ( int i = 0; i < len; i++ )
+ result[i] = Key(src[i]);
+ }
+}
+
+/* Like makeFsmKeyArray except the result has only unique keys. They ordering
+ * will be changed. */
+void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
+ bool caseInsensitive, Compiler *pd )
+{
+ /* Use a transitions list for getting unique keys. */
+ if ( keyOps->isSigned ) {
+ /* Copy from a char star type. */
+ char *src = data;
+ for ( int si = 0; si < len; si++ ) {
+ Key key( src[si] );
+ result.insert( key );
+ if ( caseInsensitive ) {
+ if ( key.isLower() )
+ result.insert( key.toUpper() );
+ else if ( key.isUpper() )
+ result.insert( key.toLower() );
+ }
+ }
+ }
+ else {
+ /* Copy from an unsigned byte ptr type. */
+ unsigned char *src = (unsigned char*) data;
+ for ( int si = 0; si < len; si++ ) {
+ Key key( src[si] );
+ result.insert( key );
+ if ( caseInsensitive ) {
+ if ( key.isLower() )
+ result.insert( key.toUpper() );
+ else if ( key.isUpper() )
+ result.insert( key.toLower() );
+ }
+ }
+ }
+}
+
+FsmGraph *dotFsm( Compiler *pd )
+{
+ FsmGraph *retFsm = new FsmGraph();
+ retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
+ return retFsm;
+}
+
+FsmGraph *dotStarFsm( Compiler *pd )
+{
+ FsmGraph *retFsm = new FsmGraph();
+ retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
+ return retFsm;
+}
+
+/* Make a builtin type. Depends on the signed nature of the alphabet type. */
+FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd )
+{
+ /* FsmGraph created to return. */
+ FsmGraph *retFsm = 0;
+ bool isSigned = keyOps->isSigned;
+
+ switch ( builtin ) {
+ case BT_Any: {
+ /* All characters. */
+ retFsm = dotFsm( pd );
+ break;
+ }
+ case BT_Ascii: {
+ /* Ascii characters 0 to 127. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( 0, 127 );
+ break;
+ }
+ case BT_Extend: {
+ /* Ascii extended characters. This is the full byte range. Dependent
+ * on signed, vs no signed. If the alphabet is one byte then just use
+ * dot fsm. */
+ if ( isSigned ) {
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( -128, 127 );
+ }
+ else {
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( 0, 255 );
+ }
+ break;
+ }
+ case BT_Alpha: {
+ /* Alpha [A-Za-z]. */
+ FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph();
+ upper->rangeFsm( 'A', 'Z' );
+ lower->rangeFsm( 'a', 'z' );
+ upper->unionOp( lower );
+ upper->minimizePartition2();
+ retFsm = upper;
+ break;
+ }
+ case BT_Digit: {
+ /* Digits [0-9]. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( '0', '9' );
+ break;
+ }
+ case BT_Alnum: {
+ /* Alpha numerics [0-9A-Za-z]. */
+ FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph();
+ FsmGraph *upper = new FsmGraph();
+ digit->rangeFsm( '0', '9' );
+ upper->rangeFsm( 'A', 'Z' );
+ lower->rangeFsm( 'a', 'z' );
+ digit->unionOp( upper );
+ digit->unionOp( lower );
+ digit->minimizePartition2();
+ retFsm = digit;
+ break;
+ }
+ case BT_Lower: {
+ /* Lower case characters. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( 'a', 'z' );
+ break;
+ }
+ case BT_Upper: {
+ /* Upper case characters. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( 'A', 'Z' );
+ break;
+ }
+ case BT_Cntrl: {
+ /* Control characters. */
+ FsmGraph *cntrl = new FsmGraph();
+ FsmGraph *highChar = new FsmGraph();
+ cntrl->rangeFsm( 0, 31 );
+ highChar->concatFsm( 127 );
+ cntrl->unionOp( highChar );
+ cntrl->minimizePartition2();
+ retFsm = cntrl;
+ break;
+ }
+ case BT_Graph: {
+ /* Graphical ascii characters [!-~]. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( '!', '~' );
+ break;
+ }
+ case BT_Print: {
+ /* Printable characters. Same as graph except includes space. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( ' ', '~' );
+ break;
+ }
+ case BT_Punct: {
+ /* Punctuation. */
+ FsmGraph *range1 = new FsmGraph();
+ FsmGraph *range2 = new FsmGraph();
+ FsmGraph *range3 = new FsmGraph();
+ FsmGraph *range4 = new FsmGraph();
+ range1->rangeFsm( '!', '/' );
+ range2->rangeFsm( ':', '@' );
+ range3->rangeFsm( '[', '`' );
+ range4->rangeFsm( '{', '~' );
+ range1->unionOp( range2 );
+ range1->unionOp( range3 );
+ range1->unionOp( range4 );
+ range1->minimizePartition2();
+ retFsm = range1;
+ break;
+ }
+ case BT_Space: {
+ /* Whitespace: [\t\v\f\n\r ]. */
+ FsmGraph *cntrl = new FsmGraph();
+ FsmGraph *space = new FsmGraph();
+ cntrl->rangeFsm( '\t', '\r' );
+ space->concatFsm( ' ' );
+ cntrl->unionOp( space );
+ cntrl->minimizePartition2();
+ retFsm = cntrl;
+ break;
+ }
+ case BT_Xdigit: {
+ /* Hex digits [0-9A-Fa-f]. */
+ FsmGraph *digit = new FsmGraph();
+ FsmGraph *upper = new FsmGraph();
+ FsmGraph *lower = new FsmGraph();
+ digit->rangeFsm( '0', '9' );
+ upper->rangeFsm( 'A', 'F' );
+ lower->rangeFsm( 'a', 'f' );
+ digit->unionOp( upper );
+ digit->unionOp( lower );
+ digit->minimizePartition2();
+ retFsm = digit;
+ break;
+ }
+ case BT_Lambda: {
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ break;
+ }
+ case BT_Empty: {
+ retFsm = new FsmGraph();
+ retFsm->emptyFsm();
+ break;
+ }}
+
+ return retFsm;
+}
+
+/* Check if this name inst or any name inst below is referenced. */
+bool NameInst::anyRefsRec()
+{
+ if ( numRefs > 0 )
+ return true;
+
+ /* Recurse on children until true. */
+ for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
+ if ( (*ch)->anyRefsRec() )
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Compiler
+ */
+
+/* Initialize the structure that will collect info during the parse of a
+ * machine. */
+Compiler::Compiler( const String &fileName, const String &sectionName,
+ const InputLoc &sectionLoc, ostream &out )
+:
+ nextPriorKey(0),
+ nextLocalErrKey(1), /* 0 is reserved for global error actions. */
+ nextNameId(0),
+ alphTypeSet(false),
+ getKeyExpr(0),
+ accessExpr(0),
+ curStateExpr(0),
+ lowerNum(0),
+ upperNum(0),
+ fileName(fileName),
+ sectionName(sectionName),
+ sectionLoc(sectionLoc),
+ errorCount(0),
+ curActionOrd(0),
+ curPriorOrd(0),
+ nextEpsilonResolvedLink(0),
+ nextTokenId(1),
+ rootCodeBlock(0),
+ mainReturnUT(0),
+ parserName(sectionName),
+ out(out),
+ access(0),
+ tokenStruct(0),
+ rootLangEl(0),
+ eofLangEl(0),
+ errorLangEl(0),
+ defaultCharLangEl(0),
+ rootRegion(0),
+ defaultRegion(0),
+ firstNonTermId(0),
+ prodIdIndex(0),
+ nextPatReplId(0),
+ nextGenericId(1),
+ nextFuncId(0),
+ loopCleanup(0),
+ nextObjectId(1), /* 0 is reserved for no object. */
+ nextFrameId(0),
+ nextParserId(0),
+ nextLabelId(0),
+ revertOn(true),
+ predValue(0),
+ nextMatchEndNum(0),
+ argvTypeRef(0),
+ context(0)
+{
+}
+
+/* Clean up the data collected during a parse. */
+Compiler::~Compiler()
+{
+ /* Delete all the nodes in the action list. Will cause all the
+ * string data that represents the actions to be deallocated. */
+ actionList.empty();
+}
+
+/* Make a name id in the current name instantiation scope if it is not
+ * already there. */
+NameInst *Compiler::addNameInst( const InputLoc &loc, char *data, bool isLabel )
+{
+ /* Create the name instantitaion object and insert it. */
+ NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
+ curNameInst->childVect.append( newNameInst );
+ if ( data != 0 )
+ curNameInst->children.insertMulti( data, newNameInst );
+ return newNameInst;
+}
+
+void Compiler::initNameWalk( NameInst *rootName )
+{
+ curNameInst = rootName;
+ curNameChild = 0;
+}
+
+/* Goes into the next child scope. The number of the child is already set up.
+ * We need this for the syncronous name tree and parse tree walk to work
+ * properly. It is reset on entry into a scope and advanced on poping of a
+ * scope. A call to enterNameScope should be accompanied by a corresponding
+ * popNameScope. */
+NameFrame Compiler::enterNameScope( bool isLocal, int numScopes )
+{
+ /* Save off the current data. */
+ NameFrame retFrame;
+ retFrame.prevNameInst = curNameInst;
+ retFrame.prevNameChild = curNameChild;
+ retFrame.prevLocalScope = localNameScope;
+
+ /* Enter into the new name scope. */
+ for ( int i = 0; i < numScopes; i++ ) {
+ curNameInst = curNameInst->childVect[curNameChild];
+ curNameChild = 0;
+ }
+
+ if ( isLocal )
+ localNameScope = curNameInst;
+
+ return retFrame;
+}
+
+/* Return from a child scope to a parent. The parent info must be specified as
+ * an argument and is obtained from the corresponding call to enterNameScope.
+ * */
+void Compiler::popNameScope( const NameFrame &frame )
+{
+ /* Pop the name scope. */
+ curNameInst = frame.prevNameInst;
+ curNameChild = frame.prevNameChild+1;
+ localNameScope = frame.prevLocalScope;
+}
+
+void Compiler::resetNameScope( const NameFrame &frame )
+{
+ /* Pop the name scope. */
+ curNameInst = frame.prevNameInst;
+ curNameChild = frame.prevNameChild;
+ localNameScope = frame.prevLocalScope;
+}
+
+
+void Compiler::unsetObsoleteEntries( FsmGraph *graph )
+{
+ /* Loop the reference names and increment the usage. Names that are no
+ * longer needed will be unset in graph. */
+ for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
+ /* Get the name. */
+ NameInst *name = *ref;
+ name->numUses += 1;
+
+ /* If the name is no longer needed unset its corresponding entry. */
+ if ( name->numUses == name->numRefs ) {
+ assert( graph->entryPoints.find( name->id ) != 0 );
+ graph->unsetEntry( name->id );
+ }
+ }
+}
+
+NameSet Compiler::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
+{
+ /* Queue needed for breadth-first search, load it with the start node. */
+ NameInstList nameQueue;
+ nameQueue.append( refFrom );
+
+ NameSet result;
+ while ( nameQueue.length() > 0 ) {
+ /* Pull the next from location off the queue. */
+ NameInst *from = nameQueue.detachFirst();
+
+ /* Look for the name. */
+ NameMapEl *low, *high;
+ if ( from->children.findMulti( data, low, high ) ) {
+ /* Record all instances of the name. */
+ for ( ; low <= high; low++ )
+ result.insert( low->value );
+ }
+
+ /* Name not there, do breadth-first operation of appending all
+ * childrent to the processing queue. */
+ for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
+ if ( !recLabelsOnly || (*name)->isLabel )
+ nameQueue.append( *name );
+ }
+ }
+
+ /* Queue exhausted and name never found. */
+ return result;
+}
+
+void Compiler::resolveFrom( NameSet &result, NameInst *refFrom,
+ const NameRef &nameRef, int namePos )
+{
+ /* Look for the name in the owning scope of the factor with aug. */
+ NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
+
+ /* If there are more parts to the name then continue on. */
+ if ( ++namePos < nameRef.length() ) {
+ /* There are more components to the name, search using all the part
+ * results as the base. */
+ for ( NameSet::Iter name = partResult; name.lte(); name++ )
+ resolveFrom( result, *name, nameRef, namePos );
+ }
+ else {
+ /* This is the last component, append the part results to the final
+ * results. */
+ result.insert( partResult );
+ }
+}
+
+ostream &operator<<( ostream &out, const Token &token )
+{
+ out << token.data;
+ return out;
+}
+
+/* Write out a name reference. */
+ostream &operator<<( ostream &out, const NameRef &nameRef )
+{
+ int pos = 0;
+ if ( nameRef[pos] == 0 ) {
+ out << "::";
+ pos += 1;
+ }
+ out << nameRef[pos++];
+ for ( ; pos < nameRef.length(); pos++ )
+ out << "::" << nameRef[pos];
+ return out;
+}
+
+ostream &operator<<( ostream &out, const NameInst &nameInst )
+{
+ /* Count the number fully qualified name parts. */
+ int numParents = 0;
+ NameInst *curParent = nameInst.parent;
+ while ( curParent != 0 ) {
+ numParents += 1;
+ curParent = curParent->parent;
+ }
+
+ /* Make an array and fill it in. */
+ curParent = nameInst.parent;
+ NameInst **parents = new NameInst*[numParents];
+ for ( int p = numParents-1; p >= 0; p-- ) {
+ parents[p] = curParent;
+ curParent = curParent->parent;
+ }
+
+ /* Write the parents out, skip the root. */
+ for ( int p = 1; p < numParents; p++ )
+ out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
+
+ /* Write the name and cleanup. */
+ out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
+ delete[] parents;
+ return out;
+}
+
+struct CmpNameInstLoc
+{
+ static int compare( const NameInst *ni1, const NameInst *ni2 )
+ {
+ if ( ni1->loc.line < ni2->loc.line )
+ return -1;
+ else if ( ni1->loc.line > ni2->loc.line )
+ return 1;
+ else if ( ni1->loc.col < ni2->loc.col )
+ return -1;
+ else if ( ni1->loc.col > ni2->loc.col )
+ return 1;
+ return 0;
+ }
+};
+
+void errorStateLabels( const NameSet &resolved )
+{
+ MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
+ mergeSort.sort( resolved.data, resolved.length() );
+ for ( NameSet::Iter res = resolved; res.lte(); res++ )
+ error((*res)->loc) << " -> " << **res << endl;
+}
+
+
+void Compiler::referenceRegions( NameInst *rootName )
+{
+ for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) {
+ /* Inc the reference in the name. This will cause the entry point to
+ * survive to the end of the graph generating walk. */
+ (*inst)->numRefs += 1;
+ }
+}
+
+/* Walk a name tree starting at from and fill the name index. */
+void Compiler::fillNameIndex( NameInst **nameIndex, NameInst *from )
+{
+ /* Fill the value for from in the name index. */
+ nameIndex[from->id] = from;
+
+ /* Recurse on the implicit final state and then all children. */
+ if ( from->final != 0 )
+ fillNameIndex( nameIndex, from->final );
+ for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
+ fillNameIndex( nameIndex, *name );
+}
+
+NameInst **Compiler::makeNameIndex( NameInst *rootName )
+{
+ /* The number of nodes in the tree can now be given by nextNameId. Put a
+ * null pointer on the end of the list to terminate it. */
+ NameInst **nameIndex = new NameInst*[nextNameId+1];
+ memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) );
+ fillNameIndex( nameIndex, rootName );
+ return nameIndex;
+}
+
+void Compiler::createBuiltin( const char *name, BuiltinMachine builtin )
+{
+ Expression *expression = new Expression( builtin );
+ Join *join = new Join( expression );
+ VarDef *varDef = new VarDef( name, join );
+ GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
+ rootNamespace->rlMap.insert( graphDictEl );
+}
+
+/* Initialize the graph dict with builtin types. */
+void Compiler::initGraphDict( )
+{
+ createBuiltin( "any", BT_Any );
+ createBuiltin( "ascii", BT_Ascii );
+ createBuiltin( "extend", BT_Extend );
+ createBuiltin( "alpha", BT_Alpha );
+ createBuiltin( "digit", BT_Digit );
+ createBuiltin( "alnum", BT_Alnum );
+ createBuiltin( "lower", BT_Lower );
+ createBuiltin( "upper", BT_Upper );
+ createBuiltin( "cntrl", BT_Cntrl );
+ createBuiltin( "graph", BT_Graph );
+ createBuiltin( "print", BT_Print );
+ createBuiltin( "punct", BT_Punct );
+ createBuiltin( "space", BT_Space );
+ createBuiltin( "xdigit", BT_Xdigit );
+ createBuiltin( "null", BT_Lambda );
+ createBuiltin( "zlen", BT_Lambda );
+ createBuiltin( "empty", BT_Empty );
+}
+
+/* Initialize the key operators object that will be referenced by all fsms
+ * created. */
+void Compiler::initKeyOps( )
+{
+ /* Signedness and bounds. */
+ HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
+ thisKeyOps.setAlphType( alphType );
+
+ if ( lowerNum != 0 ) {
+ /* If ranges are given then interpret the alphabet type. */
+ thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
+ thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
+ }
+
+ thisCondData.nextCondKey = thisKeyOps.maxKey;
+ thisCondData.nextCondKey.increment();
+}
+
+void Compiler::printNameInst( NameInst *nameInst, int level )
+{
+ for ( int i = 0; i < level; i++ )
+ cerr << " ";
+ cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
+ " id: " << nameInst->id <<
+ " refs: " << nameInst->numRefs << endl;
+ for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
+ printNameInst( *name, level+1 );
+}
+
+/* Remove duplicates of unique actions from an action table. */
+void Compiler::removeDups( ActionTable &table )
+{
+ /* Scan through the table looking for unique actions to
+ * remove duplicates of. */
+ for ( int i = 0; i < table.length(); i++ ) {
+ /* Remove any duplicates ahead of i. */
+ for ( int r = i+1; r < table.length(); ) {
+ if ( table[r].value == table[i].value )
+ table.vremove(r);
+ else
+ r += 1;
+ }
+ }
+}
+
+/* Remove duplicates from action lists. This operates only on transition and
+ * eof action lists and so should be called once all actions have been
+ * transfered to their final resting place. */
+void Compiler::removeActionDups( FsmGraph *graph )
+{
+ /* Loop all states. */
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
+ /* Loop all transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ removeDups( trans->actionTable );
+ removeDups( state->toStateActionTable );
+ removeDups( state->fromStateActionTable );
+ removeDups( state->eofActionTable );
+ }
+}
+
+Action *Compiler::newAction( const String &name, InlineList *inlineList )
+{
+ InputLoc loc;
+ loc.line = 1;
+ loc.col = 1;
+ loc.fileName = 0;
+
+ Action *action = new Action( loc, name, inlineList );
+ actionList.append( action );
+ return action;
+}
+
+void Compiler::initLongestMatchData()
+{
+ if ( regionList.length() > 0 ) {
+ /* The initActId action gives act a default value. */
+ InlineList *il4 = new InlineList;
+ il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
+ initActId = newAction( "initact", il4 );
+ initActId->isLmAction = true;
+
+ /* The setTokStart action sets tokstart. */
+ InlineList *il5 = new InlineList;
+ il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
+ setTokStart = newAction( "tokstart", il5 );
+ setTokStart->isLmAction = true;
+
+ /* The setTokEnd action sets tokend. */
+ InlineList *il3 = new InlineList;
+ il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
+ setTokEnd = newAction( "tokend", il3 );
+ setTokEnd->isLmAction = true;
+
+ /* The action will also need an ordering: ahead of all user action
+ * embeddings. */
+ initActIdOrd = curActionOrd++;
+ setTokStartOrd = curActionOrd++;
+ setTokEndOrd = curActionOrd++;
+ }
+}
+
+void Compiler::finishGraphBuild( FsmGraph *graph )
+{
+ /* Resolve any labels that point to multiple states. Any labels that are
+ * still around are referenced only by gotos and calls and they need to be
+ * made into deterministic entry points. */
+ graph->deterministicEntry();
+
+ /*
+ * All state construction is now complete.
+ */
+
+ /* Transfer global error actions. */
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
+ graph->transferErrorActions( state, 0 );
+
+ removeActionDups( graph );
+
+ /* Remove unreachable states. There should be no dead end states. The
+ * subtract and intersection operators are the only places where they may
+ * be created and those operators clean them up. */
+ graph->removeUnreachableStates();
+
+ /* No more fsm operations are to be done. Action ordering numbers are
+ * no longer of use and will just hinder minimization. Clear them. */
+ graph->nullActionKeys();
+
+ /* Transition priorities are no longer of use. We can clear them
+ * because they will just hinder minimization as well. Clear them. */
+ graph->clearAllPriorities();
+
+ /* Minimize here even if we minimized at every op. Now that function
+ * keys have been cleared we may get a more minimal fsm. */
+ graph->minimizePartition2();
+ graph->compressTransitions();
+}
+
+void Compiler::printNameTree( NameInst *rootName )
+{
+ /* Print the name instance map. */
+ cerr << "name tree:" << endl;
+ for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
+ printNameInst( *name, 0 );
+}
+
+void Compiler::printNameIndex( NameInst **nameIndex )
+{
+ /* The name index is terminated with a null pointer. */
+ cerr << "name index:" << endl;
+ for ( int ni = 0; nameIndex[ni]; ni++ ) {
+ cerr << ni << ": ";
+ char *name = nameIndex[ni]->name;
+ cerr << ( name != 0 ? name : "<ANON>" ) << endl;
+ }
+}
+
+
+/* Build the name tree and supporting data structures. */
+NameInst *Compiler::makeNameTree()
+{
+ /* Create the root name. */
+ nextNameId = 0;
+ NameInst *rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
+
+ /* First make the name tree. */
+ initNameWalk( rootName );
+ for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
+ /* Recurse on the instance. */
+ glel->value->makeNameTree( glel->loc, this );
+ }
+
+ return rootName;
+}
+
+FsmGraph *Compiler::makeAllRegions()
+{
+ /* Build the name tree and supporting data structures. */
+ NameInst *rootName = makeNameTree( );
+ NameInst **nameIndex = makeNameIndex( rootName );
+
+ /* Resovle the implicit name references to the nfa instantiations. */
+ referenceRegions( rootName );
+
+ int numGraphs = 0;
+ FsmGraph **graphs = new FsmGraph*[instanceList.length()];
+
+ /* Make all the instantiations, we know that main exists in this list. */
+ initNameWalk( rootName );
+ for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
+ /* Build the graph from a walk of the parse tree. */
+ FsmGraph *newGraph = glel->value->walk( this );
+
+ /* Wrap up the construction. */
+ finishGraphBuild( newGraph );
+
+ /* Save off the new graph. */
+ graphs[numGraphs++] = newGraph;
+ }
+
+ /* NOTE: If putting in minimization here we need to include eofTarget
+ * into the minimization algorithm. It is currently set by the longest
+ * match operator and not considered anywhere else. */
+
+ /* Add all the other graphs into the first. */
+ FsmGraph *all = graphs[0];
+ all->globOp( graphs+1, numGraphs-1 );
+ delete[] graphs;
+
+ /* Go through all the token regions and check for lmRequiresErrorState. */
+ for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+ if ( reg->lmSwitchHandlesError )
+ all->lmRequiresErrorState = true;
+ }
+
+ all->rootName = rootName;
+ all->nameIndex = nameIndex;
+
+ return all;
+}
+
+void Compiler::analyzeAction( Action *action, InlineList *inlineList )
+{
+ /* FIXME: Actions used as conditions should be very constrained. */
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ //if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
+ // action->anyCall = true;
+
+ /* Need to recurse into longest match items. */
+ if ( item->type == InlineItem::LmSwitch ) {
+ TokenRegion *lm = item->tokenRegion;
+ for ( TokenDefListReg::Iter lmi = lm->tokenDefList; lmi.lte(); lmi++ ) {
+ if ( lmi->action != 0 )
+ analyzeAction( action, lmi->action->inlineList );
+ }
+ }
+
+ if ( item->type == InlineItem::LmOnLast ||
+ item->type == InlineItem::LmOnNext ||
+ item->type == InlineItem::LmOnLagBehind )
+ {
+ TokenDef *lmi = item->longestMatchPart;
+ if ( lmi->action != 0 )
+ analyzeAction( action, lmi->action->inlineList );
+ }
+
+ if ( item->children != 0 )
+ analyzeAction( action, item->children );
+ }
+}
+
+void Compiler::analyzeGraph( FsmGraph *graph )
+{
+ for ( ActionList::Iter act = actionList; act.lte(); act++ )
+ analyzeAction( act, act->inlineList );
+
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ /* The transition list. */
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
+ at->value->numTransRefs += 1;
+ }
+
+ for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
+ at->value->numToStateRefs += 1;
+
+ for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
+ at->value->numFromStateRefs += 1;
+
+ for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
+ at->value->numEofRefs += 1;
+
+ for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
+ for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
+ (*sci)->numCondRefs += 1;
+ }
+ }
+}
+
+FsmGraph *Compiler::makeScanner()
+{
+ /* Make the graph, do minimization. */
+ FsmGraph *fsmGraph = makeAllRegions();
+
+ /* If any errors have occured in the input file then don't write anything. */
+ if ( gblErrorCount > 0 )
+ return 0;
+
+ analyzeGraph( fsmGraph );
+
+ /* Decide if an error state is necessary.
+ * 1. There is an error transition
+ * 2. There is a gap in the transitions
+ * 3. The longest match operator requires it. */
+ if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() )
+ fsmGraph->errState = fsmGraph->addState();
+
+ /* State numbers need to be assigned such that all final states have a
+ * larger state id number than all non-final states. This enables the
+ * first_final mechanism to function correctly. We also want states to be
+ * ordered in a predictable fashion. So we first apply a depth-first
+ * search, then do a stable sort by final state status, then assign
+ * numbers. */
+
+ fsmGraph->depthFirstOrdering();
+ fsmGraph->sortStatesByFinal();
+ fsmGraph->setStateNumbers( 0 );
+
+ return fsmGraph;
+}
+
+void Compiler::createDefaultScanner()
+{
+ InputLoc loc = { 0, 0, 0 };
+
+ const char *name = "___DEFAULT_SCANNER";
+
+ /* Create the default namespace. */
+ defaultNamespace = new Namespace( InputLoc(), name,
+ namespaceList.length(), 0 );
+ namespaceList.append( defaultNamespace );
+
+ /* Create a scanner which will be used when no other scanner can be
+ * figured out. It returns single characters. */
+ defaultRegion = new TokenRegion( InputLoc(), name,
+ regionList.length(), 0 );
+ regionList.append( defaultRegion );
+
+ /* Insert the machine definition into the graph dictionary. */
+ RegionGraphDictEl *newEl = rootNamespace->graphDict.insert( name );
+ assert( newEl != 0 );
+ newEl->value = new RegionDef( name, defaultRegion );
+ newEl->isInstance = true;
+ instanceList.append( newEl );
+
+ Join *join = new Join( new Expression( BT_Any ) );
+
+ TokenDef *tokenDef = new TokenDef( name, String(), false, false,
+ join, 0, loc, nextTokenId++,
+ rootNamespace, defaultRegion, 0, 0, 0 );
+
+ defaultRegion->tokenDefList.append( tokenDef );
+
+ /* Now create the one and only token -> "<chr>" / any / */
+ name = "___DEFAULT_SCANNER_CHR";
+ defaultCharLangEl = addLangEl( this, defaultNamespace, name, LangEl::Term );
+
+ tokenDef->tdLangEl = defaultCharLangEl;
+ defaultCharLangEl->tokenDef = tokenDef;
+}
+
+LangEl *Compiler::makeRepeatProd( Namespace *nspace, const String &repeatName,
+ NamespaceQual *nspaceQual, const String &name )
+{
+ LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm );
+ prodName->isRepeat = true;
+
+ ProdElList *prodElList1 = new ProdElList;
+
+ /* Build the first production of the repeat. */
+ TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name );
+ ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
+
+ UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
+ TypeRef *typeRef2 = new TypeRef( InputLoc(), prodNameUT );
+ ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef2, 0 );
+
+ prodElList1->append( factor1 );
+ prodElList1->append( factor2 );
+
+ Definition *newDef1 = new Definition( InputLoc(),
+ prodName, prodElList1, false, 0,
+ prodList.length(), prodName->defList.length(),
+ Definition::Production );
+
+ prodName->defList.append( newDef1 );
+ prodList.append( newDef1 );
+
+ /* Build the second production of the repeat. */
+ ProdElList *prodElList2 = new ProdElList;
+
+ Definition *newDef2 = new Definition( InputLoc(),
+ prodName, prodElList2, false, 0,
+ prodList.length(), prodName->defList.length(),
+ Definition::Production );
+
+ prodName->defList.append( newDef2 );
+ prodList.append( newDef2 );
+
+ return prodName;
+}
+
+LangEl *Compiler::makeListProd( Namespace *nspace, const String &listName, NamespaceQual *nspaceQual, const String &name )
+{
+ LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm );
+ prodName->isList = true;
+
+ /* Build the first production of the list. */
+ TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name );
+ ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
+
+ UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
+ TypeRef *typeRef2 = new TypeRef( InputLoc(), prodNameUT );
+ ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef2, 0 );
+
+ ProdElList *prodElList1 = new ProdElList;
+ prodElList1->append( factor1 );
+ prodElList1->append( factor2 );
+
+ Definition *newDef1 = new Definition( InputLoc(),
+ prodName, prodElList1, false, 0,
+ prodList.length(), prodName->defList.length(),
+ Definition::Production );
+
+ prodName->defList.append( newDef1 );
+ prodList.append( newDef1 );
+
+ /* Build the second production of the list. */
+ TypeRef *typeRef3 = new TypeRef( InputLoc(), nspaceQual, name );
+ ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef3, 0 );
+
+ ProdElList *prodElList2 = new ProdElList;
+ prodElList2->append( factor3 );
+
+ Definition *newDef2 = new Definition( InputLoc(),
+ prodName, prodElList2, false, 0,
+ prodList.length(), prodName->defList.length(),
+ Definition::Production );
+
+ prodName->defList.append( newDef2 );
+ prodList.append( newDef2 );
+
+ return prodName;
+}
+
+LangEl *Compiler::makeOptProd( Namespace *nspace, const String &optName, NamespaceQual *nspaceQual, const String &name )
+{
+ LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm );
+ prodName->isOpt = true;
+
+ ProdElList *prodElList1 = new ProdElList;
+
+ /* Build the first production of the repeat. */
+ TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name );
+ ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
+ prodElList1->append( factor1 );
+
+ Definition *newDef1 = new Definition( InputLoc(),
+ prodName, prodElList1, false, 0,
+ prodList.length(), prodName->defList.length(),
+ Definition::Production );
+
+ prodName->defList.append( newDef1 );
+ prodList.append( newDef1 );
+
+ /* Build the second production of the repeat. */
+ ProdElList *prodElList2 = new ProdElList;
+
+ Definition *newDef2 = new Definition( InputLoc(),
+ prodName, prodElList2, false, 0,
+ prodList.length(), prodName->defList.length(),
+ Definition::Production );
+
+ prodName->defList.append( newDef2 );
+ prodList.append( newDef2 );
+
+ return prodName;
+}
+
+Namespace *Namespace::findNamespace( const String &name )
+{
+ for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) {
+ if ( strcmp( name, (*c)->name ) == 0 )
+ return *c;
+ }
+ return 0;
+}
+
+/* Search from a previously resolved qualification. (name 1+ in a qual list). */
+Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart )
+{
+ /* While there are still parts in the qualification. */
+ while ( qualPart.lte() ) {
+ Namespace *child = from->findNamespace( *qualPart );
+ if ( child == 0 )
+ return 0;
+
+ from = child;
+ qualPart.increment();
+ }
+
+ return from;
+}
+
+Namespace *NamespaceQual::getQual( Compiler *pd )
+{
+ /* Do the search only once. */
+ if ( cachedNspaceQual != 0 )
+ return cachedNspaceQual;
+
+ if ( qualNames.length() == 0 ) {
+ /* No qualification, use the region the qualification was
+ * declared in. */
+ cachedNspaceQual = declInNspace;
+ }
+ else if ( strcmp( qualNames[0], "root" ) == 0 ) {
+ /* First item is "root." Start the downward search from there. */
+ StringVect::Iter qualPart = qualNames;
+ qualPart.increment();
+ cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart );
+ return cachedNspaceQual;
+ }
+ else {
+ /* Have a qualification. Move upwards through the declared
+ * regions looking for the first part. */
+ StringVect::Iter qualPart = qualNames;
+ Namespace *parentNamespace = declInNspace;
+ while ( parentNamespace != 0 ) {
+ /* Search for the first part underneath the current parent. */
+ Namespace *child = parentNamespace->findNamespace( *qualPart );
+
+ if ( child != 0 ) {
+ /* Found the first part. Start going below the result. */
+ qualPart.increment();
+ cachedNspaceQual = searchFrom( child, qualPart );
+ return cachedNspaceQual;
+ }
+
+ /* Not found, move up to the parent. */
+ parentNamespace = parentNamespace->parentNamespace;
+ }
+
+ /* Failed to find the place to start from. */
+ cachedNspaceQual = 0;
+ }
+
+ return cachedNspaceQual;
+}
+
+void Compiler::initEmptyScanners()
+{
+ for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+ if ( reg->tokenDefList.length() == 0 ) {
+ reg->wasEmpty = true;
+
+ static int def = 1;
+ InputLoc loc = { 0, 0, 0 };
+ String name( reg->name.length() + 16, "__%s_DEF_PAT_%d", reg->name.data, def++ );
+
+ Join *join = new Join( new Expression( BT_Any ) );
+
+ TokenDef *tokenDef = new TokenDef( name, String(), false, false, join,
+ 0, loc, nextTokenId++, rootNamespace, reg, 0, 0, 0 );
+ reg->tokenDefList.append( tokenDef );
+
+ /* These do not go in the namespace so so they cannot get declared
+ * in the declare pass. */
+ LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term );
+
+ tokenDef->tdLangEl = lel;
+ lel->tokenDef = tokenDef;
+ }
+ }
+}
+
+
+void Compiler::parsePatterns()
+{
+ Program *prg = colmNewProgram( runtimeData, 0, 0 );
+
+ /* Turn off context-dependent parsing. */
+ prg->ctxDepParsing = 0;
+
+ Tree **vm_stack = stackAlloc();
+ Tree **root = &vm_stack[VM_STACK_SIZE];
+
+ for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
+ if ( colm_log_compile ) {
+ cerr << "parsing replacement at " <<
+ repl->loc.line << ' ' << repl->loc.col << endl;
+ }
+
+ InputStream *in = new InputStream;
+ FsmRun *fsmRun = new FsmRun;
+ repl->pdaRun = new PdaRun;
+
+ initInputStream( in );
+ initPdaRun( repl->pdaRun, prg, pdaTables, fsmRun, repl->langEl->parserId, 0, false, 0 );
+ initFsmRun( fsmRun, prg );
+
+ Stream *res = streamAllocate( prg );
+ res->id = LEL_ID_STREAM;
+ res->in = newSourceStreamRepl( repl );
+ appendStream( in, (Tree*)res );
+ setEof( in );
+
+ newToken( prg, repl->pdaRun, fsmRun );
+ long pcr = parseLoop( prg, root, repl->pdaRun, fsmRun, in, PcrStart );
+ assert( pcr == PcrDone );
+ if ( repl->pdaRun->parseError )
+ cout << "parse error" << endp;
+ }
+
+ for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
+ if ( colm_log_compile ) {
+ cerr << "parsing pattern at " <<
+ pat->loc.line << ' ' << pat->loc.col << endl;
+ }
+
+ InputStream *in = new InputStream;
+ FsmRun *fsmRun = new FsmRun;
+ pat->pdaRun = new PdaRun;
+
+ initInputStream( in );
+ initPdaRun( pat->pdaRun, prg, pdaTables, fsmRun, pat->langEl->parserId, 0, false, 0 );
+ initFsmRun( fsmRun, prg );
+
+ Stream *res = streamAllocate( prg );
+ res->id = LEL_ID_STREAM;
+ res->in = newSourceStreamPattern( pat );
+ appendStream( in, (Tree*)res );
+ setEof( in );
+
+ newToken( prg, pat->pdaRun, fsmRun );
+ long pcr = parseLoop( prg, root, pat->pdaRun, fsmRun, in, PcrStart );
+ assert( pcr == PcrDone );
+ if ( pat->pdaRun->parseError )
+ cout << "parse error" << endp;
+ }
+
+ fillInPatterns( prg );
+}
+
+void Compiler::collectParserEls( BstSet<LangEl*> &parserEls )
+{
+ for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
+ /* We assume the reduction action compilation phase was run before
+ * pattern parsing and it decorated the pattern with the target type. */
+ assert( pat->langEl != 0 );
+ if ( pat->langEl->type != LangEl::NonTerm )
+ error(pat->loc) << "pattern type is not a non-terminal" << endp;
+
+ if ( pat->langEl->parserId < 0 ) {
+ /* Make a parser for the language element. */
+ parserEls.insert( pat->langEl );
+ pat->langEl->parserId = nextParserId++;
+ }
+ }
+
+ for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
+ /* We assume the reduction action compilation phase was run before
+ * replacement parsing decorated the replacement with the target type. */
+ assert( repl->langEl != 0 );
+
+ if ( repl->langEl->parserId < 0 ) {
+ /* Make a parser for the language element. */
+ parserEls.insert( repl->langEl );
+ repl->langEl->parserId = nextParserId++;
+ }
+ }
+
+ /* Make parsers that we need. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->parserId >= 0 )
+ parserEls.insert( lel );
+ }
+}
+
+
+void Compiler::generateOutput()
+{
+ FsmCodeGen *fsmGen = new FsmCodeGen("<INPUT>", sectionName,
+ *outStream, redFsm, fsmTables );
+
+ PdaCodeGen *pdaGen = new PdaCodeGen( outputFileName, "parser", this, *outStream );
+
+ fsmGen->writeIncludes();
+ pdaGen->defineRuntime();
+ fsmGen->writeCode();
+
+ /* Make parsers that we need. */
+ pdaGen->writeParserData( 0, pdaTables );
+
+ /* Write the runtime data. */
+ pdaGen->writeRuntimeData( runtimeData, pdaTables );
+
+ if ( !gblLibrary )
+ fsmGen->writeMain();
+
+ outStream->flush();
+}
+
+
+void Compiler::prepGrammar()
+{
+ /* This will create language elements. */
+ wrapNonTerminals();
+
+ makeLangElIds();
+ makeLangElNames();
+ makeDefinitionNames();
+ noUndefindLangEls();
+
+ /* Put the language elements in an index by language element id. */
+ langElIndex = new LangEl*[nextSymbolId+1];
+ memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) );
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
+ langElIndex[lel->id] = lel;
+
+ makeProdFsms();
+
+ /* Allocate the Runtime data now. Every PdaTable that we make
+ * will reference it, but it will be filled in after all the tables are
+ * built. */
+ runtimeData = new RuntimeData;
+}
+
+void Compiler::compile()
+{
+ beginProcessing();
+ initKeyOps();
+
+
+ /* Type declaration. */
+ typeDeclaration();
+
+ /* Type resolving. */
+ typeResolve();
+
+ makeTerminalWrappers();
+ makeEofElements();
+
+ /*
+ * Parsers
+ */
+
+ /* Init the longest match data */
+ initLongestMatchData();
+ FsmGraph *fsmGraph = makeScanner();
+
+ if ( colm_log_compile ) {
+ printNameTree( fsmGraph->rootName );
+ printNameIndex( fsmGraph->nameIndex );
+ }
+
+ prepGrammar();
+
+ /* Compile bytecode. */
+ compileByteCode();
+
+ /* Make the reduced fsm. */
+ RedFsmBuild reduce( sectionName, this, fsmGraph );
+ redFsm = reduce.reduceMachine();
+
+ BstSet<LangEl*> parserEls;
+ collectParserEls( parserEls );
+
+ makeParser( parserEls );
+
+ /* Make the scanner tables. */
+ fsmTables = redFsm->makeFsmTables();
+
+ /* Now that all parsers are built, make the global runtimeData. */
+ makeRuntimeData();
+
+ /*
+ * All compilation is now complete.
+ */
+
+ /* Parse patterns and replacements. */
+ parsePatterns();
+}
+
diff --git a/src/ctinput.cc b/src/ctinput.cc
new file mode 100644
index 00000000..b5086268
--- /dev/null
+++ b/src/ctinput.cc
@@ -0,0 +1,439 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "parsedata.h"
+#include "parsetree.h"
+#include "input.h"
+#include "fsmrun.h"
+#include "debug.h"
+#include "pool.h"
+
+#include <iostream>
+
+using std::cerr;
+using std::endl;
+
+SourceFuncs patternFuncs;
+SourceFuncs replFuncs;
+
+/*
+ * Pattern
+ */
+
+SourceStream *newSourceStreamPattern( Pattern *pattern )
+{
+ SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream));
+ memset( is, 0, sizeof(SourceStream) );
+ is->handlesLine = true;
+ is->pattern = pattern;
+ is->patItem = pattern->list->head;
+ is->funcs = &patternFuncs;
+ return is;
+}
+
+LangEl *inputStreamPatternGetLangEl( SourceStream *is, long *bindId, char **data, long *length )
+{
+ LangEl *klangEl = is->patItem->factor->langEl;
+ *bindId = is->patItem->bindId;
+ *data = 0;
+ *length = 0;
+ is->line = is->patItem->loc.line;
+
+ is->patItem = is->patItem->next;
+ is->offset = 0;
+ return klangEl;
+}
+
+int inputStreamPatternGetData( SourceStream *is, int skip, char *dest, int length, int *copied )
+{
+ *copied = 0;
+
+ PatternItem *buf = is->patItem;
+ int offset = is->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ return INPUT_EOD;
+
+ if ( buf->type == PatternItem::FactorType )
+ return INPUT_LANG_EL;
+
+ if ( offset == 0 )
+ is->line = buf->loc.line;
+
+ assert ( buf->type == PatternItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ /* Need to skip? */
+ if ( skip > 0 && slen <= skip ) {
+ /* Skipping the the whole source. */
+ skip -= slen;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += skip;
+ slen -= skip;
+ skip = 0;
+
+ memcpy( dest, src, slen ) ;
+ *copied += slen;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return INPUT_DATA;
+}
+
+void inputStreamPatternBackup( SourceStream *is )
+{
+ if ( is->patItem == 0 )
+ is->patItem = is->pattern->list->tail;
+ else
+ is->patItem = is->patItem->prev;
+}
+
+void inputStreamPatternPushBackBuf( SourceStream *is, RunBuf *runBuf )
+{
+ char *data = runBuf->data + runBuf->offset;
+ long length = runBuf->length;
+
+ if ( length == 0 )
+ return;
+
+ /* While pushing back past the current pattern item start. */
+ while ( length > is->offset ) {
+ length -= is->offset;
+ if ( is->offset > 0 )
+ assert( memcmp( is->patItem->data, data-length, is->offset ) == 0 );
+ inputStreamPatternBackup( is );
+ is->offset = is->patItem->data.length();
+ }
+
+ is->offset -= length;
+ assert( memcmp( &is->patItem->data[is->offset], data, length ) == 0 );
+}
+
+void inputStreamPatternUndoConsumeLangEl( SourceStream *is )
+{
+ inputStreamPatternBackup( is );
+ is->offset = is->patItem->data.length();
+}
+
+int inputStreamPatternConsumeData( SourceStream *is, int length )
+{
+ debug( REALM_INPUT, "consuming %ld bytes\n", length );
+
+ int consumed = 0;
+
+ while ( true ) {
+ if ( is->patItem == 0 )
+ break;
+
+ int avail = is->patItem->data.length() - is->offset;
+
+ if ( length >= avail ) {
+ /* Read up to the end of the data. Advance the
+ * pattern item. */
+ is->patItem = is->patItem->next;
+ is->offset = 0;
+
+ length -= avail;
+ consumed += avail;
+
+ if ( length == 0 )
+ break;
+ }
+ else {
+ is->offset += length;
+ consumed += length;
+ break;
+ }
+ }
+
+ return consumed;
+}
+
+int inputStreamPatternUndoConsumeData( SourceStream *is, const char *data, int length )
+{
+ is->offset -= length;
+ return length;
+}
+
+extern "C" void initPatternFuncs()
+{
+ memset( &patternFuncs, 0, sizeof(SourceFuncs) );
+
+ patternFuncs.getData = &inputStreamPatternGetData;
+ patternFuncs.consumeData = &inputStreamPatternConsumeData;
+ patternFuncs.undoConsumeData = &inputStreamPatternUndoConsumeData;
+
+ patternFuncs.consumeLangEl = &inputStreamPatternGetLangEl;
+ patternFuncs.undoConsumeLangEl = &inputStreamPatternUndoConsumeLangEl;
+}
+
+
+/*
+ * Replacement
+ */
+
+SourceStream *newSourceStreamRepl( Replacement *replacement )
+{
+ SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream));
+ memset( is, 0, sizeof(SourceStream) );
+ is->handlesLine = true;
+ is->replacement = replacement;
+ is->replItem = replacement->list->head;
+ is->funcs = &replFuncs;
+ return is;
+}
+
+LangEl *inputStreamReplGetLangEl( SourceStream *is, long *bindId, char **data, long *length )
+{
+ LangEl *klangEl = is->replItem->type == ReplItem::ExprType ?
+ is->replItem->langEl : is->replItem->factor->langEl;
+ *bindId = is->replItem->bindId;
+
+ *data = 0;
+ *length = 0;
+ is->line = is->replItem->loc.line;
+
+ if ( is->replItem->type == ReplItem::FactorType ) {
+ if ( is->replItem->factor->typeRef->pdaLiteral != 0 ) {
+ bool unusedCI;
+ prepareLitString( is->replItem->data, unusedCI,
+ is->replItem->factor->typeRef->pdaLiteral->token.data,
+ is->replItem->factor->typeRef->pdaLiteral->token.loc );
+
+ *data = is->replItem->data;
+ *length = is->replItem->data.length();
+ }
+ }
+
+ is->replItem = is->replItem->next;
+ is->offset = 0;
+ return klangEl;
+}
+
+int inputStreamReplGetData( SourceStream *is, int skip, char *dest, int length, int *copied )
+{
+ *copied = 0;
+
+ ReplItem *buf = is->replItem;
+ int offset = is->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ return INPUT_EOD;
+
+ if ( buf->type == ReplItem::ExprType || buf->type == ReplItem::FactorType )
+ return INPUT_LANG_EL;
+
+ if ( offset == 0 )
+ is->line = buf->loc.line;
+
+ assert ( buf->type == ReplItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ /* Need to skip? */
+ if ( skip > 0 && slen <= skip ) {
+ /* Skipping the the whole source. */
+ skip -= slen;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += skip;
+ slen -= skip;
+ skip = 0;
+
+ memcpy( dest, src, slen ) ;
+ *copied += slen;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return INPUT_DATA;
+}
+
+void inputStreamReplBackup( SourceStream *is )
+{
+ if ( is->replItem == 0 )
+ is->replItem = is->replacement->list->tail;
+ else
+ is->replItem = is->replItem->prev;
+}
+
+void inputStreamReplPushBackBuf( SourceStream *is, RunBuf *runBuf )
+{
+ char *data = runBuf->data + runBuf->offset;
+ long length = runBuf->length;
+
+ if ( colm_log_parse ) {
+ cerr << "push back data: ";
+ cerr.write( data, length );
+ cerr << endl;
+ }
+
+ if ( length == 0 )
+ return;
+
+ /* While pushing back past the current pattern item start. */
+ while ( length > is->offset ) {
+ length -= is->offset;
+ if ( is->offset > 0 )
+ assert( memcmp( is->replItem->data, data-length, is->offset ) == 0 );
+ inputStreamReplBackup( is );
+ is->offset = is->replItem->data.length();
+ }
+
+ is->offset -= length;
+ assert( memcmp( &is->replItem->data[is->offset], data, length ) == 0 );
+}
+
+void inputStreamReplUndoConsumeLangEl( SourceStream *is )
+{
+ inputStreamReplBackup( is );
+ is->offset = is->replItem->data.length();
+}
+
+int inputStreamReplConsumeData( SourceStream *is, int length )
+{
+ int consumed = 0;
+
+ while ( true ) {
+ if ( is->replItem == 0 )
+ break;
+
+ int avail = is->replItem->data.length() - is->offset;
+
+ if ( length >= avail ) {
+ /* Read up to the end of the data. Advance the
+ * pattern item. */
+ is->replItem = is->replItem->next;
+ is->offset = 0;
+
+ length -= avail;
+ consumed += avail;
+
+ if ( length == 0 )
+ break;
+ }
+ else {
+ is->offset += length;
+ consumed += length;
+ break;
+ }
+ }
+
+ return consumed;
+}
+
+int inputStreamReplUndoConsumeData( SourceStream *is, const char *data, int length )
+{
+ is->offset -= length;
+ return length;
+}
+
+extern "C" void initReplFuncs()
+{
+ memset( &replFuncs, 0, sizeof(SourceFuncs) );
+
+ replFuncs.getData = &inputStreamReplGetData;
+ replFuncs.consumeData = &inputStreamReplConsumeData;
+ replFuncs.undoConsumeData = &inputStreamReplUndoConsumeData;
+
+ replFuncs.consumeLangEl = &inputStreamReplGetLangEl;
+ replFuncs.undoConsumeLangEl = &inputStreamReplUndoConsumeLangEl;
+}
+
+void sendNamedLangEl( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream )
+{
+ /* All three set by consumeLangEl. */
+ long bindId;
+ char *data;
+ long length;
+
+ LangEl *klangEl = consumeLangEl( inputStream, &bindId, &data, &length );
+
+ #ifdef COLM_LOG_PARSE
+ if ( colm_log_parse ) {
+ cerr << "named langEl: " << prg->rtd->lelInfo[klangEl->id].name << endl;
+ }
+ #endif
+
+ /* Copy the token data. */
+ Head *tokdata = 0;
+ if ( data != 0 )
+ tokdata = stringAllocFull( prg, data, length );
+
+ Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, klangEl->id, tokdata );
+
+ incrementSteps( pdaRun );
+
+ ParseTree *parseTree = parseTreeAllocate( prg );
+ parseTree->id = input->tree->id;
+ parseTree->flags |= PF_NAMED;
+ parseTree->shadow = input;
+
+ if ( bindId > 0 )
+ pushBinding( pdaRun, parseTree );
+
+ pdaRun->parseInput = parseTree;
+}
+
+void initBindings( PdaRun *pdaRun )
+{
+ /* Bindings are indexed at 1. Need a no-binding. */
+ pdaRun->bindings = new Bindings;
+ pdaRun->bindings->push(0);
+}
+
+void pushBinding( PdaRun *pdaRun, ParseTree *parseTree )
+{
+ /* If the item is bound then store it in the bindings array. */
+ pdaRun->bindings->push( parseTree );
+}
+
+void popBinding( PdaRun *pdaRun, ParseTree *parseTree )
+{
+ ParseTree *lastBound = pdaRun->bindings->top();
+ if ( lastBound == parseTree )
+ pdaRun->bindings->pop();
+}
diff --git a/src/debug.c b/src/debug.c
new file mode 100644
index 00000000..8efaf510
--- /dev/null
+++ b/src/debug.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <debug.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+long colmActiveRealm = 0;
+const char *colmRealmNames[REALMS] =
+ {
+ "BYTECODE",
+ "PARSE",
+ "MATCH",
+ "COMPILE",
+ "POOL",
+ "PRINT",
+ "INPUT",
+ "SCAN",
+ };
+
+int _debug( long realm, const char *fmt, ... )
+{
+ int result = 0;
+ if ( colmActiveRealm & realm ) {
+ /* Compute the index by shifting. */
+ int ind = 0;
+ while ( (realm & 0x1) != 0x1 ) {
+ realm >>= 1;
+ ind += 1;
+ }
+
+ fprintf( stderr, "%s: ", colmRealmNames[ind] );
+ va_list args;
+ va_start( args, fmt );
+ result = vfprintf( stderr, fmt, args );
+ va_end( args );
+ }
+
+ return result;
+}
+
+void fatal( const char *fmt, ... )
+{
+ va_list args;
+ fprintf( stderr, "fatal: " );
+ va_start( args, fmt );
+ vfprintf( stderr, fmt, args );
+ va_end( args );
+ exit(1);
+}
+
+void message( const char *fmt, ... )
+{
+ va_list args;
+ fprintf( stderr, "message: " );
+ va_start( args, fmt );
+ vfprintf( stderr, fmt, args );
+ va_end( args );
+}
diff --git a/src/debug.h b/src/debug.h
new file mode 100644
index 00000000..3fd9bb8e
--- /dev/null
+++ b/src/debug.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "config.h"
+
+void fatal( const char *fmt, ... );
+
+#ifdef DEBUG
+#define debug( realm, ... ) _debug( realm, __VA_ARGS__ )
+#define check_realm( realm ) _check_realm( realm )
+#else
+#define debug( realm, ... )
+#define check_realm( realm )
+#endif
+
+int _debug( long realm, const char *fmt, ... );
+
+void message( const char *fmt, ... );
+
+#define REALM_BYTECODE 0x00000001
+#define REALM_PARSE 0x00000002
+#define REALM_MATCH 0x00000004
+#define REALM_COMPILE 0x00000008
+#define REALM_POOL 0x00000010
+#define REALM_PRINT 0x00000020
+#define REALM_INPUT 0x00000040
+#define REALM_SCAN 0x00000080
+
+#define REALMS 32
+
+extern long colmActiveRealm;
+extern const char *colmRealmNames[REALMS];
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/declare.cc b/src/declare.cc
new file mode 100644
index 00000000..167fe050
--- /dev/null
+++ b/src/declare.cc
@@ -0,0 +1,383 @@
+/*
+ * Copyright 2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "bytecode.h"
+#include "parsedata.h"
+#include "fsmrun.h"
+#include <iostream>
+#include <assert.h>
+
+LangEl *declareLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type )
+{
+ /* If the id is already in the dict, it will be placed in last found. If
+ * it is not there then it will be inserted and last found will be set to it. */
+ TypeMapEl *inDict = nspace->typeMap.find( data );
+ if ( inDict != 0 )
+ error() << "'" << data << "' already defined as something else" << endp;
+
+ /* Language element not there. Make the new lang el and insert.. */
+ LangEl *langEl = new LangEl( nspace, data, type );
+ TypeMapEl *typeMapEl = new TypeMapEl( data, langEl );
+ nspace->typeMap.insert( typeMapEl );
+ pd->langEls.append( langEl );
+
+ return langEl;
+}
+
+/* Does not map the new language element. */
+LangEl *addLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type )
+{
+ LangEl *langEl = new LangEl( nspace, data, type );
+ pd->langEls.append( langEl );
+ return langEl;
+}
+
+void declareTypeAlias( Compiler *pd, Namespace *nspace, const String &data, TypeRef *typeRef )
+{
+ /* If the id is already in the dict, it will be placed in last found. If
+ * it is not there then it will be inserted and last found will be set to it. */
+ TypeMapEl *inDict = nspace->typeMap.find( data );
+ if ( inDict != 0 )
+ error() << "'" << data << "' already defined as something else" << endp;
+
+ /* Language element not there. Make the new lang el and insert.. */
+ TypeMapEl *typeMapEl = new TypeMapEl( data, typeRef );
+ nspace->typeMap.insert( typeMapEl );
+}
+
+LangEl *findType( Compiler *pd, Namespace *nspace, const String &data )
+{
+ /* If the id is already in the dict, it will be placed in last found. If
+ * it is not there then it will be inserted and last found will be set to it. */
+ TypeMapEl *inDict = nspace->typeMap.find( data );
+
+ if ( inDict == 0 )
+ error() << "'" << data << "' not declared as anything" << endp;
+
+ return inDict->value;
+}
+
+
+void Compiler::declareBaseLangEls()
+{
+ /* Order here is important because we make assumptions about the inbuild
+ * language elements in the runtime. Note tokens are have identifiers set
+ * in an initial pass. */
+
+ /* Make a "_notoken" language element. This element is used when a
+ * generation action fails to generate anything, but there is reverse code
+ * that needs to be associated with a language element. This allows us to
+ * always associate reverse code with the first language element produced
+ * after a generation action. */
+ noTokenLangEl = declareLangEl( this, rootNamespace, "_notoken", LangEl::Term );
+ noTokenLangEl->ignore = true;
+
+ /* Make the "stream" language element */
+ ptrLangEl = declareLangEl( this, rootNamespace, "ptr", LangEl::Term );
+ boolLangEl = declareLangEl( this, rootNamespace, "bool", LangEl::Term );
+ intLangEl = declareLangEl( this, rootNamespace, "int", LangEl::Term );
+ strLangEl = declareLangEl( this, rootNamespace, "str", LangEl::Term );
+ streamLangEl = declareLangEl( this, rootNamespace, "stream", LangEl::Term );
+ inputLangEl = declareLangEl( this, rootNamespace, "accum_stream", LangEl::Term );
+ ignoreLangEl = declareLangEl( this, rootNamespace, "il", LangEl::Term );
+
+ /* Make the EOF language element. */
+ eofLangEl = 0;
+
+ /* Make the "any" language element */
+ anyLangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm );
+}
+
+
+void Compiler::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm )
+{
+ UniqueType *prodNameUT = findUniqueType( TYPE_TREE, nonTerm );
+ TypeRef *typeRef = new TypeRef( InputLoc(), prodNameUT );
+ ObjField *el = new ObjField( InputLoc(), typeRef, "lhs" );
+
+ el->isLhsEl = true;
+
+ initLocalInstructions( el );
+
+ localFrame->insertField( el->name, el );
+}
+
+void Compiler::addProdLHSLoad( Definition *prod, CodeVect &code, long &insertPos )
+{
+ ObjField *lhsField = prod->redBlock->localFrame->findField("lhs");
+ assert( lhsField != 0 );
+
+ CodeVect loads;
+ if ( lhsField->beenReferenced ) {
+ loads.append( IN_INIT_LHS_EL );
+ loads.appendHalf( lhsField->offset );
+ }
+
+ code.insert( insertPos, loads );
+ insertPos += loads.length();
+}
+
+void Compiler::addPushBackLHS( Definition *prod, CodeVect &code, long &insertPos )
+{
+ CodeBlock *block = prod->redBlock;
+
+ /* If the lhs tree is dirty then we will need to save off the old lhs
+ * before it gets modified. We want to avoid this for attribute
+ * modifications. The computation of dirtyTree should deal with this for
+ * us. */
+ ObjField *lhsField = block->localFrame->findField("lhs");
+ assert( lhsField != 0 );
+
+ if ( lhsField->beenReferenced ) {
+ code.append( IN_STORE_LHS_EL );
+ code.appendHalf( lhsField->offset );
+ }
+}
+
+void Compiler::addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList )
+{
+ long position = 1;
+ for ( ProdElList::Iter rhsEl = *prodElList; rhsEl.lte(); rhsEl++, position++ ) {
+ if ( rhsEl->type == ProdEl::ReferenceType ) {
+ /* Use an offset of zero. For frame objects we compute the offset on
+ * demand. */
+ String name( 8, "r%d", position );
+ ObjField *el = new ObjField( InputLoc(), rhsEl->typeRef, name );
+ rhsEl->objField = el;
+
+ /* Right hand side elements are constant. */
+ el->isConst = true;
+ el->isRhsEl = true;
+
+ /* Only ever fetch for reading since they are constant. */
+ el->inGetR = IN_GET_LOCAL_R;
+
+ localFrame->insertField( el->name, el );
+ }
+ }
+}
+
+void Compiler::addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos )
+{
+ CodeVect loads;
+ long elPos = 0;
+ for ( ProdElList::Iter rhsEl = *prod->prodElList; rhsEl.lte(); rhsEl++, elPos++ ) {
+ if ( rhsEl->type == ProdEl::ReferenceType ) {
+ if ( rhsEl->objField->beenReferenced ) {
+ loads.append ( IN_INIT_RHS_EL );
+ loads.appendHalf( elPos );
+ loads.appendHalf( rhsEl->objField->offset );
+ }
+ }
+ }
+
+ /* Insert and update the insert position. */
+ code.insert( insertPos, loads );
+ insertPos += loads.length();
+}
+
+void GenericType::declare( Compiler *pd, Namespace *nspace )
+{
+ //std::cout << "generic " << g->name << std::endl;
+
+ LangEl *langEl = declareLangEl( pd, nspace, name, LangEl::NonTerm );
+
+ /* Add one empty production. */
+ ProdElList *emptyList = new ProdElList;
+ //addProduction( g->loc, langEl, emptyList, false, 0, 0 );
+
+ {
+ LangEl *prodName = langEl;
+ assert( prodName->type == LangEl::NonTerm );
+
+ Definition *newDef = new Definition( InputLoc(), prodName,
+ emptyList, false, 0,
+ pd->prodList.length(), prodName->defList.length(),
+ Definition::Production );
+
+ prodName->defList.append( newDef );
+ pd->prodList.append( newDef );
+ newDef->predOf = 0;
+ }
+
+ langEl->generic = this;
+ this->langEl = langEl;
+}
+
+void Namespace::declare( Compiler *pd )
+{
+ for ( GenericList::Iter g = genericList; g.lte(); g++ )
+ g->declare( pd, this );
+
+ for ( LiteralDict::Iter l = literalDict; l.lte(); l++ ) {
+ if ( l->value->dupOf != 0 ) {
+ /* Duplicate of another. Use the lang el of that token. */
+ assert( l->value->dupOf->tdLangEl != 0 );
+ l->value->tdLangEl = l->value->dupOf->tdLangEl;
+ }
+ else {
+ if ( l->value->isZero ) {
+ l->value->tdLangEl = l->value->tokenRegion->ciLel;
+ assert( l->value->tokenRegion->ciLel != 0 );
+ }
+ else {
+ /* Original. Create a token for the literal. */
+ LangEl *newLangEl = declareLangEl( pd, this, l->value->name, LangEl::Term );
+
+ newLangEl->lit = l->value->literal;
+ newLangEl->isLiteral = true;
+ newLangEl->tokenDef = l->value;
+
+ l->value->tdLangEl = newLangEl;
+
+ if ( l->value->noPreIgnore )
+ newLangEl->noPreIgnore = true;
+ if ( l->value->noPostIgnore )
+ newLangEl->noPostIgnore = true;
+ }
+ }
+ }
+
+ for ( ContextDefList::Iter c = contextDefList; c.lte(); c++ ) {
+ LangEl *lel = declareLangEl( pd, this, c->name, LangEl::NonTerm );
+ ProdElList *emptyList = new ProdElList;
+ //addProduction( c->context->loc, c->name, emptyList, false, 0, 0 );
+
+ {
+ LangEl *prodName = lel;
+ assert( prodName->type == LangEl::NonTerm );
+
+ Definition *newDef = new Definition( loc, prodName,
+ emptyList, false, 0,
+ pd->prodList.length(), prodName->defList.length(),
+ Definition::Production );
+
+ prodName->defList.append( newDef );
+ pd->prodList.append( newDef );
+ newDef->predOf = 0;
+
+ /* If the token has the same name as the region it is in, then also
+ * insert it into the symbol map for the parent region. */
+ if ( strcmp( c->name, this->name ) == 0 ) {
+ /* Insert the name into the top of the region stack after popping the
+ * region just created. We need it in the parent. */
+ TypeMapEl *typeMapEl = new TypeMapEl( c->name, prodName );
+ this->parentNamespace->typeMap.insert( typeMapEl );
+ }
+ }
+
+ c->context->lel = lel;
+ lel->contextDef = c->context;
+ lel->objectDef = c->context->contextObjDef;
+ }
+
+ for ( TokenDefListNs::Iter t = tokenDefList; t.lte(); t++ ) {
+ /* Literals already taken care of. */
+ if ( ! t->isLiteral ) {
+ if ( t->dupOf != 0 ) {
+ /* Duplicate of another. Use the lang el of that token. */
+ assert( t->dupOf->tdLangEl != 0 );
+ t->tdLangEl = t->dupOf->tdLangEl;
+ }
+ else {
+ /* Create the token. */
+ LangEl *tokEl = declareLangEl( pd, this, t->name, LangEl::Term );
+ tokEl->ignore = t->ignore;
+ tokEl->transBlock = t->codeBlock;
+ tokEl->objectDef = t->objectDef;
+ tokEl->contextIn = t->contextIn;
+ tokEl->tokenDef = t;
+
+ if ( t->noPreIgnore )
+ tokEl->noPreIgnore = true;
+ if ( t->noPostIgnore )
+ tokEl->noPostIgnore = true;
+
+ t->tdLangEl = tokEl;
+ }
+ }
+ }
+
+ for ( NtDefList::Iter n = ntDefList; n.lte(); n++ ) {
+ /* Get the language element. */
+ LangEl *langEl = declareLangEl( pd, this, n->name, LangEl::NonTerm );
+ //$$->langEl = langEl;
+
+ /* Get the language element. */
+ langEl->objectDef = n->objectDef;
+ langEl->reduceFirst = n->reduceFirst;
+ langEl->contextIn = n->contextIn;
+ langEl->defList.transfer( *n->defList );
+
+ for ( LelDefList::Iter d = langEl->defList; d.lte(); d++ ) {
+ d->prodName = langEl;
+
+ if ( d->redBlock != 0 ) {
+ pd->addProdRedObjectVar( d->redBlock->localFrame, langEl );
+ pd->addProdRHSVars( d->redBlock->localFrame, d->prodElList );
+ }
+
+ /* References to the reduce item. */
+ }
+ }
+
+ for ( TypeAliasList::Iter ta = typeAliasList; ta.lte(); ta++ )
+ declareTypeAlias( pd, this, ta->name, ta->typeRef );
+
+ /* Go into child aliases. */
+ for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ )
+ (*c)->declare( pd );
+}
+
+void Compiler::setPrecedence()
+{
+ for ( PredDeclList::Iter predDecl = predDeclList; predDecl != 0; predDecl++ ) {
+ predDecl->typeRef->lookupType( this );
+
+ LangEl *langEl = predDecl->typeRef->uniqueType->langEl;
+ langEl->predType = predDecl->predType;
+ langEl->predValue = predDecl->predValue;
+ }
+}
+
+/*
+ * Type Declaration Root.
+ */
+void Compiler::typeDeclaration()
+{
+ /* These must be declared first, since the runtime assumes their identifiers. */
+ declareBaseLangEls();
+
+ makeIgnoreCollectors();
+
+ rootNamespace->declare( this );
+
+ /* Fill any empty scanners with a default token. */
+ initEmptyScanners();
+
+ /* Create the default scanner which will return single characters for us
+ * when we have no other scanner */
+ createDefaultScanner();
+
+ initUniqueTypes();
+
+ setPrecedence();
+}
diff --git a/src/defs.h.in b/src/defs.h.in
new file mode 100644
index 00000000..06a3f9df
--- /dev/null
+++ b/src/defs.h.in
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2001 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+
+/* Configuration */
+#undef COLM_LOG
+#undef COLM_LOG_BYTECODE
+#undef COLM_LOG_PARSE
+#undef COLM_LOG_MATCH
+#undef COLM_LOG_COMPILE
+
+/* If COLM_LOG is defined then turn on all logging options. */
+#ifdef COLM_LOG
+#define COLM_LOG_BYTECODE 1
+#define COLM_LOG_PARSE 1
+#define COLM_LOG_MATCH 1
+#define COLM_LOG_COMPILE 1
+#endif
+
+extern int colm_log_bytecode;
+extern int colm_log_parse;
+extern int colm_log_match;
+extern int colm_log_compile;
+extern int colm_log_conds;
+
+/* The size of `long', as computed by sizeof. */
+#undef SIZEOF_LONG
+
+#endif /* _CONFIG_H */
diff --git a/src/dotgen.cc b/src/dotgen.cc
new file mode 100644
index 00000000..e4474958
--- /dev/null
+++ b/src/dotgen.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "global.h"
+#include "parsedata.h"
+
+using namespace std;
+
+
+void Compiler::writeTransList( PdaState *state )
+{
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ /* Write out the from and to states. */
+ out << "\t" << state->stateNum << " -> " << trans->value->toState->stateNum;
+
+ /* Begin the label. */
+ out << " [ label = \"";
+ long key = trans->key;
+ LangEl *lel = langElIndex[key];
+ if ( lel != 0 )
+ out << lel->name;
+ else
+ out << (char)key;
+
+ if ( trans->value->actions.length() > 0 ) {
+ out << " / ";
+ for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) {
+ switch ( *act & 0x3 ) {
+ case 1:
+ out << "S(" << trans->value->actOrds[act.pos()] << ")";
+ break;
+ case 2: {
+ out << "R(" << prodIdIndex[(*act >> 2)]->data <<
+ ", " << trans->value->actOrds[act.pos()] << ")";
+ break;
+ }
+ case 3: {
+ out << "SR(" << prodIdIndex[(*act >> 2)]->data <<
+ ", " << trans->value->actOrds[act.pos()] << ")";
+ break;
+ }}
+ if ( ! act.last() )
+ out << ", ";
+ }
+ }
+
+ out << "\" ];\n";
+ }
+}
+
+void Compiler::writeDotFile( PdaGraph *graph )
+{
+ out <<
+ "digraph " << parserName << " {\n"
+ " rankdir=LR;\n"
+ " ranksep=\"0\"\n"
+ " nodesep=\"0.25\"\n"
+ "\n";
+
+ /* Define the psuedo states. Transitions will be done after the states
+ * have been defined as either final or not final. */
+ out <<
+ " node [ shape = point ];\n";
+
+ for ( int i = 0; i < graph->entryStateSet.length(); i++ )
+ out << "\tENTRY" << i << " [ label = \"\" ];\n";
+
+ out <<
+ "\n"
+ " node [ shape = circle, fixedsize = true, height = 0.6 ];\n";
+
+ /* Walk the states. */
+ for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ )
+ out << " " << st->stateNum << " [ label = \"" << st->stateNum << "\" ];\n";
+
+ out << "\n";
+
+ /* Walk the states. */
+ for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ )
+ writeTransList( st );
+
+ /* Start state and other entry points. */
+ for ( PdaStateSet::Iter st = graph->entryStateSet; st.lte(); st++ )
+ out << "\tENTRY" << st.pos() << " -> " << (*st)->stateNum << " [ label = \"\" ];\n";
+
+ out <<
+ "}\n";
+}
+
+void Compiler::writeDotFile()
+{
+ writeDotFile( pdaGraph );
+}
+
diff --git a/src/dotgen.h b/src/dotgen.h
new file mode 100644
index 00000000..d05a2410
--- /dev/null
+++ b/src/dotgen.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _GVDOTGEN_H
+#define _GVDOTGEN_H
+
+#include <iostream>
+
+#if 0
+
+class GraphvizDotGen : public CodeGenData
+{
+public:
+ GraphvizDotGen( ostream &out ) : CodeGenData(out) { }
+
+ /* Print an fsm to out stream. */
+ void writeTransList( RedState *state );
+ void writeDotFile( );
+
+ virtual void finishRagelDef();
+
+private:
+ /* Writing labels and actions. */
+ std::ostream &ONCHAR( Key lowKey, Key highKey );
+ std::ostream &TRANS_ACTION( RedState *fromState, RedTrans *trans );
+ std::ostream &ACTION( RedAction *action );
+ std::ostream &KEY( Key key );
+};
+
+#endif
+
+
+#endif /* _GVDOTGEN_H */
diff --git a/src/exports.cc b/src/exports.cc
new file mode 100644
index 00000000..f5153330
--- /dev/null
+++ b/src/exports.cc
@@ -0,0 +1,285 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "parsedata.h"
+#include "fsmcodegen.h"
+#include "redfsm.h"
+#include "bstmap.h"
+#include "fsmrun.h"
+#include "debug.h"
+#include <sstream>
+#include <string>
+
+using std::ostream;
+using std::ostringstream;
+using std::string;
+using std::cerr;
+using std::endl;
+
+void Compiler::openNameSpace( ostream &out, Namespace *nspace )
+{
+ if ( nspace == defaultNamespace || nspace == rootNamespace )
+ return;
+
+ openNameSpace( out, nspace->parentNamespace );
+ out << "namespace " << nspace->name << " { ";
+}
+
+void Compiler::closeNameSpace( ostream &out, Namespace *nspace )
+{
+ if ( nspace == defaultNamespace || nspace == rootNamespace )
+ return;
+
+ openNameSpace( out, nspace->parentNamespace );
+ out << " }";
+}
+
+void Compiler::generateExports()
+{
+ ostream &out = *outStream;
+
+ out <<
+ "#ifndef _EXPORTS_H\n"
+ "#define _EXPORTS_H\n"
+ "\n"
+ "#include <colm/colm.h>\n"
+ "#include <string>\n"
+ "\n";
+
+ out <<
+ "inline void appendString( ColmPrintArgs *args, const char *data, int length )\n"
+ "{\n"
+ " std::string *str = (std::string*)args->arg;\n"
+ " *str += std::string( data, length );\n"
+ "}\n"
+ "\n";
+
+ out <<
+ "inline std::string printTreeStr( ColmProgram *prg, ColmTree *tree, bool trim )\n"
+ "{\n"
+ " std::string str;\n"
+ " ColmPrintArgs printArgs = { &str, 1, 0, trim, &appendString, \n"
+ " &printNull, &printTermTree, &printNull };\n"
+ " printTreeArgs( prg, vm_root(prg), &printArgs, tree );\n"
+ " return str;\n"
+ "}\n"
+ "\n";
+
+ /* Declare. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->isEOF ) {
+ out << "// isEOF\n";
+ continue;
+ }
+ if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isTokenOnly ) {
+ out << "// isTokenOnly\n";
+ continue;
+ }
+ if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isIgnoreOnly ) {
+ out << "// isIgnoreOnly\n";
+ continue;
+ }
+ if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isCiOnly ) {
+ out << "// isCiOnly\n";
+ continue;
+ }
+ if ( lel->ciRegion != 0 ) {
+ out << "// ciRegion != 0\n";
+ continue;
+ }
+ openNameSpace( out, lel->nspace );
+ out << "struct " << lel->fullName << ";";
+ closeNameSpace( out, lel->nspace );
+ out << "\n";
+ }
+
+ /* Class definitions. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->isEOF ) {
+ out << "// isTokenOnly\n";
+ continue;
+ }
+ if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isTokenOnly ) {
+ out << "// isTokenOnly\n";
+ continue;
+ }
+ if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isIgnoreOnly ) {
+ out << "// isIgnoreOnly\n";
+ continue;
+ }
+ if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isCiOnly ) {
+ out << "// isCiOnly\n";
+ continue;
+ }
+ if ( lel->ciRegion != 0 ) {
+ out << "// ciRegion != 0\n";
+ continue;
+ }
+
+ openNameSpace( out, lel->nspace );
+ out << "struct " << lel->fullName << "\n";
+ out << "{\n";
+ out << " std::string text() { return printTreeStr( prg, tree, true ); }\n";
+ out << " std::string text_notrim() { return printTreeStr( prg, tree, false ); }\n";
+ out << " operator ColmTree *() { return tree; }\n";
+ out << " ColmProgram *prg;\n";
+ out << " ColmTree *tree;\n";
+
+ if ( mainReturnUT != 0 && mainReturnUT->langEl == lel ) {
+ out << " " << lel->fullName << "( ColmProgram *prg ) : prg(prg), tree(returnVal(prg)) {}\n";
+ }
+ out << " " << lel->fullName << "( ColmProgram *prg, ColmTree *tree ) : prg(prg), tree(tree) {}\n";
+
+ if ( lel->objectDef != 0 && lel->objectDef->objFieldList != 0 ) {
+ ObjFieldList *objFieldList = lel->objectDef->objFieldList;
+ for ( ObjFieldList::Iter ofi = *objFieldList; ofi.lte(); ofi++ ) {
+ ObjField *field = ofi->value;
+ if ( field->useOffset && field->typeRef != 0 ) {
+ UniqueType *ut = field->typeRef->lookupType( this );
+
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out << " " << ut->langEl->refName << " " << field->name << "();\n";
+ }
+ }
+
+ if ( field->isRhsGet ) {
+ UniqueType *ut = field->typeRef->lookupType( this );
+
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out << " " << ut->langEl->refName << " " << field->name << "();\n";
+ }
+ }
+ }
+ }
+
+ if ( lel->isRepeat ) {
+ out << " " << "int end() { return repeatEnd( tree ); }\n";
+ out << " " << lel->refName << " next();\n";
+ out << " " << lel->repeatOf->refName << " value();\n";
+ }
+
+ if ( lel->isList ) {
+ out << " " << "int last() { return listLast( tree ); }\n";
+ out << " " << lel->refName << " next();\n";
+ out << " " << lel->repeatOf->refName << " value();\n";
+ }
+ out << "};";
+ closeNameSpace( out, lel->nspace );
+ out << "\n";
+ }
+
+ for ( ObjFieldList::Iter of = *globalObjectDef->objFieldList; of.lte(); of++ ) {
+ ObjField *field = of->value;
+ if ( field->isExport ) {
+ UniqueType *ut = field->typeRef->lookupType(this);
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out << ut->langEl->refName << " " << field->name << "( ColmProgram *prg );\n";
+ }
+ }
+ }
+
+ out << "#endif\n";
+}
+
+void Compiler::generateExportsImpl()
+{
+ ostream &out = *outStream;
+
+ if ( gblExportTo != 0 ) {
+ out << "#include \"" << gblExportTo << "\"\n";
+ }
+
+ /* Function implementations. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->objectDef != 0 && lel->objectDef->objFieldList != 0 ) {
+ ObjFieldList *objFieldList = lel->objectDef->objFieldList;
+ for ( ObjFieldList::Iter ofi = *objFieldList; ofi.lte(); ofi++ ) {
+ ObjField *field = ofi->value;
+ if ( field->useOffset && field->typeRef != 0 ) {
+ UniqueType *ut = field->typeRef->lookupType( this );
+
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out << ut->langEl->refName << " " << lel->declName << "::" << field->name <<
+ "() { return " << ut->langEl->refName <<
+ "( prg, getAttr( tree, " << field->offset << ") ); }\n";
+ }
+ }
+
+ if ( field->isRhsGet ) {
+ UniqueType *ut = field->typeRef->lookupType( this );
+
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out << ut->langEl->refName << " " << lel->declName << "::" << field->name <<
+ "() { static int a[] = {";
+
+ /* Need to place the array computing the val. */
+ out << field->rhsVal.length();
+ for ( Vector<RhsVal>::Iter rg = field->rhsVal; rg.lte(); rg++ ) {
+ out << ", " << rg->prodNum;
+ out << ", " << rg->childNum;
+ }
+
+ out << "}; return " << ut->langEl->refName <<
+ "( prg, getRhsVal( prg, tree, a ) ); }\n";
+ }
+ }
+ }
+ }
+
+ if ( lel->isRepeat ) {
+ out << lel->refName << " " << lel->declName << "::" << " next"
+ "() { return " << lel->refName <<
+ "( prg, getRepeatNext( tree ) ); }\n";
+
+ out << lel->repeatOf->refName << " " << lel->declName << "::" << " value"
+ "() { return " << lel->repeatOf->refName <<
+ "( prg, getRepeatVal( tree ) ); }\n";
+
+ }
+
+ if ( lel->isList ) {
+ out << lel->refName << " " << lel->declName << "::" << " next"
+ "() { return " << lel->refName <<
+ "( prg, getRepeatNext( tree ) ); }\n";
+
+ out << lel->repeatOf->refName << " " << lel->declName << "::" << " value"
+ "() { return " << lel->repeatOf->refName <<
+ "( prg, getRepeatVal( tree ) ); }\n";
+ }
+ }
+
+ out << "\n";
+
+ for ( ObjFieldList::Iter of = *globalObjectDef->objFieldList; of.lte(); of++ ) {
+ ObjField *field = of->value;
+ if ( field->isExport ) {
+ UniqueType *ut = field->typeRef->lookupType(this);
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out <<
+ ut->langEl->refName << " " << field->name << "(ColmProgram *prg)\n"
+ "{ return " << ut->langEl->refName << "( prg, getGlobal( prg, " <<
+ field->offset << ") ); }\n";
+ }
+ }
+ }
+}
+
+
diff --git a/src/fsmap.cc b/src/fsmap.cc
new file mode 100644
index 00000000..a4c072b6
--- /dev/null
+++ b/src/fsmap.cc
@@ -0,0 +1,856 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+#include "defs.h"
+#include "fsmgraph.h"
+#include <iostream>
+
+using std::cerr;
+using std::endl;
+
+CondData *condData = 0;
+KeyOps *keyOps = 0;
+
+/* Insert an action into an action table. */
+void ActionTable::setAction( int ordering, Action *action )
+{
+ /* Multi-insert in case specific instances of an action appear in a
+ * transition more than once. */
+ insertMulti( ordering, action );
+}
+
+/* Set all the action from another action table in this table. */
+void ActionTable::setActions( const ActionTable &other )
+{
+ for ( ActionTable::Iter action = other; action.lte(); action++ )
+ insertMulti( action->key, action->value );
+}
+
+void ActionTable::setActions( int *orderings, Action **actions, int nActs )
+{
+ for ( int a = 0; a < nActs; a++ )
+ insertMulti( orderings[a], actions[a] );
+}
+
+bool ActionTable::hasAction( Action *action )
+{
+ for ( int a = 0; a < length(); a++ ) {
+ if ( data[a].value == action )
+ return true;
+ }
+ return false;
+}
+
+/* Insert an action into an action table. */
+void LmActionTable::setAction( int ordering, TokenDef *action )
+{
+ /* Multi-insert in case specific instances of an action appear in a
+ * transition more than once. */
+ insertMulti( ordering, action );
+}
+
+/* Set all the action from another action table in this table. */
+void LmActionTable::setActions( const LmActionTable &other )
+{
+ for ( LmActionTable::Iter action = other; action.lte(); action++ )
+ insertMulti( action->key, action->value );
+}
+
+void ErrActionTable::setAction( int ordering, Action *action, int transferPoint )
+{
+ insertMulti( ErrActionTableEl( action, ordering, transferPoint ) );
+}
+
+void ErrActionTable::setActions( const ErrActionTable &other )
+{
+ for ( ErrActionTable::Iter act = other; act.lte(); act++ )
+ insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) );
+}
+
+/* Insert a priority into this priority table. Looks out for priorities on
+ * duplicate keys. */
+void PriorTable::setPrior( int ordering, PriorDesc *desc )
+{
+ PriorEl *lastHit = 0;
+ PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit );
+ if ( insed == 0 ) {
+ /* This already has a priority on the same key as desc. Overwrite the
+ * priority if the ordering is larger (later in time). */
+ if ( ordering >= lastHit->ordering )
+ *lastHit = PriorEl( ordering, desc );
+ }
+}
+
+/* Set all the priorities from a priorTable in this table. */
+void PriorTable::setPriors( const PriorTable &other )
+{
+ /* Loop src priorities once to overwrite duplicates. */
+ PriorTable::Iter priorIt = other;
+ for ( ; priorIt.lte(); priorIt++ )
+ setPrior( priorIt->ordering, priorIt->desc );
+}
+
+/* Set the priority of starting transitions. Isolates the start state so it has
+ * no other entry points, then sets the priorities of all the transitions out
+ * of the start state. If the start state is final, then the outPrior of the
+ * start state is also set. The idea is that a machine that accepts the null
+ * string can still specify the starting trans prior for when it accepts the
+ * null word. */
+void FsmGraph::startFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Walk all transitions out of the start state. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+}
+
+/* Set the priority of all transitions in a graph. Walks all transition lists
+ * and all def transitions. */
+void FsmGraph::allTransPrior( int ordering, PriorDesc *prior )
+{
+ /* Walk the list of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out list of the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+ }
+}
+
+/* Set the priority of all transitions that go into a final state. Note that if
+ * any entry states are final, we will not be setting the priority of any
+ * transitions that may go into those states in the future. The graph does not
+ * support pending in transitions in the same way pending out transitions are
+ * supported. */
+void FsmGraph::finishFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk all in transitions of the final state. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+}
+
+/* Set the priority of any future out transitions that may be made going out of
+ * this state machine. */
+void FsmGraph::leaveFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Set priority in all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outPriorTable.setPrior( ordering, prior );
+}
+
+
+/* Set actions to execute on starting transitions. Isolates the start state
+ * so it has no other entry points, then adds to the transition functions
+ * of all the transitions out of the start state. If the start state is final,
+ * then the func is also added to the start state's out func list. The idea is
+ * that a machine that accepts the null string can execute a start func when it
+ * matches the null word, which can only be done when leaving the start/final
+ * state. */
+void FsmGraph::startFsmAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Walk the start state's transitions, setting functions. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+/* Set functions to execute on all transitions. Walks the out lists of all
+ * states. */
+void FsmGraph::allTransAction( int ordering, Action *action )
+{
+ /* Walk all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out list of the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+ }
+}
+
+/* Specify functions to execute upon entering final states. If the start state
+ * is final we can't really specify a function to execute upon entering that
+ * final state the first time. So function really means whenever entering a
+ * final state from within the same fsm. */
+void FsmGraph::finishFsmAction( int ordering, Action *action )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk the final state's in list. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+/* Add functions to any future out transitions that may be made going out of
+ * this state machine. */
+void FsmGraph::leaveFsmAction( int ordering, Action *action )
+{
+ /* Insert the action in the outActionTable of all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outActionTable.setAction( ordering, action );
+}
+
+/* Add functions to the longest match action table for constructing scanners. */
+void FsmGraph::longMatchAction( int ordering, TokenDef *lmPart )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk the final state's in list. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->lmActionTable.setAction( ordering, lmPart );
+ }
+}
+
+void FsmGraph::fillGaps( FsmState *state )
+{
+ if ( state->outList.length() == 0 ) {
+ /* Add the range on the lower and upper bound. */
+ attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey );
+ }
+ else {
+ TransList srcList;
+ srcList.transfer( state->outList );
+
+ /* Check for a gap at the beginning. */
+ TransList::Iter trans = srcList, next;
+ if ( keyOps->minKey < trans->lowKey ) {
+ /* Make the high key and append. */
+ Key highKey = trans->lowKey;
+ highKey.decrement();
+
+ attachNewTrans( state, 0, keyOps->minKey, highKey );
+ }
+
+ /* Write the transition. */
+ next = trans.next();
+ state->outList.append( trans );
+
+ /* Keep the last high end. */
+ Key lastHigh = trans->highKey;
+
+ /* Loop each source range. */
+ for ( trans = next; trans.lte(); trans = next ) {
+ /* Make the next key following the last range. */
+ Key nextKey = lastHigh;
+ nextKey.increment();
+
+ /* Check for a gap from last up to here. */
+ if ( nextKey < trans->lowKey ) {
+ /* Make the high end of the range that fills the gap. */
+ Key highKey = trans->lowKey;
+ highKey.decrement();
+
+ attachNewTrans( state, 0, nextKey, highKey );
+ }
+
+ /* Reduce the transition. If it reduced to anything then add it. */
+ next = trans.next();
+ state->outList.append( trans );
+
+ /* Keep the last high end. */
+ lastHigh = trans->highKey;
+ }
+
+ /* Now check for a gap on the end to fill. */
+ if ( lastHigh < keyOps->maxKey ) {
+ /* Get a copy of the default. */
+ lastHigh.increment();
+
+ attachNewTrans( state, 0, lastHigh, keyOps->maxKey );
+ }
+ }
+}
+
+void FsmGraph::setErrorAction( FsmState *state, int ordering, Action *action )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error transitions in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::setErrorActions( FsmState *state, const ActionTable &other )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error transitions in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 )
+ trans->actionTable.setActions( other );
+ }
+}
+
+
+/* Give a target state for error transitions. */
+void FsmGraph::setErrorTarget( FsmState *state, FsmState *target, int *orderings,
+ Action **actions, int nActs )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error target in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 ) {
+ /* The trans goes to error, redirect it. */
+ redirectErrorTrans( trans->fromState, target, trans );
+ trans->actionTable.setActions( orderings, actions, nActs );
+ }
+ }
+}
+
+void FsmGraph::transferErrorActions( FsmState *state, int transferPoint )
+{
+ for ( int i = 0; i < state->errActionTable.length(); ) {
+ ErrActionTableEl *act = state->errActionTable.data + i;
+ if ( act->transferPoint == transferPoint ) {
+ /* Transfer the error action and remove it. */
+ setErrorAction( state, act->ordering, act->action );
+ state->errActionTable.vremove( i );
+ }
+ else {
+ /* Not transfering and deleting, skip over the item. */
+ i += 1;
+ }
+ }
+}
+
+/* Set error actions in the start state. */
+void FsmGraph::startErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Add the actions. */
+ startState->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+/* Set error actions in all states where there is a transition out. */
+void FsmGraph::allErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Insert actions in the error action table of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+/* Set error actions in final states. */
+void FsmGraph::finalErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+void FsmGraph::notStartErrorAction( int ordering, Action *action, int transferPoint )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+void FsmGraph::notFinalErrorAction( int ordering, Action *action, int transferPoint )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+/* Set error actions in the states that have transitions into a final state. */
+void FsmGraph::middleErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Isolate the start state in case it is reachable from in inside the
+ * machine, in which case we don't want it set. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+/* Set EOF actions in the start state. */
+void FsmGraph::startEOFAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Add the actions. */
+ startState->eofActionTable.setAction( ordering, action );
+}
+
+/* Set EOF actions in all states where there is a transition out. */
+void FsmGraph::allEOFAction( int ordering, Action *action )
+{
+ /* Insert actions in the EOF action table of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->eofActionTable.setAction( ordering, action );
+}
+
+/* Set EOF actions in final states. */
+void FsmGraph::finalEOFAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->eofActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::notStartEOFAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::notFinalEOFAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+/* Set EOF actions in the states that have transitions into a final state. */
+void FsmGraph::middleEOFAction( int ordering, Action *action )
+{
+ /* Set the actions in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+/*
+ * Set To State Actions.
+ */
+
+/* Set to state actions in the start state. */
+void FsmGraph::startToStateAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ startState->toStateActionTable.setAction( ordering, action );
+}
+
+/* Set to state actions in all states. */
+void FsmGraph::allToStateAction( int ordering, Action *action )
+{
+ /* Insert the action on all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->toStateActionTable.setAction( ordering, action );
+}
+
+/* Set to state actions in final states. */
+void FsmGraph::finalToStateAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->toStateActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::notStartToStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::notFinalToStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+/* Set to state actions in states that are not final and not the start state. */
+void FsmGraph::middleToStateAction( int ordering, Action *action )
+{
+ /* Set the action in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+/*
+ * Set From State Actions.
+ */
+
+void FsmGraph::startFromStateAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ startState->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::allFromStateAction( int ordering, Action *action )
+{
+ /* Insert the action on all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::finalFromStateAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::notStartFromStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::notFinalFromStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::middleFromStateAction( int ordering, Action *action )
+{
+ /* Set the action in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+/* Shift the function ordering of the start transitions to start
+ * at fromOrder and increase in units of 1. Useful before staring.
+ * Returns the maximum number of order numbers used. */
+int FsmGraph::shiftStartActionOrder( int fromOrder )
+{
+ int maxUsed = 0;
+
+ /* Walk the start state's transitions, shifting function ordering. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ /* Walk the function data for the transition and set the keys to
+ * increasing values starting at fromOrder. */
+ int curFromOrder = fromOrder;
+ ActionTable::Iter action = trans->actionTable;
+ for ( ; action.lte(); action++ )
+ action->key = curFromOrder++;
+
+ /* Keep track of the max number of orders used. */
+ if ( curFromOrder - fromOrder > maxUsed )
+ maxUsed = curFromOrder - fromOrder;
+ }
+
+ return maxUsed;
+}
+
+/* Remove all priorities. */
+void FsmGraph::clearAllPriorities()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Clear out priority data. */
+ state->outPriorTable.empty();
+
+ /* Clear transition data from the out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ trans->priorTable.empty();
+ }
+}
+
+/* Zeros out the function ordering keys. This may be called before minimization
+ * when it is known that no more fsm operations are going to be done. This
+ * will achieve greater reduction as states will not be separated on the basis
+ * of function ordering. */
+void FsmGraph::nullActionKeys( )
+{
+ /* For each state... */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the transitions for the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* Walk the action table for the transition. */
+ for ( ActionTable::Iter action = trans->actionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Walk the action table for the transition. */
+ for ( LmActionTable::Iter action = trans->lmActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+ }
+
+ /* Null the action keys of the to state action table. */
+ for ( ActionTable::Iter action = state->toStateActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the from state action table. */
+ for ( ActionTable::Iter action = state->fromStateActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the out transtions. */
+ for ( ActionTable::Iter action = state->outActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the error action table. */
+ for ( ErrActionTable::Iter action = state->errActionTable;
+ action.lte(); action++ )
+ action->ordering = 0;
+
+ /* Null the action keys eof action table. */
+ for ( ActionTable::Iter action = state->eofActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+ }
+}
+
+/* Walk the list of states and verify that non final states do not have out
+ * data, that all stateBits are cleared, and that there are no states with
+ * zero foreign in transitions. */
+void FsmGraph::verifyStates()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Non final states should not have leaving data. */
+ if ( ! (state->stateBits & SB_ISFINAL) ) {
+ assert( state->outActionTable.length() == 0 );
+ assert( state->outCondSet.length() == 0 );
+ assert( state->outPriorTable.length() == 0 );
+ }
+
+ /* Data used in algorithms should be cleared. */
+ assert( (state->stateBits & SB_BOTH) == 0 );
+ assert( state->foreignInTrans > 0 );
+ }
+}
+
+/* Compare two transitions according to their relative priority. Since the
+ * base transition has no priority associated with it, the default is to
+ * return equal. */
+int FsmGraph::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 )
+{
+ /* Looking for differing priorities on same keys. Need to concurrently
+ * scan the priority lists. */
+ PriorTable::Iter pd1 = priorTable1;
+ PriorTable::Iter pd2 = priorTable2;
+ while ( pd1.lte() && pd2.lte() ) {
+ /* Check keys. */
+ if ( pd1->desc->key < pd2->desc->key )
+ pd1.increment();
+ else if ( pd1->desc->key > pd2->desc->key )
+ pd2.increment();
+ /* Keys are the same, check priorities. */
+ else if ( pd1->desc->priority < pd2->desc->priority )
+ return -1;
+ else if ( pd1->desc->priority > pd2->desc->priority )
+ return 1;
+ else {
+ /* Keys and priorities are equal, advance both. */
+ pd1.increment();
+ pd2.increment();
+ }
+ }
+
+ /* No differing priorities on the same key. */
+ return 0;
+}
+
+/* Compares two transitions according to priority and functions. Pointers
+ * should not be null. Does not consider to state or from state. Compare two
+ * transitions according to the data contained in the transitions. Data means
+ * any properties added to user transitions that may differentiate them. Since
+ * the base transition has no data, the default is to return equal. */
+int FsmGraph::compareTransData( FsmTrans *trans1, FsmTrans *trans2 )
+{
+ /* Compare the prior table. */
+ int cmpRes = CmpPriorTable::compare( trans1->priorTable,
+ trans2->priorTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Compare longest match action tables. */
+ cmpRes = CmpLmActionTable::compare(trans1->lmActionTable,
+ trans2->lmActionTable);
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Compare action tables. */
+ return CmpActionTable::compare(trans1->actionTable,
+ trans2->actionTable);
+}
+
+/* Callback invoked when another trans (or possibly this) is added into this
+ * transition during the merging process. Draw in any properties of srcTrans
+ * into this transition. AddInTrans is called when a new transitions is made
+ * that will be a duplicate of another transition or a combination of several
+ * other transitions. AddInTrans will be called for each transition that the
+ * new transition is to represent. */
+void FsmGraph::addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans )
+{
+ /* Protect against adding in from ourselves. */
+ if ( srcTrans == destTrans ) {
+ /* Adding in ourselves, need to make a copy of the source transitions.
+ * The priorities are not copied in as that would have no effect. */
+ destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) );
+ destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) );
+ }
+ else {
+ /* Not a copy of ourself, get the functions and priorities. */
+ destTrans->lmActionTable.setActions( srcTrans->lmActionTable );
+ destTrans->actionTable.setActions( srcTrans->actionTable );
+ destTrans->priorTable.setPriors( srcTrans->priorTable );
+ }
+}
+
+/* Compare the properties of states that are embedded by users. Compares out
+ * priorities, out transitions, to, from, out, error and eof action tables. */
+int FsmGraph::compareStateData( const FsmState *state1, const FsmState *state2 )
+{
+ /* Compare the out priority table. */
+ int cmpRes = CmpPriorTable::
+ compare( state1->outPriorTable, state2->outPriorTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test to state action tables. */
+ cmpRes = CmpActionTable::compare( state1->toStateActionTable,
+ state2->toStateActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test from state action tables. */
+ cmpRes = CmpActionTable::compare( state1->fromStateActionTable,
+ state2->fromStateActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out action tables. */
+ cmpRes = CmpActionTable::compare( state1->outActionTable,
+ state2->outActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out condition sets. */
+ cmpRes = CmpActionSet::compare( state1->outCondSet,
+ state2->outCondSet );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out error action tables. */
+ cmpRes = CmpErrActionTable::compare( state1->errActionTable,
+ state2->errActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test eof action tables. */
+ return CmpActionTable::compare( state1->eofActionTable,
+ state2->eofActionTable );
+}
+
+
+/* Invoked when a state looses its final state status and the leaving
+ * transition embedding data should be deleted. */
+void FsmGraph::clearOutData( FsmState *state )
+{
+ /* Kill the out actions and priorities. */
+ state->outActionTable.empty();
+ state->outCondSet.empty();
+ state->outPriorTable.empty();
+}
+
+bool FsmGraph::hasOutData( FsmState *state )
+{
+ return ( state->outActionTable.length() > 0 ||
+ state->outCondSet.length() > 0 ||
+ state->outPriorTable.length() > 0 );
+}
+
+/*
+ * Setting Conditions.
+ */
+
+void logNewExpansion( Expansion *exp );
+void logCondSpace( CondSpace *condSpace );
+
+CondSpace *FsmGraph::addCondSpace( const CondSet &condSet )
+{
+ CondSpace *condSpace = condData->condSpaceMap.find( condSet );
+ if ( condSpace == 0 ) {
+ Key baseKey = condData->nextCondKey;
+ condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize();
+
+ condSpace = new CondSpace( condSet );
+ condSpace->baseKey = baseKey;
+ condData->condSpaceMap.insert( condSpace );
+
+ #ifdef COLM_LOG_CONDS
+ if ( colm_log_conds ) {
+ cerr << "adding new condition space" << endl;
+ cerr << " condition set: ";
+ logCondSpace( condSpace );
+ cerr << endl;
+ cerr << " baseKey: " << baseKey.getVal() << endl;
+ }
+ #endif
+ }
+ return condSpace;
+}
+
+void FsmGraph::startFsmCondition( Action *condAction )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ embedCondition( startState, condAction );
+}
+
+void FsmGraph::allTransCondition( Action *condAction )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ embedCondition( state, condAction );
+}
+
+void FsmGraph::leaveFsmCondition( Action *condAction )
+{
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outCondSet.insert( condAction );
+}
diff --git a/src/fsmattach.cc b/src/fsmattach.cc
new file mode 100644
index 00000000..a58ed9a4
--- /dev/null
+++ b/src/fsmattach.cc
@@ -0,0 +1,425 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "fsmgraph.h"
+
+#include <iostream>
+using namespace std;
+
+/* Insert a transition into an inlist. The head must be supplied. */
+void FsmGraph::attachToInList( FsmState *from, FsmState *to,
+ FsmTrans *&head, FsmTrans *trans )
+{
+ trans->ilnext = head;
+ trans->ilprev = 0;
+
+ /* If in trans list is not empty, set the head->prev to trans. */
+ if ( head != 0 )
+ head->ilprev = trans;
+
+ /* Now insert ourselves at the front of the list. */
+ head = trans;
+
+ /* Keep track of foreign transitions for from and to. */
+ if ( from != to ) {
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * move it from the misfit list to the main list. */
+ if ( to->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( to ) );
+ }
+
+ to->foreignInTrans += 1;
+ }
+};
+
+/* Detach a transition from an inlist. The head of the inlist must be supplied. */
+void FsmGraph::detachFromInList( FsmState *from, FsmState *to,
+ FsmTrans *&head, FsmTrans *trans )
+{
+ /* Detach in the inTransList. */
+ if ( trans->ilprev == 0 )
+ head = trans->ilnext;
+ else
+ trans->ilprev->ilnext = trans->ilnext;
+
+ if ( trans->ilnext != 0 )
+ trans->ilnext->ilprev = trans->ilprev;
+
+ /* Keep track of foreign transitions for from and to. */
+ if ( from != to ) {
+ to->foreignInTrans -= 1;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions goes down to 0 then move it
+ * from the main list to the misfit list. */
+ if ( to->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( to ) );
+ }
+ }
+}
+
+/* Attach states on the default transition, range list or on out/in list key.
+ * First makes a new transition. If there is already a transition out from
+ * fromState on the default, then will assertion fail. */
+FsmTrans *FsmGraph::attachNewTrans( FsmState *from, FsmState *to, Key lowKey, Key highKey )
+{
+ /* Make the new transition. */
+ FsmTrans *retVal = new FsmTrans();
+
+ /* The transition is now attached. Remember the parties involved. */
+ retVal->fromState = from;
+ retVal->toState = to;
+
+ /* Make the entry in the out list for the transitions. */
+ from->outList.append( retVal );
+
+ /* Set the the keys of the new trans. */
+ retVal->lowKey = lowKey;
+ retVal->highKey = highKey;
+
+ /* Attach using inList as the head pointer. */
+ if ( to != 0 )
+ attachToInList( from, to, to->inList.head, retVal );
+
+ return retVal;
+}
+
+/* Attach for range lists or for the default transition. This attach should
+ * be used when a transition already is allocated and must be attached to a
+ * target state. Does not handle adding the transition into the out list. */
+void FsmGraph::attachTrans( FsmState *from, FsmState *to, FsmTrans *trans )
+{
+ assert( trans->fromState == 0 && trans->toState == 0 );
+ trans->fromState = from;
+ trans->toState = to;
+
+ if ( to != 0 ) {
+ /* Attach using the inList pointer as the head pointer. */
+ attachToInList( from, to, to->inList.head, trans );
+ }
+}
+
+/* Redirect a transition away from error and towards some state. This is just
+ * like attachTrans except it requires fromState to be set and does not touch
+ * it. */
+void FsmGraph::redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans )
+{
+ assert( trans->fromState != 0 && trans->toState == 0 );
+ trans->toState = to;
+
+ if ( to != 0 ) {
+ /* Attach using the inList pointer as the head pointer. */
+ attachToInList( from, to, to->inList.head, trans );
+ }
+}
+
+/* Detach for out/in lists or for default transition. */
+void FsmGraph::detachTrans( FsmState *from, FsmState *to, FsmTrans *trans )
+{
+ assert( trans->fromState == from && trans->toState == to );
+ trans->fromState = 0;
+ trans->toState = 0;
+
+ if ( to != 0 ) {
+ /* Detach using to's inList pointer as the head. */
+ detachFromInList( from, to, to->inList.head, trans );
+ }
+}
+
+
+/* Detach a state from the graph. Detaches and deletes transitions in and out
+ * of the state. Empties inList and outList. Removes the state from the final
+ * state set. A detached state becomes useless and should be deleted. */
+void FsmGraph::detachState( FsmState *state )
+{
+ /* Detach the in transitions from the inList list of transitions. */
+ while ( state->inList.head != 0 ) {
+ /* Get pointers to the trans and the state. */
+ FsmTrans *trans = state->inList.head;
+ FsmState *fromState = trans->fromState;
+
+ /* Detach the transitions from the source state. */
+ detachTrans( fromState, state, trans );
+
+ /* Ok to delete the transition. */
+ fromState->outList.detach( trans );
+ delete trans;
+ }
+
+ /* Remove the entry points in on the machine. */
+ while ( state->entryIds.length() > 0 )
+ unsetEntry( state->entryIds[0], state );
+
+ /* Detach out range transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); ) {
+ TransList::Iter next = trans.next();
+ detachTrans( state, trans->toState, trans );
+ delete trans;
+ trans = next;
+ }
+
+ /* Delete all of the out range pointers. */
+ state->outList.abandon();
+
+ /* Unset final stateness before detaching from graph. */
+ if ( state->stateBits & SB_ISFINAL )
+ finStateSet.remove( state );
+}
+
+
+/* Duplicate a transition. Makes a new transition that is attached to the same
+ * dest as srcTrans. The new transition has functions and priority taken from
+ * srcTrans. Used for merging a transition in to a free spot. The trans can
+ * just be dropped in. It does not conflict with an existing trans and need
+ * not be crossed. Returns the new transition. */
+FsmTrans *FsmGraph::dupTrans( FsmState *from, FsmTrans *srcTrans )
+{
+ /* Make a new transition. */
+ FsmTrans *newTrans = new FsmTrans();
+
+ /* We can attach the transition, one does not exist. */
+ attachTrans( from, srcTrans->toState, newTrans );
+
+ /* Call the user callback to add in the original source transition. */
+ addInTrans( newTrans, srcTrans );
+
+ return newTrans;
+}
+
+/* In crossing, src trans and dest trans both go to existing states. Make one
+ * state from the sets of states that src and dest trans go to. */
+FsmTrans *FsmGraph::fsmAttachStates( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans )
+{
+ /* The priorities are equal. We must merge the transitions. Does the
+ * existing trans go to the state we are to attach to? ie, are we to
+ * simply double up the transition? */
+ FsmState *toState = srcTrans->toState;
+ FsmState *existingState = destTrans->toState;
+
+ if ( existingState == toState ) {
+ /* The transition is a double up to the same state. Copy the src
+ * trans into itself. We don't need to merge in the from out trans
+ * data, that was done already. */
+ addInTrans( destTrans, srcTrans );
+ }
+ else {
+ /* The trans is not a double up. Dest trans cannot be the same as src
+ * trans. Set up the state set. */
+ StateSet stateSet;
+
+ /* We go to all the states the existing trans goes to, plus... */
+ if ( existingState->stateDictEl == 0 )
+ stateSet.insert( existingState );
+ else
+ stateSet.insert( existingState->stateDictEl->stateSet );
+
+ /* ... all the states that we have been told to go to. */
+ if ( toState->stateDictEl == 0 )
+ stateSet.insert( toState );
+ else
+ stateSet.insert( toState->stateDictEl->stateSet );
+
+ /* Look for the state. If it is not there already, make it. */
+ StateDictEl *lastFound;
+ if ( md.stateDict.insert( stateSet, &lastFound ) ) {
+ /* Make a new state representing the combination of states in
+ * stateSet. It gets added to the fill list. This means that we
+ * need to fill in it's transitions sometime in the future. We
+ * don't do that now (ie, do not recurse). */
+ FsmState *combinState = addState();
+
+ /* Link up the dict element and the state. */
+ lastFound->targState = combinState;
+ combinState->stateDictEl = lastFound;
+
+ /* Add to the fill list. */
+ md.fillListAppend( combinState );
+ }
+
+ /* Get the state insertted/deleted. */
+ FsmState *targ = lastFound->targState;
+
+ /* Detach the state from existing state. */
+ detachTrans( from, existingState, destTrans );
+
+ /* Re-attach to the new target. */
+ attachTrans( from, targ, destTrans );
+
+ /* Add in src trans to the existing transition that we redirected to
+ * the new state. We don't need to merge in the from out trans data,
+ * that was done already. */
+ addInTrans( destTrans, srcTrans );
+ }
+
+ return destTrans;
+}
+
+/* Two transitions are to be crossed, handle the possibility of either going
+ * to the error state. */
+FsmTrans *FsmGraph::mergeTrans( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans )
+{
+ FsmTrans *retTrans = 0;
+ if ( destTrans->toState == 0 && srcTrans->toState == 0 ) {
+ /* Error added into error. */
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) {
+ /* Non error added into error we need to detach and reattach, */
+ detachTrans( from, destTrans->toState, destTrans );
+ attachTrans( from, srcTrans->toState, destTrans );
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else if ( srcTrans->toState == 0 ) {
+ /* Dest goes somewhere but src doesn't, just add it it in. */
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else {
+ /* Both go somewhere, run the actual cross. */
+ retTrans = fsmAttachStates( md, from, destTrans, srcTrans );
+ }
+
+ return retTrans;
+}
+
+/* Find the trans with the higher priority. If src is lower priority then dest then
+ * src is ignored. If src is higher priority than dest, then src overwrites dest. If
+ * the priorities are equal, then they are merged. */
+FsmTrans *FsmGraph::crossTransitions( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans )
+{
+ FsmTrans *retTrans;
+
+ /* Compare the priority of the dest and src transitions. */
+ int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable );
+ if ( compareRes < 0 ) {
+ /* Src trans has a higher priority than dest, src overwrites dest.
+ * Detach dest and return a copy of src. */
+ detachTrans( from, destTrans->toState, destTrans );
+ retTrans = dupTrans( from, srcTrans );
+ }
+ else if ( compareRes > 0 ) {
+ /* The dest trans has a higher priority, use dest. */
+ retTrans = destTrans;
+ }
+ else {
+ /* Src trans and dest trans have the same priority, they must be merged. */
+ retTrans = mergeTrans( md, from, destTrans, srcTrans );
+ }
+
+ /* Return the transition that resulted from the cross. */
+ return retTrans;
+}
+
+/* Copy the transitions in srcList to the outlist of dest. The srcList should
+ * not be the outList of dest, otherwise you would be copying the contents of
+ * srcList into itself as it's iterated: bad news. */
+void FsmGraph::outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList )
+{
+ /* The destination list. */
+ TransList destList;
+
+ /* Set up an iterator to stop at breaks. */
+ PairIter<FsmTrans> outPair( dest->outList.head, srcList );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+ case RangeInS1: {
+ /* The pair iter is the authority on the keys. It may have needed
+ * to break the dest range. */
+ FsmTrans *destTrans = outPair.s1Tel.trans;
+ destTrans->lowKey = outPair.s1Tel.lowKey;
+ destTrans->highKey = outPair.s1Tel.highKey;
+ destList.append( destTrans );
+ break;
+ }
+ case RangeInS2: {
+ /* Src range may get crossed with dest's default transition. */
+ FsmTrans *newTrans = dupTrans( dest, outPair.s2Tel.trans );
+
+ /* Set up the transition's keys and append to the dest list. */
+ newTrans->lowKey = outPair.s2Tel.lowKey;
+ newTrans->highKey = outPair.s2Tel.highKey;
+ destList.append( newTrans );
+ break;
+ }
+ case RangeOverlap: {
+ /* Exact overlap, cross them. */
+ FsmTrans *newTrans = crossTransitions( md, dest,
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+
+ /* Set up the transition's keys and append to the dest list. */
+ newTrans->lowKey = outPair.s1Tel.lowKey;
+ newTrans->highKey = outPair.s1Tel.highKey;
+ destList.append( newTrans );
+ break;
+ }
+ case BreakS1: {
+ /* Since we are always writing to the dest trans, the dest needs
+ * to be copied when it is broken. The copy goes into the first
+ * half of the break to "break it off". */
+ outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans );
+ break;
+ }
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Abandon the old outList and transfer destList into it. */
+ dest->outList.transfer( destList );
+}
+
+
+/* Move all the transitions that go into src so that they go into dest. */
+void FsmGraph::inTransMove( FsmState *dest, FsmState *src )
+{
+ /* Do not try to move in trans to and from the same state. */
+ assert( dest != src );
+
+ /* If src is the start state, dest becomes the start state. */
+ if ( src == startState ) {
+ unsetStartState();
+ setStartState( dest );
+ }
+
+ /* For each entry point into, create an entry point into dest, when the
+ * state is detached, the entry points to src will be removed. */
+ for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ )
+ changeEntry( *enId, dest, src );
+
+ /* Move the transitions in inList. */
+ while ( src->inList.head != 0 ) {
+ /* Get trans and from state. */
+ FsmTrans *trans = src->inList.head;
+ FsmState *fromState = trans->fromState;
+
+ /* Detach from src, reattach to dest. */
+ detachTrans( fromState, src, trans );
+ attachTrans( fromState, dest, trans );
+ }
+}
diff --git a/src/fsmbase.cc b/src/fsmbase.cc
new file mode 100644
index 00000000..90341039
--- /dev/null
+++ b/src/fsmbase.cc
@@ -0,0 +1,602 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "fsmgraph.h"
+
+/* Simple singly linked list append routine for the fill list. The new state
+ * goes to the end of the list. */
+void MergeData::fillListAppend( FsmState *state )
+{
+ state->alg.next = 0;
+
+ if ( stfillHead == 0 ) {
+ /* List is empty, state becomes head and tail. */
+ stfillHead = state;
+ stfillTail = state;
+ }
+ else {
+ /* List is not empty, state goes after last element. */
+ stfillTail->alg.next = state;
+ stfillTail = state;
+ }
+}
+
+/* Graph constructor. */
+FsmGraph::FsmGraph()
+:
+ /* No start state. */
+ startState(0),
+ errState(0),
+
+ /* Misfit accounting is a switch, turned on only at specific times. It
+ * controls what happens when states have no way in from the outside
+ * world.. */
+ misfitAccounting(false),
+
+ lmRequiresErrorState(false)
+{
+}
+
+/* Copy all graph data including transitions. */
+FsmGraph::FsmGraph( const FsmGraph &graph )
+:
+ /* Lists start empty. Will be filled by copy. */
+ stateList(),
+ misfitList(),
+
+ /* Copy in the entry points,
+ * pointers will be resolved later. */
+ entryPoints(graph.entryPoints),
+ startState(graph.startState),
+ errState(0),
+
+ /* Will be filled by copy. */
+ finStateSet(),
+
+ /* Misfit accounting is only on during merging. */
+ misfitAccounting(false),
+
+ lmRequiresErrorState(graph.lmRequiresErrorState)
+{
+ /* Create the states and record their map in the original state. */
+ StateList::Iter origState = graph.stateList;
+ for ( ; origState.lte(); origState++ ) {
+ /* Make the new state. */
+ FsmState *newState = new FsmState( *origState );
+
+ /* Add the state to the list. */
+ stateList.append( newState );
+
+ /* Set the mapsTo item of the old state. */
+ origState->alg.stateMap = newState;
+ }
+
+ /* Derefernce all the state maps. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* The points to the original in the src machine. The taget's duplicate
+ * is in the statemap. */
+ FsmState *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0;
+
+ /* Attach The transition to the duplicate. */
+ trans->toState = 0;
+ attachTrans( state, toState, trans );
+ }
+ }
+
+ /* Fix the state pointers in the entry points array. */
+ EntryMapEl *eel = entryPoints.data;
+ for ( int e = 0; e < entryPoints.length(); e++, eel++ ) {
+ /* Get the duplicate of the state. */
+ eel->value = eel->value->alg.stateMap;
+
+ /* Foreign in transitions must be built up when duping machines so
+ * increment it here. */
+ eel->value->foreignInTrans += 1;
+ }
+
+ /* Fix the start state pointer and the new start state's count of in
+ * transiions. */
+ startState = startState->alg.stateMap;
+ startState->foreignInTrans += 1;
+
+ /* Build the final state set. */
+ StateSet::Iter st = graph.finStateSet;
+ for ( ; st.lte(); st++ )
+ finStateSet.insert((*st)->alg.stateMap);
+}
+
+/* Deletes all transition data then deletes each state. */
+FsmGraph::~FsmGraph()
+{
+ /* Delete all the transitions. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Iterate the out transitions, deleting them. */
+ state->outList.empty();
+ }
+
+ /* Delete all the states. */
+ stateList.empty();
+}
+
+/* Set a state final. The state has its isFinState set to true and the state
+ * is added to the finStateSet. */
+void FsmGraph::setFinState( FsmState *state )
+{
+ /* Is it already a fin state. */
+ if ( state->stateBits & SB_ISFINAL )
+ return;
+
+ state->stateBits |= SB_ISFINAL;
+ finStateSet.insert( state );
+}
+
+/* Set a state non-final. The has its isFinState flag set false and the state
+ * is removed from the final state set. */
+void FsmGraph::unsetFinState( FsmState *state )
+{
+ /* Is it already a non-final state? */
+ if ( ! (state->stateBits & SB_ISFINAL) )
+ return;
+
+ /* When a state looses its final state status it must relinquish all the
+ * properties that are allowed only for final states. */
+ clearOutData( state );
+
+ state->stateBits &= ~ SB_ISFINAL;
+ finStateSet.remove( state );
+}
+
+/* Set and unset a state as the start state. */
+void FsmGraph::setStartState( FsmState *state )
+{
+ /* Sould change from unset to set. */
+ assert( startState == 0 );
+ startState = state;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( state->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( state ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ state->foreignInTrans += 1;
+}
+
+void FsmGraph::unsetStartState()
+{
+ /* Should change from set to unset. */
+ assert( startState != 0 );
+
+ /* Decrement the entry's count of foreign entries. */
+ startState->foreignInTrans -= 1;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( startState->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( startState ) );
+ }
+
+ startState = 0;
+}
+
+/* Associate an id with a state. Makes the state a named entry point. Has no
+ * effect if the entry point is already mapped to the state. */
+void FsmGraph::setEntry( int id, FsmState *state )
+{
+ /* Insert the id into the state. If the state is already labelled with id,
+ * nothing to do. */
+ if ( state->entryIds.insert( id ) ) {
+ /* Insert the entry and assert that it succeeds. */
+ entryPoints.insertMulti( id, state );
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( state->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( state ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ state->foreignInTrans += 1;
+ }
+}
+
+/* Remove the association of an id with a state. The state looses it's entry
+ * point status. Assumes that the id is indeed mapped to state. */
+void FsmGraph::unsetEntry( int id, FsmState *state )
+{
+ /* Find the entry point in on id. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ while ( enLow->value != state )
+ enLow += 1;
+
+ /* Remove the record from the map. */
+ entryPoints.remove( enLow );
+
+ /* Remove the state's sense of the link. */
+ state->entryIds.remove( id );
+ state->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( state->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( state ) );
+ }
+}
+
+/* Remove all association of an id with states. Assumes that the id is indeed
+ * mapped to a state. */
+void FsmGraph::unsetEntry( int id )
+{
+ /* Find the entry point in on id. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) {
+ /* Remove the state's sense of the link. */
+ mel->value->entryIds.remove( id );
+ mel->value->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0
+ * then take it off the main list and put it on the misfit list. */
+ if ( mel->value->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( mel->value ) );
+ }
+ }
+
+ /* Remove the records from the entry points map. */
+ entryPoints.removeMulti( enLow, enHigh );
+}
+
+
+void FsmGraph::changeEntry( int id, FsmState *to, FsmState *from )
+{
+ /* Find the entry in the entry map. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ while ( enLow->value != from )
+ enLow += 1;
+
+ /* Change it to the new target. */
+ enLow->value = to;
+
+ /* Remove from's sense of the link. */
+ from->entryIds.remove( id );
+ from->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( from->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( from ) );
+ }
+
+ /* Add to's sense of the link. */
+ if ( to->entryIds.insert( id ) != 0 ) {
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( to->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( to ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ to->foreignInTrans += 1;
+ }
+}
+
+
+/* Clear all entry points from a machine. */
+void FsmGraph::unsetAllEntryPoints()
+{
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) {
+ /* Kill all the state's entry points at once. */
+ if ( en->value->entryIds.length() > 0 ) {
+ en->value->foreignInTrans -= en->value->entryIds.length();
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0
+ * then take it off the main list and put it on the misfit
+ * list. */
+ if ( en->value->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( en->value ) );
+ }
+
+ /* Clear the set of ids out all at once. */
+ en->value->entryIds.empty();
+ }
+ }
+
+ /* Now clear out the entry map all at once. */
+ entryPoints.empty();
+}
+
+/* Assigning an epsilon transition into final states. */
+void FsmGraph::epsilonTrans( int id )
+{
+ for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ )
+ (*fs)->epsilonTrans.append( id );
+}
+
+/* Mark all states reachable from state. Traverses transitions forward. Used
+ * for removing states that have no path into them. */
+void FsmGraph::markReachableFromHere( FsmState *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ markReachableFromHere( trans->toState );
+ }
+}
+
+void FsmGraph::markReachableFromHereStopFinal( FsmState *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ FsmState *toState = trans->toState;
+ if ( toState != 0 && !toState->isFinState() )
+ markReachableFromHereStopFinal( toState );
+ }
+}
+
+/* Mark all states reachable from state. Traverse transitions backwards. Used
+ * for removing dead end paths in graphs. */
+void FsmGraph::markReachableFromHereReverse( FsmState *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states with
+ * transitions into this state. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all items in transitions. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
+ markReachableFromHereReverse( trans->fromState );
+}
+
+/* Determine if there are any entry points into a start state other than the
+ * start state. Setting starting transitions requires that the start state be
+ * isolated. In most cases a start state will already be isolated. */
+bool FsmGraph::isStartStateIsolated()
+{
+ /* If there are any in transitions then the state is not isolated. */
+ if ( startState->inList.head != 0 )
+ return false;
+
+ /* If there are any entry points then isolated. */
+ if ( startState->entryIds.length() > 0 )
+ return false;
+
+ return true;
+}
+
+/* Bring in other's entry points. Assumes others states are going to be
+ * copied into this machine. */
+void FsmGraph::copyInEntryPoints( FsmGraph *other )
+{
+ /* Use insert multi because names are not unique. */
+ for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ )
+ entryPoints.insertMulti( en->key, en->value );
+}
+
+
+void FsmGraph::unsetAllFinStates()
+{
+ for ( StateSet::Iter st = finStateSet; st.lte(); st++ )
+ (*st)->stateBits &= ~ SB_ISFINAL;
+ finStateSet.empty();
+}
+
+void FsmGraph::setFinBits( int finStateBits )
+{
+ for ( int s = 0; s < finStateSet.length(); s++ )
+ finStateSet.data[s]->stateBits |= finStateBits;
+}
+
+
+/* Tests the integrity of the transition lists and the fromStates. */
+void FsmGraph::verifyIntegrity()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out transitions and assert fromState is correct. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ assert( trans->fromState == state );
+
+ /* Walk the inlist and assert toState is correct. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
+ assert( trans->toState == state );
+ }
+}
+
+void FsmGraph::verifyReachability()
+{
+ /* Mark all the states that can be reached
+ * through the set of entry points. */
+ markReachableFromHere( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ markReachableFromHere( en->value );
+
+ /* Check that everything got marked. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Assert it got marked and then clear the mark. */
+ assert( st->stateBits & SB_ISMARKED );
+ st->stateBits &= ~ SB_ISMARKED;
+ }
+}
+
+void FsmGraph::verifyNoDeadEndStates()
+{
+ /* Mark all states that have paths to the final states. */
+ for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ )
+ markReachableFromHereReverse( *pst );
+
+ /* Start state gets honorary marking. Must be done AFTER recursive call. */
+ startState->stateBits |= SB_ISMARKED;
+
+ /* Make sure everything got marked. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Assert the state got marked and unmark it. */
+ assert( st->stateBits & SB_ISMARKED );
+ st->stateBits &= ~ SB_ISMARKED;
+ }
+}
+
+void FsmGraph::depthFirstOrdering( FsmState *state )
+{
+ /* Nothing to do if the state is already on the list. */
+ if ( state->stateBits & SB_ONLIST )
+ return;
+
+ /* Doing depth first, put state on the list. */
+ state->stateBits |= SB_ONLIST;
+ stateList.append( state );
+
+ /* Recurse on everything ranges. */
+ for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) {
+ if ( tel->toState != 0 )
+ depthFirstOrdering( tel->toState );
+ }
+}
+
+/* Ordering states by transition connections. */
+void FsmGraph::depthFirstOrdering()
+{
+ /* Init on state list flags. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->stateBits &= ~SB_ONLIST;
+
+ /* Clear out the state list, we will rebuild it. */
+ int stateListLen = stateList.length();
+ stateList.abandon();
+
+ /* Add back to the state list from the start state and all other entry
+ * points. */
+ if ( errState != 0 )
+ depthFirstOrdering( errState );
+ depthFirstOrdering( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ depthFirstOrdering( en->value );
+
+ /* Make sure we put everything back on. */
+ assert( stateListLen == stateList.length() );
+}
+
+/* Stable sort the states by final state status. */
+void FsmGraph::sortStatesByFinal()
+{
+ /* Move forward through the list and throw final states onto the end. */
+ FsmState *state = 0;
+ FsmState *next = stateList.head;
+ FsmState *last = stateList.tail;
+ while ( state != last ) {
+ /* Move forward and load up the next. */
+ state = next;
+ next = state->next;
+
+ /* Throw to the end? */
+ if ( state->isFinState() ) {
+ stateList.detach( state );
+ stateList.append( state );
+ }
+ }
+}
+
+void FsmGraph::setStateNumbers( int base )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->alg.stateNum = base++;
+}
+
+
+bool FsmGraph::checkErrTrans( FsmState *state, FsmTrans *trans )
+{
+ /* Might go directly to error state. */
+ if ( trans->toState == 0 )
+ return true;
+
+ if ( trans->prev == 0 ) {
+ /* If this is the first transition. */
+ if ( keyOps->minKey < trans->lowKey )
+ return true;
+ }
+ else {
+ /* Not the first transition. Compare against the prev. */
+ FsmTrans *prev = trans->prev;
+ Key nextKey = prev->highKey;
+ nextKey.increment();
+ if ( nextKey < trans->lowKey )
+ return true;
+ }
+ return false;
+}
+
+bool FsmGraph::checkErrTransFinish( FsmState *state )
+{
+ /* Check if there are any ranges already. */
+ if ( state->outList.length() == 0 )
+ return true;
+ else {
+ /* Get the last and check for a gap on the end. */
+ FsmTrans *last = state->outList.tail;
+ if ( last->highKey < keyOps->maxKey )
+ return true;
+ }
+ return 0;
+}
+
+bool FsmGraph::hasErrorTrans()
+{
+ bool result;
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) {
+ result = checkErrTrans( st, tr );
+ if ( result )
+ return true;
+ }
+ result = checkErrTransFinish( st );
+ if ( result )
+ return true;
+ }
+ return false;
+}
diff --git a/src/fsmcodegen.cc b/src/fsmcodegen.cc
new file mode 100644
index 00000000..8f3ab597
--- /dev/null
+++ b/src/fsmcodegen.cc
@@ -0,0 +1,1098 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "parsedata.h"
+#include "fsmcodegen.h"
+#include "redfsm.h"
+#include "bstmap.h"
+#include "fsmrun.h"
+#include <sstream>
+#include <string>
+#include <assert.h>
+
+
+using std::ostream;
+using std::ostringstream;
+using std::string;
+using std::cerr;
+using std::endl;
+
+
+/* Init code gen with in parameters. */
+FsmCodeGen::FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out,
+ RedFsm *redFsm, FsmTables *fsmTables )
+:
+ sourceFileName(sourceFileName),
+ fsmName(fsmName),
+ out(out),
+ redFsm(redFsm),
+ fsmTables(fsmTables),
+ codeGenErrCount(0),
+ dataPrefix(true),
+ writeFirstFinal(true),
+ writeErr(true)
+{
+}
+
+unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
+{
+ long long maxValLL = (long long) maxVal;
+ HostType *arrayType = keyOps->typeSubsumes( maxValLL );
+ assert( arrayType != 0 );
+ return arrayType->size;
+}
+
+string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
+{
+ long long maxValLL = (long long) maxVal;
+ HostType *arrayType = keyOps->typeSubsumes( maxValLL );
+ assert( arrayType != 0 );
+
+ string ret = arrayType->data1;
+ if ( arrayType->data2 != 0 ) {
+ ret += " ";
+ ret += arrayType->data2;
+ }
+ return ret;
+}
+
+
+/* Write out the fsm name. */
+string FsmCodeGen::FSM_NAME()
+{
+ return fsmName;
+}
+
+/* Emit the offset of the start state as a decimal integer. */
+string FsmCodeGen::START_STATE_ID()
+{
+ ostringstream ret;
+ ret << redFsm->startState->id;
+ return ret.str();
+};
+
+/* Write out the array of actions. */
+std::ostream &FsmCodeGen::ACTIONS_ARRAY()
+{
+ out << "\t0, ";
+ int totalActions = 1;
+ for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
+ /* Write out the length, which will never be the last character. */
+ out << act->key.length() << ", ";
+ /* Put in a line break every 8 */
+ if ( totalActions++ % 8 == 7 )
+ out << "\n\t";
+
+ for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
+ out << item->value->actionId;
+ if ( ! (act.last() && item.last()) )
+ out << ", ";
+
+ /* Put in a line break every 8 */
+ if ( totalActions++ % 8 == 7 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+
+string FsmCodeGen::CS()
+{
+ ostringstream ret;
+ /* Expression for retrieving the key, use simple dereference. */
+ ret << ACCESS() << "cs";
+ return ret.str();
+}
+
+string FsmCodeGen::GET_WIDE_KEY()
+{
+ if ( redFsm->anyConditions() )
+ return "_widec";
+ else
+ return GET_KEY();
+}
+
+string FsmCodeGen::GET_WIDE_KEY( RedState *state )
+{
+ if ( state->stateCondList.length() > 0 )
+ return "_widec";
+ else
+ return GET_KEY();
+}
+
+string FsmCodeGen::GET_KEY()
+{
+ ostringstream ret;
+ /* Expression for retrieving the key, use simple dereference. */
+ ret << "(*" << P() << ")";
+ return ret.str();
+}
+
+/* Write out level number of tabs. Makes the nested binary search nice
+ * looking. */
+string FsmCodeGen::TABS( int level )
+{
+ string result;
+ while ( level-- > 0 )
+ result += "\t";
+ return result;
+}
+
+/* Write out a key from the fsm code gen. Depends on wether or not the key is
+ * signed. */
+string FsmCodeGen::KEY( Key key )
+{
+ ostringstream ret;
+ if ( keyOps->isSigned || !hostLang->explicitUnsigned )
+ ret << key.getVal();
+ else
+ ret << (unsigned long) key.getVal() << 'u';
+ return ret.str();
+}
+
+void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item )
+{
+ ret << ACT() << " = " << item->longestMatchPart->longestMatchId << ";";
+}
+
+void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
+{
+ /* The tokend action sets tokend. */
+ ret << TOKEND() << " = " << P() << "+1;";
+}
+void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
+{
+ ret << TOKSTART() << " = 0;";
+}
+
+void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item )
+{
+ ret << ACT() << " = 0;";
+}
+
+void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item )
+{
+ ret << TOKSTART() << " = " << P() << ";";
+}
+
+void FsmCodeGen::EMIT_TOKEN( ostream &ret, LangEl *token )
+{
+ ret << " " << MATCHED_TOKEN() << " = " << token->id << ";\n";
+}
+
+void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
+ int targState, int inFinish )
+{
+ ret <<
+ " " << P() << " = " << TOKEND() << ";\n"
+ " switch( " << ACT() << " ) {\n";
+
+ /* If the switch handles error then we also forced the error state. It
+ * will exist. */
+ if ( item->tokenRegion->lmSwitchHandlesError ) {
+ ret << " case 0: " << P() << " = " << TOKSTART() <<
+ "; goto st" << redFsm->errState->id << ";\n";
+ }
+
+ for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) {
+ if ( lmi->inLmSelect ) {
+ assert( lmi->tdLangEl != 0 );
+ ret << " case " << lmi->longestMatchId << ":\n";
+ EMIT_TOKEN( ret, lmi->tdLangEl );
+ ret << " break;\n";
+ }
+ }
+
+ ret <<
+ " }\n"
+ "\t"
+ " return;\n";
+}
+
+void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item )
+{
+ assert( item->longestMatchPart->tdLangEl != 0 );
+
+ ret << " " << P() << " += 1;\n";
+ EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
+ ret << " return;\n";
+}
+
+void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item )
+{
+ assert( item->longestMatchPart->tdLangEl != 0 );
+
+ EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
+ ret << " return;\n";
+}
+
+void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item )
+{
+ assert( item->longestMatchPart->tdLangEl != 0 );
+
+ ret << " " << P() << " = " << TOKEND() << ";\n";
+ EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
+ ret << " return;\n";
+}
+
+
+/* Write out an inline tree structure. Walks the list and possibly calls out
+ * to virtual functions than handle language specific items in the tree. */
+void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList,
+ int targState, bool inFinish )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Text:
+ assert( false );
+ break;
+ case InlineItem::LmSetActId:
+ SET_ACT( ret, item );
+ break;
+ case InlineItem::LmSetTokEnd:
+ SET_TOKEND( ret, item );
+ break;
+ case InlineItem::LmInitTokStart:
+ assert( false );
+ break;
+ case InlineItem::LmInitAct:
+ INIT_ACT( ret, item );
+ break;
+ case InlineItem::LmSetTokStart:
+ SET_TOKSTART( ret, item );
+ break;
+ case InlineItem::LmSwitch:
+ LM_SWITCH( ret, item, targState, inFinish );
+ break;
+ case InlineItem::LmOnLast:
+ LM_ON_LAST( ret, item );
+ break;
+ case InlineItem::LmOnNext:
+ LM_ON_NEXT( ret, item );
+ break;
+ case InlineItem::LmOnLagBehind:
+ LM_ON_LAG_BEHIND( ret, item );
+ break;
+ }
+ }
+}
+
+/* Write out paths in line directives. Escapes any special characters. */
+string FsmCodeGen::LDIR_PATH( char *path )
+{
+ ostringstream ret;
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ ret << "\\\\";
+ else
+ ret << *pc;
+ }
+ return ret.str();
+}
+
+void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish )
+{
+ /* Write the block and close it off. */
+ ret << "\t{";
+ INLINE_LIST( ret, action->inlineList, targState, inFinish );
+
+ if ( action->markId > 0 )
+ ret << "mark[" << action->markId-1 << "] = " << P() << ";\n";
+
+ ret << "}\n";
+
+}
+
+void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition )
+{
+ ret << "\n";
+ INLINE_LIST( ret, condition->inlineList, 0, false );
+}
+
+string FsmCodeGen::ERROR_STATE()
+{
+ ostringstream ret;
+ if ( redFsm->errState != 0 )
+ ret << redFsm->errState->id;
+ else
+ ret << "-1";
+ return ret.str();
+}
+
+string FsmCodeGen::FIRST_FINAL_STATE()
+{
+ ostringstream ret;
+ if ( redFsm->firstFinState != 0 )
+ ret << redFsm->firstFinState->id;
+ else
+ ret << redFsm->nextStateId;
+ return ret.str();
+}
+
+string FsmCodeGen::DATA_PREFIX()
+{
+ if ( dataPrefix )
+ return FSM_NAME() + "_";
+ return "";
+}
+
+/* Emit the alphabet data type. */
+string FsmCodeGen::ALPH_TYPE()
+{
+ string ret = keyOps->alphType->data1;
+ if ( keyOps->alphType->data2 != 0 ) {
+ ret += " ";
+ ret += + keyOps->alphType->data2;
+ }
+ return ret;
+}
+
+/* Emit the alphabet data type. */
+string FsmCodeGen::WIDE_ALPH_TYPE()
+{
+ string ret;
+ if ( redFsm->maxKey <= keyOps->maxKey )
+ ret = ALPH_TYPE();
+ else {
+ long long maxKeyVal = redFsm->maxKey.getLongLong();
+ HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
+ assert( wideType != 0 );
+
+ ret = wideType->data1;
+ if ( wideType->data2 != 0 ) {
+ ret += " ";
+ ret += wideType->data2;
+ }
+ }
+ return ret;
+}
+
+
+string FsmCodeGen::PTR_CONST()
+{
+ return "const ";
+}
+
+std::ostream &FsmCodeGen::OPEN_ARRAY( string type, string name )
+{
+ out << "static const " << type << " " << name << "[] = {\n";
+ return out;
+}
+
+std::ostream &FsmCodeGen::CLOSE_ARRAY()
+{
+ return out << "};\n";
+}
+
+std::ostream &FsmCodeGen::STATIC_VAR( string type, string name )
+{
+ out << "static const " << type << " " << name;
+ return out;
+}
+
+string FsmCodeGen::UINT( )
+{
+ return "unsigned int";
+}
+
+string FsmCodeGen::ARR_OFF( string ptr, string offset )
+{
+ return ptr + " + " + offset;
+}
+
+string FsmCodeGen::CAST( string type )
+{
+ return "(" + type + ")";
+}
+
+std::ostream &FsmCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numToStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ return out;
+}
+
+std::ostream &FsmCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numFromStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ return out;
+}
+
+std::ostream &FsmCodeGen::ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numTransRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ return out;
+}
+
+void FsmCodeGen::emitSingleSwitch( RedState *state )
+{
+ /* Load up the singles. */
+ int numSingles = state->outSingle.length();
+ RedTransEl *data = state->outSingle.data;
+
+ if ( numSingles == 1 ) {
+ /* If there is a single single key then write it out as an if. */
+ out << "\tif ( " << GET_WIDE_KEY(state) << " == " <<
+ KEY(data[0].lowKey) << " )\n\t\t";
+
+ /* Virtual function for writing the target of the transition. */
+ TRANS_GOTO(data[0].value, 0) << "\n";
+ }
+ else if ( numSingles > 1 ) {
+ /* Write out single keys in a switch if there is more than one. */
+ out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n";
+
+ /* Write out the single indicies. */
+ for ( int j = 0; j < numSingles; j++ ) {
+ out << "\t\tcase " << KEY(data[j].lowKey) << ": ";
+ TRANS_GOTO(data[j].value, 0) << "\n";
+ }
+
+ /* Close off the transition switch. */
+ out << "\t}\n";
+ }
+}
+
+void FsmCodeGen::emitRangeBSearch( RedState *state, int level, int low, int high )
+{
+ /* Get the mid position, staying on the lower end of the range. */
+ int mid = (low + high) >> 1;
+ RedTransEl *data = state->outRange.data;
+
+ /* Determine if we need to look higher or lower. */
+ bool anyLower = mid > low;
+ bool anyHigher = mid < high;
+
+ /* Determine if the keys at mid are the limits of the alphabet. */
+ bool limitLow = data[mid].lowKey == keyOps->minKey;
+ bool limitHigh = data[mid].highKey == keyOps->maxKey;
+
+ if ( anyLower && anyHigher ) {
+ /* Can go lower and higher than mid. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
+ KEY(data[mid].lowKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, low, mid-1 );
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " <<
+ KEY(data[mid].highKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, mid+1, high );
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( anyLower && !anyHigher ) {
+ /* Can go lower than mid but not higher. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
+ KEY(data[mid].lowKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, low, mid-1 );
+
+ /* if the higher is the highest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitHigh ) {
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+ else if ( !anyLower && anyHigher ) {
+ /* Can go higher than mid but not lower. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " <<
+ KEY(data[mid].highKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, mid+1, high );
+
+ /* If the lower end is the lowest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitLow ) {
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " <<
+ KEY(data[mid].lowKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+ else {
+ /* Cannot go higher or lower than mid. It's mid or bust. What
+ * tests to do depends on limits of alphabet. */
+ if ( !limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
+ GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( !limitLow && limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
+ GET_WIDE_KEY(state) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ /* Both high and low are at the limit. No tests to do. */
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+}
+
+void FsmCodeGen::COND_TRANSLATE( GenStateCond *stateCond, int level )
+{
+ GenCondSpace *condSpace = stateCond->condSpace;
+ out << TABS(level) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" <<
+ KEY(condSpace->baseKey) << " + (" << GET_KEY() <<
+ " - " << KEY(keyOps->minKey) << "));\n";
+
+ for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
+ out << TABS(level) << "if ( ";
+ CONDITION( out, *csi );
+ Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
+ out << " ) _widec += " << condValOffset << ";\n";
+ }
+}
+
+void FsmCodeGen::emitCondBSearch( RedState *state, int level, int low, int high )
+{
+ /* Get the mid position, staying on the lower end of the range. */
+ int mid = (low + high) >> 1;
+ GenStateCond **data = state->stateCondVect.data;
+
+ /* Determine if we need to look higher or lower. */
+ bool anyLower = mid > low;
+ bool anyHigher = mid < high;
+
+ /* Determine if the keys at mid are the limits of the alphabet. */
+ bool limitLow = data[mid]->lowKey == keyOps->minKey;
+ bool limitHigh = data[mid]->highKey == keyOps->maxKey;
+
+ if ( anyLower && anyHigher ) {
+ /* Can go lower and higher than mid. */
+ out << TABS(level) << "if ( " << GET_KEY() << " < " <<
+ KEY(data[mid]->lowKey) << " ) {\n";
+ emitCondBSearch( state, level+1, low, mid-1 );
+ out << TABS(level) << "} else if ( " << GET_KEY() << " > " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ emitCondBSearch( state, level+1, mid+1, high );
+ out << TABS(level) << "} else {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else if ( anyLower && !anyHigher ) {
+ /* Can go lower than mid but not higher. */
+ out << TABS(level) << "if ( " << GET_KEY() << " < " <<
+ KEY(data[mid]->lowKey) << " ) {\n";
+ emitCondBSearch( state, level+1, low, mid-1 );
+
+ /* if the higher is the highest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitHigh ) {
+ out << TABS(level) << "} else {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_KEY() << " <= " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ }
+ else if ( !anyLower && anyHigher ) {
+ /* Can go higher than mid but not lower. */
+ out << TABS(level) << "if ( " << GET_KEY() << " > " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ emitCondBSearch( state, level+1, mid+1, high );
+
+ /* If the lower end is the lowest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitLow ) {
+ out << TABS(level) << "} else {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_KEY() << " >= " <<
+ KEY(data[mid]->lowKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ }
+ else {
+ /* Cannot go higher or lower than mid. It's mid or bust. What
+ * tests to do depends on limits of alphabet. */
+ if ( !limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " <<
+ GET_KEY() << " && " << GET_KEY() << " <= " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else if ( limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << GET_KEY() << " <= " <<
+ KEY(data[mid]->highKey) << " ) {\n";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else if ( !limitLow && limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " <<
+ GET_KEY() << " )\n {";
+ COND_TRANSLATE(data[mid], level+1);
+ out << TABS(level) << "}\n";
+ }
+ else {
+ /* Both high and low are at the limit. No tests to do. */
+ COND_TRANSLATE(data[mid], level);
+ }
+ }
+}
+
+std::ostream &FsmCodeGen::STATE_GOTOS()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st == redFsm->errState )
+ STATE_GOTO_ERROR();
+ else {
+ /* Writing code above state gotos. */
+ GOTO_HEADER( st );
+
+ if ( st->stateCondVect.length() > 0 ) {
+ out << " _widec = " << GET_KEY() << ";\n";
+ emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 );
+ }
+
+ /* Try singles. */
+ if ( st->outSingle.length() > 0 )
+ emitSingleSwitch( st );
+
+ /* Default case is to binary search for the ranges, if that fails then */
+ if ( st->outRange.length() > 0 )
+ emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 );
+
+ /* Write the default transition. */
+ TRANS_GOTO( st->defTrans, 1 ) << "\n";
+ }
+ }
+ return out;
+}
+
+unsigned int FsmCodeGen::TO_STATE_ACTION( RedState *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ return act;
+}
+
+unsigned int FsmCodeGen::FROM_STATE_ACTION( RedState *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ return act;
+}
+
+std::ostream &FsmCodeGen::TO_STATE_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = TO_STATE_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+std::ostream &FsmCodeGen::FROM_STATE_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = FROM_STATE_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+bool FsmCodeGen::IN_TRANS_ACTIONS( RedState *state )
+{
+ /* Emit any transitions that have actions and that go to this state. */
+ for ( int it = 0; it < state->numInTrans; it++ ) {
+ RedTrans *trans = state->inTrans[it];
+ if ( trans->action != 0 && trans->labelNeeded ) {
+ /* Write the label for the transition so it can be jumped to. */
+ out << "tr" << trans->id << ":\n";
+
+ /* If the action contains a next, then we must preload the current
+ * state since the action may or may not set it. */
+ if ( trans->action->anyNextStmt() )
+ out << " " << CS() << " = " << trans->targ->id << ";\n";
+
+ /* Write each action in the list. */
+ for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ )
+ ACTION( out, item->value, trans->targ->id, false );
+
+ out << "\tgoto st" << trans->targ->id << ";\n";
+ }
+ }
+
+ return 0;
+}
+
+/* Called from FsmCodeGen::STATE_GOTOS just before writing the gotos for each
+ * state. */
+void FsmCodeGen::GOTO_HEADER( RedState *state )
+{
+ IN_TRANS_ACTIONS( state );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ if ( state->toStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ for ( GenActionTable::Iter item = state->toStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ /* Give the state a switch case. */
+ out << "case " << state->id << ":\n";
+
+ /* Advance and test buffer pos. */
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto out" << state->id << ";\n";
+
+ if ( state->fromStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ /* Record the prev state if necessary. */
+ if ( state->anyRegCurStateRef() )
+ out << " _ps = " << state->id << ";\n";
+}
+
+void FsmCodeGen::STATE_GOTO_ERROR()
+{
+ /* In the error state we need to emit some stuff that usually goes into
+ * the header. */
+ RedState *state = redFsm->errState;
+ IN_TRANS_ACTIONS( state );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ /* We do not need a case label here because the the error state is checked
+ * at the head of the loop. */
+
+ /* Break out here. */
+ out << " goto out" << state->id << ";\n";
+}
+
+
+/* Emit the goto to take for a given transition. */
+std::ostream &FsmCodeGen::TRANS_GOTO( RedTrans *trans, int level )
+{
+ if ( trans->action != 0 ) {
+ /* Go to the transition which will go to the state. */
+ out << TABS(level) << "goto tr" << trans->id << ";";
+ }
+ else {
+ /* Go directly to the target state. */
+ out << TABS(level) << "goto st" << trans->targ->id << ";";
+ }
+ return out;
+}
+
+std::ostream &FsmCodeGen::EXIT_STATES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ out << " case " << st->id << ": out" << st->id << ": ";
+ if ( st->eofTrans != 0 ) {
+ out << "if ( " << PE() << " == " << PEOF() << " ) {";
+ TRANS_GOTO( st->eofTrans, 0 );
+ out << "\n";
+ out << "}";
+ }
+
+ /* Exit. */
+ out << CS() << " = " << st->id << "; goto out; \n";
+ }
+ return out;
+}
+
+/* Set up labelNeeded flag for each state. */
+void FsmCodeGen::setLabelsNeeded()
+{
+ /* Do not use all labels by default, init all labelNeeded vars to false. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = false;
+
+ if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() )
+ redFsm->errState->labelNeeded = true;
+
+ /* Walk all transitions and set only those that have targs. */
+ for ( RedTransSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* If there is no action with a next statement, then the label will be
+ * needed. */
+ if ( trans->action == 0 || !trans->action->anyNextStmt() )
+ trans->targ->labelNeeded = true;
+ }
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->outNeeded = st->labelNeeded;
+}
+
+void FsmCodeGen::writeData()
+{
+ out << "#define " << START() << " " << START_STATE_ID() << "\n";
+ out << "#define " << FIRST_FINAL() << " " << FIRST_FINAL_STATE() << "\n";
+ out << "#define " << ERROR() << " " << ERROR_STATE() << "\n";
+ out << "#define false 0\n";
+ out << "#define true 1\n";
+ out << "\n";
+
+ out << "long " << ENTRY_BY_REGION() << "[] = {\n\t";
+ for ( int i = 0; i < fsmTables->numRegions; i++ ) {
+ out << fsmTables->entryByRegion[i];
+
+ if ( i < fsmTables->numRegions-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out <<
+ "FsmTables fsmTables_start =\n"
+ "{\n"
+ " 0, " /* actions */
+ " 0, " /* keyOffsets */
+ " 0, " /* transKeys */
+ " 0, " /* singleLengths */
+ " 0, " /* rangeLengths */
+ " 0, " /* indexOffsets */
+ " 0, " /* transTargsWI */
+ " 0, " /* transActionsWI */
+ " 0, " /* toStateActions */
+ " 0, " /* fromStateActions */
+ " 0, " /* eofActions */
+ " 0,\n" /* eofTargs */
+ " " << ENTRY_BY_REGION() << ",\n"
+
+ "\n"
+ " 0, " /* numStates */
+ " 0, " /* numActions */
+ " 0, " /* numTransKeys */
+ " 0, " /* numSingleLengths */
+ " 0, " /* numRangeLengths */
+ " 0, " /* numIndexOffsets */
+ " 0, " /* numTransTargsWI */
+ " 0,\n" /* numTransActionsWI */
+ " " << redFsm->regionToEntry.length() << ",\n"
+ "\n"
+ " " << START() << ",\n"
+ " " << FIRST_FINAL() << ",\n"
+ " " << ERROR() << ",\n"
+ "\n"
+ " 0,\n" /* actionSwitch */
+ " 0\n" /* numActionSwitch */
+ "};\n"
+ "\n";
+}
+
+void FsmCodeGen::writeInit()
+{
+ out <<
+ " " << CS() << " = " << START() << ";\n";
+
+ /* If there are any calls, then the stack top needs initialization. */
+ if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
+ out << "\t" << TOP() << " = 0;\n";
+
+ out <<
+ " " << TOKSTART() << " = 0;\n"
+ " " << TOKEND() << " = 0;\n"
+ " " << ACT() << " = 0;\n";
+
+ out << "\n";
+}
+
+void FsmCodeGen::writeExec()
+{
+ setLabelsNeeded();
+
+ out <<
+ "void fsmExecute( FsmRun *fsmRun, InputStream *inputStream )\n"
+ "{\n"
+ "/*_resume:*/\n";
+
+ if ( redFsm->errState != 0 ) {
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto out;\n";
+ }
+
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto out_switch;\n"
+ " --" << P() << ";\n"
+ "\n"
+ " switch ( " << CS() << " )\n {\n";
+ STATE_GOTOS() <<
+ " }\n";
+
+ out <<
+ "out_switch:\n"
+ " switch ( " << CS() << " )\n {\n";
+ EXIT_STATES() <<
+ " }\n";
+
+ out <<
+ " out: {}\n"
+ "}\n"
+ "\n";
+}
+
+void FsmCodeGen::writeIncludes()
+{
+ out <<
+ "#include <pdarun.h>\n"
+ "#include <fsmrun.h>\n"
+ "#include <debug.h>\n"
+ "#include <bytecode.h>\n"
+ "#include <config.h>\n"
+ "#include <defs.h>\n"
+ "#include <input.h>\n"
+ "#include <tree.h>\n"
+ "#include <program.h>\n"
+ "#include <colm.h>\n"
+ "#include <stdio.h>\n"
+ "#include <stdlib.h>\n"
+ "#include <string.h>\n"
+ "#include <assert.h>\n"
+ "\n"
+ "\n";
+}
+
+void FsmCodeGen::writeCode()
+{
+ redFsm->depthFirstOrdering();
+
+
+ writeData();
+ writeExec();
+
+ /* Referenced in the runtime lib, but used only in the compiler. Probably
+ * should use the preprocessor to make these go away. */
+ out <<
+ "void sendNamedLangEl( Program *prg, Tree **tree, PdaRun *pdaRun,\n"
+ " FsmRun *fsmRun, InputStream *inputStream ) { }\n"
+ "void initBindings( PdaRun *pdaRun ) {}\n"
+ "void pushBinding( PdaRun *pdaRun, ParseTree *parseTree ) {}\n"
+ "void popBinding( PdaRun *pdaRun, ParseTree *tree ) {}\n"
+ "void initStaticFuncs() {}\n"
+ "void initPatternFuncs() {}\n"
+ "void initReplFuncs() {}\n"
+ "void initInputFuncs();\n"
+ "\n"
+ "\n";
+}
+
+ostream &FsmCodeGen::source_warning( const InputLoc &loc )
+{
+ cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
+ return cerr;
+}
+
+ostream &FsmCodeGen::source_error( const InputLoc &loc )
+{
+ codeGenErrCount += 1;
+ assert( sourceFileName != 0 );
+ cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";
+ return cerr;
+}
+
+
diff --git a/src/fsmcodegen.h b/src/fsmcodegen.h
new file mode 100644
index 00000000..41cd88ec
--- /dev/null
+++ b/src/fsmcodegen.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FSMCODEGEN_H
+#define _FSMCODEGEN_H
+
+#include <iostream>
+#include <string>
+#include <stdio.h>
+#include "keyops.h"
+#include "parsedata.h"
+#include "redfsm.h"
+#include "fsmrun.h"
+
+using std::string;
+using std::ostream;
+
+/* Integer array line length. */
+#define IALL 8
+
+/* Forwards. */
+struct RedFsm;
+struct RedState;
+struct GenAction;
+struct NameInst;
+struct RedAction;
+struct LongestMatch;
+struct TokenDef;
+struct InlineList;
+struct InlineItem;
+struct NameInst;
+struct FsmCodeGen;
+
+typedef unsigned long ulong;
+typedef unsigned char uchar;
+
+
+/*
+ * The interface to the parser
+ */
+
+std::ostream *openOutput( char *inputFile );
+
+inline string itoa( int i )
+{
+ char buf[16];
+ sprintf( buf, "%i", i );
+ return buf;
+}
+
+/*
+ * class FsmCodeGen
+ */
+class FsmCodeGen
+{
+public:
+ FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out,
+ RedFsm *redFsm, FsmTables *fsmTables );
+
+protected:
+ string FSM_NAME();
+ string START_STATE_ID();
+ ostream &ACTIONS_ARRAY();
+ string GET_WIDE_KEY();
+ string GET_WIDE_KEY( RedState *state );
+ string TABS( int level );
+ string KEY( Key key );
+ string LDIR_PATH( char *path );
+ void ACTION( ostream &ret, GenAction *action, int targState, bool inFinish );
+ void CONDITION( ostream &ret, GenAction *condition );
+ string ALPH_TYPE();
+ string WIDE_ALPH_TYPE();
+ string ARRAY_TYPE( unsigned long maxVal );
+
+ string ARR_OFF( string ptr, string offset );
+ string CAST( string type );
+ string UINT();
+ string GET_KEY();
+
+ string ACCESS() { return "fsmRun->"; }
+
+ string P() { return ACCESS() + "p"; }
+ string PE() { return ACCESS() + "pe"; }
+ string PEOF() { return ACCESS() + "peof"; }
+
+ string CS();
+ string TOP() { return ACCESS() + "top"; }
+ string TOKSTART() { return ACCESS() + "tokstart"; }
+ string TOKEND() { return ACCESS() + "tokend"; }
+ string ACT() { return ACCESS() + "act"; }
+ string MATCHED_TOKEN() { return ACCESS() + "matchedToken"; }
+
+ string DATA_PREFIX();
+
+ string START() { return DATA_PREFIX() + "start"; }
+ string ERROR() { return DATA_PREFIX() + "error"; }
+ string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; }
+
+ string ENTRY_BY_REGION() { return DATA_PREFIX() + "entryByRegion"; }
+
+
+ void INLINE_LIST( ostream &ret, InlineList *inlineList,
+ int targState, bool inFinish );
+ void EXEC_TOKEND( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void SET_ACT( ostream &ret, InlineItem *item );
+ void INIT_TOKSTART( ostream &ret, InlineItem *item );
+ void INIT_ACT( ostream &ret, InlineItem *item );
+ void SET_TOKSTART( ostream &ret, InlineItem *item );
+ void SET_TOKEND( ostream &ret, InlineItem *item );
+ void GET_TOKEND( ostream &ret, InlineItem *item );
+ void SUB_ACTION( ostream &ret, InlineItem *item, int targState, bool inFinish );
+ void LM_ON_LAST( ostream &ret, InlineItem *item );
+ void LM_ON_NEXT( ostream &ret, InlineItem *item );
+ void LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item );
+ void EXEC_TOKEND( ostream &ret );
+ void EMIT_TOKEN( ostream &ret, LangEl *token );
+
+ string ERROR_STATE();
+ string FIRST_FINAL_STATE();
+
+ string PTR_CONST();
+ ostream &OPEN_ARRAY( string type, string name );
+ ostream &CLOSE_ARRAY();
+ ostream &STATIC_VAR( string type, string name );
+
+ string CTRL_FLOW();
+
+ ostream &source_warning(const InputLoc &loc);
+ ostream &source_error(const InputLoc &loc);
+
+ unsigned int arrayTypeSize( unsigned long maxVal );
+
+/* subclass */
+
+public:
+ const char *sourceFileName;
+ const char *fsmName;
+ ostream &out;
+ RedFsm *redFsm;
+ FsmTables *fsmTables;
+ int codeGenErrCount;
+
+ /* Write options. */
+ bool dataPrefix;
+ bool writeFirstFinal;
+ bool writeErr;
+
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+ std::ostream &STATE_GOTOS();
+ std::ostream &TRANSITIONS();
+ std::ostream &EXEC_FUNCS();
+
+ unsigned int TO_STATE_ACTION( RedState *state );
+ unsigned int FROM_STATE_ACTION( RedState *state );
+
+ std::ostream &TO_STATE_ACTIONS();
+ std::ostream &FROM_STATE_ACTIONS();
+
+ void COND_TRANSLATE( GenStateCond *stateCond, int level );
+ void emitCondBSearch( RedState *state, int level, int low, int high );
+ void STATE_CONDS( RedState *state, bool genDefault );
+
+ void emitSingleSwitch( RedState *state );
+ void emitRangeBSearch( RedState *state, int level, int low, int high );
+
+ std::ostream &EXIT_STATES();
+ std::ostream &TRANS_GOTO( RedTrans *trans, int level );
+ std::ostream &FINISH_CASES();
+
+ void writeIncludes();
+ void writeData();
+ void writeInit();
+ void writeExec();
+ void writeCode();
+ void writeMain();
+
+protected:
+ bool useAgainLabel();
+
+ /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for
+ * each state. */
+ bool IN_TRANS_ACTIONS( RedState *state );
+ void GOTO_HEADER( RedState *state );
+ void STATE_GOTO_ERROR();
+
+ /* Set up labelNeeded flag for each state. */
+ void setLabelsNeeded();
+};
+
+#endif /* _FSMCODEGEN_H */
diff --git a/src/fsmexec.cc b/src/fsmexec.cc
new file mode 100644
index 00000000..f922c7a4
--- /dev/null
+++ b/src/fsmexec.cc
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <iostream>
+
+#include "config.h"
+#include "defs.h"
+#include "fsmrun.h"
+#include "redfsm.h"
+#include "parsedata.h"
+#include "parsetree.h"
+#include "pdarun.h"
+#include "global.h"
+
+void execAction( FsmRun *fsmRun, GenAction *genAction )
+{
+ for ( InlineList::Iter item = *genAction->inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Text:
+ assert(false);
+ break;
+ case InlineItem::LmSetActId:
+ fsmRun->act = item->longestMatchPart->longestMatchId;
+ break;
+ case InlineItem::LmSetTokEnd:
+ fsmRun->tokend = fsmRun->p + 1;
+ break;
+ case InlineItem::LmInitTokStart:
+ assert(false);
+ break;
+ case InlineItem::LmInitAct:
+ fsmRun->act = 0;
+ break;
+ case InlineItem::LmSetTokStart:
+ fsmRun->tokstart = fsmRun->p;
+ break;
+ case InlineItem::LmSwitch:
+ /* If the switch handles error then we also forced the error state. It
+ * will exist. */
+ fsmRun->p = fsmRun->tokend;
+ if ( item->tokenRegion->lmSwitchHandlesError && fsmRun->act == 0 ) {
+ fsmRun->p = fsmRun->tokstart;
+ fsmRun->cs = fsmRun->tables->errorState;
+ }
+ else {
+ for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList;
+ lmi.lte(); lmi++ )
+ {
+ if ( lmi->inLmSelect && fsmRun->act == lmi->longestMatchId )
+ fsmRun->matchedToken = lmi->tdLangEl->id;
+ }
+ }
+ fsmRun->returnResult = true;
+ break;
+ case InlineItem::LmOnLast:
+ fsmRun->p += 1;
+ fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
+ fsmRun->returnResult = true;
+ break;
+ case InlineItem::LmOnNext:
+ fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
+ fsmRun->returnResult = true;
+ break;
+ case InlineItem::LmOnLagBehind:
+ fsmRun->p = fsmRun->tokend;
+ fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
+ fsmRun->returnResult = true;
+ break;
+ }
+ }
+
+ if ( genAction->markType == MarkMark )
+ fsmRun->mark[genAction->markId-1] = fsmRun->p;
+}
+
+void fsmExecute( FsmRun *fsmRun, InputStream *inputStream )
+{
+ int _klen;
+ unsigned int _trans;
+ const long *_acts;
+ unsigned int _nacts;
+ const char *_keys;
+
+ /* Init the token match to nothing (the sentinal). */
+ fsmRun->matchedToken = 0;
+
+/*_resume:*/
+ if ( fsmRun->cs == fsmRun->tables->errorState )
+ goto out;
+
+ if ( fsmRun->p == fsmRun->pe )
+ goto out;
+
+_loop_head:
+ _acts = fsmRun->tables->actions + fsmRun->tables->fromStateActions[fsmRun->cs];
+ _nacts = (unsigned int) *_acts++;
+ while ( _nacts-- > 0 )
+ execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
+
+ _keys = fsmRun->tables->transKeys + fsmRun->tables->keyOffsets[fsmRun->cs];
+ _trans = fsmRun->tables->indexOffsets[fsmRun->cs];
+
+ _klen = fsmRun->tables->singleLengths[fsmRun->cs];
+ if ( _klen > 0 ) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + _klen - 1;
+ while (1) {
+ if ( _upper < _lower )
+ break;
+
+ _mid = _lower + ((_upper-_lower) >> 1);
+ if ( (*fsmRun->p) < *_mid )
+ _upper = _mid - 1;
+ else if ( (*fsmRun->p) > *_mid )
+ _lower = _mid + 1;
+ else {
+ _trans += (_mid - _keys);
+ goto _match;
+ }
+ }
+ _keys += _klen;
+ _trans += _klen;
+ }
+
+ _klen = fsmRun->tables->rangeLengths[fsmRun->cs];
+ if ( _klen > 0 ) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + (_klen<<1) - 2;
+ while (1) {
+ if ( _upper < _lower )
+ break;
+
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
+ if ( (*fsmRun->p) < _mid[0] )
+ _upper = _mid - 2;
+ else if ( (*fsmRun->p) > _mid[1] )
+ _lower = _mid + 2;
+ else {
+ _trans += ((_mid - _keys)>>1);
+ goto _match;
+ }
+ }
+ _trans += _klen;
+ }
+
+_match:
+ fsmRun->cs = fsmRun->tables->transTargsWI[_trans];
+
+ if ( fsmRun->tables->transActionsWI[_trans] == 0 )
+ goto _again;
+
+ fsmRun->returnResult = false;
+ _acts = fsmRun->tables->actions + fsmRun->tables->transActionsWI[_trans];
+ _nacts = (unsigned int) *_acts++;
+ while ( _nacts-- > 0 )
+ execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
+ if ( fsmRun->returnResult )
+ return;
+
+_again:
+ _acts = fsmRun->tables->actions + fsmRun->tables->toStateActions[fsmRun->cs];
+ _nacts = (unsigned int) *_acts++;
+ while ( _nacts-- > 0 )
+ execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
+
+ if ( fsmRun->cs == fsmRun->tables->errorState )
+ goto out;
+
+ if ( ++fsmRun->p != fsmRun->pe )
+ goto _loop_head;
+out:
+ if ( fsmRun->p == fsmRun->peof ) {
+ fsmRun->returnResult = false;
+ _acts = fsmRun->tables->actions + fsmRun->tables->eofActions[fsmRun->cs];
+ _nacts = (unsigned int) *_acts++;
+
+ if ( fsmRun->tables->eofTargs[fsmRun->cs] >= 0 )
+ fsmRun->cs = fsmRun->tables->eofTargs[fsmRun->cs];
+
+ while ( _nacts-- > 0 )
+ execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
+ if ( fsmRun->returnResult )
+ return;
+ }
+}
+
+
diff --git a/src/fsmgraph.cc b/src/fsmgraph.cc
new file mode 100644
index 00000000..590d7902
--- /dev/null
+++ b/src/fsmgraph.cc
@@ -0,0 +1,1408 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <assert.h>
+#include <iostream>
+
+#include "config.h"
+#include "defs.h"
+#include "fsmgraph.h"
+#include "mergesort.h"
+
+using std::cerr;
+using std::endl;
+
+/* Make a new state. The new state will be put on the graph's
+ * list of state. The new state can be created final or non final. */
+FsmState *FsmGraph::addState()
+{
+ /* Make the new state to return. */
+ FsmState *state = new FsmState();
+
+ if ( misfitAccounting ) {
+ /* Create the new state on the misfit list. All states are created
+ * with no foreign in transitions. */
+ misfitList.append( state );
+ }
+ else {
+ /* Create the new state. */
+ stateList.append( state );
+ }
+
+ return state;
+}
+
+/* Construct an FSM that is the concatenation of an array of characters. A new
+ * machine will be made that has len+1 states with one transition between each
+ * state for each integer in str. IsSigned determines if the integers are to
+ * be considered as signed or unsigned ints. */
+void FsmGraph::concatFsm( Key *str, int len )
+{
+ /* Make the first state and set it as the start state. */
+ FsmState *last = addState();
+ setStartState( last );
+
+ /* Attach subsequent states. */
+ for ( int i = 0; i < len; i++ ) {
+ FsmState *newState = addState();
+ attachNewTrans( last, newState, str[i], str[i] );
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ setFinState( last );
+}
+
+/* Case insensitive version of concatFsm. */
+void FsmGraph::concatFsmCI( Key *str, int len )
+{
+ /* Make the first state and set it as the start state. */
+ FsmState *last = addState();
+ setStartState( last );
+
+ /* Attach subsequent states. */
+ for ( int i = 0; i < len; i++ ) {
+ FsmState *newState = addState();
+
+ KeySet keySet;
+ if ( str[i].isLower() )
+ keySet.insert( str[i].toUpper() );
+ if ( str[i].isUpper() )
+ keySet.insert( str[i].toLower() );
+ keySet.insert( str[i] );
+
+ for ( int i = 0; i < keySet.length(); i++ )
+ attachNewTrans( last, newState, keySet[i], keySet[i] );
+
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ setFinState( last );
+}
+
+/* Construct a machine that matches one character. A new machine will be made
+ * that has two states with a single transition between the states. IsSigned
+ * determines if the integers are to be considered as signed or unsigned ints. */
+void FsmGraph::concatFsm( Key chr )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ FsmState *end = addState();
+ setFinState( end );
+
+ /* Attach on the character. */
+ attachNewTrans( startState, end, chr, chr );
+}
+
+/* Construct a machine that matches any character in set. A new machine will
+ * be made that has two states and len transitions between the them. The set
+ * should be ordered correctly accroding to KeyOps and should not contain
+ * any duplicates. */
+void FsmGraph::orFsm( Key *set, int len )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ FsmState *end = addState();
+ setFinState( end );
+
+ for ( int i = 1; i < len; i++ )
+ assert( set[i-1] < set[i] );
+
+ /* Attach on all the integers in the given string of ints. */
+ for ( int i = 0; i < len; i++ )
+ attachNewTrans( startState, end, set[i], set[i] );
+}
+
+/* Construct a machine that matches a range of characters. A new machine will
+ * be made with two states and a range transition between them. The range will
+ * match any characters from low to high inclusive. Low should be less than or
+ * equal to high otherwise undefined behaviour results. IsSigned determines
+ * if the integers are to be considered as signed or unsigned ints. */
+void FsmGraph::rangeFsm( Key low, Key high )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ FsmState *end = addState();
+ setFinState( end );
+
+ /* Attach using the range of characters. */
+ attachNewTrans( startState, end, low, high );
+}
+
+/* Construct a machine that a repeated range of characters. */
+void FsmGraph::rangeStarFsm( Key low, Key high)
+{
+ /* One state which is final and is the start state. */
+ setStartState( addState() );
+ setFinState( startState );
+
+ /* Attach start to start using range of characters. */
+ attachNewTrans( startState, startState, low, high );
+}
+
+/* Construct a machine that matches the empty string. A new machine will be
+ * made with only one state. The new state will be both a start and final
+ * state. IsSigned determines if the machine has a signed or unsigned
+ * alphabet. Fsm operations must be done on machines with the same alphabet
+ * signedness. */
+void FsmGraph::lambdaFsm( )
+{
+ /* Give it one state with no transitions making it
+ * the start state and final state. */
+ setStartState( addState() );
+ setFinState( startState );
+}
+
+/* Construct a machine that matches nothing at all. A new machine will be
+ * made with only one state. It will not be final. */
+void FsmGraph::emptyFsm( )
+{
+ /* Give it one state with no transitions making it
+ * the start state and final state. */
+ setStartState( addState() );
+}
+
+void FsmGraph::transferOutData( FsmState *destState, FsmState *srcState )
+{
+ for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 ) {
+ /* Get the actions data from the outActionTable. */
+ trans->actionTable.setActions( srcState->outActionTable );
+
+ /* Get the priorities from the outPriorTable. */
+ trans->priorTable.setPriors( srcState->outPriorTable );
+ }
+ }
+}
+
+/* Kleene star operator. Makes this machine the kleene star of itself. Any
+ * transitions made going out of the machine and back into itself will be
+ * notified that they are leaving transitions by having the leavingFromState
+ * callback invoked. */
+void FsmGraph::starOp( )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* Create the new new start state. It will be set final after the merging
+ * of the final states with the start state is complete. */
+ FsmState *prevStartState = startState;
+ unsetStartState();
+ setStartState( addState() );
+
+ /* Merge the new start state with the old one to isolate it. */
+ mergeStates( md, startState, prevStartState );
+
+ /* Merge the start state into all final states. Except the start state on
+ * the first pass. If the start state is set final we will be doubling up
+ * its transitions, which will get transfered to any final states that
+ * follow it in the final state set. This will be determined by the order
+ * of items in the final state set. To prevent this we just merge with the
+ * start on a second pass. */
+ for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) {
+ if ( *st != startState )
+ mergeStatesLeaving( md, *st, startState );
+ }
+
+ /* Now it is safe to merge the start state with itself (provided it
+ * is set final). */
+ if ( startState->isFinState() )
+ mergeStatesLeaving( md, startState, startState );
+
+ /* Now ensure the new start state is a final state. */
+ setFinState( startState );
+
+ /* Fill in any states that were newed up as combinations of others. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+void FsmGraph::repeatOp( int times )
+{
+ /* Must be 1 and up. 0 produces null machine and requires deleting this. */
+ assert( times > 0 );
+
+ /* A repeat of one does absolutely nothing. */
+ if ( times == 1 )
+ return;
+
+ /* Make a machine to make copies from. */
+ FsmGraph *copyFrom = new FsmGraph( *this );
+
+ /* Concatentate duplicates onto the end up until before the last. */
+ for ( int i = 1; i < times-1; i++ ) {
+ FsmGraph *dup = new FsmGraph( *copyFrom );
+ doConcat( dup, 0, false );
+ }
+
+ /* Now use the copyFrom on the end. */
+ doConcat( copyFrom, 0, false );
+}
+
+void FsmGraph::optionalRepeatOp( int times )
+{
+ /* Must be 1 and up. 0 produces null machine and requires deleting this. */
+ assert( times > 0 );
+
+ /* A repeat of one optional merely allows zero string. */
+ if ( times == 1 ) {
+ setFinState( startState );
+ return;
+ }
+
+ /* Make a machine to make copies from. */
+ FsmGraph *copyFrom = new FsmGraph( *this );
+
+ /* The state set used in the from end of the concatentation. Starts with
+ * the initial final state set, then after each concatenation, gets set to
+ * the the final states that come from the the duplicate. */
+ StateSet lastFinSet( finStateSet );
+
+ /* Set the initial state to zero to allow zero copies. */
+ setFinState( startState );
+
+ /* Concatentate duplicates onto the end up until before the last. */
+ for ( int i = 1; i < times-1; i++ ) {
+ /* Make a duplicate for concating and set the fin bits to graph 2 so we
+ * can pick out it's final states after the optional style concat. */
+ FsmGraph *dup = new FsmGraph( *copyFrom );
+ dup->setFinBits( SB_GRAPH2 );
+ doConcat( dup, &lastFinSet, true );
+
+ /* Clear the last final state set and make the new one by taking only
+ * the final states that come from graph 2.*/
+ lastFinSet.empty();
+ for ( int i = 0; i < finStateSet.length(); i++ ) {
+ /* If the state came from graph 2, add it to the last set and clear
+ * the bits. */
+ FsmState *fs = finStateSet[i];
+ if ( fs->stateBits & SB_GRAPH2 ) {
+ lastFinSet.insert( fs );
+ fs->stateBits &= ~SB_GRAPH2;
+ }
+ }
+ }
+
+ /* Now use the copyFrom on the end, no bits set, no bits to clear. */
+ doConcat( copyFrom, &lastFinSet, true );
+}
+
+
+/* Fsm concatentation worker. Supports treating the concatentation as optional,
+ * which essentially leaves the final states of machine one as final. */
+void FsmGraph::doConcat( FsmGraph *other, StateSet *fromStates, bool optional )
+{
+ /* For the merging process. */
+ StateSet finStateSetCopy, startStateSet;
+ MergeData md;
+
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Get the other's start state. */
+ FsmState *otherStartState = other->startState;
+
+ /* Unset other's start state before bringing in the entry points. */
+ other->unsetStartState();
+
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( other );
+ other->entryPoints.empty();
+
+ /* Bring in other's states into our state lists. */
+ stateList.append( other->stateList );
+ misfitList.append( other->misfitList );
+
+ /* If from states is not set, then get a copy of our final state set before
+ * we clobber it and use it instead. */
+ if ( fromStates == 0 ) {
+ finStateSetCopy = finStateSet;
+ fromStates = &finStateSetCopy;
+ }
+
+ /* Unset all of our final states and get the final states from other. */
+ if ( !optional )
+ unsetAllFinStates();
+ finStateSet.insert( other->finStateSet );
+
+ /* Since other's lists are empty, we can delete the fsm without
+ * affecting any states. */
+ delete other;
+
+ /* Merge our former final states with the start state of other. */
+ for ( int i = 0; i < fromStates->length(); i++ ) {
+ FsmState *state = fromStates->data[i];
+
+ /* Merge the former final state with other's start state. */
+ mergeStatesLeaving( md, state, otherStartState );
+
+ /* If the former final state was not reset final then we must clear
+ * the state's out trans data. If it got reset final then it gets to
+ * keep its out trans data. This must be done before fillInStates gets
+ * called to prevent the data from being sourced. */
+ if ( ! state->isFinState() )
+ clearOutData( state );
+ }
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Concatenates other to the end of this machine. Other is deleted. Any
+ * transitions made leaving this machine and entering into other are notified
+ * that they are leaving transitions by having the leavingFromState callback
+ * invoked. */
+void FsmGraph::concatOp( FsmGraph *other )
+{
+ /* Assert same signedness and return graph concatenation op. */
+ doConcat( other, 0, false );
+}
+
+
+void FsmGraph::doOr( FsmGraph *other )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Build a state set consisting of both start states */
+ StateSet startStateSet;
+ startStateSet.insert( startState );
+ startStateSet.insert( other->startState );
+
+ /* Both of the original start states loose their start state status. */
+ unsetStartState();
+ other->unsetStartState();
+
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( other );
+ other->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other
+ * into this. No states will be deleted. */
+ stateList.append( other->stateList );
+ misfitList.append( other->misfitList );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert(other->finStateSet);
+ other->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete other;
+
+ /* Create a new start state. */
+ setStartState( addState() );
+
+ /* Merge the start states. */
+ mergeStates( md, startState, startStateSet.data, startStateSet.length() );
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+}
+
+/* Unions other with this machine. Other is deleted. */
+void FsmGraph::unionOp( FsmGraph *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Call Worker routine. */
+ doOr( other );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Intersects other with this machine. Other is deleted. */
+void FsmGraph::intersectOp( FsmGraph *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Set the fin bits on this and other to want each other. */
+ setFinBits( SB_GRAPH1 );
+ other->setFinBits( SB_GRAPH2 );
+
+ /* Call worker Or routine. */
+ doOr( other );
+
+ /* Unset any final states that are no longer to
+ * be final due to final bits. */
+ unsetIncompleteFinals();
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+
+ /* Remove states that have no path to a final state. */
+ removeDeadEndStates();
+}
+
+/* Set subtracts other machine from this machine. Other is deleted. */
+void FsmGraph::subtractOp( FsmGraph *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Set the fin bits of other to be killers. */
+ other->setFinBits( SB_GRAPH1 );
+
+ /* Call worker Or routine. */
+ doOr( other );
+
+ /* Unset any final states that are no longer to
+ * be final due to final bits. */
+ unsetKilledFinals();
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+
+ /* Remove states that have no path to a final state. */
+ removeDeadEndStates();
+}
+
+bool FsmGraph::inEptVect( EptVect *eptVect, FsmState *state )
+{
+ if ( eptVect != 0 ) {
+ /* Vect is there, walk it looking for state. */
+ for ( int i = 0; i < eptVect->length(); i++ ) {
+ if ( eptVect->data[i].targ == state )
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Fill epsilon vectors in a root state from a given starting point. Epmploys
+ * a depth first search through the graph of epsilon transitions. */
+void FsmGraph::epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving )
+{
+ /* Walk the epsilon transitions out of the state. */
+ for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) {
+ /* Find the entry point, if the it does not resove, ignore it. */
+ EntryMapEl *enLow, *enHigh;
+ if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) {
+ /* Loop the targets. */
+ for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) {
+ /* Do not add the root or states already in eptVect. */
+ FsmState *targ = en->value;
+ if ( targ != from && !inEptVect(root->eptVect, targ) ) {
+ /* Maybe need to create the eptVect. */
+ if ( root->eptVect == 0 )
+ root->eptVect = new EptVect();
+
+ /* If moving to a different graph or if any parent is
+ * leaving then we are leaving. */
+ bool leaving = parentLeaving ||
+ root->owningGraph != targ->owningGraph;
+
+ /* All ok, add the target epsilon and recurse. */
+ root->eptVect->append( EptVectEl(targ, leaving) );
+ epsilonFillEptVectFrom( root, targ, leaving );
+ }
+ }
+ }
+ }
+}
+
+void FsmGraph::shadowReadWriteStates( MergeData &md )
+{
+ /* Init isolatedShadow algorithm data. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->isolatedShadow = 0;
+
+ /* Any states that may be both read from and written to must
+ * be shadowed. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Find such states by looping through stateVect lists, which give us
+ * the states that will be read from. May cause us to visit the states
+ * that we are interested in more than once. */
+ if ( st->eptVect != 0 ) {
+ /* For all states that will be read from. */
+ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
+ /* Check for read and write to the same state. */
+ FsmState *targ = ept->targ;
+ if ( targ->eptVect != 0 ) {
+ /* State is to be written to, if the shadow is not already
+ * there, create it. */
+ if ( targ->isolatedShadow == 0 ) {
+ FsmState *shadow = addState();
+ mergeStates( md, shadow, targ );
+ targ->isolatedShadow = shadow;
+ }
+
+ /* Write shadow into the state vector so that it is the
+ * state that the epsilon transition will read from. */
+ ept->targ = targ->isolatedShadow;
+ }
+ }
+ }
+ }
+}
+
+void FsmGraph::resolveEpsilonTrans( MergeData &md )
+{
+ /* Walk the state list and invoke recursive worker on each state. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ epsilonFillEptVectFrom( st, st, false );
+
+ /* Prevent reading from and writing to of the same state. */
+ shadowReadWriteStates( md );
+
+ /* For all states that have epsilon transitions out, draw the transitions,
+ * clear the epsilon transitions. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* If there is a state vector, then create the pre-merge state. */
+ if ( st->eptVect != 0 ) {
+ /* Merge all the epsilon targets into the state. */
+ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
+ if ( ept->leaving )
+ mergeStatesLeaving( md, st, ept->targ );
+ else
+ mergeStates( md, st, ept->targ );
+ }
+
+ /* Clean up the target list. */
+ delete st->eptVect;
+ st->eptVect = 0;
+ }
+
+ /* Clear the epsilon transitions vector. */
+ st->epsilonTrans.empty();
+ }
+}
+
+void FsmGraph::epsilonOp()
+{
+ /* For merging process. */
+ MergeData md;
+
+ setMisfitAccounting( true );
+
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->owningGraph = 0;
+
+ /* Perform merges. */
+ resolveEpsilonTrans( md );
+
+ /* Epsilons can caused merges which leave behind unreachable states. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Make a new maching by joining together a bunch of machines without making
+ * any transitions between them. A negative finalId results in there being no
+ * final id. */
+void FsmGraph::joinOp( int startId, int finalId, FsmGraph **others, int numOthers )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Set the owning machines. Start at one. Zero is reserved for the start
+ * and final states. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->owningGraph = 1;
+ for ( int m = 0; m < numOthers; m++ ) {
+ for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ )
+ st->owningGraph = 2+m;
+ }
+
+ /* All machines loose start state status. */
+ unsetStartState();
+ for ( int m = 0; m < numOthers; m++ )
+ others[m]->unsetStartState();
+
+ /* Bring the other machines into this. */
+ for ( int m = 0; m < numOthers; m++ ) {
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( others[m] );
+ others[m]->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other into
+ * this. No states will be deleted. */
+ stateList.append( others[m]->stateList );
+ assert( others[m]->misfitList.length() == 0 );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert( others[m]->finStateSet );
+ others[m]->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete others[m];
+ }
+
+ /* Look up the start entry point. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ bool findRes = entryPoints.findMulti( startId, enLow, enHigh );
+ if ( ! findRes ) {
+ /* No start state. Set a default one and proceed with the join. Note
+ * that the result of the join will be a very uninteresting machine. */
+ setStartState( addState() );
+ }
+ else {
+ /* There is at least one start state, create a state that will become
+ * the new start state. */
+ FsmState *newStart = addState();
+ setStartState( newStart );
+
+ /* The start state is in an owning machine class all it's own. */
+ newStart->owningGraph = 0;
+
+ /* Create the set of states to merge from. */
+ StateSet stateSet;
+ for ( EntryMapEl *en = enLow; en <= enHigh; en++ )
+ stateSet.insert( en->value );
+
+ /* Merge in the set of start states into the new start state. */
+ mergeStates( md, newStart, stateSet.data, stateSet.length() );
+ }
+
+ /* Take a copy of the final state set, before unsetting them all. This
+ * will allow us to call clearOutData on the states that don't get
+ * final state status back back. */
+ StateSet finStateSetCopy = finStateSet;
+
+ /* Now all final states are unset. */
+ unsetAllFinStates();
+
+ if ( finalId >= 0 ) {
+ /* Create the implicit final state. */
+ FsmState *finState = addState();
+ setFinState( finState );
+
+ /* Assign an entry into the final state on the final state entry id. Note
+ * that there may already be an entry on this id. That's ok. Also set the
+ * final state owning machine id. It's in a class all it's own. */
+ setEntry( finalId, finState );
+ finState->owningGraph = 0;
+ }
+
+ /* Hand over to workers for resolving epsilon trans. This will merge states
+ * with the targets of their epsilon transitions. */
+ resolveEpsilonTrans( md );
+
+ /* Invoke the relinquish final callback on any states that did not get
+ * final state status back. */
+ for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) {
+ if ( !((*st)->stateBits & SB_ISFINAL) )
+ clearOutData( *st );
+ }
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+
+ /* Joining can be messy. Instead of having misfit accounting on (which is
+ * tricky here) do a full cleaning. */
+ removeUnreachableStates();
+}
+
+void FsmGraph::globOp( FsmGraph **others, int numOthers )
+{
+ /* All other machines loose start states status. */
+ for ( int m = 0; m < numOthers; m++ )
+ others[m]->unsetStartState();
+
+ /* Bring the other machines into this. */
+ for ( int m = 0; m < numOthers; m++ ) {
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( others[m] );
+ others[m]->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other into
+ * this. No states will be deleted. */
+ stateList.append( others[m]->stateList );
+ assert( others[m]->misfitList.length() == 0 );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert( others[m]->finStateSet );
+ others[m]->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete others[m];
+ }
+}
+
+void FsmGraph::deterministicEntry()
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* States may loose their entry points, turn on misfit accounting. */
+ setMisfitAccounting( true );
+
+ /* Get a copy of the entry map then clear all the entry points. As we
+ * iterate the old entry map finding duplicates we will add the entry
+ * points for the new states that we create. */
+ EntryMap prevEntry = entryPoints;
+ unsetAllEntryPoints();
+
+ for ( int enId = 0; enId < prevEntry.length(); ) {
+ /* Count the number of states on this entry key. */
+ int highId = enId;
+ while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key )
+ highId += 1;
+
+ int numIds = highId - enId;
+ if ( numIds == 1 ) {
+ /* Only a single entry point, just set the entry. */
+ setEntry( prevEntry[enId].key, prevEntry[enId].value );
+ }
+ else {
+ /* Multiple entry points, need to create a new state and merge in
+ * all the targets of entry points. */
+ FsmState *newEntry = addState();
+ for ( int en = enId; en < highId; en++ )
+ mergeStates( md, newEntry, prevEntry[en].value );
+
+ /* Add the new state as the single entry point. */
+ setEntry( prevEntry[enId].key, newEntry );
+ }
+
+ enId += numIds;
+ }
+
+ /* The old start state may be unreachable. Remove the misfits and turn off
+ * misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Unset any final states that are no longer to be final due to final bits. */
+void FsmGraph::unsetKilledFinals()
+{
+ /* Duplicate the final state set before we begin modifying it. */
+ StateSet fin( finStateSet );
+
+ for ( int s = 0; s < fin.length(); s++ ) {
+ /* Check for killing bit. */
+ FsmState *state = fin.data[s];
+ if ( state->stateBits & SB_GRAPH1 ) {
+ /* One final state is a killer, set to non-final. */
+ unsetFinState( state );
+ }
+
+ /* Clear all killing bits. Non final states should never have had those
+ * state bits set in the first place. */
+ state->stateBits &= ~SB_GRAPH1;
+ }
+}
+
+/* Unset any final states that are no longer to be final due to final bits. */
+void FsmGraph::unsetIncompleteFinals()
+{
+ /* Duplicate the final state set before we begin modifying it. */
+ StateSet fin( finStateSet );
+
+ for ( int s = 0; s < fin.length(); s++ ) {
+ /* Check for one set but not the other. */
+ FsmState *state = fin.data[s];
+ if ( state->stateBits & SB_BOTH &&
+ (state->stateBits & SB_BOTH) != SB_BOTH )
+ {
+ /* One state wants the other but it is not there. */
+ unsetFinState( state );
+ }
+
+ /* Clear wanting bits. Non final states should never have had those
+ * state bits set in the first place. */
+ state->stateBits &= ~SB_BOTH;
+ }
+}
+
+/* Ensure that the start state is free of entry points (aside from the fact
+ * that it is the start state). If the start state has entry points then Make a
+ * new start state by merging with the old one. Useful before modifying start
+ * transitions. If the existing start state has any entry points other than the
+ * start state entry then modifying its transitions changes more than the start
+ * transitions. So isolate the start state by separating it out such that it
+ * only has start stateness as it's entry point. */
+void FsmGraph::isolateStartState( )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Bail out if the start state is already isolated. */
+ if ( isStartStateIsolated() )
+ return;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* This will be the new start state. The existing start
+ * state is merged with it. */
+ FsmState *prevStartState = startState;
+ unsetStartState();
+ setStartState( addState() );
+
+ /* Merge the new start state with the old one to isolate it. */
+ mergeStates( md, startState, prevStartState );
+
+ /* Stfil and stateDict will be empty because the merging of the old start
+ * state into the new one will not have any conflicting transitions. */
+ assert( md.stateDict.treeSize == 0 );
+ assert( md.stfillHead == 0 );
+
+ /* The old start state may be unreachable. Remove the misfits and turn off
+ * misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+#if COLM_LOG_CONDS
+void logCondSpace( CondSpace *condSpace )
+{
+ if ( condSpace == 0 )
+ cerr << "<empty>";
+ else {
+ for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) {
+ if ( ! csi.last() )
+ cerr << ',';
+ (*csi)->actionName( cerr );
+ }
+ }
+}
+
+void logNewExpansion( Expansion *exp )
+{
+ cerr << "created expansion:" << endl;
+ cerr << " range: " << exp->lowKey.getVal() << " .. " <<
+ exp->highKey.getVal() << endl;
+
+ cerr << " fromCondSpace: ";
+ logCondSpace( exp->fromCondSpace );
+ cerr << endl;
+ cerr << " fromVals: " << exp->fromVals << endl;
+
+ cerr << " toCondSpace: ";
+ logCondSpace( exp->toCondSpace );
+ cerr << endl;
+ cerr << " toValsList: ";
+ for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ )
+ cerr << " " << *to;
+ cerr << endl;
+}
+#endif
+
+
+void FsmGraph::findTransExpansions( ExpansionList &expansionList,
+ FsmState *destState, FsmState *srcState )
+{
+ PairIter<FsmTrans, StateCond> transCond( destState->outList.head,
+ srcState->stateCondList.head );
+ for ( ; !transCond.end(); transCond++ ) {
+ if ( transCond.userState == RangeOverlap ) {
+ Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
+ transCond.s1Tel.highKey );
+ expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans);
+ expansion->fromTrans->fromState = 0;
+ expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
+ expansion->fromCondSpace = 0;
+ expansion->fromVals = 0;
+ CondSpace *srcCS = transCond.s2Tel.trans->condSpace;
+ expansion->toCondSpace = srcCS;
+
+ long numTargVals = (1 << srcCS->condSet.length());
+ for ( long targVals = 0; targVals < numTargVals; targVals++ )
+ expansion->toValsList.append( targVals );
+
+ #ifdef COLM_LOG_CONDS
+ if ( colm_log_conds ) {
+ logNewExpansion( expansion );
+ }
+ #endif
+ expansionList.append( expansion );
+ }
+ }
+}
+
+void FsmGraph::findCondExpInTrans( ExpansionList &expansionList, FsmState *state,
+ Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
+ long fromVals, LongVect &toValsList )
+{
+ FsmTrans searchTrans;
+ searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
+ (lowKey - keyOps->minKey);
+ searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
+ (highKey - keyOps->minKey);
+ searchTrans.prev = searchTrans.next = 0;
+
+ PairIter<FsmTrans> pairIter( state->outList.head, &searchTrans );
+ for ( ; !pairIter.end(); pairIter++ ) {
+ if ( pairIter.userState == RangeOverlap ) {
+ Expansion *expansion = new Expansion( lowKey, highKey );
+ expansion->fromTrans = new FsmTrans(*pairIter.s1Tel.trans);
+ expansion->fromTrans->fromState = 0;
+ expansion->fromTrans->toState = pairIter.s1Tel.trans->toState;
+ expansion->fromCondSpace = fromCondSpace;
+ expansion->fromVals = fromVals;
+ expansion->toCondSpace = toCondSpace;
+ expansion->toValsList = toValsList;
+
+ expansionList.append( expansion );
+ #ifdef COLM_LOG_CONDS
+ if ( colm_log_conds ) {
+ logNewExpansion( expansion );
+ }
+ #endif
+ }
+ }
+}
+
+void FsmGraph::findCondExpansions( ExpansionList &expansionList,
+ FsmState *destState, FsmState *srcState )
+{
+ PairIter<StateCond, StateCond> condCond( destState->stateCondList.head,
+ srcState->stateCondList.head );
+ for ( ; !condCond.end(); condCond++ ) {
+ if ( condCond.userState == RangeOverlap ) {
+ /* Loop over all existing condVals . */
+ CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet;
+ long destLen = destCS.length();
+
+ /* Find the items in src cond set that are not in dest
+ * cond set. These are the items that we must expand. */
+ CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet;
+ for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ )
+ srcOnlyCS.remove( *dcsi );
+ long srcOnlyLen = srcOnlyCS.length();
+
+ if ( srcOnlyCS.length() > 0 ) {
+ #ifdef COLM_LOG_CONDS
+ if ( colm_log_conds ) {
+ cerr << "there are " << srcOnlyCS.length() << " item(s) that are "
+ "only in the srcCS" << endl;
+ }
+ #endif
+
+ CondSet mergedCS = destCS;
+ mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet );
+
+ CondSpace *fromCondSpace = addCondSpace( destCS );
+ CondSpace *toCondSpace = addCondSpace( mergedCS );
+
+ /* Loop all values in the dest space. */
+ for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
+ long basicVals = 0;
+ for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
+ if ( destVals & (1 << csi.pos()) ) {
+ Action **cim = mergedCS.find( *csi );
+ long bitPos = (cim - mergedCS.data);
+ basicVals |= 1 << bitPos;
+ }
+ }
+
+ /* Loop all new values. */
+ LongVect expandToVals;
+ for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) {
+ long targVals = basicVals;
+ for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) {
+ if ( soVals & (1 << csi.pos()) ) {
+ Action **cim = mergedCS.find( *csi );
+ long bitPos = (cim - mergedCS.data);
+ targVals |= 1 << bitPos;
+ }
+ }
+ expandToVals.append( targVals );
+ }
+
+ findCondExpInTrans( expansionList, destState,
+ condCond.s1Tel.lowKey, condCond.s1Tel.highKey,
+ fromCondSpace, toCondSpace, destVals, expandToVals );
+ }
+ }
+ }
+ }
+}
+
+void FsmGraph::doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 )
+{
+ for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
+ for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) {
+ long targVals = *to;
+
+ /* We will use the copy of the transition that was made when the
+ * expansion was created. It will get used multiple times. Each
+ * time we must set up the keys, everything else is constant and
+ * and already prepared. */
+ FsmTrans *srcTrans = exp->fromTrans;
+
+ srcTrans->lowKey = exp->toCondSpace->baseKey +
+ targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
+ srcTrans->highKey = exp->toCondSpace->baseKey +
+ targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
+
+ TransList srcList;
+ srcList.append( srcTrans );
+ outTransCopy( md, destState, srcList.head );
+ srcList.abandon();
+ }
+ }
+}
+
+
+void FsmGraph::doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 )
+{
+ for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
+ Removal removal;
+ if ( exp->fromCondSpace == 0 ) {
+ removal.lowKey = exp->lowKey;
+ removal.highKey = exp->highKey;
+ }
+ else {
+ removal.lowKey = exp->fromCondSpace->baseKey +
+ exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
+ removal.highKey = exp->fromCondSpace->baseKey +
+ exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
+ }
+ removal.next = 0;
+
+ TransList destList;
+ PairIter<FsmTrans, Removal> pairIter( destState->outList.head, &removal );
+ for ( ; !pairIter.end(); pairIter++ ) {
+ switch ( pairIter.userState ) {
+ case RangeInS1: {
+ FsmTrans *destTrans = pairIter.s1Tel.trans;
+ destTrans->lowKey = pairIter.s1Tel.lowKey;
+ destTrans->highKey = pairIter.s1Tel.highKey;
+ destList.append( destTrans );
+ break;
+ }
+ case RangeInS2:
+ break;
+ case RangeOverlap: {
+ FsmTrans *trans = pairIter.s1Tel.trans;
+ detachTrans( trans->fromState, trans->toState, trans );
+ delete trans;
+ break;
+ }
+ case BreakS1: {
+ pairIter.s1Tel.trans = dupTrans( destState,
+ pairIter.s1Tel.trans );
+ break;
+ }
+ case BreakS2:
+ break;
+ }
+ }
+ destState->outList.transfer( destList );
+ }
+}
+
+void FsmGraph::mergeStateConds( FsmState *destState, FsmState *srcState )
+{
+ StateCondList destList;
+ PairIter<StateCond> pairIter( destState->stateCondList.head,
+ srcState->stateCondList.head );
+ for ( ; !pairIter.end(); pairIter++ ) {
+ switch ( pairIter.userState ) {
+ case RangeInS1: {
+ StateCond *destCond = pairIter.s1Tel.trans;
+ destCond->lowKey = pairIter.s1Tel.lowKey;
+ destCond->highKey = pairIter.s1Tel.highKey;
+ destList.append( destCond );
+ break;
+ }
+ case RangeInS2: {
+ StateCond *newCond = new StateCond( *pairIter.s2Tel.trans );
+ newCond->lowKey = pairIter.s2Tel.lowKey;
+ newCond->highKey = pairIter.s2Tel.highKey;
+ destList.append( newCond );
+ break;
+ }
+ case RangeOverlap: {
+ StateCond *destCond = pairIter.s1Tel.trans;
+ StateCond *srcCond = pairIter.s2Tel.trans;
+ CondSet mergedCondSet;
+ mergedCondSet.insert( destCond->condSpace->condSet );
+ mergedCondSet.insert( srcCond->condSpace->condSet );
+ destCond->condSpace = addCondSpace( mergedCondSet );
+
+ destCond->lowKey = pairIter.s1Tel.lowKey;
+ destCond->highKey = pairIter.s1Tel.highKey;
+ destList.append( destCond );
+ break;
+ }
+ case BreakS1:
+ pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans );
+ break;
+
+ case BreakS2:
+ break;
+ }
+ }
+ destState->stateCondList.transfer( destList );
+}
+
+/* A state merge which represents the drawing in of leaving transitions. If
+ * there is any out data then we duplicate the souce state, transfer the out
+ * data, then merge in the state. The new state will be reaped because it will
+ * not be given any in transitions. */
+void FsmGraph::mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState )
+{
+ if ( !hasOutData( destState ) )
+ mergeStates( md, destState, srcState );
+ else {
+ FsmState *ssMutable = addState();
+ mergeStates( md, ssMutable, srcState );
+ transferOutData( ssMutable, destState );
+
+ for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ )
+ embedCondition( md, ssMutable, *cond );
+
+ mergeStates( md, destState, ssMutable );
+ }
+}
+
+void FsmGraph::mergeStates( MergeData &md, FsmState *destState,
+ FsmState **srcStates, int numSrc )
+{
+ for ( int s = 0; s < numSrc; s++ )
+ mergeStates( md, destState, srcStates[s] );
+}
+
+void FsmGraph::mergeStates( MergeData &md, FsmState *destState, FsmState *srcState )
+{
+ ExpansionList expList1;
+ ExpansionList expList2;
+
+ findTransExpansions( expList1, destState, srcState );
+ findCondExpansions( expList1, destState, srcState );
+ findTransExpansions( expList2, srcState, destState );
+ findCondExpansions( expList2, srcState, destState );
+
+ mergeStateConds( destState, srcState );
+
+ outTransCopy( md, destState, srcState->outList.head );
+
+ doExpand( md, destState, expList1 );
+ doExpand( md, destState, expList2 );
+
+ doRemove( md, destState, expList1 );
+ doRemove( md, destState, expList2 );
+
+ expList1.empty();
+ expList2.empty();
+
+ /* Get its bits and final state status. */
+ destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL );
+ if ( srcState->isFinState() )
+ setFinState( destState );
+
+ /* Draw in any properties of srcState into destState. */
+ if ( srcState == destState ) {
+ /* Duplicate the list to protect against write to source. The
+ * priorities sets are not copied in because that would have no
+ * effect. */
+ destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) );
+
+ /* Get all actions, duplicating to protect against write to source. */
+ destState->toStateActionTable.setActions(
+ ActionTable( srcState->toStateActionTable ) );
+ destState->fromStateActionTable.setActions(
+ ActionTable( srcState->fromStateActionTable ) );
+ destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) );
+ destState->outCondSet.insert( ActionSet( srcState->outCondSet ) );
+ destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) );
+ destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) );
+ }
+ else {
+ /* Get the epsilons, out priorities. */
+ destState->epsilonTrans.append( srcState->epsilonTrans );
+ destState->outPriorTable.setPriors( srcState->outPriorTable );
+
+ /* Get all actions. */
+ destState->toStateActionTable.setActions( srcState->toStateActionTable );
+ destState->fromStateActionTable.setActions( srcState->fromStateActionTable );
+ destState->outActionTable.setActions( srcState->outActionTable );
+ destState->outCondSet.insert( srcState->outCondSet );
+ destState->errActionTable.setActions( srcState->errActionTable );
+ destState->eofActionTable.setActions( srcState->eofActionTable );
+ }
+}
+
+void FsmGraph::fillInStates( MergeData &md )
+{
+ /* Merge any states that are awaiting merging. This will likey cause
+ * other states to be added to the stfil list. */
+ FsmState *state = md.stfillHead;
+ while ( state != 0 ) {
+ StateSet *stateSet = &state->stateDictEl->stateSet;
+ mergeStates( md, state, stateSet->data, stateSet->length() );
+ state = state->alg.next;
+ }
+
+ /* Delete the state sets of all states that are on the fill list. */
+ state = md.stfillHead;
+ while ( state != 0 ) {
+ /* Delete and reset the state set. */
+ delete state->stateDictEl;
+ state->stateDictEl = 0;
+
+ /* Next state in the stfill list. */
+ state = state->alg.next;
+ }
+
+ /* StateDict will still have its ptrs/size set but all of it's element
+ * will be deleted so we don't need to clean it up. */
+}
+
+void FsmGraph::findEmbedExpansions( ExpansionList &expansionList,
+ FsmState *destState, Action *condAction )
+{
+ StateCondList destList;
+ PairIter<FsmTrans, StateCond> transCond( destState->outList.head,
+ destState->stateCondList.head );
+ for ( ; !transCond.end(); transCond++ ) {
+ switch ( transCond.userState ) {
+ case RangeInS1: {
+ if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) {
+ assert( transCond.s1Tel.highKey <= keyOps->maxKey );
+
+ /* Make a new state cond. */
+ StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey,
+ transCond.s1Tel.highKey );
+ newStateCond->condSpace = addCondSpace( CondSet( condAction ) );
+ destList.append( newStateCond );
+
+ /* Create the expansion. */
+ Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
+ transCond.s1Tel.highKey );
+ expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans);
+ expansion->fromTrans->fromState = 0;
+ expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
+ expansion->fromCondSpace = 0;
+ expansion->fromVals = 0;
+ expansion->toCondSpace = newStateCond->condSpace;
+ expansion->toValsList.append( 1 );
+ #ifdef COLM_LOG_CONDS
+ if ( colm_log_conds ) {
+ logNewExpansion( expansion );
+ }
+ #endif
+ expansionList.append( expansion );
+ }
+ break;
+ }
+ case RangeInS2: {
+ /* Enhance state cond and find the expansion. */
+ StateCond *stateCond = transCond.s2Tel.trans;
+ stateCond->lowKey = transCond.s2Tel.lowKey;
+ stateCond->highKey = transCond.s2Tel.highKey;
+
+ CondSet &destCS = stateCond->condSpace->condSet;
+ long destLen = destCS.length();
+ CondSpace *fromCondSpace = stateCond->condSpace;
+
+ CondSet mergedCS = destCS;
+ mergedCS.insert( condAction );
+ CondSpace *toCondSpace = addCondSpace( mergedCS );
+ stateCond->condSpace = toCondSpace;
+ destList.append( stateCond );
+
+ /* Loop all values in the dest space. */
+ for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
+ long basicVals = 0;
+ for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
+ if ( destVals & (1 << csi.pos()) ) {
+ Action **cim = mergedCS.find( *csi );
+ long bitPos = (cim - mergedCS.data);
+ basicVals |= 1 << bitPos;
+ }
+ }
+
+ long targVals = basicVals;
+ Action **cim = mergedCS.find( condAction );
+ long bitPos = (cim - mergedCS.data);
+ targVals |= 1 << bitPos;
+
+ LongVect expandToVals( targVals );
+ findCondExpInTrans( expansionList, destState,
+ transCond.s2Tel.lowKey, transCond.s2Tel.highKey,
+ fromCondSpace, toCondSpace, destVals, expandToVals );
+ }
+ break;
+ }
+
+
+ case RangeOverlap:
+ case BreakS1:
+ case BreakS2:
+ assert( false );
+ break;
+ }
+ }
+
+ destState->stateCondList.transfer( destList );
+}
+
+void FsmGraph::embedCondition( FsmState *state, Action *condAction )
+{
+ MergeData md;
+ ExpansionList expList;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* Worker. */
+ embedCondition( md, state, condAction );
+
+ /* Fill in any states that were newed up as combinations of others. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+void FsmGraph::embedCondition( MergeData &md, FsmState *state, Action *condAction )
+{
+ ExpansionList expList;
+
+ findEmbedExpansions( expList, state, condAction );
+ doExpand( md, state, expList );
+ doRemove( md, state, expList );
+ expList.empty();
+}
diff --git a/src/fsmgraph.h b/src/fsmgraph.h
new file mode 100644
index 00000000..fca23cc1
--- /dev/null
+++ b/src/fsmgraph.h
@@ -0,0 +1,1388 @@
+/*
+ * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FSMGRAPH_H
+#define _FSMGRAPH_H
+
+#include <assert.h>
+#include "keyops.h"
+#include "vector.h"
+#include "bstset.h"
+#include "compare.h"
+#include "avltree.h"
+#include "dlist.h"
+#include "bstmap.h"
+#include "sbstmap.h"
+#include "sbstset.h"
+#include "sbsttable.h"
+#include "avlset.h"
+#include "avlmap.h"
+
+/* Flags that control merging. */
+#define SB_GRAPH1 0x01
+#define SB_GRAPH2 0x02
+#define SB_BOTH 0x03
+#define SB_ISFINAL 0x04
+#define SB_ISMARKED 0x08
+#define SB_ONLIST 0x10
+
+struct FsmTrans;
+struct FsmState;
+struct FsmGraph;
+struct Action;
+struct TokenDef;
+struct NameInst;
+
+/* State list element for unambiguous access to list element. */
+struct FsmListEl
+{
+ FsmState *prev, *next;
+};
+
+/* This is the marked index for a state pair. Used in minimization. It keeps
+ * track of whether or not the state pair is marked. */
+struct MarkIndex
+{
+ MarkIndex(int states);
+ ~MarkIndex();
+
+ void markPair(int state1, int state2);
+ bool isPairMarked(int state1, int state2);
+
+private:
+ int numStates;
+ bool *array;
+};
+
+extern KeyOps *keyOps;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, Action* > ActionTableEl;
+
+/* Transition Action Table. */
+struct ActionTable
+ : public SBstMap< int, Action*, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action );
+ void setActions( int *orderings, Action **actions, int nActs );
+ void setActions( const ActionTable &other );
+
+ bool hasAction( Action *action );
+};
+
+typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet;
+typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, TokenDef* > LmActionTableEl;
+
+/* Transition Action Table. */
+struct LmActionTable
+ : public SBstMap< int, TokenDef*, CmpOrd<int> >
+{
+ void setAction( int ordering, TokenDef *action );
+ void setActions( const LmActionTable &other );
+};
+
+/* Compare of a whole action table element (key & value). */
+struct CmpActionTableEl
+{
+ static int compare( const ActionTableEl &action1,
+ const ActionTableEl &action2 )
+ {
+ if ( action1.key < action2.key )
+ return -1;
+ else if ( action1.key > action2.key )
+ return 1;
+ else if ( action1.value < action2.value )
+ return -1;
+ else if ( action1.value > action2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
+
+/* Compare of a whole lm action table element (key & value). */
+struct CmpLmActionTableEl
+{
+ static int compare( const LmActionTableEl &lmAction1,
+ const LmActionTableEl &lmAction2 )
+ {
+ if ( lmAction1.key < lmAction2.key )
+ return -1;
+ else if ( lmAction1.key > lmAction2.key )
+ return 1;
+ else if ( lmAction1.value < lmAction2.value )
+ return -1;
+ else if ( lmAction1.value > lmAction2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable;
+
+/* Action table element for error action tables. Adds the encoding of transfer
+ * point. */
+struct ErrActionTableEl
+{
+ ErrActionTableEl( Action *action, int ordering, int transferPoint )
+ : ordering(ordering), action(action), transferPoint(transferPoint) { }
+
+ /* Ordering and id of the action embedding. */
+ int ordering;
+ Action *action;
+
+ /* Id of point of transfere from Error action table to transtions and
+ * eofActionTable. */
+ int transferPoint;
+
+ int getKey() const { return ordering; }
+};
+
+struct ErrActionTable
+ : public SBstTable< ErrActionTableEl, int, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action, int transferPoint );
+ void setActions( const ErrActionTable &other );
+};
+
+/* Compare of an error action table element (key & value). */
+struct CmpErrActionTableEl
+{
+ static int compare( const ErrActionTableEl &action1,
+ const ErrActionTableEl &action2 )
+ {
+ if ( action1.ordering < action2.ordering )
+ return -1;
+ else if ( action1.ordering > action2.ordering )
+ return 1;
+ else if ( action1.action < action2.action )
+ return -1;
+ else if ( action1.action > action2.action )
+ return 1;
+ else if ( action1.transferPoint < action2.transferPoint )
+ return -1;
+ else if ( action1.transferPoint > action2.transferPoint )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ErrActionTable. */
+typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable;
+
+
+/* Descibe a priority, shared among PriorEls.
+ * Has key and whether or not used. */
+struct PriorDesc
+{
+ int key;
+ int priority;
+};
+
+/* Element in the arrays of priorities for transitions and arrays. Ordering is
+ * unique among instantiations of machines, desc is shared. */
+struct PriorEl
+{
+ PriorEl( int ordering, PriorDesc *desc )
+ : ordering(ordering), desc(desc) { }
+
+ int ordering;
+ PriorDesc *desc;
+};
+
+/* Compare priority elements, which are ordered by the priority descriptor
+ * key. */
+struct PriorElCmp
+{
+ static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
+ {
+ if ( pel1.desc->key < pel2.desc->key )
+ return -1;
+ else if ( pel1.desc->key > pel2.desc->key )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+
+/* Priority Table. */
+struct PriorTable
+ : public SBstSet< PriorEl, PriorElCmp >
+{
+ void setPrior( int ordering, PriorDesc *desc );
+ void setPriors( const PriorTable &other );
+};
+
+/* Compare of prior table elements for distinguising state data. */
+struct CmpPriorEl
+{
+ static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
+ {
+ if ( pel1.desc < pel2.desc )
+ return -1;
+ else if ( pel1.desc > pel2.desc )
+ return 1;
+ else if ( pel1.ordering < pel2.ordering )
+ return -1;
+ else if ( pel1.ordering > pel2.ordering )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare of PriorTable distinguising state data. Using a compare of the
+ * pointers is a little more strict than it needs be. It requires that
+ * prioritiy tables have the exact same set of priority assignment operators
+ * (from the input lang) to be considered equal.
+ *
+ * Really only key-value pairs need be tested and ordering be merged. However
+ * this would require that in the fuseing of states, priority descriptors be
+ * chosen for the new fused state based on priority. Since the out transition
+ * lists and ranges aren't necessarily going to line up, this is more work for
+ * little gain. Final compression resets all priorities first, so this would
+ * only be useful for compression at every operator, which is only an
+ * undocumented test feature.
+ */
+typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable;
+
+/* Plain action list that imposes no ordering. */
+typedef Vector<int> TransFuncList;
+
+/* Comparison for TransFuncList. */
+typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare;
+
+/* Transition class that implements actions and priorities. */
+struct FsmTrans
+{
+ FsmTrans() : fromState(0), toState(0) {}
+ FsmTrans( const FsmTrans &other ) :
+ lowKey(other.lowKey),
+ highKey(other.highKey),
+ fromState(0), toState(0),
+ actionTable(other.actionTable),
+ priorTable(other.priorTable)
+ {
+ assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 );
+ }
+
+ Key lowKey, highKey;
+ FsmState *fromState;
+ FsmState *toState;
+
+ /* Pointers for outlist. */
+ FsmTrans *prev, *next;
+
+ /* Pointers for in-list. */
+ FsmTrans *ilprev, *ilnext;
+
+ /* The function table and priority for the transition. */
+ ActionTable actionTable;
+ PriorTable priorTable;
+
+ LmActionTable lmActionTable;
+};
+
+/* In transition list. Like DList except only has head pointers, which is all
+ * that is required. Insertion and deletion is handled by the graph. This
+ * class provides the iterator of a single list. */
+struct TransInList
+{
+ TransInList() : head(0) { }
+
+ FsmTrans *head;
+
+ struct Iter
+ {
+ /* Default construct. */
+ Iter() : ptr(0) { }
+
+ /* Construct, assign from a list. */
+ Iter( const TransInList &il ) : ptr(il.head) { }
+ Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; }
+
+ /* At the end */
+ bool lte() const { return ptr != 0; }
+ bool end() const { return ptr == 0; }
+
+ /* At the first, last element. */
+ bool first() const { return ptr && ptr->ilprev == 0; }
+ bool last() const { return ptr && ptr->ilnext == 0; }
+
+ /* Cast, dereference, arrow ops. */
+ operator FsmTrans*() const { return ptr; }
+ FsmTrans &operator *() const { return *ptr; }
+ FsmTrans *operator->() const { return ptr; }
+
+ /* Increment, decrement. */
+ inline void operator++(int) { ptr = ptr->ilnext; }
+ inline void operator--(int) { ptr = ptr->ilprev; }
+
+ /* The iterator is simply a pointer. */
+ FsmTrans *ptr;
+ };
+};
+
+typedef DList<FsmTrans> TransList;
+
+/* Set of states, list of states. */
+typedef BstSet<FsmState*> StateSet;
+typedef DList<FsmState> StateList;
+
+/* A element in a state dict. */
+struct StateDictEl
+:
+ public AvlTreeEl<StateDictEl>
+{
+ StateDictEl(const StateSet &stateSet)
+ : stateSet(stateSet) { }
+
+ const StateSet &getKey() { return stateSet; }
+ StateSet stateSet;
+ FsmState *targState;
+};
+
+/* Dictionary mapping a set of states to a target state. */
+typedef AvlTree< StateDictEl, StateSet, CmpTable<FsmState*> > StateDict;
+
+/* Data needed for a merge operation. */
+struct MergeData
+{
+ MergeData()
+ : stfillHead(0), stfillTail(0) { }
+
+ StateDict stateDict;
+
+ FsmState *stfillHead;
+ FsmState *stfillTail;
+
+ void fillListAppend( FsmState *state );
+};
+
+struct TransEl
+{
+ /* Constructors. */
+ TransEl() { }
+ TransEl( Key lowKey, Key highKey )
+ : lowKey(lowKey), highKey(highKey) { }
+ TransEl( Key lowKey, Key highKey, FsmTrans *value )
+ : lowKey(lowKey), highKey(highKey), value(value) { }
+
+ Key lowKey, highKey;
+ FsmTrans *value;
+};
+
+struct CmpKey
+{
+ static int compare( const Key key1, const Key key2 )
+ {
+ if ( key1 < key2 )
+ return -1;
+ else if ( key1 > key2 )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+/* Vector based set of key items. */
+typedef BstSet<Key, CmpKey> KeySet;
+
+struct MinPartition
+{
+ MinPartition() : active(false) { }
+
+ StateList list;
+ bool active;
+
+ MinPartition *prev, *next;
+};
+
+/* Epsilon transition stored in a state. Specifies the target */
+typedef Vector<int> EpsilonTrans;
+
+/* List of states that are to be drawn into this. */
+struct EptVectEl
+{
+ EptVectEl( FsmState *targ, bool leaving )
+ : targ(targ), leaving(leaving) { }
+
+ FsmState *targ;
+ bool leaving;
+};
+typedef Vector<EptVectEl> EptVect;
+
+/* Set of entry ids that go into this state. */
+typedef BstSet<int> EntryIdSet;
+
+/* Set of longest match items that may be active in a given state. */
+typedef BstSet<TokenDef*> LmItemSet;
+
+/* Conditions. */
+typedef BstSet< Action*, CmpOrd<Action*> > CondSet;
+typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet;
+
+struct CondSpace
+ : public AvlTreeEl<CondSpace>
+{
+ CondSpace( const CondSet &condSet )
+ : condSet(condSet) {}
+
+ const CondSet &getKey() { return condSet; }
+
+ CondSet condSet;
+ Key baseKey;
+ long condSpaceId;
+};
+
+typedef Vector<CondSpace*> CondSpaceVect;
+
+typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap;
+
+struct StateCond
+{
+ StateCond( Key lowKey, Key highKey ) :
+ lowKey(lowKey), highKey(highKey) {}
+
+ Key lowKey;
+ Key highKey;
+ CondSpace *condSpace;
+
+ StateCond *prev, *next;
+};
+
+typedef DList<StateCond> StateCondList;
+typedef Vector<long> LongVect;
+
+struct Expansion
+{
+ Expansion( Key lowKey, Key highKey ) :
+ lowKey(lowKey), highKey(highKey),
+ fromTrans(0), fromCondSpace(0),
+ toCondSpace(0) {}
+
+ ~Expansion()
+ {
+ if ( fromTrans != 0 )
+ delete fromTrans;
+ }
+
+ Key lowKey;
+ Key highKey;
+
+ FsmTrans *fromTrans;
+ CondSpace *fromCondSpace;
+ long fromVals;
+
+ CondSpace *toCondSpace;
+ LongVect toValsList;
+
+ Expansion *prev, *next;
+};
+
+typedef DList<Expansion> ExpansionList;
+
+struct Removal
+{
+ Key lowKey;
+ Key highKey;
+
+ Removal *next;
+};
+
+struct CondData
+{
+ CondData() : nextCondKey(0) {}
+
+ /* Condition info. */
+ Key nextCondKey;
+
+ CondSpaceMap condSpaceMap;
+};
+
+extern CondData *condData;
+
+/* State class that implements actions and priorities. */
+struct FsmState
+{
+ FsmState();
+ FsmState(const FsmState &other);
+ ~FsmState();
+
+ /* Is the state final? */
+ bool isFinState() { return stateBits & SB_ISFINAL; }
+
+ /* Out transition list and the pointer for the default out trans. */
+ TransList outList;
+
+ /* In transition Lists. */
+ TransInList inList;
+
+ /* Entry points into the state. */
+ EntryIdSet entryIds;
+
+ /* Epsilon transitions. */
+ EpsilonTrans epsilonTrans;
+
+ /* Condition info. */
+ StateCondList stateCondList;
+
+ /* Number of in transitions from states other than ourselves. */
+ int foreignInTrans;
+
+ /* Temporary data for various algorithms. */
+ union {
+ /* When duplicating the fsm we need to map each
+ * state to the new state representing it. */
+ FsmState *stateMap;
+
+ /* When minimizing machines by partitioning, this maps to the group
+ * the state is in. */
+ MinPartition *partition;
+
+ /* When merging states (state machine operations) this next pointer is
+ * used for the list of states that need to be filled in. */
+ FsmState *next;
+
+ /* Identification for printing and stable minimization. */
+ int stateNum;
+
+ } alg;
+
+ /* Data used in epsilon operation, maybe fit into alg? */
+ FsmState *isolatedShadow;
+ int owningGraph;
+
+ /* A pointer to a dict element that contains the set of states this state
+ * represents. This cannot go into alg, because alg.next is used during
+ * the merging process. */
+ StateDictEl *stateDictEl;
+
+ /* When drawing epsilon transitions, holds the list of states to merge
+ * with. */
+ EptVect *eptVect;
+
+ /* Bits controlling the behaviour of the state during collapsing to dfa. */
+ int stateBits;
+
+ /* State list elements. */
+ FsmState *next, *prev;
+
+ /*
+ * Priority and Action data.
+ */
+
+ /* Out priorities transfered to out transitions. */
+ PriorTable outPriorTable;
+
+ /* The following two action tables are distinguished by the fact that when
+ * toState actions are executed immediatly after transition actions of
+ * incoming transitions and the current character will be the same as the
+ * one available then. The fromState actions are executed immediately
+ * before the transition actions of outgoing transitions and the current
+ * character is same as the one available then. */
+
+ /* Actions to execute upon entering into a state. */
+ ActionTable toStateActionTable;
+
+ /* Actions to execute when going from the state to the transition. */
+ ActionTable fromStateActionTable;
+
+ /* Actions to add to any future transitions that leave via this state. */
+ ActionTable outActionTable;
+
+ /* Conditions to add to any future transiions that leave via this sttate. */
+ ActionSet outCondSet;
+
+ /* Error action tables. */
+ ErrActionTable errActionTable;
+
+ /* Actions to execute on eof. */
+ ActionTable eofActionTable;
+
+ /* Set of longest match items that may be active in this state. */
+ LmItemSet lmItemSet;
+
+ FsmState *eofTarget;
+};
+
+template <class ListItem> struct NextTrans
+{
+ Key lowKey, highKey;
+ ListItem *trans;
+ ListItem *next;
+
+ void load() {
+ if ( trans == 0 )
+ next = 0;
+ else {
+ next = trans->next;
+ lowKey = trans->lowKey;
+ highKey = trans->highKey;
+ }
+ }
+
+ void set( ListItem *t ) {
+ trans = t;
+ load();
+ }
+
+ void increment() {
+ trans = next;
+ load();
+ }
+};
+
+
+/* Encodes the different states that are meaningful to the of the iterator. */
+enum PairIterUserState
+{
+ RangeInS1, RangeInS2,
+ RangeOverlap,
+ BreakS1, BreakS2
+};
+
+template <class ListItem1, class ListItem2 = ListItem1> struct PairIter
+{
+ /* Encodes the different states that an fsm iterator can be in. */
+ enum IterState {
+ Begin,
+ ConsumeS1Range, ConsumeS2Range,
+ OnlyInS1Range, OnlyInS2Range,
+ S1SticksOut, S1SticksOutBreak,
+ S2SticksOut, S2SticksOutBreak,
+ S1DragsBehind, S1DragsBehindBreak,
+ S2DragsBehind, S2DragsBehindBreak,
+ ExactOverlap, End
+ };
+
+ PairIter( ListItem1 *list1, ListItem2 *list2 );
+
+ /* Query iterator. */
+ bool lte() { return itState != End; }
+ bool end() { return itState == End; }
+ void operator++(int) { findNext(); }
+ void operator++() { findNext(); }
+
+ /* Iterator state. */
+ ListItem1 *list1;
+ ListItem2 *list2;
+ IterState itState;
+ PairIterUserState userState;
+
+ NextTrans<ListItem1> s1Tel;
+ NextTrans<ListItem2> s2Tel;
+ Key bottomLow, bottomHigh;
+ ListItem1 *bottomTrans1;
+ ListItem2 *bottomTrans2;
+
+private:
+ void findNext();
+};
+
+/* Init the iterator by advancing to the first item. */
+template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter(
+ ListItem1 *list1, ListItem2 *list2 )
+:
+ list1(list1),
+ list2(list2),
+ itState(Begin)
+{
+ findNext();
+}
+
+/* Return and re-entry for the co-routine iterators. This should ALWAYS be
+ * used inside of a block. */
+#define CO_RETURN(label) \
+ itState = label; \
+ return; \
+ entry##label: {}
+
+/* Return and re-entry for the co-routine iterators. This should ALWAYS be
+ * used inside of a block. */
+#define CO_RETURN2(label, uState) \
+ itState = label; \
+ userState = uState; \
+ return; \
+ entry##label: {}
+
+/* Advance to the next transition. When returns, trans points to the next
+ * transition, unless there are no more, in which case end() returns true. */
+template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext()
+{
+ /* Jump into the iterator routine base on the iterator state. */
+ switch ( itState ) {
+ case Begin: goto entryBegin;
+ case ConsumeS1Range: goto entryConsumeS1Range;
+ case ConsumeS2Range: goto entryConsumeS2Range;
+ case OnlyInS1Range: goto entryOnlyInS1Range;
+ case OnlyInS2Range: goto entryOnlyInS2Range;
+ case S1SticksOut: goto entryS1SticksOut;
+ case S1SticksOutBreak: goto entryS1SticksOutBreak;
+ case S2SticksOut: goto entryS2SticksOut;
+ case S2SticksOutBreak: goto entryS2SticksOutBreak;
+ case S1DragsBehind: goto entryS1DragsBehind;
+ case S1DragsBehindBreak: goto entryS1DragsBehindBreak;
+ case S2DragsBehind: goto entryS2DragsBehind;
+ case S2DragsBehindBreak: goto entryS2DragsBehindBreak;
+ case ExactOverlap: goto entryExactOverlap;
+ case End: goto entryEnd;
+ }
+
+entryBegin:
+ /* Set up the next structs at the head of the transition lists. */
+ s1Tel.set( list1 );
+ s2Tel.set( list2 );
+
+ /* Concurrently scan both out ranges. */
+ while ( true ) {
+ if ( s1Tel.trans == 0 ) {
+ /* We are at the end of state1's ranges. Process the rest of
+ * state2's ranges. */
+ while ( s2Tel.trans != 0 ) {
+ /* Range is only in s2. */
+ CO_RETURN2( ConsumeS2Range, RangeInS2 );
+ s2Tel.increment();
+ }
+ break;
+ }
+ else if ( s2Tel.trans == 0 ) {
+ /* We are at the end of state2's ranges. Process the rest of
+ * state1's ranges. */
+ while ( s1Tel.trans != 0 ) {
+ /* Range is only in s1. */
+ CO_RETURN2( ConsumeS1Range, RangeInS1 );
+ s1Tel.increment();
+ }
+ break;
+ }
+ /* Both state1's and state2's transition elements are good.
+ * The signiture of no overlap is a back key being in front of a
+ * front key. */
+ else if ( s1Tel.highKey < s2Tel.lowKey ) {
+ /* A range exists in state1 that does not overlap with state2. */
+ CO_RETURN2( OnlyInS1Range, RangeInS1 );
+ s1Tel.increment();
+ }
+ else if ( s2Tel.highKey < s1Tel.lowKey ) {
+ /* A range exists in state2 that does not overlap with state1. */
+ CO_RETURN2( OnlyInS2Range, RangeInS2 );
+ s2Tel.increment();
+ }
+ /* There is overlap, must mix the ranges in some way. */
+ else if ( s1Tel.lowKey < s2Tel.lowKey ) {
+ /* Range from state1 sticks out front. Must break it into
+ * non-overlaping and overlaping segments. */
+ bottomLow = s2Tel.lowKey;
+ bottomHigh = s1Tel.highKey;
+ s1Tel.highKey = s2Tel.lowKey;
+ s1Tel.highKey.decrement();
+ bottomTrans1 = s1Tel.trans;
+
+ /* Notify the caller that we are breaking s1. This gives them a
+ * chance to duplicate s1Tel[0,1].value. */
+ CO_RETURN2( S1SticksOutBreak, BreakS1 );
+
+ /* Broken off range is only in s1. */
+ CO_RETURN2( S1SticksOut, RangeInS1 );
+
+ /* Advance over the part sticking out front. */
+ s1Tel.lowKey = bottomLow;
+ s1Tel.highKey = bottomHigh;
+ s1Tel.trans = bottomTrans1;
+ }
+ else if ( s2Tel.lowKey < s1Tel.lowKey ) {
+ /* Range from state2 sticks out front. Must break it into
+ * non-overlaping and overlaping segments. */
+ bottomLow = s1Tel.lowKey;
+ bottomHigh = s2Tel.highKey;
+ s2Tel.highKey = s1Tel.lowKey;
+ s2Tel.highKey.decrement();
+ bottomTrans2 = s2Tel.trans;
+
+ /* Notify the caller that we are breaking s2. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S2SticksOutBreak, BreakS2 );
+
+ /* Broken off range is only in s2. */
+ CO_RETURN2( S2SticksOut, RangeInS2 );
+
+ /* Advance over the part sticking out front. */
+ s2Tel.lowKey = bottomLow;
+ s2Tel.highKey = bottomHigh;
+ s2Tel.trans = bottomTrans2;
+ }
+ /* Low ends are even. Are the high ends even? */
+ else if ( s1Tel.highKey < s2Tel.highKey ) {
+ /* Range from state2 goes longer than the range from state1. We
+ * must break the range from state2 into an evenly overlaping
+ * segment. */
+ bottomLow = s1Tel.highKey;
+ bottomLow.increment();
+ bottomHigh = s2Tel.highKey;
+ s2Tel.highKey = s1Tel.highKey;
+ bottomTrans2 = s2Tel.trans;
+
+ /* Notify the caller that we are breaking s2. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S2DragsBehindBreak, BreakS2 );
+
+ /* Breaking s2 produces exact overlap. */
+ CO_RETURN2( S2DragsBehind, RangeOverlap );
+
+ /* Advance over the front we just broke off of range 2. */
+ s2Tel.lowKey = bottomLow;
+ s2Tel.highKey = bottomHigh;
+ s2Tel.trans = bottomTrans2;
+
+ /* Advance over the entire s1Tel. We have consumed it. */
+ s1Tel.increment();
+ }
+ else if ( s2Tel.highKey < s1Tel.highKey ) {
+ /* Range from state1 goes longer than the range from state2. We
+ * must break the range from state1 into an evenly overlaping
+ * segment. */
+ bottomLow = s2Tel.highKey;
+ bottomLow.increment();
+ bottomHigh = s1Tel.highKey;
+ s1Tel.highKey = s2Tel.highKey;
+ bottomTrans1 = s1Tel.trans;
+
+ /* Notify the caller that we are breaking s1. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S1DragsBehindBreak, BreakS1 );
+
+ /* Breaking s1 produces exact overlap. */
+ CO_RETURN2( S1DragsBehind, RangeOverlap );
+
+ /* Advance over the front we just broke off of range 1. */
+ s1Tel.lowKey = bottomLow;
+ s1Tel.highKey = bottomHigh;
+ s1Tel.trans = bottomTrans1;
+
+ /* Advance over the entire s2Tel. We have consumed it. */
+ s2Tel.increment();
+ }
+ else {
+ /* There is an exact overlap. */
+ CO_RETURN2( ExactOverlap, RangeOverlap );
+
+ s1Tel.increment();
+ s2Tel.increment();
+ }
+ }
+
+ /* Done, go into end state. */
+ CO_RETURN( End );
+}
+
+
+/* Compare lists of epsilon transitions. Entries are name ids of targets. */
+typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
+
+/* Compare class for the Approximate minimization. */
+class ApproxCompare
+{
+public:
+ ApproxCompare() { }
+ int compare( const FsmState *pState1, const FsmState *pState2 );
+};
+
+/* Compare class for the initial partitioning of a partition minimization. */
+class InitPartitionCompare
+{
+public:
+ InitPartitionCompare() { }
+ int compare( const FsmState *pState1, const FsmState *pState2 );
+};
+
+/* Compare class for the regular partitioning of a partition minimization. */
+class PartitionCompare
+{
+public:
+ PartitionCompare() { }
+ int compare( const FsmState *pState1, const FsmState *pState2 );
+};
+
+/* Compare class for a minimization that marks pairs. Provides the shouldMark
+ * routine. */
+class MarkCompare
+{
+public:
+ MarkCompare() { }
+ bool shouldMark( MarkIndex &markIndex, const FsmState *pState1,
+ const FsmState *pState2 );
+};
+
+/* List of partitions. */
+typedef DList< MinPartition > PartitionList;
+
+/* List of transtions out of a state. */
+typedef Vector<TransEl> TransListVect;
+
+/* Entry point map used for keeping track of entry points in a machine. */
+typedef BstSet< int > EntryIdSet;
+typedef BstMapEl< int, FsmState* > EntryMapEl;
+typedef BstMap< int, FsmState* > EntryMap;
+typedef Vector<EntryMapEl> EntryMapBase;
+
+/* Graph class that implements actions and priorities. */
+struct FsmGraph
+{
+ /* Constructors/Destructors. */
+ FsmGraph( );
+ FsmGraph( const FsmGraph &graph );
+ ~FsmGraph();
+
+ /* The list of states. */
+ StateList stateList;
+ StateList misfitList;
+
+ /* The map of entry points. */
+ EntryMap entryPoints;
+
+ /* The start state. */
+ FsmState *startState;
+
+ /* Error state, possibly created only when the final machine has been
+ * created and the XML machine is about to be written. No transitions
+ * point to this state. */
+ FsmState *errState;
+
+ /* The set of final states. */
+ StateSet finStateSet;
+
+ /* Misfit Accounting. Are misfits put on a separate list. */
+ bool misfitAccounting;
+
+ bool lmRequiresErrorState;
+ NameInst *rootName;
+ NameInst **nameIndex;
+
+ /*
+ * Transition actions and priorities.
+ */
+
+ /* Set priorities on transtions. */
+ void startFsmPrior( int ordering, PriorDesc *prior );
+ void allTransPrior( int ordering, PriorDesc *prior );
+ void finishFsmPrior( int ordering, PriorDesc *prior );
+ void leaveFsmPrior( int ordering, PriorDesc *prior );
+
+ /* Action setting support. */
+ void transferErrorActions( FsmState *state, int transferPoint );
+ void setErrorAction( FsmState *state, int ordering, Action *action );
+ void setErrorActions( FsmState *state, const ActionTable &other );
+
+ /* Fill all spaces in a transition list with an error transition. */
+ void fillGaps( FsmState *state );
+
+ /* Similar to setErrorAction, instead gives a state to go to on error. */
+ void setErrorTarget( FsmState *state, FsmState *target, int *orderings,
+ Action **actions, int nActs );
+
+ /* Set actions to execute. */
+ void startFsmAction( int ordering, Action *action );
+ void allTransAction( int ordering, Action *action );
+ void finishFsmAction( int ordering, Action *action );
+ void leaveFsmAction( int ordering, Action *action );
+ void longMatchAction( int ordering, TokenDef *lmPart );
+
+ /* Set conditions. */
+ CondSpace *addCondSpace( const CondSet &condSet );
+
+ void findEmbedExpansions( ExpansionList &expansionList,
+ FsmState *destState, Action *condAction );
+ void embedCondition( MergeData &md, FsmState *state, Action *condAction );
+ void embedCondition( FsmState *state, Action *condAction );
+
+ void startFsmCondition( Action *condAction );
+ void allTransCondition( Action *condAction );
+ void leaveFsmCondition( Action *condAction );
+
+ /* Set error actions to execute. */
+ void startErrorAction( int ordering, Action *action, int transferPoint );
+ void allErrorAction( int ordering, Action *action, int transferPoint );
+ void finalErrorAction( int ordering, Action *action, int transferPoint );
+ void notStartErrorAction( int ordering, Action *action, int transferPoint );
+ void notFinalErrorAction( int ordering, Action *action, int transferPoint );
+ void middleErrorAction( int ordering, Action *action, int transferPoint );
+
+ /* Set EOF actions. */
+ void startEOFAction( int ordering, Action *action );
+ void allEOFAction( int ordering, Action *action );
+ void finalEOFAction( int ordering, Action *action );
+ void notStartEOFAction( int ordering, Action *action );
+ void notFinalEOFAction( int ordering, Action *action );
+ void middleEOFAction( int ordering, Action *action );
+
+ /* Set To State actions. */
+ void startToStateAction( int ordering, Action *action );
+ void allToStateAction( int ordering, Action *action );
+ void finalToStateAction( int ordering, Action *action );
+ void notStartToStateAction( int ordering, Action *action );
+ void notFinalToStateAction( int ordering, Action *action );
+ void middleToStateAction( int ordering, Action *action );
+
+ /* Set From State actions. */
+ void startFromStateAction( int ordering, Action *action );
+ void allFromStateAction( int ordering, Action *action );
+ void finalFromStateAction( int ordering, Action *action );
+ void notStartFromStateAction( int ordering, Action *action );
+ void notFinalFromStateAction( int ordering, Action *action );
+ void middleFromStateAction( int ordering, Action *action );
+
+ /* Shift the action ordering of the start transitions to start at
+ * fromOrder and increase in units of 1. Useful before kleene star
+ * operation. */
+ int shiftStartActionOrder( int fromOrder );
+
+ /* Clear all priorities from the fsm to so they won't affcet minimization
+ * of the final fsm. */
+ void clearAllPriorities();
+
+ /* Zero out all the function keys. */
+ void nullActionKeys();
+
+ /* Walk the list of states and verify state properties. */
+ void verifyStates();
+
+ /* Misfit Accounting. Are misfits put on a separate list. */
+ void setMisfitAccounting( bool val )
+ { misfitAccounting = val; }
+
+ /* Set and Unset a state as final. */
+ void setFinState( FsmState *state );
+ void unsetFinState( FsmState *state );
+
+ void setStartState( FsmState *state );
+ void unsetStartState( );
+
+ /* Set and unset a state as an entry point. */
+ void setEntry( int id, FsmState *state );
+ void changeEntry( int id, FsmState *to, FsmState *from );
+ void unsetEntry( int id, FsmState *state );
+ void unsetEntry( int id );
+ void unsetAllEntryPoints();
+
+ /* Epsilon transitions. */
+ void epsilonTrans( int id );
+ void shadowReadWriteStates( MergeData &md );
+
+ /*
+ * Basic attaching and detaching.
+ */
+
+ /* Common to attaching/detaching list and default. */
+ void attachToInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans );
+ void detachFromInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans );
+
+ /* Attach with a new transition. */
+ FsmTrans *attachNewTrans( FsmState *from, FsmState *to,
+ Key onChar1, Key onChar2 );
+
+ /* Attach with an existing transition that already in an out list. */
+ void attachTrans( FsmState *from, FsmState *to, FsmTrans *trans );
+
+ /* Redirect a transition away from error and towards some state. */
+ void redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans );
+
+ /* Detach a transition from a target state. */
+ void detachTrans( FsmState *from, FsmState *to, FsmTrans *trans );
+
+ /* Detach a state from the graph. */
+ void detachState( FsmState *state );
+
+ /*
+ * NFA to DFA conversion routines.
+ */
+
+ /* Duplicate a transition that will dropin to a free spot. */
+ FsmTrans *dupTrans( FsmState *from, FsmTrans *srcTrans );
+
+ /* In crossing, two transitions both go to real states. */
+ FsmTrans *fsmAttachStates( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans );
+
+ /* Two transitions are to be crossed, handle the possibility of either
+ * going to the error state. */
+ FsmTrans *mergeTrans( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans );
+
+ /* Compare deterimne relative priorities of two transition tables. */
+ int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 );
+
+ /* Cross a src transition with one that is already occupying a spot. */
+ FsmTrans *crossTransitions( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans );
+
+ void outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList );
+
+ void doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 );
+ void doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 );
+ void findCondExpInTrans( ExpansionList &expansionList, FsmState *state,
+ Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
+ long destVals, LongVect &toValsList );
+ void findTransExpansions( ExpansionList &expansionList,
+ FsmState *destState, FsmState *srcState );
+ void findCondExpansions( ExpansionList &expansionList,
+ FsmState *destState, FsmState *srcState );
+ void mergeStateConds( FsmState *destState, FsmState *srcState );
+
+ /* Merge a set of states into newState. */
+ void mergeStates( MergeData &md, FsmState *destState,
+ FsmState **srcStates, int numSrc );
+ void mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState );
+ void mergeStates( MergeData &md, FsmState *destState, FsmState *srcState );
+
+ /* Make all states that are combinations of other states and that
+ * have not yet had their out transitions filled in. This will
+ * empty out stateDict and stFil. */
+ void fillInStates( MergeData &md );
+
+ /*
+ * Transition Comparison.
+ */
+
+ /* Compare transition data. Either of the pointers may be null. */
+ static inline int compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 );
+
+ /* Compare target state and transition data. Either pointer may be null. */
+ static inline int compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 );
+
+ /* Compare target partitions. Either pointer may be null. */
+ static inline int comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 );
+
+ /* Check marked status of target states. Either pointer may be null. */
+ static inline bool shouldMarkPtr( MarkIndex &markIndex,
+ FsmTrans *trans1, FsmTrans *trans2 );
+
+ /*
+ * Callbacks.
+ */
+
+ /* Compare priority and function table of transitions. */
+ static int compareTransData( FsmTrans *trans1, FsmTrans *trans2 );
+
+ /* Add in the properties of srcTrans into this. */
+ void addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans );
+
+ /* Compare states on data stored in the states. */
+ static int compareStateData( const FsmState *state1, const FsmState *state2 );
+
+ /* Out transition data. */
+ void clearOutData( FsmState *state );
+ bool hasOutData( FsmState *state );
+ void transferOutData( FsmState *destState, FsmState *srcState );
+
+ /*
+ * Allocation.
+ */
+
+ /* New up a state and add it to the graph. */
+ FsmState *addState();
+
+ /*
+ * Building basic machines
+ */
+
+ void concatFsm( Key c );
+ void concatFsm( Key *str, int len );
+ void concatFsmCI( Key *str, int len );
+ void orFsm( Key *set, int len );
+ void rangeFsm( Key low, Key high );
+ void rangeStarFsm( Key low, Key high );
+ void emptyFsm( );
+ void lambdaFsm( );
+
+ /*
+ * Fsm operators.
+ */
+
+ void starOp( );
+ void repeatOp( int times );
+ void optionalRepeatOp( int times );
+ void concatOp( FsmGraph *other );
+ void unionOp( FsmGraph *other );
+ void intersectOp( FsmGraph *other );
+ void subtractOp( FsmGraph *other );
+ void epsilonOp();
+ void joinOp( int startId, int finalId, FsmGraph **others, int numOthers );
+ void globOp( FsmGraph **others, int numOthers );
+ void deterministicEntry();
+
+ /*
+ * Operator workers
+ */
+
+ /* Determine if there are any entry points into a start state other than
+ * the start state. */
+ bool isStartStateIsolated();
+
+ /* Make a new start state that has no entry points. Will not change the
+ * identity of the fsm. */
+ void isolateStartState();
+
+ /* Workers for resolving epsilon transitions. */
+ bool inEptVect( EptVect *eptVect, FsmState *targ );
+ void epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving );
+ void resolveEpsilonTrans( MergeData &md );
+
+ /* Workers for concatenation and union. */
+ void doConcat( FsmGraph *other, StateSet *fromStates, bool optional );
+ void doOr( FsmGraph *other );
+
+ /*
+ * Final states
+ */
+
+ /* Unset any final states that are no longer to be final
+ * due to final bits. */
+ void unsetIncompleteFinals();
+ void unsetKilledFinals();
+
+ /* Bring in other's entry points. Assumes others states are going to be
+ * copied into this machine. */
+ void copyInEntryPoints( FsmGraph *other );
+
+ /* Ordering states. */
+ void depthFirstOrdering( FsmState *state );
+ void depthFirstOrdering();
+ void sortStatesByFinal();
+
+ /* Set sqequential state numbers starting at 0. */
+ void setStateNumbers( int base );
+
+ /* Unset all final states. */
+ void unsetAllFinStates();
+
+ /* Set the bits of final states and clear the bits of non final states. */
+ void setFinBits( int finStateBits );
+
+ /*
+ * Self-consistency checks.
+ */
+
+ /* Run a sanity check on the machine. */
+ void verifyIntegrity();
+
+ /* Verify that there are no unreachable states, or dead end states. */
+ void verifyReachability();
+ void verifyNoDeadEndStates();
+
+ /*
+ * Path pruning
+ */
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHereReverse( FsmState *state );
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHere( FsmState *state );
+ void markReachableFromHereStopFinal( FsmState *state );
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeDeadEndStates();
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeUnreachableStates();
+
+ /* Remove error actions from states on which the error transition will
+ * never be taken. */
+ bool outListCovers( FsmState *state );
+ bool anyErrorRange( FsmState *state );
+
+ /* Remove states that are on the misfit list. */
+ void removeMisfits();
+
+ /*
+ * FSM Minimization
+ */
+
+ /* Minimization by partitioning. */
+ void minimizePartition1();
+ void minimizePartition2();
+
+ /* Minimize the final state Machine. The result is the minimal fsm. Slow
+ * but stable, correct minimization. Uses n^2 space (lookout) and average
+ * n^2 time. Worst case n^3 time, but a that is a very rare case. */
+ void minimizeStable();
+
+ /* Minimize the final state machine. Does not find the minimal fsm, but a
+ * pretty good approximation. Does not use any extra space. Average n^2
+ * time. Worst case n^3 time, but a that is a very rare case. */
+ void minimizeApproximate();
+
+ /* This is the worker for the minimize approximate solution. It merges
+ * states that have identical out transitions. */
+ bool minimizeRound( );
+
+ /* Given an intial partioning of states, split partitions that have out trans
+ * to differing partitions. */
+ int partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts );
+
+ /* Split partitions that have a transition to a previously split partition, until
+ * there are no more partitions to split. */
+ int splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts );
+
+ /* Fuse together states in the same partition. */
+ void fusePartitions( MinPartition *parts, int numParts );
+
+ /* Mark pairs where out final stateness differs, out trans data differs,
+ * trans pairs go to a marked pair or trans data differs. Should get
+ * alot of pairs. */
+ void initialMarkRound( MarkIndex &markIndex );
+
+ /* One marking round on all state pairs. Considers if trans pairs go
+ * to a marked state only. Returns whether or not a pair was marked. */
+ bool markRound( MarkIndex &markIndex );
+
+ /* Move the in trans into src into dest. */
+ void inTransMove(FsmState *dest, FsmState *src);
+
+ /* Make state src and dest the same state. */
+ void fuseEquivStates(FsmState *dest, FsmState *src);
+
+ /* Find any states that didn't get marked by the marking algorithm and
+ * merge them into the primary states of their equivalence class. */
+ void fuseUnmarkedPairs( MarkIndex &markIndex );
+
+ /* Merge neighboring transitions go to the same state and have the same
+ * transitions data. */
+ void compressTransitions();
+
+ /* Returns true if there is a transtion (either explicit or by a gap) to
+ * the error state. */
+ bool checkErrTrans( FsmState *state, FsmTrans *trans );
+ bool checkErrTransFinish( FsmState *state );
+ bool hasErrorTrans();
+};
+
+
+#endif /* _FSMGRAPH_H */
diff --git a/src/fsmmin.cc b/src/fsmmin.cc
new file mode 100644
index 00000000..cbb2b99f
--- /dev/null
+++ b/src/fsmmin.cc
@@ -0,0 +1,732 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "fsmgraph.h"
+#include "mergesort.h"
+
+int FsmGraph::partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts )
+{
+ /* Need a mergesort object and a single partition compare. */
+ MergeSort<FsmState*, PartitionCompare> mergeSort;
+ PartitionCompare partCompare;
+
+ /* For each partition. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Fill the pointer array with the states in the partition. */
+ StateList::Iter state = parts[p].list;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the partitioning compare. */
+ int numStates = parts[p].list.length();
+ mergeSort.sort( statePtrs, numStates );
+
+ /* Assign the states into partitions based on the results of the sort. */
+ int destPart = p, firstNewPart = numParts;
+ for ( int s = 1; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* The new partition is the next avail spot. */
+ destPart = numParts;
+ numParts += 1;
+ }
+
+ /* If the state is not staying in the first partition, then
+ * transfer it to its destination partition. */
+ if ( destPart != p ) {
+ FsmState *state = parts[p].list.detach( statePtrs[s] );
+ parts[destPart].list.append( state );
+ }
+ }
+
+ /* Fix the partition pointer for all the states that got moved to a new
+ * partition. This must be done after the states are transfered so the
+ * result of the sort is not altered. */
+ for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ StateList::Iter state = parts[newPart].list;
+ for ( ; state.lte(); state++ )
+ state->alg.partition = &parts[newPart];
+ }
+ }
+
+ return numParts;
+}
+
+/**
+ * \brief Minimize by partitioning version 1.
+ *
+ * Repeatedly tries to split partitions until all partitions are unsplittable.
+ * Produces the most minimal FSM possible.
+ */
+void FsmGraph::minimizePartition1()
+{
+ /* Need one mergesort object and partition compares. */
+ MergeSort<FsmState*, InitPartitionCompare> mergeSort;
+ InitPartitionCompare initPartCompare;
+
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return;
+
+ /*
+ * First thing is to partition the states by final state status and
+ * transition functions. This gives us an initial partitioning to work
+ * with.
+ */
+
+ /* Make a array of pointers to states. */
+ int numStates = stateList.length();
+ FsmState** statePtrs = new FsmState*[numStates];
+
+ /* Fill up an array of pointers to the states for easy sorting. */
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the array of states. */
+ mergeSort.sort( statePtrs, numStates );
+
+ /* An array of lists of states is used to partition the states. */
+ MinPartition *parts = new MinPartition[numStates];
+
+ /* Assign the states into partitions. */
+ int destPart = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* Move to the next partition. */
+ destPart += 1;
+ }
+
+ /* Put the state into its partition. */
+ statePtrs[s]->alg.partition = &parts[destPart];
+ parts[destPart].list.append( statePtrs[s] );
+ }
+
+ /* We just moved all the states from the main list into partitions without
+ * taking them off the main list. So clean up the main list now. */
+ stateList.abandon();
+
+ /* Split partitions. */
+ int numParts = destPart + 1;
+ while ( true ) {
+ /* Test all partitions for splitting. */
+ int newNum = partitionRound( statePtrs, parts, numParts );
+
+ /* When no partitions can be split, stop. */
+ if ( newNum == numParts )
+ break;
+
+ numParts = newNum;
+ }
+
+ /* Fuse states in the same partition. The states will end up back on the
+ * main list. */
+ fusePartitions( parts, numParts );
+
+ /* Cleanup. */
+ delete[] statePtrs;
+ delete[] parts;
+}
+
+/* Split partitions that need splittting, decide which partitions might need
+ * to be split as a result, continue until there are no more that might need
+ * to be split. */
+int FsmGraph::splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts )
+{
+ /* Need a mergesort and a partition compare. */
+ MergeSort<FsmState*, PartitionCompare> mergeSort;
+ PartitionCompare partCompare;
+
+ /* The lists of unsplitable (partList) and splitable partitions.
+ * Only partitions in the splitable list are check for needing splitting. */
+ PartitionList partList, splittable;
+
+ /* Initially, all partitions are born from a split (the initial
+ * partitioning) and can cause other partitions to be split. So any
+ * partition with a state with a transition out to another partition is a
+ * candidate for splitting. This will make every partition except possibly
+ * partitions of final states split candidates. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Assume not active. */
+ parts[p].active = false;
+
+ /* Look for a trans out of any state in the partition. */
+ for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) {
+ /* If there is at least one transition out to another state then
+ * the partition becomes splittable. */
+ if ( state->outList.length() > 0 ) {
+ parts[p].active = true;
+ break;
+ }
+ }
+
+ /* If it was found active then it goes on the splittable list. */
+ if ( parts[p].active )
+ splittable.append( &parts[p] );
+ else
+ partList.append( &parts[p] );
+ }
+
+ /* While there are partitions that are splittable, pull one off and try
+ * to split it. If it splits, determine which partitions may now be split
+ * as a result of the newly split partition. */
+ while ( splittable.length() > 0 ) {
+ MinPartition *partition = splittable.detachFirst();
+
+ /* Fill the pointer array with the states in the partition. */
+ StateList::Iter state = partition->list;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the partitioning compare. */
+ int numStates = partition->list.length();
+ mergeSort.sort( statePtrs, numStates );
+
+ /* Assign the states into partitions based on the results of the sort. */
+ MinPartition *destPart = partition;
+ int firstNewPart = numParts;
+ for ( int s = 1; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* The new partition is the next avail spot. */
+ destPart = &parts[numParts];
+ numParts += 1;
+ }
+
+ /* If the state is not staying in the first partition, then
+ * transfer it to its destination partition. */
+ if ( destPart != partition ) {
+ FsmState *state = partition->list.detach( statePtrs[s] );
+ destPart->list.append( state );
+ }
+ }
+
+ /* Fix the partition pointer for all the states that got moved to a new
+ * partition. This must be done after the states are transfered so the
+ * result of the sort is not altered. */
+ int newPart;
+ for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ StateList::Iter state = parts[newPart].list;
+ for ( ; state.lte(); state++ )
+ state->alg.partition = &parts[newPart];
+ }
+
+ /* Put the partition we just split and any new partitions that came out
+ * of the split onto the inactive list. */
+ partition->active = false;
+ partList.append( partition );
+ for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ parts[newPart].active = false;
+ partList.append( &parts[newPart] );
+ }
+
+ if ( destPart == partition )
+ continue;
+
+ /* Now determine which partitions are splittable as a result of
+ * splitting partition by walking the in lists of the states in
+ * partitions that got split. Partition is the faked first item in the
+ * loop. */
+ MinPartition *causalPart = partition;
+ newPart = firstNewPart - 1;
+ while ( newPart < numParts ) {
+ /* Loop all states in the causal partition. */
+ StateList::Iter state = causalPart->list;
+ for ( ; state.lte(); state++ ) {
+ /* Walk all transition into the state and put the partition
+ * that the from state is in onto the splittable list. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) {
+ MinPartition *fromPart = trans->fromState->alg.partition;
+ if ( ! fromPart->active ) {
+ fromPart->active = true;
+ partList.detach( fromPart );
+ splittable.append( fromPart );
+ }
+ }
+ }
+
+ newPart += 1;
+ causalPart = &parts[newPart];
+ }
+ }
+ return numParts;
+}
+
+
+/**
+ * \brief Minimize by partitioning version 2 (best alg).
+ *
+ * Repeatedly tries to split partitions that may splittable until there are no
+ * more partitions that might possibly need splitting. Runs faster than
+ * version 1. Produces the most minimal fsm possible.
+ */
+void FsmGraph::minimizePartition2()
+{
+ /* Need a mergesort and an initial partition compare. */
+ MergeSort<FsmState*, InitPartitionCompare> mergeSort;
+ InitPartitionCompare initPartCompare;
+
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return;
+
+ /*
+ * First thing is to partition the states by final state status and
+ * transition functions. This gives us an initial partitioning to work
+ * with.
+ */
+
+ /* Make a array of pointers to states. */
+ int numStates = stateList.length();
+ FsmState** statePtrs = new FsmState*[numStates];
+
+ /* Fill up an array of pointers to the states for easy sorting. */
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the array of states. */
+ mergeSort.sort( statePtrs, numStates );
+
+ /* An array of lists of states is used to partition the states. */
+ MinPartition *parts = new MinPartition[numStates];
+
+ /* Assign the states into partitions. */
+ int destPart = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* Move to the next partition. */
+ destPart += 1;
+ }
+
+ /* Put the state into its partition. */
+ statePtrs[s]->alg.partition = &parts[destPart];
+ parts[destPart].list.append( statePtrs[s] );
+ }
+
+ /* We just moved all the states from the main list into partitions without
+ * taking them off the main list. So clean up the main list now. */
+ stateList.abandon();
+
+ /* Split partitions. */
+ int numParts = splitCandidates( statePtrs, parts, destPart+1 );
+
+ /* Fuse states in the same partition. The states will end up back on the
+ * main list. */
+ fusePartitions( parts, numParts );
+
+ /* Cleanup. */
+ delete[] statePtrs;
+ delete[] parts;
+}
+
+void FsmGraph::initialMarkRound( MarkIndex &markIndex )
+{
+ /* P and q for walking pairs. */
+ FsmState *p = stateList.head, *q;
+
+ /* Need an initial partition compare. */
+ InitPartitionCompare initPartCompare;
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ q = stateList.head;
+ while ( q != p ) {
+ /* If the states differ on final state status, out transitions or
+ * any transition data then they should be separated on the initial
+ * round. */
+ if ( initPartCompare.compare( p, q ) != 0 )
+ markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
+
+ q = q->next;
+ }
+ p = p->next;
+ }
+}
+
+bool FsmGraph::markRound( MarkIndex &markIndex )
+{
+ /* P an q for walking pairs. Take note if any pair gets marked. */
+ FsmState *p = stateList.head, *q;
+ bool pairWasMarked = false;
+
+ /* Need a mark comparison. */
+ MarkCompare markCompare;
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ q = stateList.head;
+ while ( q != p ) {
+ /* Should we mark the pair? */
+ if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
+ if ( markCompare.shouldMark( markIndex, p, q ) ) {
+ markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
+ pairWasMarked = true;
+ }
+ }
+ q = q->next;
+ }
+ p = p->next;
+ }
+
+ return pairWasMarked;
+}
+
+
+/**
+ * \brief Minimize by pair marking.
+ *
+ * Decides if each pair of states is distinct or not. Uses O(n^2) memory and
+ * should only be used on small graphs. Produces the most minmimal FSM
+ * possible.
+ */
+void FsmGraph::minimizeStable()
+{
+ /* Set the state numbers. */
+ setStateNumbers( 0 );
+
+ /* This keeps track of which pairs have been marked. */
+ MarkIndex markIndex( stateList.length() );
+
+ /* Mark pairs where final stateness, out trans, or trans data differ. */
+ initialMarkRound( markIndex );
+
+ /* While the last round of marking succeeded in marking a state
+ * continue to do another round. */
+ int modified = markRound( markIndex );
+ while (modified)
+ modified = markRound( markIndex );
+
+ /* Merge pairs that are unmarked. */
+ fuseUnmarkedPairs( markIndex );
+}
+
+bool FsmGraph::minimizeRound()
+{
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return false;
+
+ /* Need a mergesort on approx compare and an approx compare. */
+ MergeSort<FsmState*, ApproxCompare> mergeSort;
+ ApproxCompare approxCompare;
+
+ /* Fill up an array of pointers to the states. */
+ FsmState **statePtrs = new FsmState*[stateList.length()];
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ bool modified = false;
+
+ /* Sort The list. */
+ mergeSort.sort( statePtrs, stateList.length() );
+
+ /* Walk the list looking for duplicates next to each other,
+ * merge in any duplicates. */
+ FsmState **pLast = statePtrs;
+ FsmState **pState = statePtrs + 1;
+ for ( int i = 1; i < stateList.length(); i++, pState++ ) {
+ if ( approxCompare.compare( *pLast, *pState ) == 0 ) {
+ /* Last and pState are the same, so fuse together. Move forward
+ * with pState but not with pLast. If any more are identical, we
+ * must */
+ fuseEquivStates( *pLast, *pState );
+ modified = true;
+ }
+ else {
+ /* Last and this are different, do not set to merge them. Move
+ * pLast to the current (it may be way behind from merging many
+ * states) and pState forward one to consider the next pair. */
+ pLast = pState;
+ }
+ }
+ delete[] statePtrs;
+ return modified;
+}
+
+/**
+ * \brief Minmimize by an approximation.
+ *
+ * Repeatedly tries to find states with transitions out to the same set of
+ * states on the same set of keys until no more identical states can be found.
+ * Does not produce the most minimial FSM possible.
+ */
+void FsmGraph::minimizeApproximate()
+{
+ /* While the last minimization round succeeded in compacting states,
+ * continue to try to compact states. */
+ while ( true ) {
+ bool modified = minimizeRound();
+ if ( ! modified )
+ break;
+ }
+}
+
+
+/* Remove states that have no path to them from the start state. Recursively
+ * traverses the graph marking states that have paths into them. Then removes
+ * all states that did not get marked. */
+void FsmGraph::removeUnreachableStates()
+{
+ /* Misfit accounting should be off and there should be no states on the
+ * misfit list. */
+ assert( !misfitAccounting && misfitList.length() == 0 );
+
+ /* Mark all the states that can be reached
+ * through the existing set of entry points. */
+ markReachableFromHere( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ markReachableFromHere( en->value );
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ FsmState *state = stateList.head;
+ while ( state ) {
+ FsmState *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
+
+bool FsmGraph::outListCovers( FsmState *state )
+{
+ /* Must be at least one range to cover. */
+ if ( state->outList.length() == 0 )
+ return false;
+
+ /* The first must start at the lower bound. */
+ TransList::Iter trans = state->outList.first();
+ if ( keyOps->minKey < trans->lowKey )
+ return false;
+
+ /* Loop starts at second el. */
+ trans.increment();
+
+ /* Loop checks lower against prev upper. */
+ for ( ; trans.lte(); trans++ ) {
+ /* Lower end of the trans must be one greater than the
+ * previous' high end. */
+ Key lowKey = trans->lowKey;
+ lowKey.decrement();
+ if ( trans->prev->highKey < lowKey )
+ return false;
+ }
+
+ /* Require that the last range extends to the upper bound. */
+ trans = state->outList.last();
+ if ( trans->highKey < keyOps->maxKey )
+ return false;
+
+ return true;
+}
+
+/* Remove states that that do not lead to a final states. Works recursivly traversing
+ * the graph in reverse (starting from all final states) and marking seen states. Then
+ * removes states that did not get marked. */
+void FsmGraph::removeDeadEndStates()
+{
+ /* Misfit accounting should be off and there should be no states on the
+ * misfit list. */
+ assert( !misfitAccounting && misfitList.length() == 0 );
+
+ /* Mark all states that have paths to the final states. */
+ FsmState **st = finStateSet.data;
+ int nst = finStateSet.length();
+ for ( int i = 0; i < nst; i++, st++ )
+ markReachableFromHereReverse( *st );
+
+ /* Start state gets honorary marking. If the machine accepts nothing we
+ * still want the start state to hang around. This must be done after the
+ * recursive call on all the final states so that it does not cause the
+ * start state in transitions to be skipped when the start state is
+ * visited by the traversal. */
+ startState->stateBits |= SB_ISMARKED;
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ FsmState *state = stateList.head;
+ while ( state != 0 ) {
+ FsmState *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
+
+/* Remove states on the misfit list. To work properly misfit accounting should
+ * be on when this is called. The detaching of a state will likely cause
+ * another misfit to be collected and it can then be removed. */
+void FsmGraph::removeMisfits()
+{
+ while ( misfitList.length() > 0 ) {
+ /* Get the first state. */
+ FsmState *state = misfitList.head;
+
+ /* Detach and delete. */
+ detachState( state );
+
+ /* The state was previously on the misfit list and detaching can only
+ * remove in transitions so the state must still be on the misfit
+ * list. */
+ misfitList.detach( state );
+ delete state;
+ }
+}
+
+/* Fuse src into dest because they have been deemed equivalent states.
+ * Involves moving transitions into src to go into dest and invoking
+ * callbacks. Src is deleted detached from the graph and deleted. */
+void FsmGraph::fuseEquivStates( FsmState *dest, FsmState *src )
+{
+ /* This would get ugly. */
+ assert( dest != src );
+
+ /* Cur is a duplicate. We can merge it with trail. */
+ inTransMove( dest, src );
+
+ detachState( src );
+ stateList.detach( src );
+ delete src;
+}
+
+void FsmGraph::fuseUnmarkedPairs( MarkIndex &markIndex )
+{
+ FsmState *p = stateList.head, *nextP, *q;
+
+ /* Definition: The primary state of an equivalence class is the first state
+ * encounterd that belongs to the equivalence class. All equivalence
+ * classes have primary state including equivalence classes with one state
+ * in it. */
+
+ /* For each unmarked pair merge p into q and delete p. q is always the
+ * primary state of it's equivalence class. We wouldn't have landed on it
+ * here if it were not, because it would have been deleted.
+ *
+ * Proof that q is the primaray state of it's equivalence class: Assume q
+ * is not the primary state of it's equivalence class, then it would be
+ * merged into some state that came before it and thus p would be
+ * equivalent to that state. But q is the first state that p is equivalent
+ * to so we have a contradiction. */
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ nextP = p->next;
+
+ q = stateList.head;
+ while ( q != p ) {
+ /* If one of p or q is a final state then mark. */
+ if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
+ fuseEquivStates( q, p );
+ break;
+ }
+ q = q->next;
+ }
+ p = nextP;
+ }
+}
+
+void FsmGraph::fusePartitions( MinPartition *parts, int numParts )
+{
+ /* For each partition, fuse state 2, 3, ... into state 1. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Assume that there will always be at least one state. */
+ FsmState *first = parts[p].list.head, *toFuse = first->next;
+
+ /* Put the first state back onto the main state list. Don't bother
+ * removing it from the partition list first. */
+ stateList.append( first );
+
+ /* Fuse the rest of the state into the first. */
+ while ( toFuse != 0 ) {
+ /* Save the next. We will trash it before it is needed. */
+ FsmState *next = toFuse->next;
+
+ /* Put the state to be fused in to the first back onto the main
+ * list before it is fuse. the graph. The state needs to be on
+ * the main list for the detach from the graph to work. Don't
+ * bother removing the state from the partition list first. We
+ * need not maintain it. */
+ stateList.append( toFuse );
+
+ /* Now fuse to the first. */
+ fuseEquivStates( first, toFuse );
+
+ /* Go to the next that we saved before trashing the next pointer. */
+ toFuse = next;
+ }
+
+ /* We transfered the states from the partition list into the main list without
+ * removing the states from the partition list first. Clean it up. */
+ parts[p].list.abandon();
+ }
+}
+
+
+/* Merge neighboring transitions go to the same state and have the same
+ * transitions data. */
+void FsmGraph::compressTransitions()
+{
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->outList.length() > 1 ) {
+ for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) {
+ Key nextLow = next->lowKey;
+ nextLow.decrement();
+ if ( trans->highKey == nextLow && trans->toState == next->toState &&
+ CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 )
+ {
+ trans->highKey = next->highKey;
+ st->outList.detach( next );
+ detachTrans( next->fromState, next->toState, next );
+ delete next;
+ next = trans.next();
+ }
+ else {
+ trans.increment();
+ next.increment();
+ }
+ }
+ }
+ }
+}
diff --git a/src/fsmrun.h b/src/fsmrun.h
new file mode 100644
index 00000000..f92b5e5e
--- /dev/null
+++ b/src/fsmrun.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FSMRUN2_H
+#define _FSMRUN2_H
+
+#include <input.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/fsmstate.cc b/src/fsmstate.cc
new file mode 100644
index 00000000..dae1479b
--- /dev/null
+++ b/src/fsmstate.cc
@@ -0,0 +1,467 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "fsmgraph.h"
+
+#include <iostream>
+using namespace std;
+
+/* Construct a mark index for a specified number of states. Must new up
+ * an array that is states^2 in size. */
+MarkIndex::MarkIndex( int states ) : numStates(states)
+{
+ /* Total pairs is states^2. Actually only use half of these, but we allocate
+ * them all to make indexing into the array easier. */
+ int total = states * states;
+
+ /* New up chars so that individual DListEl constructors are
+ * not called. Zero out the mem manually. */
+ array = new bool[total];
+ memset( array, 0, sizeof(bool) * total );
+}
+
+/* Free the array used to store state pairs. */
+MarkIndex::~MarkIndex()
+{
+ delete[] array;
+}
+
+/* Mark a pair of states. States are specified by their number. The
+ * marked states are moved from the unmarked list to the marked list. */
+void MarkIndex::markPair(int state1, int state2)
+{
+ int pos = ( state1 >= state2 ) ?
+ ( state1 * numStates ) + state2 :
+ ( state2 * numStates ) + state1;
+
+ array[pos] = true;
+}
+
+/* Returns true if the pair of states are marked. Returns false otherwise.
+ * Ordering of states given does not matter. */
+bool MarkIndex::isPairMarked(int state1, int state2)
+{
+ int pos = ( state1 >= state2 ) ?
+ ( state1 * numStates ) + state2 :
+ ( state2 * numStates ) + state1;
+
+ return array[pos];
+}
+
+/* Create a new fsm state. State has not out transitions or in transitions, not
+ * out out transition data and not number. */
+FsmState::FsmState()
+:
+ /* No out or in transitions. */
+ outList(),
+ inList(),
+
+ /* No entry points, or epsilon trans. */
+ entryIds(),
+ epsilonTrans(),
+
+ /* Conditions. */
+ stateCondList(),
+
+ /* No transitions in from other states. */
+ foreignInTrans(0),
+
+ /* Only used during merging. Normally null. */
+ stateDictEl(0),
+ eptVect(0),
+
+ /* No state identification bits. */
+ stateBits(0),
+
+ /* No Priority data. */
+ outPriorTable(),
+
+ /* No Action data. */
+ toStateActionTable(),
+ fromStateActionTable(),
+ outActionTable(),
+ outCondSet(),
+ errActionTable(),
+ eofActionTable(),
+
+ eofTarget(0)
+{
+}
+
+/* Copy everything except actual the transitions. That is left up to the
+ * FsmGraph copy constructor. */
+FsmState::FsmState(const FsmState &other)
+:
+ /* All lists are cleared. They will be filled in when the
+ * individual transitions are duplicated and attached. */
+ outList(),
+ inList(),
+
+ /* Duplicate the entry id set and epsilon transitions. These
+ * are sets of integers and as such need no fixing. */
+ entryIds(other.entryIds),
+ epsilonTrans(other.epsilonTrans),
+
+ /* Copy in the elements of the conditions. */
+ stateCondList( other.stateCondList ),
+
+ /* No transitions in from other states. */
+ foreignInTrans(0),
+
+ /* This is only used during merging. Normally null. */
+ stateDictEl(0),
+ eptVect(0),
+
+ /* Fsm state data. */
+ stateBits(other.stateBits),
+
+ /* Copy in priority data. */
+ outPriorTable(other.outPriorTable),
+
+ /* Copy in action data. */
+ toStateActionTable(other.toStateActionTable),
+ fromStateActionTable(other.fromStateActionTable),
+ outActionTable(other.outActionTable),
+ outCondSet(other.outCondSet),
+ errActionTable(other.errActionTable),
+ eofActionTable(other.eofActionTable),
+
+ eofTarget(0)
+{
+ /* Duplicate all the transitions. */
+ for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) {
+ /* Dupicate and store the orginal target in the transition. This will
+ * be corrected once all the states have been created. */
+ FsmTrans *newTrans = new FsmTrans(*trans);
+ newTrans->toState = trans->toState;
+ outList.append( newTrans );
+ }
+}
+
+/* If there is a state dict element, then delete it. Everything else is left
+ * up to the FsmGraph destructor. */
+FsmState::~FsmState()
+{
+ if ( stateDictEl != 0 )
+ delete stateDictEl;
+}
+
+/* Compare two states using pointers to the states. With the approximate
+ * compare the idea is that if the compare finds them the same, they can
+ * immediately be merged. */
+int ApproxCompare::compare( const FsmState *state1 , const FsmState *state2 )
+{
+ int compareRes;
+
+ /* Test final state status. */
+ if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
+ return -1;
+ else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
+ return 1;
+
+ /* Test epsilon transition sets. */
+ compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
+ state2->epsilonTrans );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Compare the out transitions. */
+ compareRes = FsmGraph::compareStateData( state1, state2 );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmGraph::compareFullPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmGraph::compareFullPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmGraph::compareFullPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Got through the entire state comparison, deem them equal. */
+ return 0;
+}
+
+/* Compare class for the sort that does the intial partition of compaction. */
+int InitPartitionCompare::compare( const FsmState *state1 , const FsmState *state2 )
+{
+ int compareRes;
+
+ /* Test final state status. */
+ if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
+ return -1;
+ else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
+ return 1;
+
+ /* Test epsilon transition sets. */
+ compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
+ state2->epsilonTrans );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Compare the out transitions. */
+ compareRes = FsmGraph::compareStateData( state1, state2 );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Use a pair iterator to test the condition pairs. */
+ PairIter<StateCond> condPair( state1->stateCondList.head, state2->stateCondList.head );
+ for ( ; !condPair.end(); condPair++ ) {
+ switch ( condPair.userState ) {
+ case RangeInS1:
+ return 1;
+ case RangeInS2:
+ return -1;
+
+ case RangeOverlap: {
+ CondSpace *condSpace1 = condPair.s1Tel.trans->condSpace;
+ CondSpace *condSpace2 = condPair.s2Tel.trans->condSpace;
+ if ( condSpace1 < condSpace2 )
+ return -1;
+ else if ( condSpace1 > condSpace2 )
+ return 1;
+ break;
+ }
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Use a pair iterator to test the transition pairs. */
+ PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmGraph::compareDataPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmGraph::compareDataPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmGraph::compareDataPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Compare class for the sort that does the partitioning. */
+int PartitionCompare::compare( const FsmState *state1, const FsmState *state2 )
+{
+ int compareRes;
+
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmGraph::comparePartPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmGraph::comparePartPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmGraph::comparePartPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Compare class for the sort that does the partitioning. */
+bool MarkCompare::shouldMark( MarkIndex &markIndex, const FsmState *state1,
+ const FsmState *state2 )
+{
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ if ( FsmGraph::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) )
+ return true;
+ break;
+
+ case RangeInS2:
+ if ( FsmGraph::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) )
+ return true;
+ break;
+
+ case RangeOverlap:
+ if ( FsmGraph::shouldMarkPtr( markIndex,
+ outPair.s1Tel.trans, outPair.s2Tel.trans ) )
+ return true;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Transition Comparison.
+ */
+
+/* Compare target partitions. Either pointer may be null. */
+int FsmGraph::comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 )
+{
+ if ( trans1 != 0 ) {
+ /* If trans1 is set then so should trans2. The initial partitioning
+ * guarantees this for us. */
+ if ( trans1->toState == 0 && trans2->toState != 0 )
+ return -1;
+ else if ( trans1->toState != 0 && trans2->toState == 0 )
+ return 1;
+ else if ( trans1->toState != 0 ) {
+ /* Both of targets are set. */
+ return CmpOrd< MinPartition* >::compare(
+ trans1->toState->alg.partition, trans2->toState->alg.partition );
+ }
+ }
+ return 0;
+}
+
+
+/* Compares two transition pointers according to priority and functions.
+ * Either pointer may be null. Does not consider to state or from state. */
+int FsmGraph::compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 )
+{
+ if ( trans1 == 0 && trans2 != 0 )
+ return -1;
+ else if ( trans1 != 0 && trans2 == 0 )
+ return 1;
+ else if ( trans1 != 0 ) {
+ /* Both of the transition pointers are set. */
+ int compareRes = compareTransData( trans1, trans2 );
+ if ( compareRes != 0 )
+ return compareRes;
+ }
+ return 0;
+}
+
+/* Compares two transitions according to target state, priority and functions.
+ * Does not consider from state. Either of the pointers may be null. */
+int FsmGraph::compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 )
+{
+ if ( (trans1 != 0) ^ (trans2 != 0) ) {
+ /* Exactly one of the transitions is set. */
+ if ( trans1 != 0 )
+ return -1;
+ else
+ return 1;
+ }
+ else if ( trans1 != 0 ) {
+ /* Both of the transition pointers are set. Test target state,
+ * priority and funcs. */
+ if ( trans1->toState < trans2->toState )
+ return -1;
+ else if ( trans1->toState > trans2->toState )
+ return 1;
+ else if ( trans1->toState != 0 ) {
+ /* Test transition data. */
+ int compareRes = compareTransData( trans1, trans2 );
+ if ( compareRes != 0 )
+ return compareRes;
+ }
+ }
+ return 0;
+}
+
+
+bool FsmGraph::shouldMarkPtr( MarkIndex &markIndex, FsmTrans *trans1,
+ FsmTrans *trans2 )
+{
+ if ( (trans1 != 0) ^ (trans2 != 0) ) {
+ /* Exactly one of the transitions is set. The initial mark round
+ * should rule out this case. */
+ assert( false );
+ }
+ else if ( trans1 != 0 ) {
+ /* Both of the transitions are set. If the target pair is marked, then
+ * the pair we are considering gets marked. */
+ return markIndex.isPairMarked( trans1->toState->alg.stateNum,
+ trans2->toState->alg.stateNum );
+ }
+
+ /* Neither of the transitiosn are set. */
+ return false;
+}
+
+
diff --git a/src/global.h b/src/global.h
new file mode 100644
index 00000000..d67c55e4
--- /dev/null
+++ b/src/global.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __COLM_GLOBAL_H
+#define __COLM_GLOBAL_H
+
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <fstream>
+#include <string>
+
+#include "config.h"
+#include "defs.h"
+#include "avltree.h"
+#include "keyops.h"
+
+#define PROGNAME "colm"
+
+/* IO filenames and stream. */
+extern bool genGraphviz;
+extern int gblErrorCount;
+
+std::ostream &error();
+
+/* IO filenames and stream. */
+extern const char *outputFileName;
+extern std::ostream *outStream;
+extern bool generateGraphviz;
+extern bool branchPointInfo;
+extern bool verbose, logging;
+extern bool addUniqueEmptyProductions;
+
+extern int gblErrorCount;
+extern char startDefName[];
+
+/* Error reporting. */
+std::ostream &error();
+std::ostream &error( int first_line, int first_column );
+std::ostream &warning( );
+std::ostream &warning( int first_line, int first_column );
+
+extern std::ostream *outStream;
+extern bool printStatistics;
+
+extern int gblErrorCount;
+extern char machineMain[];
+extern bool gblLibrary;
+extern const char *gblExportTo;
+
+/* Location in an input file. */
+struct InputLoc
+{
+ const char *fileName;
+ int line;
+ int col;
+};
+
+/* Error reporting. */
+std::ostream &error();
+std::ostream &error( const InputLoc &loc );
+std::ostream &warning( const InputLoc &loc );
+
+void scan( char *fileName, std::istream &input, std::ostream &output );
+void terminateAllParsers( );
+void checkMachines( );
+
+void xmlEscapeHost( std::ostream &out, char *data, int len );
+void openOutput();
+void escapeLiteralString( std::ostream &out, const char *data );
+
+#endif
diff --git a/src/input.c b/src/input.c
new file mode 100644
index 00000000..d181b556
--- /dev/null
+++ b/src/input.c
@@ -0,0 +1,847 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <input.h>
+#include <fsmrun.h>
+#include <pdarun.h>
+#include <debug.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+
+#define true 1
+#define false 0
+
+RunBuf *newRunBuf()
+{
+ RunBuf *rb = (RunBuf*)malloc(sizeof(RunBuf));
+ memset( rb, 0, sizeof(RunBuf) );
+ return rb;
+}
+
+void initFdFuncs();
+void initFileFuncs();
+void initPatternFuncs();
+void initReplFuncs();
+
+struct SourceFuncs dynamicFuncs;
+struct SourceFuncs fileFuncs;
+struct SourceFuncs fdFuncs;
+
+void initSourceStream( SourceStream *inputStream )
+{
+ /* FIXME: correct values here. */
+ inputStream->line = 1;
+ inputStream->column = 1;
+ inputStream->byte = 0;
+}
+
+void clearSourceStream( struct ColmProgram *prg, Tree **sp, SourceStream *sourceStream )
+{
+ RunBuf *buf = sourceStream->queue;
+ while ( buf != 0 ) {
+ switch ( buf->type ) {
+ case RunBufDataType:
+ break;
+
+ case RunBufTokenType:
+ case RunBufIgnoreType:
+ case RunBufSourceType:
+ treeDownref( prg, sp, buf->tree );
+ break;
+ }
+
+ RunBuf *next = buf->next;
+ free( buf );
+ buf = next;
+ }
+
+ sourceStream->queue = 0;
+}
+
+SourceStream *newSourceStreamFile( FILE *file )
+{
+ SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream));
+ memset( is, 0, sizeof(SourceStream) );
+ is->line = 1;
+ is->column = 1;
+ is->file = file;
+ is->funcs = &fileFuncs;
+ return is;
+}
+
+SourceStream *newSourceStreamFd( long fd )
+{
+ SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream));
+ memset( is, 0, sizeof(SourceStream) );
+ is->line = 1;
+ is->column = 1;
+ is->fd = fd;
+ is->funcs = &fdFuncs;
+ return is;
+}
+
+static RunBuf *sourceStreamPopHead( SourceStream *is )
+{
+ RunBuf *ret = is->queue;
+ is->queue = is->queue->next;
+ if ( is->queue == 0 )
+ is->queueTail = 0;
+ else
+ is->queue->prev = 0;
+ return ret;
+}
+
+static void sourceStreamAppend( SourceStream *is, RunBuf *runBuf )
+{
+ if ( is->queue == 0 ) {
+ runBuf->prev = runBuf->next = 0;
+ is->queue = is->queueTail = runBuf;
+ }
+ else {
+ is->queueTail->next = runBuf;
+ runBuf->prev = is->queueTail;
+ runBuf->next = 0;
+ is->queueTail = runBuf;
+ }
+}
+
+static void sourceStreamPrepend( SourceStream *is, RunBuf *runBuf )
+{
+ if ( is->queue == 0 ) {
+ runBuf->prev = runBuf->next = 0;
+ is->queue = is->queueTail = runBuf;
+ }
+ else {
+ is->queue->prev = runBuf;
+ runBuf->prev = 0;
+ runBuf->next = is->queue;
+ is->queue = runBuf;
+ }
+}
+
+void initInputFuncs()
+{
+ initFdFuncs();
+ initFileFuncs();
+ initPatternFuncs();
+ initReplFuncs();
+}
+
+/*
+ * Base run-time input streams.
+ */
+
+int fdGetData( SourceStream *is, int skip, char *dest, int length, int *copied )
+{
+ int ret = 0;
+ *copied = 0;
+
+ if ( skip == 9 && length == 6 ) {
+ debug( REALM_INPUT, "foo\n" );
+ }
+
+ /* Move over skip bytes. */
+ RunBuf *buf = is->queue;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ RunBuf *runBuf = newRunBuf();
+ sourceStreamAppend( is, runBuf );
+ int received = is->funcs->getDataImpl( is, runBuf->data, FSM_BUFSIZE );
+ if ( received == 0 ) {
+ ret = INPUT_EOD;
+ break;
+ }
+ runBuf->length = received;
+
+ int slen = received < length ? received : length;
+ memcpy( dest, runBuf->data, slen );
+ *copied = slen;
+ ret = INPUT_DATA;
+ break;
+ }
+
+ int avail = buf->length - buf->offset;
+
+ /* Anything available in the current buffer. */
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[buf->offset];
+
+ /* Need to skip? */
+ if ( skip > 0 && skip >= avail ) {
+ /* Skipping the the whole source. */
+ skip -= avail;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += skip;
+ avail -= skip;
+ skip = 0;
+
+ int slen = avail < length ? avail : length;
+ memcpy( dest, src, slen ) ;
+ *copied += slen;
+ ret = INPUT_DATA;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ }
+
+ return ret;
+}
+
+int fdConsumeData( SourceStream *is, int length )
+{
+ debug( REALM_INPUT, "source consuming %ld bytes\n", length );
+
+ int consumed = 0;
+
+ /* Move over skip bytes. */
+ while ( true ) {
+ RunBuf *buf = is->queue;
+
+ if ( buf == 0 )
+ break;
+
+ if ( buf->type == RunBufTokenType )
+ break;
+ else if ( buf->type == RunBufIgnoreType )
+ break;
+ else {
+ /* Anything available in the current buffer. */
+ int avail = buf->length - buf->offset;
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ int slen = avail <= length ? avail : length;
+ debug( REALM_INPUT, "consumed: %.*s\n", slen, buf->data + buf->offset );
+ consumed += slen;
+ length -= slen;
+ buf->offset += slen;
+ }
+ }
+
+ if ( length == 0 )
+ break;
+
+ RunBuf *runBuf = sourceStreamPopHead( is );
+ free( runBuf );
+ }
+
+ return consumed;
+}
+
+int fdUndoConsumeData( SourceStream *is, const char *data, int length )
+{
+ debug( REALM_INPUT, "undoing consume of %ld bytes\n", length );
+
+ RunBuf *newBuf = newRunBuf();
+ newBuf->length = length;
+ memcpy( newBuf->data, data, length );
+ sourceStreamPrepend( is, newBuf );
+
+ return length;
+}
+
+/*
+ * File
+ */
+
+int fileGetDataImpl( SourceStream *is, char *dest, int length )
+{
+ debug( REALM_INPUT, "inputStreamFileGetDataImpl length = %ld\n", length );
+ size_t res = fread( dest, 1, length, is->file );
+ return res;
+}
+
+void initFileFuncs()
+{
+ memset( &fileFuncs, 0, sizeof(struct SourceFuncs) );
+ fileFuncs.getData = &fdGetData;
+ fileFuncs.consumeData = &fdConsumeData;
+ fileFuncs.undoConsumeData = &fdUndoConsumeData;
+ fileFuncs.getDataImpl = &fileGetDataImpl;
+}
+
+/*
+ * FD
+ */
+
+int fdGetDataImpl( SourceStream *is, char *dest, int length )
+{
+ long got = read( is->fd, dest, length );
+ return got;
+}
+
+void initFdFuncs()
+{
+ memset( &fdFuncs, 0, sizeof(struct SourceFuncs) );
+ fdFuncs.getData = &fdGetData;
+ fdFuncs.consumeData = &fdConsumeData;
+ fdFuncs.undoConsumeData = &fdUndoConsumeData;
+ fdFuncs.getDataImpl = &fdGetDataImpl;
+}
+
+/*
+ * InputStream struct, this wraps the list of input streams.
+ */
+
+void initInputStream( InputStream *inputStream )
+{
+ memset( inputStream, 0, sizeof(InputStream) );
+
+ /* FIXME: correct values here. */
+ inputStream->line = 1;
+ inputStream->column = 1;
+ inputStream->byte = 0;
+}
+
+void clearInputStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream )
+{
+ RunBuf *buf = inputStream->queue;
+ while ( buf != 0 ) {
+ switch ( buf->type ) {
+ case RunBufDataType:
+ break;
+
+ case RunBufTokenType:
+ case RunBufIgnoreType:
+ case RunBufSourceType:
+ treeDownref( prg, sp, buf->tree );
+ break;
+ }
+
+ RunBuf *next = buf->next;
+ free( buf );
+ buf = next;
+ }
+
+ inputStream->queue = 0;
+}
+
+static void inputStreamPrepend( InputStream *is, RunBuf *runBuf )
+{
+ if ( is->queue == 0 ) {
+ runBuf->prev = runBuf->next = 0;
+ is->queue = is->queueTail = runBuf;
+ }
+ else {
+ is->queue->prev = runBuf;
+ runBuf->prev = 0;
+ runBuf->next = is->queue;
+ is->queue = runBuf;
+ }
+}
+
+static RunBuf *inputStreamPopHead( InputStream *is )
+{
+ RunBuf *ret = is->queue;
+ is->queue = is->queue->next;
+ if ( is->queue == 0 )
+ is->queueTail = 0;
+ else
+ is->queue->prev = 0;
+ return ret;
+}
+
+static void inputStreamAppend( InputStream *is, RunBuf *runBuf )
+{
+ if ( is->queue == 0 ) {
+ runBuf->prev = runBuf->next = 0;
+ is->queue = is->queueTail = runBuf;
+ }
+ else {
+ is->queueTail->next = runBuf;
+ runBuf->prev = is->queueTail;
+ runBuf->next = 0;
+ is->queueTail = runBuf;
+ }
+}
+
+static RunBuf *inputStreamPopTail( InputStream *is )
+{
+ RunBuf *ret = is->queueTail;
+ is->queueTail = is->queueTail->prev;
+ if ( is->queueTail == 0 )
+ is->queue = 0;
+ else
+ is->queueTail->next = 0;
+ return ret;
+}
+
+static int isSourceStream( InputStream *is )
+{
+ if ( is->queue != 0 && is->queue->type == RunBufSourceType )
+ return true;
+ return false;
+}
+
+void setEof( InputStream *is )
+{
+ debug( REALM_INPUT, "setting EOF in input stream\n" );
+ is->eof = true;
+}
+
+void unsetEof( InputStream *is )
+{
+ if ( isSourceStream( is ) ) {
+ Stream *stream = (Stream*)is->queue->tree;
+ stream->in->eof = false;
+ }
+ else {
+ is->eof = false;
+ }
+}
+
+int getData( FsmRun *fsmRun, InputStream *is, int skip, char *dest, int length, int *copied )
+{
+ int ret = 0;
+ *copied = 0;
+
+ /* Move over skip bytes. */
+ RunBuf *buf = is->queue;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ ret = is->eof ? INPUT_EOF : INPUT_EOD;
+ break;
+ }
+
+ if ( buf->type == RunBufSourceType ) {
+ Stream *stream = (Stream*)buf->tree;
+ int type = stream->in->funcs->getData( stream->in, skip, dest, length, copied );
+
+ attachSource( fsmRun, stream->in );
+
+ if ( type == INPUT_EOD && is->eof ) {
+ ret = INPUT_EOF;
+ break;
+ }
+
+ ret = type;
+ break;
+ }
+
+ if ( buf->type == RunBufTokenType ) {
+ ret = INPUT_TREE;
+ break;
+ }
+
+ if ( buf->type == RunBufIgnoreType ) {
+ ret = INPUT_IGNORE;
+ break;
+ }
+
+ int avail = buf->length - buf->offset;
+
+ /* Anything available in the current buffer. */
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[buf->offset];
+
+ /* Need to skip? */
+ if ( skip > 0 && skip >= avail ) {
+ /* Skipping the the whole source. */
+ skip -= avail;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += skip;
+ avail -= skip;
+ skip = 0;
+
+ int slen = avail <= length ? avail : length;
+ memcpy( dest, src, slen ) ;
+ *copied += slen;
+ ret = INPUT_DATA;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ }
+
+ attachInput( fsmRun, is );
+
+#if DEBUG
+ switch ( ret ) {
+ case INPUT_DATA:
+ debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest );
+ break;
+ case INPUT_EOD:
+ debug( REALM_INPUT, "get data: EOD\n" );
+ break;
+ case INPUT_EOF:
+ debug( REALM_INPUT, "get data: EOF\n" );
+ break;
+ case INPUT_TREE:
+ debug( REALM_INPUT, "get data: TREE\n" );
+ break;
+ case INPUT_IGNORE:
+ debug( REALM_INPUT, "get data: IGNORE\n" );
+ break;
+ case INPUT_LANG_EL:
+ debug( REALM_INPUT, "get data: LANG_EL\n" );
+ break;
+ }
+#endif
+
+ return ret;
+}
+
+int consumeData( InputStream *is, int length )
+{
+ debug( REALM_INPUT, "consuming %d bytes\n", length );
+
+ int consumed = 0;
+
+ /* Move over skip bytes. */
+ while ( true ) {
+ RunBuf *buf = is->queue;
+
+ if ( buf == 0 )
+ break;
+
+ if ( buf->type == RunBufSourceType ) {
+ Stream *stream = (Stream*)buf->tree;
+ int slen = stream->in->funcs->consumeData( stream->in, length );
+
+ consumed += slen;
+ length -= slen;
+ }
+ else if ( buf->type == RunBufTokenType )
+ break;
+ else if ( buf->type == RunBufIgnoreType )
+ break;
+ else {
+ /* Anything available in the current buffer. */
+ int avail = buf->length - buf->offset;
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ int slen = avail <= length ? avail : length;
+ consumed += slen;
+ length -= slen;
+ buf->offset += slen;
+ }
+ }
+
+ if ( length == 0 )
+ break;
+
+ RunBuf *runBuf = inputStreamPopHead( is );
+ free( runBuf );
+ }
+
+ return consumed;
+}
+
+int undoConsumeData( FsmRun *fsmRun, InputStream *is, const char *data, int length )
+{
+ debug( REALM_INPUT, "undoing consume of %ld bytes\n", length );
+
+ if ( isSourceStream( is ) ) {
+ Stream *stream = (Stream*)is->queue->tree;
+ int len = stream->in->funcs->undoConsumeData( stream->in, data, length );
+
+ if ( stream->in->attached != 0 )
+ detachSource( stream->in->attached, stream->in );
+
+ return len;
+ }
+ else {
+ RunBuf *newBuf = newRunBuf();
+ newBuf->length = length;
+ memcpy( newBuf->data, data, length );
+ inputStreamPrepend( is, newBuf );
+
+ if ( is->attached != 0 )
+ detachInput( is->attached, is );
+
+ return length;
+ }
+}
+
+Tree *consumeTree( InputStream *is )
+{
+ while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) {
+ RunBuf *runBuf = inputStreamPopHead( is );
+ free( runBuf );
+ }
+
+ if ( is->queue != 0 && (is->queue->type == RunBufTokenType || is->queue->type == RunBufIgnoreType) ) {
+ RunBuf *runBuf = inputStreamPopHead( is );
+
+ /* FIXME: using runbufs here for this is a poor use of memory. */
+ Tree *tree = runBuf->tree;
+ free(runBuf);
+ return tree;
+ }
+
+ return 0;
+}
+
+void undoConsumeTree( InputStream *is, Tree *tree, int ignore )
+{
+ if ( is->attached != 0 )
+ detachInput( is->attached, is );
+
+ /* Create a new buffer for the data. This is the easy implementation.
+ * Something better is needed here. It puts a max on the amount of
+ * data that can be pushed back to the inputStream. */
+ RunBuf *newBuf = newRunBuf();
+ newBuf->type = ignore ? RunBufIgnoreType : RunBufTokenType;
+ newBuf->tree = tree;
+ inputStreamPrepend( is, newBuf );
+}
+
+struct LangEl *consumeLangEl( InputStream *is, long *bindId, char **data, long *length )
+{
+ if ( isSourceStream( is ) ) {
+ Stream *stream = (Stream*)is->queue->tree;
+ return stream->in->funcs->consumeLangEl( stream->in, bindId, data, length );
+ }
+ else {
+ assert( false );
+ }
+}
+
+void undoConsumeLangEl( InputStream *is )
+{
+ if ( isSourceStream( is ) ) {
+ Stream *stream = (Stream*)is->queue->tree;
+ return stream->in->funcs->undoConsumeLangEl( stream->in );
+ }
+ else {
+ assert( false );
+ }
+}
+
+void prependData( InputStream *is, const char *data, long length )
+{
+ if ( is->attached != 0 )
+ detachInput( is->attached, is );
+
+ /* Create a new buffer for the data. This is the easy implementation.
+ * Something better is needed here. It puts a max on the amount of
+ * data that can be pushed back to the inputStream. */
+ assert( length < FSM_BUFSIZE );
+
+ RunBuf *newBuf = newRunBuf();
+ newBuf->length = length;
+ memcpy( newBuf->data, data, length );
+
+ inputStreamPrepend( is, newBuf );
+}
+
+int undoPrependData( InputStream *is, int length )
+{
+ if ( is->attached != 0 )
+ detachInput( is->attached, is );
+
+ debug( REALM_INPUT, "consuming %d bytes\n", length );
+
+ int consumed = 0;
+
+ /* Move over skip bytes. */
+ while ( true ) {
+ RunBuf *buf = is->queue;
+
+ if ( buf == 0 )
+ break;
+
+ if ( buf->type == RunBufSourceType ) {
+ Stream *stream = (Stream*)buf->tree;
+ int slen = stream->in->funcs->consumeData( stream->in, length );
+
+ consumed += slen;
+ length -= slen;
+ }
+ else if ( buf->type == RunBufTokenType )
+ break;
+ else if ( buf->type == RunBufIgnoreType )
+ break;
+ else {
+ /* Anything available in the current buffer. */
+ int avail = buf->length - buf->offset;
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ int slen = avail <= length ? avail : length;
+ consumed += slen;
+ length -= slen;
+ buf->offset += slen;
+ }
+ }
+
+ if ( length == 0 )
+ break;
+
+ RunBuf *runBuf = inputStreamPopHead( is );
+ free( runBuf );
+ }
+
+ return consumed;
+}
+
+void prependTree( InputStream *is, Tree *tree, int ignore )
+{
+ if ( is->attached != 0 )
+ detachInput( is->attached, is );
+
+ /* Create a new buffer for the data. This is the easy implementation.
+ * Something better is needed here. It puts a max on the amount of
+ * data that can be pushed back to the inputStream. */
+ RunBuf *newBuf = newRunBuf();
+ newBuf->type = ignore ? RunBufIgnoreType : RunBufTokenType;
+ newBuf->tree = tree;
+ inputStreamPrepend( is, newBuf );
+}
+
+Tree *undoPrependTree( InputStream *is )
+{
+ if ( is->attached != 0 )
+ detachInput( is->attached, is );
+
+ while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) {
+ RunBuf *runBuf = inputStreamPopHead( is );
+ free( runBuf );
+ }
+
+ if ( is->queue != 0 && (is->queue->type == RunBufTokenType || is->queue->type == RunBufIgnoreType) ) {
+ RunBuf *runBuf = inputStreamPopHead( is );
+
+ /* FIXME: using runbufs here for this is a poor use of memory. */
+ Tree *tree = runBuf->tree;
+ free(runBuf);
+ return tree;
+ }
+
+ return 0;
+}
+
+void appendData( InputStream *is, const char *data, long len )
+{
+ while ( len > 0 ) {
+ RunBuf *ad = newRunBuf();
+ inputStreamAppend( is, ad );
+
+ long consume =
+ len <= (long)sizeof(ad->data) ?
+ len : (long)sizeof(ad->data);
+
+ memcpy( ad->data, data, consume );
+ ad->length = consume;
+
+ len -= consume;
+ data += consume;
+ }
+}
+
+Tree *undoAppendData( InputStream *is, int length )
+{
+ if ( is->attached != 0 )
+ detachInput( is->attached, is );
+
+ int consumed = 0;
+
+ /* Move over skip bytes. */
+ while ( true ) {
+ RunBuf *buf = is->queueTail;
+
+ if ( buf == 0 )
+ break;
+
+ if ( buf->type == RunBufTokenType )
+ break;
+ else if ( buf->type == RunBufIgnoreType )
+ break;
+ else {
+ /* Anything available in the current buffer. */
+ int avail = buf->length - buf->offset;
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ int slen = avail <= length ? avail : length;
+ consumed += slen;
+ length -= slen;
+ buf->length -= slen;
+ }
+ }
+
+ if ( length == 0 )
+ break;
+
+ RunBuf *runBuf = inputStreamPopTail( is );
+ free( runBuf );
+ }
+
+ return 0;
+}
+
+void appendTree( InputStream *is, Tree *tree )
+{
+ RunBuf *ad = newRunBuf();
+
+ inputStreamAppend( is, ad );
+
+ ad->type = RunBufTokenType;
+ ad->tree = tree;
+ ad->length = 0;
+}
+
+void appendStream( InputStream *in, struct ColmTree *tree )
+{
+ RunBuf *ad = newRunBuf();
+
+ inputStreamAppend( in, ad );
+
+ ad->type = RunBufSourceType;
+ ad->tree = tree;
+ ad->length = 0;
+}
+
+Tree *undoAppendStream( InputStream *is )
+{
+ if ( is->attached != 0 )
+ detachInput( is->attached, is );
+
+ RunBuf *runBuf = inputStreamPopTail( is );
+ Tree *tree = runBuf->tree;
+ free( runBuf );
+ return tree;
+}
+
+Tree *undoAppendTree( InputStream *is )
+{
+ if ( is->attached != 0 )
+ detachInput( is->attached, is );
+
+ RunBuf *runBuf = inputStreamPopTail( is );
+ Tree *tree = runBuf->tree;
+ free( runBuf );
+ return tree;
+}
diff --git a/src/input.h b/src/input.h
new file mode 100644
index 00000000..882c6b31
--- /dev/null
+++ b/src/input.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _INPUT_H
+#define _INPUT_H
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FSM_BUFSIZE 8192
+//#define FSM_BUFSIZE 8
+
+#define INPUT_DATA 1
+/* This is for data sources to return, not for the wrapper. */
+#define INPUT_EOD 2
+#define INPUT_EOF 3
+#define INPUT_LANG_EL 4
+#define INPUT_TREE 5
+#define INPUT_IGNORE 6
+
+/*
+ * pdaRun <- fsmRun <- stream
+ *
+ * Activities we need to support:
+ *
+ * 1. Stuff data into an input stream each time we <<
+ * 2. Detach an input stream, and attach another when we include
+ * 3. Send data back to an input stream when the parser backtracks
+ * 4. Temporarily stop parsing due to a lack of input.
+ *
+ * At any given time, the fsmRun struct may have a prefix of the stream's
+ * input. If getting data we first get what we can out of the fsmRun, then
+ * consult the stream. If sending data back, we first shift pointers in the
+ * fsmRun, then ship to the stream. If changing streams the old stream needs to
+ * take back unprocessed data from the fsmRun.
+ */
+
+struct LangEl;
+struct Pattern;
+struct PatternItem;
+struct Replacement;
+struct ReplItem;
+struct _FsmRun;
+struct ColmTree;
+
+enum RunBufType {
+ RunBufDataType = 0,
+ RunBufTokenType,
+ RunBufIgnoreType,
+ RunBufSourceType
+};
+
+typedef struct _RunBuf
+{
+ enum RunBufType type;
+ char data[FSM_BUFSIZE];
+ long length;
+ struct ColmTree *tree;
+ long offset;
+ struct _RunBuf *next, *prev;
+} RunBuf;
+
+RunBuf *newRunBuf();
+
+typedef struct _SourceStream SourceStream;
+
+struct SourceFuncs
+{
+ /* Data. */
+ int (*getData)( SourceStream *is, int offset, char *dest, int length, int *copied );
+ int (*consumeData)( SourceStream *is, int length );
+ int (*undoConsumeData)( SourceStream *is, const char *data, int length );
+
+ /* Language elments (compile-time). */
+ struct LangEl *(*consumeLangEl)( SourceStream *is, long *bindId, char **data, long *length );
+ void (*undoConsumeLangEl)( SourceStream *is );
+
+ /* Private implmentation for some shared get data functions. */
+ int (*getDataImpl)( SourceStream *is, char *dest, int length );
+};
+
+struct _SourceStream
+{
+ struct SourceFuncs *funcs;
+
+ struct _FsmRun *hasData;
+
+ char eofSent;
+ char eof;
+
+ long line;
+ long column;
+ long byte;
+
+ /* This is set true for input streams that do their own line counting.
+ * Causes FsmRun to ignore NLs. */
+ int handlesLine;
+
+ RunBuf *queue;
+ RunBuf *queueTail;
+
+ const char *data;
+ long dlen;
+ int offset;
+
+ FILE *file;
+ long fd;
+
+ struct Pattern *pattern;
+ struct PatternItem *patItem;
+ struct Replacement *replacement;
+ struct ReplItem *replItem;
+
+ struct _FsmRun *attached;
+};
+
+SourceStream *newSourceStreamPattern( struct Pattern *pattern );
+SourceStream *newSourceStreamRepl( struct Replacement *replacement );
+SourceStream *newSourceStreamFile( FILE *file );
+SourceStream *newSourceStreamFd( long fd );
+
+void initInputFuncs();
+void initStaticFuncs();
+void initPatternFuncs();
+void initReplFuncs();
+
+/* List of input streams. Enables streams to be pushed/popped. */
+struct _InputStream
+{
+ char eofSent;
+ char eof;
+
+ long line;
+ long column;
+ long byte;
+
+ /* This is set true for input streams that do their own line counting.
+ * Causes FsmRun to ignore NLs. */
+ int handlesLine;
+
+ RunBuf *queue;
+ RunBuf *queueTail;
+
+ const char *data;
+ long dlen;
+ int offset;
+
+ FILE *file;
+ long fd;
+
+ struct Pattern *pattern;
+ struct PatternItem *patItem;
+ struct Replacement *replacement;
+ struct ReplItem *replItem;
+
+ struct _FsmRun *attached;
+};
+
+typedef struct _InputStream InputStream;
+
+/* The input stream interface. */
+
+int getData( struct _FsmRun *fsmRun, InputStream *in, int offset, char *dest, int length, int *copied );
+int consumeData( InputStream *in, int length );
+int undoConsumeData( struct _FsmRun *fsmRun, InputStream *is, const char *data, int length );
+
+struct ColmTree *consumeTree( InputStream *in );
+void undoConsumeTree( InputStream *in, struct ColmTree *tree, int ignore );
+
+struct LangEl *consumeLangEl( InputStream *in, long *bindId, char **data, long *length );
+void undoConsumeLangEl( InputStream *in );
+
+void setEof( InputStream *is );
+void unsetEof( InputStream *is );
+
+void prependData( InputStream *in, const char *data, long len );
+int undoPrependData( InputStream *is, int length );
+
+void prependTree( InputStream *is, struct ColmTree *tree, int ignore );
+struct ColmTree *undoPrependTree( InputStream *is );
+
+void appendData( InputStream *in, const char *data, long len );
+void appendTree( InputStream *in, struct ColmTree *tree );
+void appendStream( InputStream *in, struct ColmTree *tree );
+struct ColmTree *undoAppendData( InputStream *in, int length );
+struct ColmTree *undoAppendStream( InputStream *in );
+struct ColmTree *undoAppendTree( InputStream *in );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INPUT_H */
diff --git a/src/keyops.h b/src/keyops.h
new file mode 100644
index 00000000..1808c6a6
--- /dev/null
+++ b/src/keyops.h
@@ -0,0 +1,283 @@
+/*
+ * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _KEYOPS_H
+#define _KEYOPS_H
+
+#include <fstream>
+#include <climits>
+
+enum MarkType
+{
+ MarkNone = 0,
+ MarkMark
+};
+
+typedef unsigned long long Size;
+
+struct Key
+{
+private:
+ long key;
+
+public:
+ friend inline Key operator+(const Key key1, const Key key2);
+ friend inline Key operator-(const Key key1, const Key key2);
+ friend inline Key operator/(const Key key1, const Key key2);
+ friend inline long operator&(const Key key1, const Key key2);
+
+ friend inline bool operator<( const Key key1, const Key key2 );
+ friend inline bool operator<=( const Key key1, const Key key2 );
+ friend inline bool operator>( const Key key1, const Key key2 );
+ friend inline bool operator>=( const Key key1, const Key key2 );
+ friend inline bool operator==( const Key key1, const Key key2 );
+ friend inline bool operator!=( const Key key1, const Key key2 );
+
+ friend struct KeyOps;
+
+ Key( ) {}
+ Key( const Key &key ) : key(key.key) {}
+ Key( long key ) : key(key) {}
+
+ /* Returns the value used to represent the key. This value must be
+ * interpreted based on signedness. */
+ long getVal() const { return key; };
+
+ /* Returns the key casted to a long long. This form of the key does not
+ * require and signedness interpretation. */
+ long long getLongLong() const;
+
+ bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); }
+ bool isLower() const { return ( 'a' <= key && key <= 'z' ); }
+ bool isPrintable() const { return ( 32 <= key && key < 127 ); }
+
+ Key toUpper() const
+ { return Key( 'A' + ( key - 'a' ) ); }
+ Key toLower() const
+ { return Key( 'a' + ( key - 'A' ) ); }
+
+ void operator+=( const Key other )
+ {
+ /* FIXME: must be made aware of isSigned. */
+ key += other.key;
+ }
+
+ void operator-=( const Key other )
+ {
+ /* FIXME: must be made aware of isSigned. */
+ key -= other.key;
+ }
+
+ void operator|=( const Key other )
+ {
+ /* FIXME: must be made aware of isSigned. */
+ key |= other.key;
+ }
+
+ /* Decrement. Needed only for ranges. */
+ inline void decrement();
+ inline void increment();
+};
+
+struct HostType
+{
+ const char *data1;
+ const char *data2;
+ bool isSigned;
+ long long minVal;
+ long long maxVal;
+ unsigned int size;
+};
+
+struct HostLang
+{
+ HostType *hostTypes;
+ int numHostTypes;
+ HostType *defaultAlphType;
+ bool explicitUnsigned;
+};
+
+
+/* Target language. */
+enum HostLangType
+{
+ CCode,
+ DCode,
+ JavaCode,
+ RubyCode
+};
+
+extern HostLang *hostLang;
+extern HostLangType hostLangType;
+
+extern HostLang hostLangC;
+extern HostLang hostLangD;
+extern HostLang hostLangJava;
+extern HostLang hostLangRuby;
+
+/* An abstraction of the key operators that manages key operations such as
+ * comparison and increment according the signedness of the key. */
+struct KeyOps
+{
+ /* Default to signed alphabet. */
+ KeyOps() :
+ isSigned(true),
+ alphType(0)
+ {}
+
+ /* Default to signed alphabet. */
+ KeyOps( bool isSigned )
+ :isSigned(isSigned) {}
+
+ bool isSigned;
+ Key minKey, maxKey;
+ HostType *alphType;
+
+ void setAlphType( HostType *alphType )
+ {
+ this->alphType = alphType;
+ isSigned = alphType->isSigned;
+ if ( isSigned ) {
+ minKey = (long) alphType->minVal;
+ maxKey = (long) alphType->maxVal;
+ }
+ else {
+ minKey = (long) (unsigned long) alphType->minVal;
+ maxKey = (long) (unsigned long) alphType->maxVal;
+ }
+ }
+
+ /* Compute the distance between two keys. */
+ Size span( Key key1, Key key2 )
+ {
+ return isSigned ?
+ (unsigned long long)(
+ (long long)key2.key -
+ (long long)key1.key + 1) :
+ (unsigned long long)(
+ (unsigned long)key2.key) -
+ (unsigned long long)((unsigned long)key1.key) + 1;
+ }
+
+ Size alphSize()
+ { return span( minKey, maxKey ); }
+
+ HostType *typeSubsumes( long long maxVal )
+ {
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( maxVal <= hostLang->hostTypes[i].maxVal )
+ return hostLang->hostTypes + i;
+ }
+ return 0;
+ }
+
+ HostType *typeSubsumes( bool isSigned, long long maxVal )
+ {
+ for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
+ if ( ( (isSigned && hostLang->hostTypes[i].isSigned) || !isSigned ) &&
+ maxVal <= hostLang->hostTypes[i].maxVal )
+ return hostLang->hostTypes + i;
+ }
+ return 0;
+ }
+};
+
+extern KeyOps *keyOps;
+
+inline bool operator<( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key < key2.key :
+ (unsigned long)key1.key < (unsigned long)key2.key;
+}
+
+inline bool operator<=( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key <= key2.key :
+ (unsigned long)key1.key <= (unsigned long)key2.key;
+}
+
+inline bool operator>( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key > key2.key :
+ (unsigned long)key1.key > (unsigned long)key2.key;
+}
+
+inline bool operator>=( const Key key1, const Key key2 )
+{
+ return keyOps->isSigned ? key1.key >= key2.key :
+ (unsigned long)key1.key >= (unsigned long)key2.key;
+}
+
+inline bool operator==( const Key key1, const Key key2 )
+{
+ return key1.key == key2.key;
+}
+
+inline bool operator!=( const Key key1, const Key key2 )
+{
+ return key1.key != key2.key;
+}
+
+/* Decrement. Needed only for ranges. */
+inline void Key::decrement()
+{
+ key = keyOps->isSigned ? key - 1 : ((unsigned long)key)-1;
+}
+
+/* Increment. Needed only for ranges. */
+inline void Key::increment()
+{
+ key = keyOps->isSigned ? key+1 : ((unsigned long)key)+1;
+}
+
+inline long long Key::getLongLong() const
+{
+ return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key;
+}
+
+inline Key operator+(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return Key( key1.key + key2.key );
+}
+
+inline Key operator-(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return Key( key1.key - key2.key );
+}
+
+inline long operator&(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return key1.key & key2.key;
+}
+
+inline Key operator/(const Key key1, const Key key2)
+{
+ /* FIXME: must be made aware of isSigned. */
+ return key1.key / key2.key;
+}
+
+const char *findFileExtension( const char *stemFile );
+char *fileNameFromStem( const char *stemFile, const char *suffix );
+
+#endif /* _KEYOPS_H */
diff --git a/src/list.c b/src/list.c
new file mode 100644
index 00000000..f57109e1
--- /dev/null
+++ b/src/list.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <pdarun.h>
+
+void listAddAfter( List *list, ListEl *prev_el, ListEl *new_el )
+{
+ /* Set the previous pointer of new_el to prev_el. We do
+ * this regardless of the state of the list. */
+ new_el->prev = prev_el;
+
+ /* Set forward pointers. */
+ if (prev_el == 0) {
+ /* There was no prev_el, we are inserting at the head. */
+ new_el->next = list->head;
+ list->head = new_el;
+ }
+ else {
+ /* There was a prev_el, we can access previous next. */
+ new_el->next = prev_el->next;
+ prev_el->next = new_el;
+ }
+
+ /* Set reverse pointers. */
+ if (new_el->next == 0) {
+ /* There is no next element. Set the tail pointer. */
+ list->tail = new_el;
+ }
+ else {
+ /* There is a next element. Set it's prev pointer. */
+ new_el->next->prev = new_el;
+ }
+
+ /* Update list length. */
+ list->listLen++;
+}
+
+void listAddBefore( List *list, ListEl *next_el, ListEl *new_el)
+{
+ /* Set the next pointer of the new element to next_el. We do
+ * this regardless of the state of the list. */
+ new_el->next = next_el;
+
+ /* Set reverse pointers. */
+ if (next_el == 0) {
+ /* There is no next elememnt. We are inserting at the tail. */
+ new_el->prev = list->tail;
+ list->tail = new_el;
+ }
+ else {
+ /* There is a next element and we can access next's previous. */
+ new_el->prev = next_el->prev;
+ next_el->prev = new_el;
+ }
+
+ /* Set forward pointers. */
+ if (new_el->prev == 0) {
+ /* There is no previous element. Set the head pointer.*/
+ list->head = new_el;
+ }
+ else {
+ /* There is a previous element, set it's next pointer to new_el. */
+ new_el->prev->next = new_el;
+ }
+
+ list->listLen++;
+}
+
+ListEl *listDetach( List *list, ListEl *el )
+{
+ /* Set forward pointers to skip over el. */
+ if (el->prev == 0)
+ list->head = el->next;
+ else
+ el->prev->next = el->next;
+
+ /* Set reverse pointers to skip over el. */
+ if (el->next == 0)
+ list->tail = el->prev;
+ else
+ el->next->prev = el->prev;
+
+ /* Update List length and return element we detached. */
+ list->listLen--;
+ return el;
+}
+
diff --git a/src/lmparse.kh b/src/lmparse.kh
new file mode 100644
index 00000000..86b70b6f
--- /dev/null
+++ b/src/lmparse.kh
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef RLPARSE_H
+#define RLPARSE_H
+
+#include <iostream>
+#include "avltree.h"
+#include "parsedata.h"
+
+#define PROPERTY_REDUCE_FIRST 0x1
+
+struct ColmParser
+{
+ ColmParser( Compiler *pd, const char *fileName, const char *sectionName, const InputLoc &sectionLoc )
+ : pd(pd), sectionName(sectionName), enterRl(false)
+ {}
+
+ %%{
+ parser ColmParser;
+
+ # Use a class for tokens.
+ token uses class Token;
+
+ # Atoms.
+ token TK_Word, TK_Literal, TK_SingleLit, TK_DoubleLit, TK_Number, TK_UInt,
+ TK_Hex, KW_Nil, KW_True, KW_False;
+
+ # General tokens.
+ token TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon,
+ TK_DoubleArrow, TK_StarStar, TK_NameSep, TK_DashDash, TK_DoubleEql,
+ TK_NotEql, TK_DoubleColon, TK_LessEql, TK_GrtrEql, TK_RightArrow,
+ TK_LitPat, TK_AmpAmp, TK_BarBar, TK_SqOpen, TK_SqOpenNeg, TK_SqClose,
+ TK_Dash, TK_ReChar, TK_LtLt;
+
+ # Defining things.
+ token KW_Rl, KW_Def, KW_Lex, KW_Context, KW_Ignore, KW_Token, KW_Commit, KW_Namespace,
+ KW_Literal, KW_ReduceFirst, KW_Map, KW_List, KW_Vector, KW_Accum, KW_Global, KW_Export,
+ KW_Iter, KW_Reject, KW_Ptr, KW_Ref, KW_Deref;
+
+ # Language.
+ token KW_If, KW_While, KW_Else, KW_Elsif, KW_For, KW_Return, KW_Yield, KW_In,
+ KW_Break, KW_PrintXMLAC, KW_PrintXML, KW_Print, KW_PrintStream, KW_Require;
+
+ # Patterns.
+ token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken,
+ KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni, KW_Ci;
+
+ token KW_Include, KW_Preeof;
+
+ token KW_Left, KW_Right, KW_Nonassoc, KW_Prec;
+
+ }%%
+
+ %% write instance_data;
+
+
+ void init();
+ int parseLangEl( int type, const Token *token );
+
+ int token( InputLoc &loc, int tokId, char *tokstart, int toklen );
+ void addRegularDef( const InputLoc &loc, Namespace *nspace,
+ const String &name, Join *join );
+ TokenRegion *createRegion( String &name );
+ void addRegionDef( const InputLoc &loc, Namespace *nspace,
+ const String &name, TokenRegion *join );
+ void addProduction( const InputLoc &loc, const String &name,
+ ProdElList *prodElList, bool commit, CodeBlock *redBlock, LangEl *predOf );
+ void addArgvList();
+
+ /* Report an error encountered by the parser. */
+ ostream &parse_error( int tokId, Token &token );
+
+ Compiler *pd;
+
+ /* The name of the root section, this does not change during an include. */
+ const char *sectionName;
+
+ NameRef nameRef;
+ NameRefList nameRefList;
+
+ LangElVect langElVect;
+
+ PatternItemList *patternItemList;
+ ReplItemList *replItemList;
+ RegionVect regionStack;
+ NamespaceVect namespaceStack;
+ ContextVect contextStack;
+
+ String curDefineId;
+ LelDefList *curDefList;
+ ProdElList *curProdElList;
+
+ PredType predType;
+ ReCaptureVect reCaptureVect;
+
+ bool enterRl;
+};
+
+%% write token_defs;
+
+#endif
diff --git a/src/lmparse.kl b/src/lmparse.kl
new file mode 100644
index 00000000..3ead7c98
--- /dev/null
+++ b/src/lmparse.kl
@@ -0,0 +1,2677 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <errno.h>
+
+#include "config.h"
+#include "lmparse.h"
+#include "global.h"
+#include "input.h"
+#include "fsmrun.h"
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+ParserDict parserDict;
+
+%%{
+
+parser ColmParser;
+
+include "lmparse.kh";
+
+start: root_item_list
+ final {
+ if ( colm_log_compile ) {
+ cerr << "parsing complete" << endl;
+ }
+
+ pd->rootCodeBlock = new CodeBlock( $1->stmtList );
+ };
+
+nonterm root_item_list uses lang_stmt_list;
+
+root_item_list: root_item_list root_item
+ final {
+ $$->stmtList = $1->stmtList;
+
+ /* Maybe a statement. */
+ if ( $2->stmt != 0 )
+ $$->stmtList->append( $2->stmt );
+ };
+
+root_item_list:
+ final {
+ $$->stmtList = new StmtList;
+ };
+
+nonterm root_item uses statement;
+
+root_item: literal_def commit final { $$->stmt = 0; };
+root_item: rl_def commit final { $$->stmt = 0; };
+root_item: token_def commit final { $$->stmt = 0; };
+root_item: cfl_def commit final { $$->stmt = 0; };
+root_item: region_def commit final { $$->stmt = 0; };
+root_item: context_def commit final { $$->stmt = 0; };
+root_item: namespace_def commit final { $$->stmt = 0; };
+root_item: function_def commit final { $$->stmt = 0; };
+root_item: iter_def commit final { $$->stmt = 0; };
+root_item: global_def commit final { $$->stmt = $1->stmt; };
+root_item: statement commit final { $$->stmt = $1->stmt; };
+root_item: pre_eof commit final { $$->stmt = 0; };
+root_item: precedence commit final { $$->stmt = 0; };
+root_item: typedef commit final { $$->stmt = 0; };
+
+nonterm block_open
+{
+ ObjectDef *localFrame;
+};
+
+block_open: '{'
+ final {
+ /* Init the object representing the local frame. */
+ $$->localFrame = new ObjectDef( ObjectDef::FrameType,
+ "local", pd->nextObjectId++ );
+
+ pd->curLocalFrame = $$->localFrame;
+
+ /* Add captures to the local frame. We Depend on these becoming the
+ * first local variables so we can compute their location. */
+
+ /* Make local variables corresponding to the local capture vector. */
+ for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ )
+ {
+ ObjField *objField = new ObjField( c->objField->loc,
+ c->objField->typeRef, c->objField->name );
+
+ /* Insert it into the field map. */
+ pd->curLocalFrame->insertField( objField->name, objField );
+ }
+ };
+
+block_close: '}'
+ final {
+ /* Pop the cur local frame, back to the root. */
+ pd->curLocalFrame = pd->rootLocalFrame;
+ };
+
+
+iter_def:
+ KW_Iter TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close
+ final {
+ CodeBlock *codeBlock = new CodeBlock( $7->stmtList );
+ codeBlock->localFrame = $6->localFrame;
+ Function *newFunction = new Function( 0, $2->data,
+ $4->paramList, codeBlock, pd->nextFuncId++, true );
+ pd->functionList.append( newFunction );
+ };
+
+function_def:
+ type_ref TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close
+ final {
+ CodeBlock *codeBlock = new CodeBlock( $7->stmtList );
+ codeBlock->localFrame = $6->localFrame;
+ Function *newFunction = new Function( $1->typeRef, $2->data,
+ $4->paramList, codeBlock, pd->nextFuncId++, false );
+ pd->functionList.append( newFunction );
+
+ if ( contextStack.length() > 0 )
+ newFunction->inContext = contextStack.top();
+ };
+
+nonterm opt_param_list uses param_list;
+
+opt_param_list: param_list
+ final {
+ $$->paramList = $1->paramList;
+ };
+
+opt_param_list:
+ final {
+ $$->paramList = new ParameterList;
+ };
+
+nonterm param_list
+{
+ ParameterList *paramList;
+};
+
+param_list: param_list param_var_def
+ final {
+ $$->paramList = $1->paramList;
+ $$->paramList->append( $2->objField );
+ };
+
+param_list: param_var_def
+ final {
+ /* Create the map and insert the first item. */
+ $$->paramList = new ParameterList;
+ $$->paramList->append( $1->objField );
+ };
+
+nonterm param_var_def uses var_def;
+
+param_var_def: TK_Word ':' type_ref
+ final {
+ $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data );
+ $$->objField->isParam = true;
+ };
+param_var_def: TK_Word ':' reference_type_ref
+ final {
+ $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data );
+ $$->objField->isParam = true;
+ };
+
+nonterm reference_type_ref uses type_ref;
+
+reference_type_ref: KW_Ref type_ref
+ final {
+ $$->typeRef = new TypeRef( TypeRef::Ref, $1->loc, $2->typeRef );
+ };
+
+nonterm global_def uses statement;
+
+global_def: KW_Export var_def opt_def_init
+ final {
+ $$->stmt = 0;
+
+ if ( contextStack.length() != 0 )
+ error($2->objField->loc) << "cannot export parser context variables" << endp;
+
+ ObjectDef *object = pd->globalObjectDef;
+
+ if ( object->checkRedecl( $2->objField->name ) != 0 )
+ error($2->objField->loc) << "object field renamed" << endp;
+
+ object->insertField( $2->objField->name, $2->objField );
+ $2->objField->isExport = true;
+
+ if ( $3->expr != 0 ) {
+ LangVarRef *varRef = new LangVarRef( $2->objField->loc,
+ new QualItemVect, $2->objField->name );
+
+ $$->stmt = new LangStmt( $2->objField->loc,
+ $3->assignType, varRef, $3->expr );
+ }
+ };
+
+global_def: KW_Global var_def opt_def_init
+ final {
+ $$->stmt = 0;
+
+ ObjectDef *object;
+ if ( contextStack.length() == 0 )
+ object = pd->globalObjectDef;
+ else {
+ Context *context = contextStack.top();
+ $2->objField->context = context;
+ object = context->contextObjDef;
+ }
+
+ if ( object->checkRedecl( $2->objField->name ) != 0 )
+ error($2->objField->loc) << "object field renamed" << endp;
+
+ object->insertField( $2->objField->name, $2->objField );
+
+ if ( $3->expr != 0 ) {
+ LangVarRef *varRef = new LangVarRef( $2->objField->loc,
+ new QualItemVect, $2->objField->name );
+
+ $$->stmt = new LangStmt( $2->objField->loc,
+ $3->assignType, varRef, $3->expr );
+ }
+ };
+
+precedence: pred_type pred_token_list final { pd->predValue++; };
+
+pred_type: KW_Left final { predType = PredLeft; };
+pred_type: KW_Right final { predType = PredRight; };
+pred_type: KW_Nonassoc final { predType = PredNonassoc; };
+
+pred_token_list: pred_token_list ',' pred_token
+ final {
+ };
+
+pred_token_list: pred_token;
+
+nonterm pred_token
+{
+ ProdEl *factor;
+ TypeRef *typeRef;
+};
+
+pred_token:
+ region_qual TK_Word
+ final {
+ TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data );
+
+ PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue );
+ pd->predDeclList.append( predDecl );
+ };
+
+pred_token:
+ region_qual TK_Literal
+ final {
+ PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
+ TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal );
+
+ PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue );
+ pd->predDeclList.append( predDecl );
+ };
+
+typedef:
+ KW_Alias TK_Word type_ref
+ final {
+ Namespace *nspace = namespaceStack.top();
+ TypeAlias *typeAlias = new TypeAlias(
+ $1->loc, nspace, $2->data, $3->typeRef );
+ nspace->typeAliasList.append( typeAlias );
+ };
+
+cfl_def: cfl_def_head obj_var_list properties_list cfl_prod_list
+ final {
+ Namespace *nspace = namespaceStack.top();
+ NtDef *ntDef = new NtDef(
+ curDefineId,
+ nspace,
+ $4->defList,
+ pd->objectDef,
+ contextStack.length() > 0 ? contextStack.top() : 0,
+ $3->property & PROPERTY_REDUCE_FIRST );
+
+ nspace->ntDefList.append( ntDef );
+ };
+
+cfl_def_head: KW_Def TK_Word
+ final {
+ curDefineId = $2->data;
+ curDefList = new LelDefList;
+ };
+
+nonterm cfl_prod_list
+{
+ LelDefList *defList;
+};
+
+cfl_prod_list: cfl_prod_list '|' define_prod
+ final {
+ $$->defList = $1->defList;
+ $3->definition->prodNum = $$->defList->length();
+ $$->defList->append( $3->definition );
+ };
+cfl_prod_list: define_prod
+ final {
+ $$->defList = curDefList;
+ $1->definition->prodNum = $$->defList->length();
+ $$->defList->append( $1->definition );
+ };
+
+nonterm property
+{
+ long property;
+};
+
+nonterm properties_list uses property;
+
+properties_list: properties_list property
+ final {
+ $$->property = $1->property | $2->property;
+ };
+properties_list:
+ final {
+ $$->property = 0;
+ };
+
+property:
+ KW_ReduceFirst
+ final {
+ $$->property = PROPERTY_REDUCE_FIRST;
+ };
+
+nonterm opt_prec
+{
+ LangEl *predOf;
+};
+
+opt_prec:
+ final {
+ $$->predOf = 0;
+ };
+
+opt_prec:
+ KW_Prec pred_token
+ final {
+ $$->predOf = $2->factor->langEl;
+ };
+
+nonterm define_prod
+{
+ Definition *definition;
+};
+
+define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec
+ final {
+ const InputLoc &loc = $1->loc;
+ //const String &name = curDefineId;
+ ProdElList *prodElList = curProdElList;
+ bool commit = $4->commit;
+ CodeBlock *redBlock = $5->codeBlock;
+ LangEl *predOf = $6->predOf;
+
+ //Namespace *nspace = namespaceStack.top();
+
+ Definition *newDef = new Definition( loc, 0/*prodName*/,
+ prodElList, commit, redBlock,
+ pd->prodList.length(), 0, Definition::Production );
+ newDef->predOf = predOf;
+
+ pd->prodList.append( newDef );
+
+ $$->definition = newDef;
+ };
+
+obj_var_list: obj_var_list var_def
+ final {
+ if ( pd->objectDef->checkRedecl( $2->objField->name ) != 0 )
+ error() << "object field renamed" << endp;
+
+ pd->objectDef->insertField( $2->objField->name, $2->objField );
+ };
+
+obj_var_list:
+ final {
+ pd->objectDef = new ObjectDef( ObjectDef::UserType,
+ curDefineId, pd->nextObjectId++ );
+ };
+
+
+nonterm type_ref
+{
+ TypeRef *typeRef;
+};
+
+type_ref: basic_type_ref
+ final {
+ $$->typeRef = $1->typeRef;
+ };
+
+type_ref: KW_Map '<' type_ref type_ref '>'
+ final {
+ NamespaceQual *nspaceQual = new NamespaceQual(
+ namespaceStack.top(), regionStack.top() );
+ $$->typeRef = new TypeRef( TypeRef::Map, InputLoc(), nspaceQual,
+ $3->typeRef, $4->typeRef );
+ };
+
+type_ref: KW_List '<' type_ref '>'
+ final {
+ NamespaceQual *nspaceQual = new NamespaceQual(
+ namespaceStack.top(), regionStack.top() );
+ $$->typeRef = new TypeRef( TypeRef::List, InputLoc(), nspaceQual, $3->typeRef, 0 );
+ };
+type_ref: KW_Vector '<' type_ref '>'
+ final {
+ NamespaceQual *nspaceQual = new NamespaceQual(
+ namespaceStack.top(), regionStack.top() );
+ $$->typeRef = new TypeRef( TypeRef::Vector, InputLoc(), nspaceQual, $3->typeRef, 0 );
+ };
+type_ref: KW_Accum '<' type_ref '>'
+ final {
+ NamespaceQual *nspaceQual = new NamespaceQual(
+ namespaceStack.top(), regionStack.top() );
+ $$->typeRef = new TypeRef( TypeRef::Parser, InputLoc(), nspaceQual, $3->typeRef, 0 );
+ };
+
+nonterm basic_type_ref uses type_ref;
+
+basic_type_ref: region_qual TK_Word opt_repeat
+ final {
+ $$->typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data );
+ $$->typeRef->repeatType = $3->repeatType;
+ };
+
+basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat
+ final {
+ $$->typeRef = new TypeRef( $1->loc, $2->nspaceQual, $3->data );
+ $$->typeRef->repeatType = $4->repeatType;
+ $$->typeRef = new TypeRef( TypeRef::Ptr, $1->loc, $$->typeRef );
+ };
+
+
+nonterm var_def
+{
+ InputLoc loc;
+ ObjField *objField;
+};
+
+var_def: TK_Word ':' type_ref
+ final {
+ /* Return an object field object. The user of this nonterminal must
+ * load it into the approrpriate map and do error checking. */
+ $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data );
+ };
+
+region_def:
+ region_head '{' root_item_list '}'
+ final {
+ /* Pop the top of the stack. */
+ regionStack.pop();
+ };
+
+region_head:
+ KW_Lex TK_Word
+ final {
+ /* Just for ignores. */
+ String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data );
+ TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn );
+
+ /* Just for collect ignores. Will use the ignore-only start state. */
+ String scannerNameCi( $2->data.length() + 2, "<%s>-ci", $2->data.data );
+ TokenRegion *tokenRegionCi = createRegion( scannerNameCi );
+
+ /* Just for tokens. */
+ String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data );
+ TokenRegion *tokenRegionTok = createRegion( scannerNameTok );
+
+ /* Make the new token region. */
+ String scannerName( $2->data.length() + 2, "<%s>", $2->data.data );
+ TokenRegion *tokenRegion = createRegion( scannerName );
+
+ regionStack.push( tokenRegion );
+
+ tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
+ tokenRegion->tokenOnlyRegion = tokenRegionTok;
+ tokenRegion->ciRegion = tokenRegionCi;
+
+ tokenRegion->isFullRegion = true;
+ tokenRegionIgn->isIgnoreOnly = true;
+ tokenRegionCi->isCiOnly = true;
+ tokenRegionTok->isTokenOnly = true;
+
+ tokenRegionIgn->derivedFrom = tokenRegion;
+ tokenRegionCi->derivedFrom = tokenRegion;
+ tokenRegionTok->derivedFrom = tokenRegion;
+ };
+
+namespace_def:
+ namespace_head '{' root_item_list '}'
+ final {
+ namespaceStack.pop();
+ };
+
+namespace_head:
+ KW_Namespace TK_Word
+ final {
+ /* Make the new namespace. */
+ Namespace *nspace = new Namespace( InputLoc(), $2->data,
+ pd->namespaceList.length(), namespaceStack.top() );
+ namespaceStack.top()->childNamespaces.append( nspace );
+ pd->namespaceList.append( nspace );
+ namespaceStack.push( nspace );
+ };
+
+context_var_def:
+ var_def
+ final {
+ ObjectDef *object;
+ if ( contextStack.length() == 0 )
+ error($1->loc) << "internal error: no context stack items found" << endp;
+
+ Context *context = contextStack.top();
+ $1->objField->context = context;
+ object = context->contextObjDef;
+
+ if ( object->checkRedecl( $1->objField->name ) != 0 )
+ error($1->objField->loc) << "object field renamed" << endp;
+
+ object->insertField( $1->objField->name, $1->objField );
+ };
+
+
+context_item: context_var_def commit;
+context_item: literal_def commit;
+context_item: rl_def commit;
+context_item: token_def commit;
+context_item: cfl_def commit;
+context_item: region_def commit;
+context_item: context_def commit;
+context_item: function_def commit;
+context_item: iter_def commit;
+context_item: pre_eof commit;
+context_item: precedence commit;
+
+context_item_list:
+ context_item_list context_item;
+context_item_list:
+ ;
+
+context_def:
+ context_head '{' context_item_list '}'
+ final {
+ contextStack.pop();
+ namespaceStack.pop();
+ };
+
+context_head:
+ KW_Context TK_Word
+ final {
+ /* Make the new namespace. */
+ Namespace *nspace = new Namespace( InputLoc(), $2->data,
+ pd->namespaceList.length(), namespaceStack.top() );
+ namespaceStack.top()->childNamespaces.append( nspace );
+ pd->namespaceList.append( nspace );
+ namespaceStack.push( nspace );
+
+ Context *context = new Context( $1->loc, 0 );
+ contextStack.push( context );
+
+ ContextDef *contextDef = new ContextDef( $2->data, context, nspace );
+ nspace->contextDefList.append( contextDef );
+
+ context->contextObjDef = new ObjectDef( ObjectDef::UserType,
+ $2->data, pd->nextObjectId++ );
+ };
+
+pattern_list: pattern_list pattern;
+pattern_list: init_pattern_list pattern;
+
+init_pattern_list:
+ final {
+ patternItemList = new PatternItemList;
+ };
+
+pattern: '"' litpat_el_list '"';
+pattern: '[' pattern_el_list ']';
+
+litpat_el_list: litpat_el_list litpat_el;
+litpat_el_list: ;
+
+litpat_el: TK_LitPat
+ final {
+ PatternItem *patternItem = new PatternItem( $1->loc, $1->data,
+ PatternItem::InputText );
+ patternItemList->append( patternItem );
+ };
+
+litpat_el: '[' pattern_el_list ']';
+
+pattern_el_list: pattern_el_list pattern_el;
+pattern_el_list: ;
+
+pattern_el: opt_label pattern_el_type_or_lit
+ final {
+ /* Store the variable reference in the pattern itemm. */
+ $2->patternItem->varRef = $1->varRef;
+
+ if ( $1->varRef != 0 ) {
+ if ( pd->curLocalFrame->checkRedecl( $1->varRef->name ) != 0 ) {
+ error( $1->varRef->loc ) << "variable " << $1->varRef->name <<
+ " redeclared" << endp;
+ }
+
+ TypeRef *typeRef = $2->patternItem->factor->typeRef;
+ ObjField *objField = new ObjField( InputLoc(), typeRef, $1->varRef->name );
+
+ /* Insert it into the field map. */
+ pd->curLocalFrame->insertField( $1->varRef->name, objField );
+ }
+ };
+
+pattern_el: '"' litpat_el_list '"';
+pattern_el: '?' TK_Word
+ final {
+ /* FIXME: Implement */
+ assert(false);
+ };
+
+nonterm pattern_el_type_or_lit
+{
+ PatternItem *patternItem;
+};
+
+pattern_el_type_or_lit: region_qual TK_Word opt_repeat
+ final {
+ TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data );
+ typeRef->repeatType = $3->repeatType;
+ ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 );
+ $$->patternItem = new PatternItem( $2->loc, factor, PatternItem::FactorType );
+ patternItemList->append( $$->patternItem );
+ };
+
+pattern_el_type_or_lit: region_qual TK_Literal opt_repeat
+ final {
+ PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
+ TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal );
+ typeRef->repeatType = $3->repeatType;
+
+ ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 );
+ $$->patternItem = new PatternItem( $2->loc, factor, PatternItem::FactorType );
+ patternItemList->append( $$->patternItem );
+ };
+
+nonterm opt_label
+{
+ /* Variable reference. */
+ LangVarRef *varRef;
+};
+
+opt_label: TK_Word ':'
+ final {
+ $$->varRef = new LangVarRef( $1->loc, new QualItemVect, $1->data );
+ };
+opt_label:
+ final {
+ $$->varRef = 0;
+ };
+
+#
+# Replacement
+#
+
+repl_list: repl_list replacement;
+repl_list: init_repl_list replacement;
+
+init_repl_list:
+ final {
+ replItemList = new ReplItemList;
+ };
+
+replacement: '"' lit_repl_el_list '"';
+replacement: '[' repl_el_list ']';
+
+lit_repl_el_list: lit_repl_el_list lit_repl_el;
+lit_repl_el_list: ;
+
+lit_repl_el: TK_LitPat
+ final {
+ ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data );
+ replItemList->append( replItem );
+ };
+
+lit_repl_el: '[' repl_el_list ']';
+
+repl_el_list: repl_el_list repl_el;
+repl_el_list: ;
+
+repl_el: region_qual TK_Literal
+ final {
+ PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
+ TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal );
+ typeRef->repeatType = RepeatNone;
+ ProdEl *factor = new ProdEl( ProdEl::LiteralType, $2->loc, 0, false, typeRef, 0 );
+ ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor );
+ replItemList->append( replItem );
+ };
+repl_el: '"' lit_repl_el_list '"';
+
+repl_el: code_expr
+ final {
+ ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr );
+ replItemList->append( replItem );
+ };
+
+#
+# Accum
+#
+accumulate: init_repl_list accum_list;
+accumulate: init_repl_list code_expr
+ final {
+ ReplItem *replItem = new ReplItem( $2->expr->loc, ReplItem::ExprType, $2->expr );
+ replItemList->append( replItem );
+ };
+
+accum_list: accum_list accum;
+accum_list: accum;
+
+init_accum_list:
+ final {
+ replItemList = new ReplItemList;
+ };
+
+accum: '"' lit_accum_el_list '"';
+accum: '[' accum_el_list ']';
+
+lit_accum_el_list: lit_accum_el_list lit_accum_el;
+lit_accum_el_list: ;
+
+lit_accum_el: TK_LitPat
+ final {
+ ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data );
+ replItemList->append( replItem );
+ };
+
+lit_accum_el: '[' accum_el_list ']';
+
+accum_el_list: accum_el_list accum_el;
+accum_el_list: ;
+
+#accum_el: region_qual TK_Literal
+# final {
+# PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
+# ProdEl *factor = new ProdEl( $2->loc, false, $1->nspaceQual,
+# literal, 0 );
+# ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor );
+# replItemList->append( replItem );
+# };
+accum_el: code_expr
+ final {
+ ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr );
+ replItemList->append( replItem );
+ };
+
+accum_el: '"' lit_accum_el_list '"';
+
+
+#
+# String
+#
+
+string_list: string_list string;
+string_list: init_string_list string;
+
+init_string_list:
+ final {
+ replItemList = new ReplItemList;
+ };
+
+string: '"' lit_string_el_list '"';
+string: '[' string_el_list ']';
+
+lit_string_el_list: lit_string_el_list lit_string_el;
+lit_string_el_list: ;
+
+lit_string_el: TK_LitPat
+ final {
+ ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data );
+ replItemList->append( replItem );
+ };
+
+lit_string_el: '[' string_el_list ']';
+
+string_el_list: string_el_list string_el;
+string_el_list: ;
+
+#accum_el: region_qual TK_Literal
+# final {
+# PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
+# ProdEl *factor = new ProdEl( $2->loc, false, $1->nspaceQual,
+# literal, 0 );
+# ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor );
+# replItemList->append( replItem );
+# };
+string_el: code_expr
+ final {
+ ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr );
+ replItemList->append( replItem );
+ };
+
+string_el: '"' lit_string_el_list '"';
+
+prod_el_list:
+ prod_el_list prod_el
+ final {
+ curProdElList->append( $2->factor );
+ };
+
+prod_el_list:
+ final { curProdElList = new ProdElList; };
+
+nonterm opt_no_ignore { bool value; };
+
+opt_no_ignore: KW_Ni final { $$->value = true; };
+opt_no_ignore: final { $$->value = false; };
+
+nonterm prod_el
+{
+ ProdEl *factor;
+};
+
+prod_el:
+ opt_capture opt_commit region_qual TK_Word opt_repeat
+ final {
+ TypeRef *typeRef = new TypeRef( $4->loc, $3->nspaceQual, $4->data );
+ typeRef->repeatType = $5->repeatType;
+ $$->factor = new ProdEl( ProdEl::ReferenceType, $4->loc, $1->objField, $2->commit, typeRef, 0 );
+
+ /* If there is a capture, create the field. */
+ if ( $1->objField != 0 ) {
+ /* Might already exist. */
+ ObjField *objField = pd->objectDef->checkRedecl( $1->objField->name );
+ if ( objField == 0 ) {
+ objField = $1->objField;
+ objField->typeRef = typeRef;
+ pd->objectDef->insertField( objField->name, objField );
+ }
+ else {
+ /* FIXME: check the types are the same. */
+ //error() << "object field renamed" << endp;
+ }
+
+ objField->isRhsGet = true;
+ RhsVal rhsVal( curDefList->length(), curProdElList->length() );
+ objField->rhsVal.append( RhsVal( curDefList->length(), curProdElList->length() ) );
+ }
+ };
+
+prod_el:
+ opt_capture opt_commit region_qual TK_Literal opt_repeat
+ final {
+ /* Create a new factor node going to a concat literal. */
+ PdaLiteral *literal = new PdaLiteral( $4->loc, *$4 );
+ TypeRef *typeRef = new TypeRef( $4->loc, $3->nspaceQual, literal );
+ typeRef->repeatType = $5->repeatType;
+ $$->factor = new ProdEl( ProdEl::LiteralType, $4->loc, $1->objField, $2->commit, typeRef, 0 );
+
+ /* If there is a capture, create the field. */
+ if ( $1->objField != 0 ) {
+ $1->objField->typeRef = typeRef;
+ if ( pd->objectDef->checkRedecl( $1->objField->name ) != 0 )
+ error() << "object field renamed" << endp;
+
+ pd->objectDef->insertField( $1->objField->name, $1->objField );
+ }
+ };
+
+nonterm opt_repeat
+{
+ bool opt;
+ bool repeat;
+ RepeatType repeatType;
+};
+
+opt_repeat: '*' final { $$->opt = false; $$->repeat = true; $$->repeatType = RepeatRepeat; };
+opt_repeat: '+' final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatList; };
+opt_repeat: '?' final { $$->opt = true; $$->repeat = false; $$->repeatType = RepeatOpt; };
+opt_repeat: final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatNone; };
+
+nonterm region_qual
+{
+ NamespaceQual *nspaceQual;
+};
+
+region_qual: region_qual TK_Word TK_DoubleColon
+ final {
+ $$->nspaceQual = $1->nspaceQual;
+ $$->nspaceQual->qualNames.append( $2->data );
+ };
+
+region_qual:
+ final {
+ $$->nspaceQual = new NamespaceQual( namespaceStack.top(), regionStack.top() );
+ };
+
+literal_def: KW_Literal literal_list;
+
+literal_list: literal_list ',' literal_item;
+literal_list: literal_item;
+
+literal_item: opt_no_ignore TK_Literal opt_no_ignore
+ final {
+ /* Create a name for the literal. */
+ String name( 32, "_literal_%.4x", pd->nextTokenId );
+
+ bool insideRegion = regionStack.top() != pd->rootRegion;
+ if ( !insideRegion ) {
+ /* Just for ignores. */
+ String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
+ TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn );
+
+ /* Just for collect ignores. Will use the ignore-only start state. */
+ String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
+ TokenRegion *tokenRegionCi = createRegion( scannerNameCi );
+
+ /* Just for tokens. */
+ String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
+ TokenRegion *tokenRegionTok = createRegion( scannerNameTok );
+
+ /* Make a new token region just for the token. */
+ String scannerName( name.length() + 2, "<%s>", name.data );
+ TokenRegion *tokenRegion = createRegion( scannerName );
+
+ regionStack.push( tokenRegion );
+
+ tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
+ tokenRegion->tokenOnlyRegion = tokenRegionTok;
+ tokenRegion->ciRegion = tokenRegionCi;
+
+ tokenRegion->isFullRegion = true;
+ tokenRegionIgn->isIgnoreOnly = true;
+ tokenRegionCi->isCiOnly = true;
+ tokenRegionTok->isTokenOnly = true;
+
+ tokenRegionIgn->derivedFrom = tokenRegion;
+ tokenRegionCi->derivedFrom = tokenRegion;
+ tokenRegionTok->derivedFrom = tokenRegion;
+ }
+
+ bool unusedCI;
+ String interp;
+ prepareLitString( interp, unusedCI, $2->data, $2->loc );
+
+ /* Look for the production's associated region. */
+ Namespace *nspace = namespaceStack.top();
+ TokenRegion *region = regionStack.top();
+
+
+ LiteralDictEl *ldel = nspace->literalDict.find( interp );
+ if ( ldel != 0 )
+ error( $2->loc ) << "literal already defined in this namespace" << endp;
+ else {
+ Join *join = new Join( new Expression( new Term( new FactorWithAug(
+ new FactorWithRep( $2->loc, new FactorWithNeg( $2->loc, new Factor(
+ new Literal( $2->loc, $2->data,
+ Literal::LitString ) ) ) ) ) ) ) );
+
+ if ( strcmp( interp.data, "" ) == 0 ) {
+ TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join,
+ 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
+
+ //region->tokenDefList.append( tokenDef );
+
+ ldel = nspace->literalDict.insert( interp, tokenDef );
+ nspace->tokenDefList.append( tokenDef );
+
+ tokenDef->isZero = true;
+ }
+ else {
+ TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join,
+ 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
+ region->tokenDefList.append( tokenDef );
+ ldel = nspace->literalDict.insert( interp, tokenDef );
+ nspace->tokenDefList.append( tokenDef );
+
+ if ( $1->value )
+ tokenDef->noPreIgnore = true;
+ if ( $3->value )
+ tokenDef->noPostIgnore = true;
+
+ TokenDef *tokenDefTok = new TokenDef( name + "_tok", $2->data, true, false, join,
+ 0, $2->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 );
+ tokenDefTok->dupOf = tokenDef;
+ region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
+ ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok );
+ nspace->tokenDefList.append( tokenDefTok );
+ }
+ }
+
+ if ( !insideRegion ) {
+ /* Leave the region just for this token. */
+ regionStack.pop();
+ }
+ };
+
+
+# These two productions are responsible for setting and unsetting the Regular
+# language scanning context.
+enter_rl:
+ try {
+ enterRl = true;
+ }
+ undo {
+ enterRl = false;
+ };
+leave_rl:
+ try {
+ enterRl = false;
+ }
+ undo {
+ enterRl = true;
+ };
+
+token_def:
+ token_or_ignore token_def_name obj_var_list
+ enter_rl opt_no_ignore '/' opt_rl_join leave_rl '/' opt_no_ignore
+ opt_translate
+ final {
+ bool ignore = $1->ignore;
+ String name = $2->name;
+ Join *join = $7->join;
+ CodeBlock *transBlock = $11->transBlock;
+
+ /* Check the region if this is for an ignore. */
+ if ( ignore && !pd->insideRegion )
+ error($1->loc) << "ignore tokens can only appear inside scanners" << endp;
+
+ /* Check the name if this is a token. */
+ if ( !ignore && name == 0 )
+ error($1->loc) << "tokens must have a name" << endp;
+
+ /* Give a default name to ignores. */
+ if ( name == 0 )
+ name.setAs( 32, "_ignore_%.4x", pd->nextTokenId );
+
+ Namespace *nspace = namespaceStack.top();
+ TokenRegion *region = regionStack.top();
+
+ TokenDef *tokenDef = new TokenDef( name, String(), false, ignore, join,
+ transBlock, $1->loc, pd->nextTokenId++, nspace, region,
+ &reCaptureVect, pd->objectDef,
+ contextStack.length() > 0 ? contextStack.top() : 0 );
+
+ region->tokenDefList.append( tokenDef );
+ nspace->tokenDefList.append( tokenDef );
+
+ if ( $5->value )
+ tokenDef->noPreIgnore = true;
+ if ( $10->value )
+ tokenDef->noPostIgnore = true;
+
+ /* All again for the ignore. */
+ if ( ignore ) {
+ TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join,
+ 0, $1->loc, pd->nextTokenId++, nspace, region->ignoreOnlyRegion,
+ &reCaptureVect, pd->objectDef,
+ contextStack.length() > 0 ? contextStack.top() : 0 );
+
+ tokenDefIgn->dupOf = tokenDef;
+
+ region->ignoreOnlyRegion->tokenDefList.append( tokenDefIgn );
+ nspace->tokenDefList.append( tokenDefIgn );
+ }
+ else {
+ TokenDef *tokenDefTok = new TokenDef( name + "_tok", String(), false, ignore, join,
+ 0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion,
+ &reCaptureVect, pd->objectDef,
+ contextStack.length() > 0 ? contextStack.top() : 0 );
+
+ tokenDefTok->dupOf = tokenDef;
+
+ region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
+ nspace->tokenDefList.append( tokenDefTok );
+ }
+
+ /* This is created and pushed in the name. */
+ if ( !pd->insideRegion ) {
+ /* Leave the region that we made just for this token. */
+ regionStack.pop();
+ }
+
+ if ( join != 0 ) {
+ /* Create a regular language definition so the token can be used to
+ * make other tokens */
+ addRegularDef( $1->loc, namespaceStack.top(), name, join );
+ }
+
+
+ reCaptureVect.empty();
+ };
+
+nonterm token_or_ignore
+{
+ InputLoc loc;
+ bool ignore;
+};
+
+token_or_ignore: KW_Token
+ final { $$->loc = $1->loc; $$->ignore = false; };
+
+token_or_ignore: KW_Ignore
+ final { $$->loc = $1->loc; $$->ignore = true; };
+
+nonterm class token_def_name
+{
+ String name;
+};
+
+token_def_name:
+ opt_name
+ final {
+ String name = $1->name;
+
+ $$->name = name;
+ pd->insideRegion = regionStack.top() != pd->rootRegion;
+ curDefineId = name;
+
+ if ( !pd->insideRegion ) {
+ /* For just ignores. */
+ String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
+ TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn );
+
+ /* Just for explicitly collecting ignores. */
+ String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
+ TokenRegion *tokenRegionCi = createRegion( scannerNameCi );
+
+ /* Just for tokens. */
+ String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
+ TokenRegion *tokenRegionTok = createRegion( scannerNameTok );
+
+ /* If not inside a region, make one for the token. */
+ String scannerName( name.length() + 2, "<%s>", name.data );
+ TokenRegion *tokenRegion = createRegion( scannerName );
+
+ regionStack.push( tokenRegion );
+
+ tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
+ tokenRegion->tokenOnlyRegion = tokenRegionTok;
+ tokenRegion->ciRegion = tokenRegionCi;
+
+ tokenRegion->isFullRegion = true;
+ tokenRegionIgn->isIgnoreOnly = true;
+ tokenRegionCi->isCiOnly = true;
+ tokenRegionTok->isTokenOnly = true;
+
+ tokenRegionIgn->derivedFrom = tokenRegion;
+ tokenRegionCi->derivedFrom = tokenRegion;
+ tokenRegionTok->derivedFrom = tokenRegion;
+ }
+
+ /* Reset the lable id counter. */
+ pd->nextLabelId = 0;
+ };
+
+nonterm class opt_name
+{
+ String name;
+};
+
+opt_name: TK_Word final { $$->name = $1->data; };
+opt_name: ;
+
+nonterm opt_translate
+{
+ CodeBlock *transBlock;
+};
+
+opt_translate:
+ block_open lang_stmt_list block_close
+ final {
+ $$->transBlock = new CodeBlock( $2->stmtList );
+ $$->transBlock->localFrame = $1->localFrame;
+ $$->transBlock->context = contextStack.length() == 0 ? 0 : contextStack.top();
+ };
+
+opt_translate:
+ final {
+ $$->transBlock = 0;
+ };
+
+pre_eof:
+ KW_Preeof block_open lang_stmt_list block_close
+ final {
+ bool insideRegion = regionStack.top() != pd->rootRegion;
+ if ( !insideRegion )
+ error($1->loc) << "preeof must be used inside an existing region" << endl;
+
+ CodeBlock *codeBlock = new CodeBlock( $3->stmtList );
+ codeBlock->localFrame = $2->localFrame;
+ codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top();
+
+ TokenRegion *region = regionStack.top();
+ region->preEofBlock = codeBlock;
+ };
+
+rl_def:
+ KW_Rl machine_name enter_rl '/' rl_join leave_rl '/'
+ final {
+ /* Generic creation of machine for instantiation and assignment. */
+ addRegularDef( $2->loc, namespaceStack.top(), $2->data, $5->join );
+
+ if ( reCaptureVect.length() > 0 )
+ error($1->loc) << "rl definitions cannot capture vars" << endl;
+ };
+
+type class token_data
+{
+ InputLoc loc;
+ String data;
+};
+
+nonterm machine_name uses token_data;
+
+machine_name:
+ TK_Word
+ final {
+ /* Make/get the priority key. The name may have already been referenced
+ * and therefore exist. */
+ PriorDictEl *priorDictEl;
+ if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) )
+ pd->nextPriorKey += 1;
+ pd->curDefPriorKey = priorDictEl->value;
+
+ /* Make/get the local error key. */
+ LocalErrDictEl *localErrDictEl;
+ if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) )
+ pd->nextLocalErrKey += 1;
+ pd->curDefLocalErrKey = localErrDictEl->value;
+
+ $$->loc = $1->loc;
+ $$->data = $1->data;
+ };
+
+#
+# Reduce statements
+#
+
+nonterm opt_reduce_code
+{
+ CodeBlock *codeBlock;
+};
+
+opt_reduce_code:
+ final { $$->codeBlock = 0; };
+
+opt_reduce_code:
+ start_reduce lang_stmt_list block_close
+ final {
+ $$->codeBlock = new CodeBlock( $2->stmtList );
+ $$->codeBlock->localFrame = $1->localFrame;
+ $$->codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top();
+ };
+
+nonterm start_reduce uses block_open;
+
+start_reduce:
+ block_open
+ final {
+ $$->localFrame = $1->localFrame;
+ };
+
+nonterm lang_stmt_list
+{
+ StmtList *stmtList;
+};
+
+lang_stmt_list: rec_stmt_list opt_require_stmt
+ final {
+ $$->stmtList = $1->stmtList;
+ if ( $2->stmt != 0 )
+ $$->stmtList->append( $2->stmt );
+ };
+
+nonterm rec_stmt_list uses lang_stmt_list;
+
+rec_stmt_list: rec_stmt_list statement
+ final {
+ $$->stmtList = $1->stmtList;
+
+ /* Maybe a statement was generated. */
+ if ( $2->stmt != 0 )
+ $$->stmtList->append( $2->stmt );
+ };
+
+rec_stmt_list:
+ final {
+ $$->stmtList = new StmtList;
+ };
+
+nonterm opt_def_init
+{
+ LangExpr *expr;
+ LangStmt::Type assignType;
+};
+
+opt_def_init: '=' code_expr
+ final {
+ $$->expr = $2->expr;
+ $$->assignType = LangStmt::AssignType;
+ };
+opt_def_init:
+ final {
+ $$->expr = 0;
+ };
+
+scope_push:
+ final {
+ pd->curLocalFrame->pushScope();
+ //cout << "push scope" << endl;
+ };
+
+scope_pop:
+ final {
+ pd->curLocalFrame->popScope();
+ //cout << "pop scope" << endl;
+ };
+
+nonterm statement
+{
+ LangStmt *stmt;
+};
+nonterm for_scope uses statement;
+
+statement: var_def opt_def_init
+ final {
+ /* By default no statement here. Maybe will add an initialization. */
+ $$->stmt = 0;
+
+ /* Check for redeclaration. */
+ if ( pd->curLocalFrame->checkRedecl( $1->objField->name ) != 0 ) {
+ error( $1->objField->loc ) << "variable " << $1->objField->name <<
+ " redeclared" << endp;
+ }
+
+ /* Insert it into the field map. */
+ pd->curLocalFrame->insertField( $1->objField->name, $1->objField );
+
+ //cout << "var def " << $1->objField->name << endl;
+
+ if ( $2->expr != 0 ) {
+ LangVarRef *varRef = new LangVarRef( $1->objField->loc,
+ new QualItemVect, $1->objField->name );
+
+ $$->stmt = new LangStmt( $1->objField->loc,
+ $2->assignType, varRef, $2->expr );
+ }
+ };
+statement: var_ref '=' code_expr
+ final {
+ $$->stmt = new LangStmt( $2->loc, LangStmt::AssignType, $1->varRef, $3->expr );
+ };
+statement: KW_Print '(' code_expr_list ')'
+ final {
+ $$->stmt = new LangStmt( $1->loc, LangStmt::PrintType, $3->exprVect );
+ };
+statement: KW_PrintXMLAC '(' code_expr_list ')'
+ final {
+ $$->stmt = new LangStmt( $1->loc, LangStmt::PrintXMLACType, $3->exprVect );
+ };
+statement: KW_PrintXML '(' code_expr_list ')'
+ final {
+ $$->stmt = new LangStmt( $1->loc, LangStmt::PrintXMLType, $3->exprVect );
+ };
+statement: KW_PrintStream '(' code_expr_list ')'
+ final {
+ $$->stmt = new LangStmt( $1->loc, LangStmt::PrintStreamType, $3->exprVect );
+ };
+statement: code_expr
+ final {
+ $$->stmt = new LangStmt( InputLoc(), LangStmt::ExprType, $1->expr );
+ };
+statement: if_stmt
+ final {
+ $$->stmt = $1->stmt;
+ };
+statement: KW_Reject
+ final {
+ $$->stmt = new LangStmt( $1->loc, LangStmt::RejectType );
+ };
+statement: KW_While scope_push code_expr block_or_single scope_pop
+ final {
+ $$->stmt = new LangStmt( LangStmt::WhileType, $3->expr, $4->stmtList );
+ };
+
+for_scope: TK_Word ':' type_ref KW_In iter_call block_or_single
+ final {
+ /* Check for redeclaration. */
+ if ( pd->curLocalFrame->checkRedecl( $1->data ) != 0 )
+ error( $1->loc ) << "variable " << $1->data << " redeclared" << endp;
+
+ /* Note that we pass in a null type reference. This type is dependent
+ * on the result of the iter_call lookup since it must contain a reference
+ * to the iterator that is called. This lookup is done at compile time. */
+ ObjField *iterField = new ObjField( $1->loc, (TypeRef*)0, $1->data );
+ pd->curLocalFrame->insertField( $1->data, iterField );
+
+ $$->stmt = new LangStmt( $1->loc, LangStmt::ForIterType,
+ iterField, $3->typeRef, $5->langTerm, $6->stmtList );
+ };
+
+statement: KW_For scope_push for_scope scope_pop
+ final {
+ $$->stmt = $3->stmt;
+ };
+
+statement: KW_Return code_expr
+ final {
+ $$->stmt = new LangStmt( $1->loc, LangStmt::ReturnType, $2->expr );
+ };
+statement: KW_Break
+ final {
+ $$->stmt = new LangStmt( LangStmt::BreakType );
+ };
+statement: KW_Yield var_ref
+ final {
+ $$->stmt = new LangStmt( LangStmt::YieldType, $2->varRef );
+ };
+statement: var_ref TK_LtLt accumulate
+ final {
+ Namespace *nspace = namespaceStack.top();
+ TokenRegion *region = regionStack.top();
+ ParserText *parserText = new ParserText( $2->loc, nspace, region, replItemList );
+ pd->parserTextList.append( parserText );
+
+ $$->stmt = new LangStmt( LangStmt::ParserType, $1->varRef, parserText );
+ };
+statement: KW_Send var_ref accumulate
+ final {
+ Namespace *nspace = namespaceStack.top();
+ TokenRegion *region = regionStack.top();
+ ParserText *parserText = new ParserText( $1->loc, nspace, region, replItemList );
+ pd->parserTextList.append( parserText );
+
+ $$->stmt = new LangStmt( LangStmt::ParserType, $2->varRef, parserText );
+ };
+
+nonterm opt_require_stmt uses statement;
+
+opt_require_stmt:
+ scope_push require_pattern lang_stmt_list scope_pop
+ final {
+ $$->stmt = new LangStmt( LangStmt::IfType, $2->expr, $3->stmtList, 0 );
+ };
+opt_require_stmt:
+ final {
+ $$->stmt = 0;
+ };
+
+nonterm require_pattern uses code_expr;
+
+require_pattern:
+ KW_Require var_ref pattern_list
+ final {
+ Namespace *nspace = namespaceStack.top();
+ TokenRegion *region = regionStack.top();
+ Pattern *pattern = new Pattern( $1->loc, nspace, region,
+ patternItemList, pd->nextPatReplId++ );
+ pd->patternList.append( pattern );
+
+ $$->expr = new LangExpr(
+ new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) );
+ };
+
+nonterm block_or_single uses lang_stmt_list;
+
+block_or_single: '{' lang_stmt_list '}'
+ final {
+ $$->stmtList = $2->stmtList;
+ };
+block_or_single: statement
+ final {
+ $$->stmtList = new StmtList;
+ $$->stmtList->append( $1->stmt );
+ };
+
+nonterm iter_call
+{
+ LangTerm *langTerm;
+};
+
+iter_call: var_ref '(' opt_code_expr_list ')'
+ final {
+ $$->langTerm = new LangTerm( $1->varRef, $3->exprVect );
+ };
+iter_call: TK_Word
+ final {
+ $$->langTerm = new LangTerm( LangTerm::VarRefType,
+ new LangVarRef( $1->loc, new QualItemVect, $1->data ) );
+ };
+
+#
+# If Statements
+#
+
+nonterm if_stmt uses statement;
+
+if_stmt: KW_If scope_push code_expr block_or_single scope_pop elsif_list
+ final {
+ $$->stmt = new LangStmt( LangStmt::IfType, $3->expr, $4->stmtList, $6->stmt );
+ };
+
+nonterm elsif_list
+{
+ LangStmt *stmt;
+};
+
+elsif_list:
+ elsif_clause elsif_list
+ final {
+ /* Put any of the followng elseif part, an else, or null into the elsePart. */
+ $$->stmt = $1->stmt;
+ $$->stmt->elsePart = $2->stmt;
+ };
+elsif_list:
+ optional_else
+ final {
+ $$->stmt = $1->stmt;
+ };
+
+nonterm elsif_clause
+{
+ LangStmt *stmt;
+};
+
+elsif_clause:
+ KW_Elsif scope_push code_expr block_or_single scope_pop
+ final {
+ $$->stmt = new LangStmt( LangStmt::IfType, $3->expr, $4->stmtList, 0 );
+ };
+
+nonterm optional_else
+{
+ LangStmt *stmt;
+};
+
+optional_else:
+ KW_Else scope_push block_or_single scope_pop
+ final {
+ $$->stmt = new LangStmt( LangStmt::ElseType, $3->stmtList );
+ };
+
+optional_else:
+ final {
+ $$->stmt = 0;
+ };
+
+#
+# Code Expression Lists.
+#
+nonterm code_expr_list
+{
+ ExprVect *exprVect;
+};
+
+code_expr_list: code_expr_list code_expr
+ final {
+ $$->exprVect = $1->exprVect;
+ $$->exprVect->append( $2->expr );
+ };
+code_expr_list: code_expr
+ final {
+ $$->exprVect = new ExprVect;
+ $$->exprVect->append( $1->expr );
+ };
+
+nonterm opt_code_expr_list uses code_expr_list;
+
+opt_code_expr_list: code_expr_list
+ final {
+ $$->exprVect = $1->exprVect;
+ };
+
+opt_code_expr_list:
+ final {
+ $$->exprVect = 0;
+ };
+
+#
+# Type list
+#
+
+nonterm type_list
+{
+ TypeRefVect *typeRefVect;
+};
+
+type_list: type_list ',' type_ref
+ final {
+ $$->typeRefVect = $1->typeRefVect;
+ $$->typeRefVect->append( $3->typeRef );
+ };
+type_list: type_ref
+ final {
+ $$->typeRefVect = new TypeRefVect;
+ $$->typeRefVect->append( $1->typeRef );
+ };
+
+nonterm opt_type_list uses type_list;
+
+opt_type_list: type_list
+ final {
+ $$->typeRefVect = $1->typeRefVect;
+ };
+
+opt_type_list:
+ final {
+ $$->typeRefVect = 0;
+ };
+
+
+#
+# Variable reference
+#
+
+nonterm var_ref
+{
+ LangVarRef *varRef;
+};
+
+var_ref: qual TK_Word
+ final {
+ $$->varRef = new LangVarRef( $2->loc, $1->qual, $2->data );
+ };
+
+nonterm qual
+{
+ QualItemVect *qual;
+};
+
+qual: qual TK_Word '.'
+ final {
+ $$->qual = $1->qual;
+ $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Dot ) );
+ };
+qual: qual TK_Word TK_RightArrow
+ final {
+ $$->qual = $1->qual;
+ $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Arrow ) );
+ };
+qual:
+ final {
+ $$->qual = new QualItemVect;
+ };
+
+#
+# Code expression
+#
+
+nonterm code_expr
+{
+ LangExpr *expr;
+};
+
+code_expr: code_expr TK_AmpAmp code_relational
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalAnd, $3->expr );
+ };
+
+code_expr: code_expr TK_BarBar code_relational
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalOr, $3->expr );
+ };
+
+code_expr: code_relational
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm code_relational uses code_expr;
+
+code_relational: code_relational TK_DoubleEql code_additive
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, OP_DoubleEql, $3->expr );
+ };
+
+code_relational: code_relational TK_NotEql code_additive
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, OP_NotEql, $3->expr );
+ };
+
+code_relational: code_relational '<' code_additive
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, '<', $3->expr );
+ };
+
+code_relational: code_relational '>' code_additive
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, '>', $3->expr );
+ };
+
+code_relational: code_relational TK_LessEql code_additive
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, OP_LessEql, $3->expr );
+ };
+
+code_relational: code_relational TK_GrtrEql code_additive
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, OP_GrtrEql, $3->expr );
+ };
+
+
+code_relational: code_additive
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm code_additive uses code_expr;
+
+code_additive: code_additive '+' code_multiplicitive
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, '+', $3->expr );
+ };
+
+code_additive: code_additive '-' code_multiplicitive
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, '-', $3->expr );
+ };
+
+code_additive: code_multiplicitive
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm code_multiplicitive uses code_expr;
+
+code_multiplicitive: code_multiplicitive '*' code_unary
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, '*', $3->expr );
+ };
+
+code_multiplicitive: code_multiplicitive '/' code_unary
+ final {
+ $$->expr = new LangExpr( $2->loc, $1->expr, '/', $3->expr );
+ };
+
+code_multiplicitive: code_unary
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm code_unary uses code_expr;
+code_unary: '!' code_factor
+ final {
+ $$->expr = new LangExpr( $1->loc, '!', $2->expr );
+ };
+code_unary: '$' code_factor
+ final {
+ $$->expr = new LangExpr( $1->loc, '$', $2->expr );
+ };
+code_unary: '^' code_factor
+ final {
+ $$->expr = new LangExpr( $1->loc, '^', $2->expr );
+ };
+code_unary: '%' code_factor
+ final {
+ $$->expr = new LangExpr( $1->loc, '%', $2->expr );
+ };
+code_unary: code_factor
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm opt_capture uses var_def;
+
+opt_capture: TK_Word ':'
+ final {
+ $$->objField = new ObjField( $1->loc, 0, $1->data );
+ };
+opt_capture:
+ final {
+ $$->objField = 0;
+ };
+
+nonterm code_factor uses code_expr;
+
+code_factor: TK_Number
+ final {
+ $$->expr = new LangExpr( new LangTerm( LangTerm::NumberType, $1->data ) );
+ };
+code_factor: TK_Literal
+ final {
+ $$->expr = new LangExpr( new LangTerm( LangTerm::StringType, $1->data ) );
+ };
+code_factor: var_ref '(' opt_code_expr_list ')'
+ final {
+ $$->expr = new LangExpr( new LangTerm( $1->varRef, $3->exprVect ) );
+ };
+code_factor: var_ref
+ final {
+ $$->expr = new LangExpr( new LangTerm( LangTerm::VarRefType, $1->varRef ) );
+ };
+code_factor: KW_Match var_ref pattern_list
+ final {
+ Namespace *nspace = namespaceStack.top();
+ TokenRegion *region = regionStack.top();
+ Pattern *pattern = new Pattern( $1->loc, nspace, region,
+ patternItemList, pd->nextPatReplId++ );
+ pd->patternList.append( pattern );
+
+ $$->expr = new LangExpr( new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) );
+ };
+code_factor: KW_New code_factor
+ final {
+ $$->expr = new LangExpr( new LangTerm( LangTerm::NewType, $2->expr ) );
+ };
+code_factor:
+ KW_Construct opt_capture type_ref opt_field_init repl_list
+ final {
+ Namespace *nspace = namespaceStack.top();
+ TokenRegion *region = regionStack.top();
+ Replacement *replacement = new Replacement( $1->loc, nspace, region,
+ replItemList, pd->nextPatReplId++ );
+ pd->replList.append( replacement );
+
+ LangVarRef *varRef = 0;
+ if ( $2->objField != 0 )
+ varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name );
+
+ $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ConstructType,
+ varRef, $2->objField, $3->typeRef, $4->fieldInitVect, replacement ) );
+
+ /* Check for redeclaration. */
+ if ( $2->objField != 0 ) {
+ if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) {
+ error( $2->objField->loc ) << "variable " << $2->objField->name <<
+ " redeclared" << endp;
+ }
+
+ /* Insert it into the field map. */
+ $2->objField->typeRef = $3->typeRef;
+ pd->curLocalFrame->insertField( $2->objField->name, $2->objField );
+ }
+ };
+code_factor: KW_Parse opt_capture type_ref '(' opt_code_expr_list ')'
+ final {
+ String parserName = $3->typeRef->typeName + "_parser";
+
+ /* Get the language element. */
+ Namespace *nspace = namespaceStack.top();
+
+ GenericType *generic = 0;
+
+ NamespaceQual *nspaceQual = new NamespaceQual(
+ namespaceStack.top(), regionStack.top() );
+ TypeRef *parserTypeRef = new TypeRef( TypeRef::Parser,
+ InputLoc(), nspaceQual, $3->typeRef, 0 );
+
+ Replacement *replacement = new Replacement( $1->loc, nspace, pd->rootRegion,
+ new ReplItemList, pd->nextPatReplId++ );
+ pd->replList.append( replacement );
+
+ LangVarRef *varRef = 0;
+ if ( $2->objField != 0 )
+ varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name );
+
+ $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ParseType,
+ varRef, $2->objField, $3->typeRef, generic, parserTypeRef, replacement ) );
+ $$->expr->term->args = $5->exprVect;
+
+ /* Check for redeclaration. */
+ if ( $2->objField != 0 ) {
+ if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) {
+ error( $2->objField->loc ) << "variable " << $2->objField->name <<
+ " redeclared" << endp;
+ }
+
+ /* Insert it into the field map. */
+ $2->objField->typeRef = $3->typeRef;
+ pd->curLocalFrame->insertField( $2->objField->name, $2->objField );
+ }
+ };
+code_factor: KW_ParseStop opt_capture type_ref '(' opt_code_expr_list ')'
+ final {
+ /* This is a silly clone. To be fixed later. */
+ String parserName = $3->typeRef->typeName + "_parser";
+
+ /* Get the language element. */
+ Namespace *nspace = namespaceStack.top();
+
+ GenericType *generic = 0;
+
+ NamespaceQual *nspaceQual = new NamespaceQual(
+ namespaceStack.top(), regionStack.top() );
+ TypeRef *parserTypeRef = new TypeRef( TypeRef::Parser,
+ InputLoc(), nspaceQual, $3->typeRef, 0 );
+
+ Replacement *replacement = new Replacement( $1->loc, nspace, pd->rootRegion,
+ new ReplItemList, pd->nextPatReplId++ );
+ pd->replList.append( replacement );
+
+ LangVarRef *varRef = 0;
+ if ( $2->objField != 0 )
+ varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name );
+
+ $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ParseStopType,
+ varRef, $2->objField, $3->typeRef, generic, parserTypeRef, replacement ) );
+ $$->expr->term->args = $5->exprVect;
+
+ /* Check for redeclaration. */
+ if ( $2->objField != 0 ) {
+ if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) {
+ error( $2->objField->loc ) << "variable " << $2->objField->name <<
+ " redeclared" << endp;
+ }
+
+ /* Insert it into the field map. */
+ $2->objField->typeRef = $3->typeRef;
+ pd->curLocalFrame->insertField( $2->objField->name, $2->objField );
+ }
+
+ };
+code_factor: KW_TypeId '<' type_ref '>'
+ final {
+ $$->expr = new LangExpr( new LangTerm( $1->loc,
+ LangTerm::TypeIdType, $3->typeRef ) );
+ };
+code_factor: type_ref KW_In var_ref
+ final {
+ $$->expr = new LangExpr( new LangTerm( $2->loc,
+ LangTerm::SearchType, $1->typeRef, $3->varRef ) );
+ };
+code_factor: KW_Nil
+ final {
+ $$->expr = new LangExpr( new LangTerm( $1->loc,
+ LangTerm::NilType ) );
+ };
+code_factor: KW_True
+ final {
+ $$->expr = new LangExpr( new LangTerm( $1->loc,
+ LangTerm::TrueType ) );
+ };
+code_factor: KW_False
+ final {
+ $$->expr = new LangExpr( new LangTerm( $1->loc,
+ LangTerm::FalseType ) );
+ };
+code_factor: '(' code_expr ')'
+ final {
+ $$->expr = $2->expr;
+ };
+code_factor: KW_MakeTree '(' opt_code_expr_list ')'
+ final {
+ $$->expr = new LangExpr( new LangTerm( $1->loc,
+ LangTerm::MakeTreeType, $3->exprVect ) );
+ };
+code_factor: KW_MakeToken '(' opt_code_expr_list ')'
+ final {
+ $$->expr = new LangExpr( new LangTerm( $1->loc,
+ LangTerm::MakeTokenType, $3->exprVect ) );
+ };
+code_factor: KW_Deref code_expr
+ final {
+ $$->expr = new LangExpr( $1->loc, OP_Deref, $2->expr );
+ };
+code_factor: string_list
+ final {
+ $$->expr = new LangExpr( new LangTerm( replItemList ) );
+ };
+
+nonterm opt_field_init uses field_init_list;
+
+opt_field_init: '(' opt_field_init_list ')'
+ final {
+ $$->fieldInitVect = $2->fieldInitVect;
+ };
+opt_field_init:
+ final {
+ $$->fieldInitVect = 0;
+ };
+
+nonterm opt_field_init_list uses field_init_list;
+
+opt_field_init_list: field_init_list
+ final {
+ $$->fieldInitVect = $1->fieldInitVect;
+ };
+opt_field_init_list:
+ final {
+ $$->fieldInitVect = 0;
+ };
+
+nonterm field_init_list
+{
+ FieldInitVect *fieldInitVect;
+};
+
+field_init_list: field_init_list field_init
+ final {
+ $$->fieldInitVect = $1->fieldInitVect;
+ $$->fieldInitVect->append( $2->fieldInit );
+ };
+field_init_list: field_init
+ final {
+ $$->fieldInitVect = new FieldInitVect;
+ $$->fieldInitVect->append( $1->fieldInit );
+ };
+
+nonterm field_init
+{
+ FieldInit *fieldInit;
+};
+
+field_init: code_expr
+ final {
+ $$->fieldInit = new FieldInit( InputLoc(), "_name", $1->expr );
+ };
+
+#
+# Regular Expressions
+#
+
+nonterm opt_rl_join uses rl_join;
+
+opt_rl_join: rl_join opt_context
+ final {
+ $$->join = $1->join;
+ $$->context = $2->context;
+
+ if ( $2->context != 0 ) {
+ /* Create the enter and leaving actions that will mark the substring. */
+ Action *mark = new Action( MarkMark, pd->nextMatchEndNum++ );
+ pd->actionList.append( mark );
+
+ $$->join->context = $2->context;
+ $$->join->mark = mark;
+ }
+ };
+
+opt_rl_join:
+ final {
+ $$->join = 0;
+ $$->context = 0;
+ };
+
+nonterm rl_join
+{
+ Join *join;
+ Join *context;
+};
+
+rl_join:
+ rl_join ',' rl_expr
+ final {
+ /* Append the expression to the list and return it. */
+ $1->join->exprList.append( $3->expression );
+ $$->join = $1->join;
+ };
+rl_join:
+ rl_expr
+ final {
+ $$->join = new Join( $1->expression );
+ };
+
+# Context at the end of a pattern that is not included in the match
+nonterm opt_context uses rl_join;
+
+opt_context: '@' rl_join final { $$->context = $2->join; };
+opt_context: final { $$->context = 0; };
+
+nonterm rl_expr
+{
+ Expression *expression;
+};
+
+rl_expr:
+ rl_expr '|' rl_term_short final {
+ $$->expression = new Expression( $1->expression,
+ $3->term, Expression::OrType );
+ };
+rl_expr:
+ rl_expr '&' rl_term_short final {
+ $$->expression = new Expression( $1->expression,
+ $3->term, Expression::IntersectType );
+ };
+# This priority specification overrides the innermost parsing strategy which
+# results ordered choice interpretation of the grammar.
+rl_expr:
+ rl_expr '-' rl_term_short final {
+ $$->expression = new Expression( $1->expression,
+ $3->term, Expression::SubtractType );
+ };
+rl_expr:
+ rl_expr TK_DashDash rl_term_short final {
+ $$->expression = new Expression( $1->expression,
+ $3->term, Expression::StrongSubtractType );
+ };
+rl_expr:
+ rl_term_short final {
+ $$->expression = new Expression( $1->term );
+ };
+
+nonterm rl_term_short
+{
+ Term *term;
+};
+
+shortest rl_term_short;
+
+rl_term_short: rl_term
+ final { $$->term = $1->term; };
+
+nonterm rl_term
+{
+ Term *term;
+};
+
+rl_term:
+ rl_term factor_with_label final {
+ $$->term = new Term( $1->term, $2->factorWithAug );
+ };
+rl_term:
+ rl_term '.' factor_with_label final {
+ $$->term = new Term( $1->term, $3->factorWithAug );
+ };
+rl_term:
+ rl_term TK_ColonGt factor_with_label final {
+ $$->term = new Term( $1->term, $3->factorWithAug, Term::RightStartType );
+ };
+rl_term:
+ rl_term TK_ColonGtGt factor_with_label final {
+ $$->term = new Term( $1->term, $3->factorWithAug, Term::RightFinishType );
+ };
+rl_term:
+ rl_term TK_LtColon factor_with_label final {
+ $$->term = new Term( $1->term,
+ $3->factorWithAug, Term::LeftType );
+ };
+rl_term:
+ factor_with_label final {
+ $$->term = new Term( $1->factorWithAug );
+ };
+
+nonterm factor_with_label
+{
+ FactorWithAug *factorWithAug;
+};
+
+factor_with_label:
+ factor_with_ep final {
+ $$->factorWithAug = $1->factorWithAug;
+ };
+
+factor_with_label:
+ TK_Word ':' factor_with_label final {
+ $$->factorWithAug = $3->factorWithAug;
+
+ if ( pd->objectDef->checkRedecl( $1->data ) != 0 )
+ error($1->loc) << "label name \"" << $1->data << "\" already in use" << endp;
+
+ /* Create the object field. */
+ NamespaceQual *qual = new NamespaceQual( namespaceStack.top(), regionStack.top() );
+ TypeRef *typeRef = new TypeRef( $1->loc, qual, "str" );
+ ObjField *objField = new ObjField( $1->loc, typeRef, $1->data );
+
+ /* Insert it into the map. */
+ pd->objectDef->insertField( $1->data, objField );
+
+ /* Create the enter and leaving actions that will mark the substring. */
+ Action *enter = new Action( MarkMark, pd->nextMatchEndNum++ );
+ Action *leave = new Action( MarkMark, pd->nextMatchEndNum++ );
+ pd->actionList.append( enter );
+ pd->actionList.append( leave );
+
+ /* Add entering and leaving actions. */
+ $$->factorWithAug->actions.append( ParserAction( $1->loc, at_start, 0, enter ) );
+ $$->factorWithAug->actions.append( ParserAction( $1->loc, at_leave, 0, leave ) );
+
+ reCaptureVect.append( ReCapture( enter, leave, objField ) );
+ };
+
+nonterm factor_with_ep
+{
+ FactorWithAug *factorWithAug;
+};
+
+factor_with_ep:
+ factor_with_aug final {
+ $$->factorWithAug = $1->factorWithAug;
+ };
+
+nonterm factor_with_aug
+{
+ FactorWithAug *factorWithAug;
+};
+
+factor_with_aug:
+ factor_with_rep final {
+ $$->factorWithAug = new FactorWithAug( $1->factorWithRep );
+ };
+
+
+# The fourth level of precedence. These are the trailing unary operators that
+# allow for repetition.
+
+nonterm factor_with_rep
+{
+ FactorWithRep *factorWithRep;
+};
+
+factor_with_rep:
+ factor_with_rep '*' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, 0, FactorWithRep::StarType );
+ };
+factor_with_rep:
+ factor_with_rep TK_StarStar final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, 0, FactorWithRep::StarStarType );
+ };
+factor_with_rep:
+ factor_with_rep '?' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, 0, FactorWithRep::OptionalType );
+ };
+factor_with_rep:
+ factor_with_rep '+' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, 0, FactorWithRep::PlusType );
+ };
+factor_with_rep:
+ factor_with_rep '{' factor_rep_num '}' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ $3->rep, 0, FactorWithRep::ExactType );
+ };
+factor_with_rep:
+ factor_with_rep '{' ',' factor_rep_num '}' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ 0, $4->rep, FactorWithRep::MaxType );
+ };
+factor_with_rep:
+ factor_with_rep '{' factor_rep_num ',' '}' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ $3->rep, 0, FactorWithRep::MinType );
+ };
+factor_with_rep:
+ factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final {
+ $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
+ $3->rep, $5->rep, FactorWithRep::RangeType );
+ };
+factor_with_rep:
+ factor_with_neg final {
+ $$->factorWithRep = new FactorWithRep(
+ $1->factorWithNeg->loc, $1->factorWithNeg );
+ };
+
+nonterm factor_rep_num
+{
+ int rep;
+};
+
+factor_rep_num:
+ TK_UInt final {
+ // Convert the priority number to a long. Check for overflow.
+ errno = 0;
+ int rep = strtol( $1->data, 0, 10 );
+ if ( errno == ERANGE && rep == LONG_MAX ) {
+ // Repetition too large. Recover by returing repetition 1. */
+ error($1->loc) << "repetition number " << $1->data << " overflows" << endl;
+ $$->rep = 1;
+ }
+ else {
+ // Cannot be negative, so no overflow.
+ $$->rep = rep;
+ }
+ };
+
+
+#
+# The fifth level up in precedence. Negation.
+#
+
+nonterm factor_with_neg
+{
+ FactorWithNeg *factorWithNeg;
+};
+
+factor_with_neg:
+ '!' factor_with_neg final {
+ $$->factorWithNeg = new FactorWithNeg( $1->loc,
+ $2->factorWithNeg, FactorWithNeg::NegateType );
+ };
+factor_with_neg:
+ '^' factor_with_neg final {
+ $$->factorWithNeg = new FactorWithNeg( $1->loc,
+ $2->factorWithNeg, FactorWithNeg::CharNegateType );
+ };
+factor_with_neg:
+ rl_factor final {
+ $$->factorWithNeg = new FactorWithNeg( $1->factor->loc, $1->factor );
+ };
+
+nonterm rl_factor
+{
+ Factor *factor;
+};
+
+rl_factor:
+ TK_Literal final {
+ /* Create a new factor node going to a concat literal. */
+ $$->factor = new Factor( new Literal( $1->loc, $1->data, Literal::LitString ) );
+ };
+rl_factor:
+ alphabet_num final {
+ /* Create a new factor node going to a literal number. */
+ $$->factor = new Factor( new Literal( $1->loc,
+ $1->data, Literal::Number ) );
+ };
+rl_factor:
+ TK_Word final {
+ /* Find the named graph. */
+ Namespace *nspace = namespaceStack.top();
+
+ while ( nspace != 0 ) {
+ GraphDictEl *gdNode = nspace->rlMap.find( $1->data );
+ if ( gdNode != 0 ) {
+ if ( gdNode->isInstance ) {
+ /* Recover by retuning null as the factor node. */
+ error($1->loc) << "references to graph instantiations not allowed "
+ "in expressions" << endl;
+ $$->factor = 0;
+ }
+ else {
+ /* Create a factor node that is a lookup of an expression. */
+ $$->factor = new Factor( $1->loc, gdNode->value );
+ }
+ break;
+ }
+
+ nspace = nspace->parentNamespace;
+ }
+
+ if ( nspace == 0 ) {
+ /* Recover by returning null as the factor node. */
+ error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl;
+ $$->factor = 0;
+ }
+ };
+rl_factor:
+ TK_SqOpen regular_expr_or_data TK_SqClose final {
+ /* Create a new factor node going to an OR expression. */
+ $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ) );
+ };
+rl_factor:
+ TK_SqOpenNeg regular_expr_or_data TK_SqClose final {
+ /* Create a new factor node going to a negated OR expression. */
+ $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) );
+ };
+rl_factor:
+ range_lit TK_DotDot range_lit final {
+ /* Create a new factor node going to a range. */
+ $$->factor = new Factor( new Range( $1->literal, $3->literal ) );
+ };
+rl_factor:
+ '(' rl_join ')' final {
+ /* Create a new factor going to a parenthesized join. */
+ $$->factor = new Factor( $2->join );
+ };
+
+nonterm range_lit
+{
+ Literal *literal;
+};
+
+# Literals which can be the end points of ranges.
+range_lit:
+ TK_Literal final {
+ /* Range literas must have only one char. We restrict this in the parse tree. */
+ $$->literal = new Literal( $1->loc, $1->data, Literal::LitString );
+ };
+range_lit:
+ alphabet_num final {
+ /* Create a new literal number. */
+ $$->literal = new Literal( $1->loc, $1->data, Literal::Number );
+ };
+
+nonterm alphabet_num uses token_data;
+
+# Any form of a number that can be used as a basic machine. */
+alphabet_num:
+ TK_UInt final {
+ $$->loc = $1->loc;
+ $$->data = $1->data;
+ };
+alphabet_num:
+ '-' TK_UInt final {
+ $$->loc = $1->loc;
+ $$->data = '+';
+ $$->data += $2->data;
+ };
+alphabet_num:
+ TK_Hex final {
+ $$->loc = $1->loc;
+ $$->data = $1->data;
+ };
+
+#
+# Regular Expressions.
+#
+
+
+# The data inside of a [] expression in a regular expression. Accepts any
+# number of characters or ranges. */
+nonterm regular_expr_or_data
+{
+ ReOrBlock *reOrBlock;
+};
+
+regular_expr_or_data:
+ regular_expr_or_data regular_expr_or_char final {
+ /* An optimization to lessen the tree size. If an or char is directly
+ * under the left side on the right and the right side is another or
+ * char then paste them together and return the left side. Otherwise
+ * just put the two under a new or data node. */
+ if ( $2->reOrItem->type == ReOrItem::Data &&
+ $1->reOrBlock->type == ReOrBlock::RecurseItem &&
+ $1->reOrBlock->item->type == ReOrItem::Data )
+ {
+ /* Append the right side to right side of the left and toss the
+ * right side. */
+ $1->reOrBlock->item->data += $2->reOrItem->data;
+ delete $2->reOrItem;
+ $$->reOrBlock = $1->reOrBlock;
+ }
+ else {
+ /* Can't optimize, put the left and right under a new node. */
+ $$->reOrBlock = new ReOrBlock( $1->reOrBlock, $2->reOrItem );
+ }
+ };
+regular_expr_or_data:
+ final {
+ $$->reOrBlock = new ReOrBlock();
+ };
+
+# A single character inside of an or expression. Can either be a character or a
+# set of characters.
+nonterm regular_expr_or_char
+{
+ ReOrItem *reOrItem;
+};
+
+regular_expr_or_char:
+ TK_ReChar final {
+ $$->reOrItem = new ReOrItem( $1->loc, $1->data );
+ };
+regular_expr_or_char:
+ TK_ReChar TK_Dash TK_ReChar final {
+ $$->reOrItem = new ReOrItem( $2->loc, $1->data[0], $3->data[0] );
+ };
+
+# A local state reference. Cannot have :: prefix.
+local_state_ref:
+ no_name_sep state_ref_names;
+
+# Clear the name ref structure.
+no_name_sep:
+ final {
+ nameRef.empty();
+ };
+
+# A qualified state reference.
+state_ref: opt_name_sep state_ref_names;
+
+# Optional leading name separator.
+opt_name_sep:
+ TK_NameSep
+ final {
+ /* Insert an initial null pointer val to indicate the existence of the
+ * initial name seperator. */
+ nameRef.setAs( 0 );
+ };
+opt_name_sep:
+ final {
+ nameRef.empty();
+ };
+
+# List of names separated by ::
+state_ref_names:
+ state_ref_names TK_NameSep TK_Word
+ final {
+ nameRef.append( $3->data );
+ };
+state_ref_names:
+ TK_Word
+ final {
+ nameRef.append( $1->data );
+ };
+
+nonterm opt_commit
+{
+ bool commit;
+};
+
+opt_commit: final { $$->commit = false; };
+opt_commit: KW_Commit final { $$->commit = true; };
+
+#
+# Grammar Finished
+#
+
+ write types;
+ write data;
+}%%
+
+void ColmParser::init()
+{
+ /* Set up the root namespace. */
+ const char *rootNamespaceName = "___ROOT_NAMESPACE";
+ Namespace *rootNamespace = new Namespace( InputLoc(),
+ rootNamespaceName, pd->namespaceList.length(), 0 );
+ pd->namespaceList.append( rootNamespace );
+ namespaceStack.push( rootNamespace );
+ pd->rootNamespace = rootNamespace;
+
+ /* Set up the root token region. */
+ const char *rootRegionName = "___ROOT_REGION";
+
+ TokenRegion *rootRegion = new TokenRegion( InputLoc(), rootRegionName,
+ pd->regionList.length(), 0 );
+ pd->regionList.append( rootRegion );
+ addRegionDef( InputLoc(), namespaceStack.top(), rootRegionName, rootRegion );
+
+ regionStack.push( rootRegion );
+
+ pd->rootRegion = rootRegion;
+
+ /* Set up the global object. */
+ String global = "global";
+ pd->globalObjectDef = new ObjectDef( ObjectDef::UserType,
+ global, pd->nextObjectId++ );
+
+ /* The eofTokenRegion defaults to the root region. */
+ pd->eofTokenRegion = rootRegion;
+
+ /* Initialize the dictionary of graphs. This is our symbol table. The
+ * initialization needs to be done on construction which happens at the
+ * beginning of a machine spec so any assignment operators can reference
+ * the builtins. */
+ pd->initGraphDict();
+
+ pd->rootLocalFrame = new ObjectDef( ObjectDef::FrameType,
+ "local", pd->nextObjectId++ );
+ pd->curLocalFrame = pd->rootLocalFrame;
+
+ %% write init;
+
+ addArgvList();
+}
+
+void ColmParser::addArgvList()
+{
+ NamespaceQual *nspaceQual1 = new NamespaceQual(
+ namespaceStack.top(), regionStack.top() );
+ TypeRef *typeRef = new TypeRef( InputLoc(), nspaceQual1, "str" );
+
+ NamespaceQual *nspaceQual2 = new NamespaceQual(
+ namespaceStack.top(), regionStack.top() );
+
+ pd->argvTypeRef = new TypeRef( TypeRef::List, InputLoc(),
+ nspaceQual2, typeRef, 0 );
+}
+
+int ColmParser::parseLangEl( int type, const Token *token )
+{
+ %% write exec;
+ return errCount == 0 ? 0 : -1;
+}
+
+void ColmParser::addRegularDef( const InputLoc &loc, Namespace *nspace,
+ const String &name, Join *join )
+{
+ GraphDictEl *newEl = nspace->rlMap.insert( name );
+ if ( newEl != 0 ) {
+ /* New element in the dict, all good. */
+ newEl->value = new VarDef( name, join );
+ newEl->isInstance = false;
+ newEl->loc = loc;
+ }
+ else {
+ // Recover by ignoring the duplicate.
+ error(loc) << "regular definition \"" << name << "\" already exists" << endl;
+ }
+}
+
+TokenRegion *ColmParser::createRegion( String &scannerName )
+{
+ TokenRegion *tokenRegion = new TokenRegion( InputLoc(), scannerName,
+ pd->regionList.length(), regionStack.top() );
+
+ regionStack.top()->childRegions.append( tokenRegion );
+
+ pd->regionList.append( tokenRegion );
+
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerName, tokenRegion );
+
+ return tokenRegion;
+}
+
+
+void ColmParser::addRegionDef( const InputLoc &loc, Namespace *nspace,
+ const String &name, TokenRegion *tokenRegion )
+{
+ RegionGraphDictEl *newEl = nspace->graphDict.insert( name );
+ if ( newEl != 0 ) {
+ /* New element in the dict, all good. */
+ newEl->value = new RegionDef( name, tokenRegion );
+ newEl->isInstance = true;
+ newEl->loc = loc;
+
+ /* It it is an instance, put on the instance list. */
+ pd->instanceList.append( newEl );
+ }
+ else {
+ // Recover by ignoring the duplicate.
+ error(loc) << "regular definition \"" << name << "\" already exists" << endl;
+ }
+}
+
+ostream &ColmParser::parse_error( int tokId, Token &token )
+{
+ /* Maintain the error count. */
+ gblErrorCount += 1;
+
+ cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": ";
+ cerr << "at token ";
+ if ( tokId < 128 )
+ cerr << "\"" << ColmParser_lelNames[tokId] << "\"";
+ else
+ cerr << ColmParser_lelNames[tokId];
+ if ( token.data != 0 )
+ cerr << " with data \"" << token.data << "\"";
+ cerr << ": ";
+
+ return cerr;
+}
+
+int ColmParser::token( InputLoc &loc, int tokId, char *tokstart, int toklen )
+{
+ Token token;
+
+ if ( toklen > 0 )
+ token.data.setAs( tokstart, toklen );
+
+ token.loc = loc;
+ int res = parseLangEl( tokId, &token );
+ if ( res < 0 ) {
+ parse_error(tokId, token) << "parse error" << endl;
+ exit(1);
+ }
+ return res;
+}
diff --git a/src/lmscan.h b/src/lmscan.h
new file mode 100644
index 00000000..5badaed5
--- /dev/null
+++ b/src/lmscan.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _RLSCAN_H
+#define _RLSCAN_H
+
+#include <iostream>
+#include <fstream>
+#include <string.h>
+
+#include "global.h"
+#include "lmparse.h"
+#include "parsedata.h"
+#include "avltree.h"
+#include "vector.h"
+#include "buffer.h"
+
+using std::ifstream;
+using std::istream;
+using std::ostream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+extern char *Parser_lelNames[];
+
+/* This is used for tracking the current stack of include file/machine pairs. It is
+ * is used to detect and recursive include structure. */
+struct IncludeStackItem
+{
+ IncludeStackItem( const char *fileName )
+ : fileName(fileName) {}
+
+ const char *fileName;
+};
+
+typedef Vector<IncludeStackItem> IncludeStack;
+typedef Vector<const char *> ArgsVector;
+
+extern ArgsVector includePaths;
+
+struct ColmScanner
+{
+ ColmScanner( const char *fileName, istream &input,
+ ostream &output, ColmParser *parser, int includeDepth )
+ :
+ fileName(fileName), input(input), output(output),
+ includeDepth(includeDepth),
+ line(1), column(1), lastnl(0),
+ parser(parser),
+ parserExistsError(false),
+ whitespaceOn(true)
+ {
+ }
+
+ ifstream *tryOpenInclude( char **pathChecks, long &found );
+ char **makeIncludePathChecks( const char *thisFileName, const char *fileName );
+ bool recursiveInclude( const char *inclFileName );
+
+ void sectionParseInit();
+ void token( int type, char *start, char *end );
+ void token( int type, char c );
+ void token( int type );
+ void updateCol();
+ void endSection();
+ void scan();
+ void eof();
+ ostream &scan_error();
+
+ const char *fileName;
+ istream &input;
+ ostream &output;
+ int includeDepth;
+
+ int cs;
+ int line;
+ char *word, *lit;
+ int word_len, lit_len;
+ InputLoc sectionLoc;
+ char *ts, *te;
+ int column;
+ char *lastnl;
+
+ /* Set by machine statements, these persist from section to section
+ * allowing for unnamed sections. */
+ ColmParser *parser;
+ IncludeStack includeStack;
+
+ /* This is set if ragel has already emitted an error stating that
+ * no section name has been seen and thus no parser exists. */
+ bool parserExistsError;
+
+ /* This is for inline code. By default it is on. It goes off for
+ * statements and values in inline blocks which are parsed. */
+ bool whitespaceOn;
+
+ Buffer litBuf;
+};
+
+#endif /* _RLSCAN_H */
diff --git a/src/lmscan.rl b/src/lmscan.rl
new file mode 100644
index 00000000..070a1e66
--- /dev/null
+++ b/src/lmscan.rl
@@ -0,0 +1,636 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string.h>
+
+#include "global.h"
+#include "lmscan.h"
+#include "lmparse.h"
+#include "parsedata.h"
+#include "avltree.h"
+#include "vector.h"
+
+//#define PRINT_TOKENS
+
+using std::ifstream;
+using std::istream;
+using std::ostream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+%%{
+ machine section_parse;
+ alphtype int;
+ write data;
+}%%
+
+void ColmScanner::sectionParseInit()
+{
+ %% write init;
+}
+
+ostream &ColmScanner::scan_error()
+{
+ /* Maintain the error count. */
+ gblErrorCount += 1;
+ cerr << fileName << ":" << line << ":" << column << ": ";
+ return cerr;
+}
+
+bool ColmScanner::recursiveInclude( const char *inclFileName )
+{
+ for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
+ if ( strcmp( si->fileName, inclFileName ) == 0 )
+ return true;
+ }
+ return false;
+}
+
+void ColmScanner::updateCol()
+{
+ char *from = lastnl;
+ if ( from == 0 )
+ from = ts;
+ //cerr << "adding " << te - from << " to column" << endl;
+ column += te - from;
+ lastnl = 0;
+}
+
+void ColmScanner::token( int type, char c )
+{
+ token( type, &c, &c + 1 );
+}
+
+void ColmScanner::token( int type )
+{
+ token( type, 0, 0 );
+}
+
+bool isAbsolutePath( const char *path )
+{
+ return path[0] == '/';
+}
+
+ifstream *ColmScanner::tryOpenInclude( char **pathChecks, long &found )
+{
+ char **check = pathChecks;
+ ifstream *inFile = new ifstream;
+
+ while ( *check != 0 ) {
+ inFile->open( *check );
+ if ( inFile->is_open() ) {
+ found = check - pathChecks;
+ return inFile;
+ }
+ check += 1;
+ }
+
+ found = -1;
+ delete inFile;
+ return 0;
+}
+
+char **ColmScanner::makeIncludePathChecks( const char *thisFileName, const char *fileName )
+{
+ char **checks = 0;
+ long nextCheck = 0;
+ char *data = strdup(fileName);
+ long length = strlen(fileName);
+
+ /* Absolute path? */
+ if ( isAbsolutePath( data ) ) {
+ checks = new char*[2];
+ checks[nextCheck++] = data;
+ }
+ else {
+ /* Search from the the location of the current file. */
+ checks = new char *[2 + includePaths.length()];
+ const char *lastSlash = strrchr( thisFileName, '/' );
+ if ( lastSlash == 0 )
+ checks[nextCheck++] = data;
+ else {
+ long givenPathLen = (lastSlash - thisFileName) + 1;
+ long checklen = givenPathLen + length;
+ char *check = new char[checklen+1];
+ memcpy( check, thisFileName, givenPathLen );
+ memcpy( check+givenPathLen, data, length );
+ check[checklen] = 0;
+ checks[nextCheck++] = check;
+ }
+
+ /* Search from the include paths given on the command line. */
+ for ( ArgsVector::Iter incp = includePaths; incp.lte(); incp++ ) {
+ long pathLen = strlen( *incp );
+ long checkLen = pathLen + 1 + length;
+ char *check = new char[checkLen+1];
+ memcpy( check, *incp, pathLen );
+ check[pathLen] = '/';
+ memcpy( check+pathLen+1, data, length );
+ check[checkLen] = 0;
+ checks[nextCheck++] = check;
+ }
+ }
+
+ checks[nextCheck] = 0;
+ return checks;
+}
+
+
+%%{
+ machine section_parse;
+ import "lmparse.h";
+
+ action clear_words { word = lit = 0; word_len = lit_len = 0; }
+ action store_lit { lit = tokdata; lit_len = toklen; }
+
+ action mach_err { scan_error() << "bad machine statement" << endl; }
+ action incl_err { scan_error() << "bad include statement" << endl; }
+ action write_err { scan_error() << "bad write statement" << endl; }
+
+ action handle_include
+ {
+ String src( lit, lit_len );
+ String fileName;
+ bool unused;
+
+ /* Need a location. */
+ InputLoc here;
+ here.fileName = fileName;
+ here.line = line;
+ here.col = column;
+
+ prepareLitString( fileName, unused, src, here );
+ char **checks = makeIncludePathChecks( this->fileName, fileName );
+
+ /* Open the input file for reading. */
+ long found = 0;
+ ifstream *inFile = tryOpenInclude( checks, found );
+ if ( inFile == 0 ) {
+ scan_error() << "include: could not open " <<
+ fileName << " for reading" << endl;
+ }
+ else {
+ /* Only proceed with the include if it was found. */
+ if ( recursiveInclude( checks[found] ) )
+ scan_error() << "include: this is a recursive include operation" << endl;
+
+ /* Check for a recursive include structure. Add the current file/section
+ * name then check if what we are including is already in the stack. */
+ includeStack.append( IncludeStackItem( checks[found] ) );
+
+ ColmScanner *scanner = new ColmScanner( fileName, *inFile, output, parser, includeDepth+1 );
+ scanner->scan();
+ delete inFile;
+
+ /* Remove the last element (len-1) */
+ includeStack.remove( -1 );
+
+ delete scanner;
+ }
+ }
+
+ include_target =
+ TK_Literal >clear_words @store_lit;
+
+ include_stmt =
+ ( KW_Include include_target ) @handle_include
+ <>err incl_err <>eof incl_err;
+
+ action handle_token
+ {
+// cout << Parser_lelNames[type] << " ";
+// if ( start != 0 ) {
+// cout.write( start, end-start );
+// }
+// cout << endl;
+
+ InputLoc loc;
+
+ #ifdef PRINT_TOKENS
+ cerr << "scanner:" << line << ":" << column <<
+ ": sending token to the parser " << Parser_lelNames[*p];
+ cerr << " " << toklen;
+ if ( tokdata != 0 )
+ cerr << " " << tokdata;
+ cerr << endl;
+ #endif
+
+ loc.fileName = fileName;
+ loc.line = line;
+ loc.col = column;
+
+ if ( tokdata != 0 && tokdata[toklen-1] == '\n' )
+ loc.line -= 1;
+
+ parser->token( loc, type, tokdata, toklen );
+ }
+
+ # Catch everything else.
+ everything_else = ^( KW_Include ) @handle_token;
+
+ main := (
+ include_stmt |
+ everything_else
+ )*;
+}%%
+
+void ColmScanner::token( int type, char *start, char *end )
+{
+ char *tokdata = 0;
+ int toklen = 0;
+ int *p = &type;
+ int *pe = &type + 1;
+ int *eof = 0;
+
+ if ( start != 0 ) {
+ toklen = end-start;
+ tokdata = new char[toklen+1];
+ memcpy( tokdata, start, toklen );
+ tokdata[toklen] = 0;
+ }
+
+ %%{
+ machine section_parse;
+ write exec;
+ }%%
+
+ updateCol();
+}
+
+void ColmScanner::endSection( )
+{
+ /* Execute the eof actions for the section parser. */
+ /* Probably use: token( -1 ); */
+}
+
+%%{
+ machine rlscan;
+
+ # This is sent by the driver code.
+ EOF = 0;
+
+ action inc_nl {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ NL = '\n' @inc_nl;
+
+ # Identifiers, numbers, commetns, and other common things.
+ ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
+ number = digit+;
+ hex_number = '0x' [0-9a-fA-F]+;
+
+ # These literal forms are common to C-like host code and ragel.
+ s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
+ d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+
+ whitespace = [ \t] | NL;
+ pound_comment = '#' [^\n]* NL;
+
+ or_literal := |*
+ # Escape sequences in OR expressions.
+ '\\0' => { token( TK_ReChar, '\0' ); };
+ '\\a' => { token( TK_ReChar, '\a' ); };
+ '\\b' => { token( TK_ReChar, '\b' ); };
+ '\\t' => { token( TK_ReChar, '\t' ); };
+ '\\n' => { token( TK_ReChar, '\n' ); };
+ '\\v' => { token( TK_ReChar, '\v' ); };
+ '\\f' => { token( TK_ReChar, '\f' ); };
+ '\\r' => { token( TK_ReChar, '\r' ); };
+ '\\\n' => { updateCol(); };
+ '\\' any => { token( TK_ReChar, ts+1, te ); };
+
+ # Range dash in an OR expression.
+ '-' => { token( TK_Dash, 0, 0 ); };
+
+ # Terminate an OR expression.
+ ']' => { token( TK_SqClose ); fret; };
+
+ EOF => {
+ scan_error() << "unterminated OR literal" << endl;
+ };
+
+ # Characters in an OR expression.
+ [^\]] => { token( TK_ReChar, ts, te ); };
+
+ *|;
+
+ regular_type := |*
+ # Identifiers.
+ ident => { token( TK_Word, ts, te ); } ;
+
+ # Numbers
+ number => { token( TK_UInt, ts, te ); };
+ hex_number => { token( TK_Hex, ts, te ); };
+
+ # Literals, with optionals.
+ ( s_literal | d_literal ) [i]?
+ => { token( TK_Literal, ts, te ); };
+
+ '[' => { token( TK_SqOpen ); fcall or_literal; };
+ '[^' => { token( TK_SqOpenNeg ); fcall or_literal; };
+
+ '/' => { token( '/'); fret; };
+
+ # Ignore.
+ pound_comment => { updateCol(); };
+
+ '..' => { token( TK_DotDot ); };
+ '**' => { token( TK_StarStar ); };
+ '--' => { token( TK_DashDash ); };
+
+ ':>' => { token( TK_ColonGt ); };
+ ':>>' => { token( TK_ColonGtGt ); };
+ '<:' => { token( TK_LtColon ); };
+
+ # Whitespace other than newline.
+ [ \t\r]+ => { updateCol(); };
+
+ # If we are in a single line machine then newline may end the spec.
+ NL => { updateCol(); };
+
+ # Consume eof.
+ EOF;
+
+ any => { token( *ts ); } ;
+ *|;
+
+ literal_pattern := |*
+ '\\' '0' { litBuf.append( '\0' ); };
+ '\\' 'a' { litBuf.append( '\a' ); };
+ '\\' 'b' { litBuf.append( '\b' ); };
+ '\\' 't' { litBuf.append( '\t' ); };
+ '\\' 'n' { litBuf.append( '\n' ); };
+ '\\' 'v' { litBuf.append( '\v' ); };
+ '\\' 'f' { litBuf.append( '\f' ); };
+ '\\' 'r' { litBuf.append( '\r' ); };
+
+ '\\' any {
+ litBuf.append( ts[1] );
+ };
+ '"' => {
+ if ( litBuf.length > 0 ) {
+ token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
+ litBuf.clear();
+ }
+ token( '"' );
+ fret;
+ };
+ NL => {
+ litBuf.append( '\n' );
+ token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
+ litBuf.clear();
+ token( '"' );
+ fret;
+ };
+ '[' => {
+ if ( litBuf.length > 0 ) {
+ token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
+ litBuf.clear();
+ }
+ token( '[' );
+ fcall main;
+ };
+ any => {
+ litBuf.append( *ts );
+ };
+ *|;
+
+ # Parser definitions.
+ main := |*
+ 'lex' => { token( KW_Lex ); };
+ 'commit' => { token( KW_Commit ); };
+ 'token' => { token( KW_Token ); };
+ 'literal' => { token( KW_Literal ); };
+ 'rl' => { token( KW_Rl ); };
+ 'def' => { token( KW_Def ); };
+ 'ignore' => { token( KW_Ignore ); };
+ 'construct' => { token( KW_Construct ); };
+ 'cons' => { token( KW_Construct ); };
+ 'new' => { token( KW_New ); };
+ 'if' => { token( KW_If ); };
+ 'reject' => { token( KW_Reject ); };
+ 'while' => { token( KW_While ); };
+ 'else' => { token( KW_Else ); };
+ 'elsif' => { token( KW_Elsif ); };
+ 'match' => { token( KW_Match ); };
+ 'for' => { token( KW_For ); };
+ 'iter' => { token( KW_Iter ); };
+ 'prints' => { token( KW_PrintStream ); };
+ 'print' => { token( KW_Print ); };
+ 'print_xml_ac' => { token( KW_PrintXMLAC ); };
+ 'print_xml' => { token( KW_PrintXML ); };
+ 'namespace' => { token( KW_Namespace ); };
+ 'lex' => { token( KW_Lex ); };
+ 'map' => { token( KW_Map ); };
+ 'list' => { token( KW_List ); };
+ 'vector' => { token( KW_Vector ); };
+ 'accum' => { token( KW_Accum ); };
+ 'parser' => { token( KW_Accum ); };
+ 'return' => { token( KW_Return ); };
+ 'break' => { token( KW_Break ); };
+ 'yield' => { token( KW_Yield ); };
+ 'typeid' => { token( KW_TypeId ); };
+ 'make_token' => { token( KW_MakeToken ); };
+ 'make_tree' => { token( KW_MakeTree ); };
+ 'reducefirst' => { token( KW_ReduceFirst ); };
+ 'for' => { token( KW_For ); };
+ 'in' => { token( KW_In ); };
+ 'nil' => { token( KW_Nil ); };
+ 'true' => { token( KW_True ); };
+ 'false' => { token( KW_False ); };
+ 'parse' => { token( KW_Parse ); };
+ 'parse_stop' => { token( KW_ParseStop ); };
+ 'global' => { token( KW_Global ); };
+ 'export' => { token( KW_Export ); };
+ 'ptr' => { token( KW_Ptr ); };
+ 'ref' => { token( KW_Ref ); };
+ 'deref' => { token( KW_Deref ); };
+ 'require' => { token( KW_Require ); };
+ 'preeof' => { token( KW_Preeof ); };
+ 'left' => { token( KW_Left ); };
+ 'right' => { token( KW_Right ); };
+ 'nonassoc' => { token( KW_Nonassoc ); };
+ 'prec' => { token( KW_Prec ); };
+ 'include' => { token( KW_Include ); };
+ 'context' => { token( KW_Context ); };
+ 'alias' => { token( KW_Alias ); };
+ 'send' => { token( KW_Send ); };
+ 'ni' => { token( KW_Ni ); };
+ 'ci' => { token( KW_Ci ); };
+
+ # Identifiers.
+ ident => { token( TK_Word, ts, te ); } ;
+
+ number => { token( TK_Number, ts, te ); };
+
+ '/' => {
+ token( '/' );
+ if ( parser->enterRl )
+ fcall regular_type;
+ };
+
+ "~" [^\n]* NL => {
+ token( '"' );
+ token( TK_LitPat, ts+1, te );
+ token( '"' );
+ };
+
+ "'" ([^'\\\n] | '\\' (any | NL))* ( "'" | NL ) => {
+ token( TK_Literal, ts, te );
+ };
+
+ '"' => {
+ token( '"' );
+ litBuf.clear();
+ fcall literal_pattern;
+ };
+ '[' => {
+ token( '[' );
+ fcall main;
+ };
+
+ ']' => {
+ token( ']' );
+ if ( top > 0 )
+ fret;
+ };
+
+ # Ignore.
+ pound_comment => { updateCol(); };
+
+ '=>' => { token( TK_DoubleArrow ); };
+ '==' => { token( TK_DoubleEql ); };
+ '!=' => { token( TK_NotEql ); };
+ '::' => { token( TK_DoubleColon ); };
+ '<=' => { token( TK_LessEql ); };
+ '>=' => { token( TK_GrtrEql ); };
+ '->' => { token( TK_RightArrow ); };
+ '&&' => { token( TK_AmpAmp ); };
+ '||' => { token( TK_BarBar ); };
+ '<<' => { token( TK_LtLt ); };
+
+ ('+' | '-' | '*' | '/' | '(' | ')' | '@' | '$' | '^' ) => { token( *ts ); };
+
+
+ # Whitespace other than newline.
+ [ \t\r]+ => { updateCol(); };
+ NL => { updateCol(); };
+
+ # Consume eof.
+ EOF;
+
+ any => { token( *ts ); } ;
+ *|;
+}%%
+
+%% write data;
+
+void ColmScanner::scan()
+{
+ int bufsize = 8;
+ char *buf = new char[bufsize];
+ const char last_char = 0;
+ int cs, act, have = 0;
+ int top, stack[32];
+ bool execute = true;
+
+ sectionParseInit();
+ %% write init;
+
+ while ( execute ) {
+ char *p = buf + have;
+ int space = bufsize - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. Grow it. */
+ bufsize = bufsize * 2;
+ char *newbuf = new char[bufsize];
+
+ /* Recompute p and space. */
+ p = newbuf + have;
+ space = bufsize - have;
+
+ /* Patch up pointers possibly in use. */
+ if ( ts != 0 )
+ ts = newbuf + ( ts - buf );
+ te = newbuf + ( te - buf );
+
+ /* Copy the new buffer in. */
+ memcpy( newbuf, buf, have );
+ delete[] buf;
+ buf = newbuf;
+ }
+
+ input.read( p, space );
+ int len = input.gcount();
+
+ /* If we see eof then append the EOF char. */
+ if ( len == 0 ) {
+ p[0] = last_char, len = 1;
+ execute = false;
+ }
+
+ char *pe = p + len;
+ char *eof = 0;
+ %% write exec;
+
+ /* Check if we failed. */
+ if ( cs == rlscan_error ) {
+ /* Machine failed before finding a token. I'm not yet sure if this
+ * is reachable. */
+ scan_error() << "colm scanner error (metalanguage)" << endl;
+ exit(1);
+ }
+
+ /* Decide if we need to preserve anything. */
+ char *preserve = ts;
+
+ /* Now set up the prefix. */
+ if ( preserve == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ have = pe - preserve;
+ memmove( buf, preserve, have );
+ unsigned int shiftback = preserve - buf;
+ if ( ts != 0 )
+ ts -= shiftback;
+ te -= shiftback;
+
+ preserve = buf;
+ }
+ }
+ delete[] buf;
+}
+
+void ColmScanner::eof()
+{
+ InputLoc loc;
+ loc.fileName = "<EOF>";
+ loc.line = line;
+ loc.col = 1;
+ parser->token( loc, ColmParser_tk_eof, 0, 0 );
+}
diff --git a/src/main.cc b/src/main.cc
new file mode 100644
index 00000000..6856da27
--- /dev/null
+++ b/src/main.cc
@@ -0,0 +1,623 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <unistd.h>
+#include <sstream>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "global.h"
+#include "debug.h"
+#include "lmscan.h"
+#include "pcheck.h"
+#include "vector.h"
+#include "version.h"
+#include "keyops.h"
+#include "parsedata.h"
+#include "vector.h"
+#include "version.h"
+#include "fsmcodegen.h"
+
+using std::istream;
+using std::ifstream;
+using std::ostream;
+using std::ios;
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+/* Graphviz dot file generation. */
+bool genGraphviz = false;
+
+using std::ostream;
+using std::istream;
+using std::ifstream;
+using std::ofstream;
+using std::ios;
+using std::cout;
+using std::cerr;
+using std::cin;
+using std::endl;
+
+/* Io globals. */
+istream *inStream = 0;
+ostream *outStream = 0;
+const char *inputFileName = 0;
+const char *outputFileName = 0;
+const char *gblExportTo = 0;
+const char *gblExpImplTo = 0;
+bool exportCode = false;
+
+bool generateGraphviz = false;
+bool verbose = false;
+bool logging = false;
+bool branchPointInfo = false;
+bool addUniqueEmptyProductions = false;
+bool gblLibrary = false;
+
+ArgsVector includePaths;
+
+/* Print version information. */
+void version();
+
+/* Total error count. */
+int gblErrorCount = 0;
+
+HostType hostTypesC[] =
+{
+ { "char", 0, true, CHAR_MIN, CHAR_MAX, sizeof(char) },
+};
+
+HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true };
+
+HostLang *hostLang = &hostLangC;
+HostLangType hostLangType = CCode;
+
+/* Print the opening to an error in the input, then return the error ostream. */
+ostream &error( const InputLoc &loc )
+{
+ /* Keep the error count. */
+ gblErrorCount += 1;
+
+ cerr << "error: " << inputFileName << ":" <<
+ loc.line << ":" << loc.col << ": ";
+ return cerr;
+}
+
+/* Print the opening to a program error, then return the error stream. */
+ostream &error()
+{
+ gblErrorCount += 1;
+ cerr << "error: " PROGNAME ": ";
+ return cerr;
+}
+
+
+/* Print the opening to a warning, then return the error ostream. */
+ostream &warning( )
+{
+ cerr << "warning: " << inputFileName << ": ";
+ return cerr;
+}
+
+/* Print the opening to a warning in the input, then return the error ostream. */
+ostream &warning( const InputLoc &loc )
+{
+ assert( inputFileName != 0 );
+ cerr << "warning: " << inputFileName << ":" <<
+ loc.line << ":" << loc.col << ": ";
+ return cerr;
+}
+
+void escapeLineDirectivePath( std::ostream &out, char *path )
+{
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ out << "\\\\";
+ else
+ out << *pc;
+ }
+}
+
+void escapeLineDirectivePath( std::ostream &out, char *path );
+void scan( char *fileName, istream &input );
+
+bool printStatistics = false;
+
+/* Print a summary of the options. */
+void usage()
+{
+ cout <<
+"usage: colm [options] file\n"
+"general:\n"
+" -h, -H, -?, --help print this usage and exit\n"
+" -v --version print version information and exit\n"
+" -o <file> write output to <file>\n"
+" -i show conflict information\n"
+" -d make colm verbose\n"
+" -l compile logging into the output executable\n"
+ ;
+}
+
+/* Print version information. */
+void version()
+{
+ cout << "Colm version " VERSION << " " PUBDATE << endl <<
+ "Copyright (c) 2007-2012 by Adrian D. Thurston" << endl;
+}
+
+/* Scans a string looking for the file extension. If there is a file
+ * extension then pointer returned points to inside the string
+ * passed in. Otherwise returns null. */
+const char *findFileExtension( const char *stemFile )
+{
+ const char *ppos = stemFile + strlen(stemFile) - 1;
+
+ /* Scan backwards from the end looking for the first dot.
+ * If we encounter a '/' before the first dot, then stop the scan. */
+ while ( 1 ) {
+ /* If we found a dot or got to the beginning of the string then
+ * we are done. */
+ if ( ppos == stemFile || *ppos == '.' )
+ break;
+
+ /* If we hit a / then there is no extension. Done. */
+ if ( *ppos == '/' ) {
+ ppos = stemFile;
+ break;
+ }
+ ppos--;
+ }
+
+ /* If we got to the front of the string then bail we
+ * did not find an extension */
+ if ( ppos == stemFile )
+ ppos = 0;
+
+ return ppos;
+}
+
+/* Make a file name from a stem. Removes the old filename suffix and
+ * replaces it with a new one. Returns a newed up string. */
+char *fileNameFromStem( const char *stemFile, const char *suffix )
+{
+ int len = strlen( stemFile );
+ assert( len > 0 );
+
+ /* Get the extension. */
+ const char *ppos = findFileExtension( stemFile );
+
+ /* If an extension was found, then shorten what we think the len is. */
+ if ( ppos != 0 )
+ len = ppos - stemFile;
+
+ /* Make the return string from the stem and the suffix. */
+ char *retVal = new char[ len + strlen( suffix ) + 1 ];
+ strncpy( retVal, stemFile, len );
+ strcpy( retVal + len, suffix );
+
+ return retVal;
+}
+
+
+/* Invoked by the parser when the root element is opened. */
+void openOutput( )
+{
+ /* If the output format is code and no output file name is given, then
+ * make a default. */
+ if ( outputFileName == 0 ) {
+ const char *ext = findFileExtension( inputFileName );
+ if ( ext != 0 && strcmp( ext, ".rh" ) == 0 )
+ outputFileName = fileNameFromStem( inputFileName, ".h" );
+ else {
+ const char *defExtension = ".c";
+ outputFileName = fileNameFromStem( inputFileName, defExtension );
+ }
+ }
+
+ if ( colm_log_compile ) {
+ cerr << "opening output file: " << outputFileName << endl;
+ }
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( outputFileName != 0 && strcmp( inputFileName, outputFileName ) == 0 ) {
+ error() << "output file \"" << outputFileName <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( outputFileName != 0 ) {
+ /* Open the output stream, attaching it to the filter. */
+ ofstream *outFStream = new ofstream( outputFileName );
+
+ if ( !outFStream->is_open() ) {
+ error() << "error opening " << outputFileName << " for writing" << endl;
+ exit(1);
+ }
+
+ outStream = outFStream;
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+}
+
+void openExports( )
+{
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( gblExportTo != 0 && strcmp( inputFileName, gblExportTo ) == 0 ) {
+ error() << "output file \"" << gblExportTo <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( gblExportTo != 0 ) {
+ /* Open the output stream, attaching it to the filter. */
+ ofstream *outFStream = new ofstream( gblExportTo );
+
+ if ( !outFStream->is_open() ) {
+ error() << "error opening " << outputFileName << " for writing" << endl;
+ exit(1);
+ }
+
+ outStream = outFStream;
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+}
+
+void openExportsImpl( )
+{
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( gblExpImplTo != 0 && strcmp( inputFileName, gblExpImplTo ) == 0 ) {
+ error() << "output file \"" << gblExpImplTo <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( gblExpImplTo != 0 ) {
+ /* Open the output stream, attaching it to the filter. */
+ ofstream *outFStream = new ofstream( gblExpImplTo );
+
+ if ( !outFStream->is_open() ) {
+ error() << "error opening " << outputFileName << " for writing" << endl;
+ exit(1);
+ }
+
+ outStream = outFStream;
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+}
+
+void compileOutputCommand( const char *command )
+{
+ if ( colm_log_compile )
+ cout << "compiling with: " << command << endl;
+ int res = system( command );
+ if ( res != 0 )
+ cout << "there was a problem compiling the output" << endl;
+}
+
+void compileOutputInstalled( const char *argv0 )
+{
+ /* Find the location of the colm program that is executing. */
+ char *location = strdup( argv0 );
+ char *last = location + strlen(location) - 1;
+ while ( true ) {
+ if ( last == location ) {
+ last[0] = '.';
+ last[1] = 0;
+ break;
+ }
+ if ( *last == '/' ) {
+ last[0] = 0;
+ break;
+ }
+ last -= 1;
+ }
+
+ char *exec = fileNameFromStem( outputFileName, ".bin" );
+
+ int length = 1024 + 3*strlen(location) + strlen(outputFileName) + strlen(exec);
+ char command[length];
+ sprintf( command,
+ "gcc -Wall -Wwrite-strings"
+ " -I" PREFIX "/include/colm"
+ " -g"
+ " -o %s"
+ " %s"
+ " -L" PREFIX "/lib"
+ " -lcolm%c",
+ exec, outputFileName, logging ? 'd' : 'p' );
+
+ compileOutputCommand( command );
+}
+
+void compileOutputInSource( const char *argv0 )
+{
+ /* Find the location of the colm program that is executing. */
+ char *location = strdup( argv0 );
+ char *last = strrchr( location, '/' );
+ assert( last != 0 );
+ last[1] = 0;
+
+ char *exec = fileNameFromStem( outputFileName, ".bin" );
+
+ int length = 1024 + 3*strlen(location) + strlen(outputFileName) + strlen(exec);
+ char command[length];
+ sprintf( command,
+ "gcc -Wall -Wwrite-strings"
+ " -I%s."
+ " -I%s../aapl"
+ " -g"
+ " -o %s"
+ " %s"
+ " -L%s"
+ " -lcolm%c",
+ location, location,
+ exec, outputFileName, location, logging ? 'd' : 'p' );
+
+ compileOutputCommand( command );
+}
+
+bool inSourceTree( const char *argv0 )
+{
+ const char *lastSlash = strrchr( argv0, '/' );
+ if ( lastSlash != 0 ) {
+ int rootLen = lastSlash - argv0 + 1;
+ char *mainPath = new char[rootLen + 16];
+ memcpy( mainPath, argv0, rootLen );
+ strcpy( mainPath + rootLen, "main.cc" );
+
+ struct stat sb;
+ int res = stat( mainPath, &sb );
+ delete[] mainPath;
+
+ if ( res == 0 && S_ISREG( sb.st_mode ) )
+ return true;
+ }
+
+ return false;
+}
+
+void processArgs( int argc, const char **argv )
+{
+ ParamCheck pc( "D:e:c:LI:vdlio:S:M:vHh?-:sV", argc, argv );
+
+ while ( pc.check() ) {
+ switch ( pc.state ) {
+ case ParamCheck::match:
+ switch ( pc.parameter ) {
+ case 'I':
+ includePaths.append( pc.parameterArg );
+ break;
+ case 'v':
+ version();
+ exit(0);
+ break;
+ case 'd':
+ verbose = true;
+ break;
+ case 'l':
+ logging = true;
+ break;
+ case 'i':
+ branchPointInfo = true;
+ break;
+ /* Output. */
+ case 'o':
+ if ( *pc.parameterArg == 0 )
+ error() << "a zero length output file name was given" << endl;
+ else if ( outputFileName != 0 )
+ error() << "more than one output file name was given" << endl;
+ else {
+ /* Ok, remember the output file name. */
+ outputFileName = pc.parameterArg;
+ }
+ break;
+
+ case 'H': case 'h': case '?':
+ usage();
+ exit(0);
+ case 's':
+ printStatistics = true;
+ break;
+ case 'V':
+ generateGraphviz = true;
+ break;
+ case '-':
+ if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+ usage();
+ exit(0);
+ }
+ else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+ version();
+ exit(0);
+ }
+ else {
+ error() << "--" << pc.parameterArg <<
+ " is an invalid argument" << endl;
+ }
+ break;
+ case 'L':
+ gblLibrary = true;
+ break;
+ case 'e':
+ gblExportTo = pc.parameterArg;
+ break;
+ case 'c':
+ gblExpImplTo = pc.parameterArg;
+ break;
+ case 'D':
+#if DEBUG
+ if ( strcmp( pc.parameterArg, "BYTECODE" ) == 0 )
+ colmActiveRealm |= REALM_BYTECODE;
+ else if ( strcmp( pc.parameterArg, "PARSE" ) == 0 )
+ colmActiveRealm |= REALM_PARSE;
+ else if ( strcmp( pc.parameterArg, "MATCH" ) == 0 )
+ colmActiveRealm |= REALM_MATCH;
+ else if ( strcmp( pc.parameterArg, "COMPILE" ) == 0 )
+ colmActiveRealm |= REALM_COMPILE;
+ else if ( strcmp( pc.parameterArg, "POOL" ) == 0 )
+ colmActiveRealm |= REALM_POOL;
+ else if ( strcmp( pc.parameterArg, "PRINT" ) == 0 )
+ colmActiveRealm |= REALM_PRINT;
+ else if ( strcmp( pc.parameterArg, "INPUT" ) == 0 )
+ colmActiveRealm |= REALM_INPUT;
+ else if ( strcmp( pc.parameterArg, "SCAN" ) == 0 )
+ colmActiveRealm |= REALM_SCAN;
+ else
+ fatal( "unknown argument to -D %s\n", pc.parameterArg );
+#else
+ fatal("-D option specified but debugging messsages not compiled in");
+#endif
+
+ }
+ break;
+
+ case ParamCheck::invalid:
+ error() << "-" << pc.parameter << " is an invalid argument" << endl;
+ break;
+
+ case ParamCheck::noparam:
+ /* It is interpreted as an input file. */
+ if ( *pc.curArg == 0 )
+ error() << "a zero length input file name was given" << endl;
+ else if ( inputFileName != 0 )
+ error() << "more than one input file name was given" << endl;
+ else {
+ /* OK, Remember the filename. */
+ inputFileName = pc.curArg;
+ }
+ break;
+ }
+ }
+}
+
+/* Main, process args and call yyparse to start scanning input. */
+int main(int argc, const char **argv)
+{
+ processArgs( argc, argv );
+
+ if ( verbose ) {
+ colm_log_bytecode = 1;
+ colm_log_parse = 1;
+ colm_log_match = 1;
+ colm_log_compile = 1;
+ colm_log_conds = 1;
+ colmActiveRealm = 0xffffffff;
+ }
+ initInputFuncs();
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( inputFileName != 0 && outputFileName != 0 &&
+ strcmp( inputFileName, outputFileName ) == 0 )
+ {
+ error() << "output file \"" << outputFileName <<
+ "\" is the same as the input file" << endl;
+ }
+
+ /* Open the input file for reading. */
+ istream *inStream;
+ if ( inputFileName != 0 ) {
+ /* Open the input file for reading. */
+ ifstream *inFile = new ifstream( inputFileName );
+ inStream = inFile;
+ if ( ! inFile->is_open() )
+ error() << "could not open " << inputFileName << " for reading" << endl;
+ }
+ else {
+ inputFileName = "<stdin>";
+ inStream = &cin;
+ }
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ Compiler *pd = new Compiler( inputFileName, "machine", InputLoc(), std::cout );
+ ColmParser *parser = new ColmParser( pd, inputFileName, "machine", InputLoc() );
+ ColmScanner *scanner = new ColmScanner( inputFileName, *inStream, cout, parser, 0 );
+
+ parser->init();
+ scanner->scan();
+ scanner->eof();
+
+ /* Parsing complete, check for errors.. */
+ if ( gblErrorCount > 0 )
+ return 1;
+
+ /* Initiate a compile following a parse. */
+ pd->compile();
+
+ /*
+ * Write output.
+ */
+ if ( generateGraphviz ) {
+ outStream = &cout;
+ pd->writeDotFile();
+ }
+ else {
+ openOutput();
+ pd->generateOutput();
+
+ if ( outStream != 0 )
+ delete outStream;
+
+ if ( !gblLibrary ) {
+ if ( inSourceTree( argv[0] ) )
+ compileOutputInSource( argv[0] );
+ else
+ compileOutputInstalled( argv[0] );
+ }
+
+ if ( gblExportTo != 0 ) {
+ openExports();
+ pd->generateExports();
+ delete outStream;
+ }
+ if ( gblExpImplTo != 0 ) {
+ openExportsImpl();
+ scanner->parser->pd->generateExportsImpl();
+ delete outStream;
+ }
+ }
+
+ delete scanner;
+ delete parser;
+ delete pd;
+
+ return 0;
+}
diff --git a/src/map.c b/src/map.c
new file mode 100644
index 00000000..72f4a18c
--- /dev/null
+++ b/src/map.c
@@ -0,0 +1,763 @@
+/*
+ * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <assert.h>
+#include <pdarun.h>
+#include <map.h>
+#include <pool.h>
+
+#define true 1
+#define false 0
+
+void mapListAbandon( Map *map )
+{
+ map->head = map->tail = 0;
+}
+
+void mapListAddBefore( Map *map, MapEl *next_el, MapEl *new_el )
+{
+ /* Set the next pointer of the new element to next_el. We do
+ * this regardless of the state of the list. */
+ new_el->next = next_el;
+
+ /* Set reverse pointers. */
+ if ( next_el == 0 ) {
+ /* There is no next elememnt. We are inserting at the tail. */
+ new_el->prev = map->tail;
+ map->tail = new_el;
+ }
+ else {
+ /* There is a next element and we can access next's previous. */
+ new_el->prev = next_el->prev;
+ next_el->prev = new_el;
+ }
+
+ /* Set forward pointers. */
+ if ( new_el->prev == 0 ) {
+ /* There is no previous element. Set the head pointer.*/
+ map->head = new_el;
+ }
+ else {
+ /* There is a previous element, set it's next pointer to new_el. */
+ new_el->prev->next = new_el;
+ }
+}
+
+void mapListAddAfter( Map *map, MapEl *prev_el, MapEl *new_el )
+{
+ /* Set the previous pointer of new_el to prev_el. We do
+ * this regardless of the state of the list. */
+ new_el->prev = prev_el;
+
+ /* Set forward pointers. */
+ if (prev_el == 0) {
+ /* There was no prev_el, we are inserting at the head. */
+ new_el->next = map->head;
+ map->head = new_el;
+ }
+ else {
+ /* There was a prev_el, we can access previous next. */
+ new_el->next = prev_el->next;
+ prev_el->next = new_el;
+ }
+
+ /* Set reverse pointers. */
+ if (new_el->next == 0) {
+ /* There is no next element. Set the tail pointer. */
+ map->tail = new_el;
+ }
+ else {
+ /* There is a next element. Set it's prev pointer. */
+ new_el->next->prev = new_el;
+ }
+}
+
+
+MapEl *mapListDetach( Map *map, MapEl *el )
+{
+ /* Set forward pointers to skip over el. */
+ if ( el->prev == 0 )
+ map->head = el->next;
+ else
+ el->prev->next = el->next;
+
+ /* Set reverse pointers to skip over el. */
+ if ( el->next == 0 )
+ map->tail = el->prev;
+ else
+ el->next->prev = el->prev;
+
+ /* Update List length and return element we detached. */
+ return el;
+}
+
+
+/* Once an insertion position is found, attach a element to the tree. */
+void mapAttachRebal( Map *map, MapEl *element, MapEl *parentEl, MapEl *lastLess )
+{
+ /* Increment the number of element in the tree. */
+ map->treeSize += 1;
+
+ /* Set element's parent. */
+ element->parent = parentEl;
+
+ /* New element always starts as a leaf with height 1. */
+ element->left = 0;
+ element->right = 0;
+ element->height = 1;
+
+ /* Are we inserting in the tree somewhere? */
+ if ( parentEl != 0 ) {
+ /* We have a parent so we are somewhere in the tree. If the parent
+ * equals lastLess, then the last traversal in the insertion went
+ * left, otherwise it went right. */
+ if ( lastLess == parentEl ) {
+ parentEl->left = element;
+
+ mapListAddBefore( map, parentEl, element );
+ }
+ else {
+ parentEl->right = element;
+
+ mapListAddAfter( map, parentEl, element );
+ }
+ }
+ else {
+ /* No parent element so we are inserting the root. */
+ map->root = element;
+
+ mapListAddAfter( map, map->tail, element );
+ }
+
+ /* Recalculate the heights. */
+ mapRecalcHeights( map, parentEl );
+
+ /* Find the first unbalance. */
+ MapEl *ub = mapFindFirstUnbalGP( map, element );
+
+ /* rebalance. */
+ if ( ub != 0 )
+ {
+ /* We assert that after this single rotation the
+ * tree is now properly balanced. */
+ mapRebalance( map, ub );
+ }
+}
+
+#if 0
+/* Recursively delete all the children of a element. */
+void mapDeleteChildrenOf( Map *map, MapEl *element )
+{
+ /* Recurse left. */
+ if ( element->left ) {
+ mapDeleteChildrenOf( map, element->left );
+
+ /* Delete left element. */
+ delete element->left;
+ element->left = 0;
+ }
+
+ /* Recurse right. */
+ if ( element->right ) {
+ mapDeleteChildrenOf( map, element->right );
+
+ /* Delete right element. */
+ delete element->right;
+ element->left = 0;
+ }
+}
+
+void mapEmpty( Map *map )
+{
+ if ( map->root ) {
+ /* Recursively delete from the tree structure. */
+ mapDeleteChildrenOf( map, map->root );
+ delete map->root;
+ map->root = 0;
+ map->treeSize = 0;
+
+ mapListAbandon( map );
+ }
+}
+#endif
+
+/* rebalance from a element whose gradparent is unbalanced. Only
+ * call on a element that has a grandparent. */
+MapEl *mapRebalance( Map *map, MapEl *n )
+{
+ long lheight, rheight;
+ MapEl *a, *b, *c;
+ MapEl *t1, *t2, *t3, *t4;
+
+ MapEl *p = n->parent; /* parent (Non-NUL). L*/
+ MapEl *gp = p->parent; /* Grand-parent (Non-NULL). */
+ MapEl *ggp = gp->parent; /* Great grand-parent (may be NULL). */
+
+ if (gp->right == p)
+ {
+ /* gp
+ * * p
+ p
+ */
+ if (p->right == n)
+ {
+ /* gp
+ * * p
+ p
+ * * n
+ n
+ */
+ a = gp;
+ b = p;
+ c = n;
+ t1 = gp->left;
+ t2 = p->left;
+ t3 = n->left;
+ t4 = n->right;
+ }
+ else
+ {
+ /* gp
+ * * p
+ p
+ * /
+ * n
+ */
+ a = gp;
+ b = n;
+ c = p;
+ t1 = gp->left;
+ t2 = n->left;
+ t3 = n->right;
+ t4 = p->right;
+ }
+ }
+ else
+ {
+ /* gp
+ * /
+ * p
+ */
+ if (p->right == n)
+ {
+ /* gp
+ * /
+ * p
+ * * n
+ n
+ */
+ a = p;
+ b = n;
+ c = gp;
+ t1 = p->left;
+ t2 = n->left;
+ t3 = n->right;
+ t4 = gp->right;
+ }
+ else
+ {
+ /* gp
+ * /
+ * p
+ * /
+ * n
+ */
+ a = n;
+ b = p;
+ c = gp;
+ t1 = n->left;
+ t2 = n->right;
+ t3 = p->right;
+ t4 = gp->right;
+ }
+ }
+
+ /* Perform rotation.
+ */
+
+ /* Tie b to the great grandparent. */
+ if ( ggp == 0 )
+ map->root = b;
+ else if ( ggp->left == gp )
+ ggp->left = b;
+ else
+ ggp->right = b;
+ b->parent = ggp;
+
+ /* Tie a as a leftchild of b. */
+ b->left = a;
+ a->parent = b;
+
+ /* Tie c as a rightchild of b. */
+ b->right = c;
+ c->parent = b;
+
+ /* Tie t1 as a leftchild of a. */
+ a->left = t1;
+ if ( t1 != 0 ) t1->parent = a;
+
+ /* Tie t2 as a rightchild of a. */
+ a->right = t2;
+ if ( t2 != 0 ) t2->parent = a;
+
+ /* Tie t3 as a leftchild of c. */
+ c->left = t3;
+ if ( t3 != 0 ) t3->parent = c;
+
+ /* Tie t4 as a rightchild of c. */
+ c->right = t4;
+ if ( t4 != 0 ) t4->parent = c;
+
+ /* The heights are all recalculated manualy and the great
+ * grand-parent is passed to recalcHeights() to ensure
+ * the heights are correct up the tree.
+ *
+ * Note that recalcHeights() cuts out when it comes across
+ * a height that hasn't changed.
+ */
+
+ /* Fix height of a. */
+ lheight = a->left ? a->left->height : 0;
+ rheight = a->right ? a->right->height : 0;
+ a->height = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* Fix height of c. */
+ lheight = c->left ? c->left->height : 0;
+ rheight = c->right ? c->right->height : 0;
+ c->height = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* Fix height of b. */
+ lheight = a->height;
+ rheight = c->height;
+ b->height = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* Fix height of b's parents. */
+ mapRecalcHeights( map, ggp );
+ return ggp;
+}
+
+/* Recalculates the heights of all the ancestors of element. */
+void mapRecalcHeights( Map *map, MapEl *element )
+{
+ while ( element != 0 )
+ {
+ long lheight = element->left ? element->left->height : 0;
+ long rheight = element->right ? element->right->height : 0;
+
+ long new_height = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* If there is no chage in the height, then there will be no
+ * change in any of the ancestor's height. We can stop going up.
+ * If there was a change, continue upward. */
+ if (new_height == element->height)
+ return;
+ else
+ element->height = new_height;
+
+ element = element->parent;
+ }
+}
+
+/* Finds the first element whose grandparent is unbalanced. */
+MapEl *mapFindFirstUnbalGP( Map *map, MapEl *element )
+{
+ long lheight, rheight, balanceProp;
+ MapEl *gp;
+
+ if ( element == 0 || element->parent == 0 ||
+ element->parent->parent == 0 )
+ return 0;
+
+ /* Don't do anything if we we have no grandparent. */
+ gp = element->parent->parent;
+ while ( gp != 0 )
+ {
+ lheight = gp->left ? gp->left->height : 0;
+ rheight = gp->right ? gp->right->height : 0;
+ balanceProp = lheight - rheight;
+
+ if ( balanceProp < -1 || balanceProp > 1 )
+ return element;
+
+ element = element->parent;
+ gp = gp->parent;
+ }
+ return 0;
+}
+
+
+
+/* Finds the first element that is unbalanced. */
+MapEl *mapFindFirstUnbalEl( Map *map, MapEl *element )
+{
+ if ( element == 0 )
+ return 0;
+
+ while ( element != 0 )
+ {
+ long lheight = element->left ?
+ element->left->height : 0;
+ long rheight = element->right ?
+ element->right->height : 0;
+ long balanceProp = lheight - rheight;
+
+ if ( balanceProp < -1 || balanceProp > 1 )
+ return element;
+
+ element = element->parent;
+ }
+ return 0;
+}
+
+/* Replace a element in the tree with another element not in the tree. */
+void mapReplaceEl( Map *map, MapEl *element, MapEl *replacement )
+{
+ MapEl *parent = element->parent,
+ *left = element->left,
+ *right = element->right;
+
+ replacement->left = left;
+ if (left)
+ left->parent = replacement;
+ replacement->right = right;
+ if (right)
+ right->parent = replacement;
+
+ replacement->parent = parent;
+ if (parent)
+ {
+ if (parent->left == element)
+ parent->left = replacement;
+ else
+ parent->right = replacement;
+ }
+ else {
+ map->root = replacement;
+ }
+
+ replacement->height = element->height;
+}
+
+
+/* Removes a element from a tree and puts filler in it's place.
+ * Filler should be null or a child of element. */
+void mapRemoveEl( Map *map, MapEl *element, MapEl *filler )
+{
+ MapEl *parent = element->parent;
+
+ if ( parent )
+ {
+ if ( parent->left == element )
+ parent->left = filler;
+ else
+ parent->right = filler;
+ }
+ else {
+ map->root = filler;
+ }
+
+ if ( filler )
+ filler->parent = parent;
+
+ return;
+}
+
+/* Recursive worker for tree copying. */
+MapEl *mapCopyBranch( Program *prg, Map *map, MapEl *el, Kid *oldNextDown, Kid **newNextDown )
+{
+ /* Duplicate element. Either the base element's copy constructor or defaul
+ * constructor will get called. Both will suffice for initting the
+ * pointers to null when they need to be. */
+ MapEl *newEl = mapElAllocate( prg );
+
+ if ( (Kid*)el == oldNextDown )
+ *newNextDown = (Kid*)newEl;
+
+ /* If the left tree is there, copy it. */
+ if ( newEl->left ) {
+ newEl->left = mapCopyBranch( prg, map, newEl->left, oldNextDown, newNextDown );
+ newEl->left->parent = newEl;
+ }
+
+ mapListAddAfter( map, map->tail, newEl );
+
+ /* If the right tree is there, copy it. */
+ if ( newEl->right ) {
+ newEl->right = mapCopyBranch( prg, map, newEl->right, oldNextDown, newNextDown );
+ newEl->right->parent = newEl;
+ }
+
+ return newEl;
+}
+
+MapEl *mapInsertEl( Program *prg, Map *map, MapEl *element, MapEl **lastFound )
+{
+ long keyRelation;
+ MapEl *curEl = map->root, *parentEl = 0;
+ MapEl *lastLess = 0;
+
+ while ( true ) {
+ if ( curEl == 0 ) {
+ /* We are at an external element and did not find the key we were
+ * looking for. Attach underneath the leaf and rebalance. */
+ mapAttachRebal( map, element, parentEl, lastLess );
+
+ if ( lastFound != 0 )
+ *lastFound = element;
+ return element;
+ }
+
+ keyRelation = cmpTree( prg,
+ element->key, curEl->key );
+
+ /* Do we go left? */
+ if ( keyRelation < 0 ) {
+ parentEl = lastLess = curEl;
+ curEl = curEl->left;
+ }
+ /* Do we go right? */
+ else if ( keyRelation > 0 ) {
+ parentEl = curEl;
+ curEl = curEl->right;
+ }
+ /* We have hit the target. */
+ else {
+ if ( lastFound != 0 )
+ *lastFound = curEl;
+ return 0;
+ }
+ }
+}
+
+MapEl *mapInsertKey( Program *prg, Map *map, Tree *key, MapEl **lastFound )
+{
+ long keyRelation;
+ MapEl *curEl = map->root, *parentEl = 0;
+ MapEl *lastLess = 0;
+
+ while ( true ) {
+ if ( curEl == 0 ) {
+ /* We are at an external element and did not find the key we were
+ * looking for. Create the new element, attach it underneath the leaf
+ * and rebalance. */
+ MapEl *element = mapElAllocate( prg );
+ element->key = key;
+ element->tree = 0;
+ mapAttachRebal( map, element, parentEl, lastLess );
+
+ if ( lastFound != 0 )
+ *lastFound = element;
+ return element;
+ }
+
+ keyRelation = cmpTree( prg, key, curEl->key );
+
+ /* Do we go left? */
+ if ( keyRelation < 0 ) {
+ parentEl = lastLess = curEl;
+ curEl = curEl->left;
+ }
+ /* Do we go right? */
+ else if ( keyRelation > 0 ) {
+ parentEl = curEl;
+ curEl = curEl->right;
+ }
+ /* We have hit the target. */
+ else {
+ if ( lastFound != 0 )
+ *lastFound = curEl;
+ return 0;
+ }
+ }
+}
+
+
+/**
+ * \brief Find a element in the tree with the given key.
+ *
+ * \returns The element if key exists, null if the key does not exist.
+ */
+MapEl *mapImplFind( Program *prg, Map *map, Tree *key )
+{
+ MapEl *curEl = map->root;
+ long keyRelation;
+
+ while ( curEl != 0 ) {
+ keyRelation = cmpTree( prg, key, curEl->key );
+
+ /* Do we go left? */
+ if ( keyRelation < 0 )
+ curEl = curEl->left;
+ /* Do we go right? */
+ else if ( keyRelation > 0 )
+ curEl = curEl->right;
+ /* We have hit the target. */
+ else {
+ return curEl;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * \brief Find a element, then detach it from the tree.
+ *
+ * The element is not deleted.
+ *
+ * \returns The element detached if the key is found, othewise returns null.
+ */
+MapEl *mapDetachByKey( Program *prg, Map *map, Tree *key )
+{
+ MapEl *element = mapImplFind( prg, map, key );
+ if ( element )
+ mapDetach( prg, map, element );
+
+ return element;
+}
+
+/**
+ * \brief Detach a element from the tree.
+ *
+ * If the element is not in the tree then undefined behaviour results.
+ *
+ * \returns The element given.
+ */
+MapEl *mapDetach( Program *prg, Map *map, MapEl *element )
+{
+ MapEl *replacement, *fixfrom;
+ long lheight, rheight;
+
+ /* Remove the element from the ordered list. */
+ mapListDetach( map, element );
+
+ /* Update treeSize. */
+ map->treeSize--;
+
+ /* Find a replacement element. */
+ if (element->right)
+ {
+ /* Find the leftmost element of the right subtree. */
+ replacement = element->right;
+ while (replacement->left)
+ replacement = replacement->left;
+
+ /* If replacing the element the with its child then we need to start
+ * fixing at the replacement, otherwise we start fixing at the
+ * parent of the replacement. */
+ if (replacement->parent == element)
+ fixfrom = replacement;
+ else
+ fixfrom = replacement->parent;
+
+ mapRemoveEl( map, replacement, replacement->right );
+ mapReplaceEl( map, element, replacement );
+ }
+ else if (element->left)
+ {
+ /* Find the rightmost element of the left subtree. */
+ replacement = element->left;
+ while (replacement->right)
+ replacement = replacement->right;
+
+ /* If replacing the element the with its child then we need to start
+ * fixing at the replacement, otherwise we start fixing at the
+ * parent of the replacement. */
+ if (replacement->parent == element)
+ fixfrom = replacement;
+ else
+ fixfrom = replacement->parent;
+
+ mapRemoveEl( map, replacement, replacement->left );
+ mapReplaceEl( map, element, replacement );
+ }
+ else
+ {
+ /* We need to start fixing at the parent of the element. */
+ fixfrom = element->parent;
+
+ /* The element we are deleting is a leaf element. */
+ mapRemoveEl( map, element, 0 );
+ }
+
+ /* If fixfrom is null it means we just deleted
+ * the root of the tree. */
+ if ( fixfrom == 0 )
+ return element;
+
+ /* Fix the heights after the deletion. */
+ mapRecalcHeights( map, fixfrom );
+
+ /* Fix every unbalanced element going up in the tree. */
+ MapEl *ub = mapFindFirstUnbalEl( map, fixfrom );
+ while ( ub )
+ {
+ /* Find the element to rebalance by moving down from the first unbalanced
+ * element 2 levels in the direction of the greatest heights. On the
+ * second move down, the heights may be equal ( but not on the first ).
+ * In which case go in the direction of the first move. */
+ lheight = ub->left ? ub->left->height : 0;
+ rheight = ub->right ? ub->right->height : 0;
+ assert( lheight != rheight );
+ if (rheight > lheight)
+ {
+ ub = ub->right;
+ lheight = ub->left ?
+ ub->left->height : 0;
+ rheight = ub->right ?
+ ub->right->height : 0;
+ if (rheight > lheight)
+ ub = ub->right;
+ else if (rheight < lheight)
+ ub = ub->left;
+ else
+ ub = ub->right;
+ }
+ else
+ {
+ ub = ub->left;
+ lheight = ub->left ?
+ ub->left->height : 0;
+ rheight = ub->right ?
+ ub->right->height : 0;
+ if (rheight > lheight)
+ ub = ub->right;
+ else if (rheight < lheight)
+ ub = ub->left;
+ else
+ ub = ub->left;
+ }
+
+
+ /* rebalance returns the grandparant of the subtree formed
+ * by the element that were rebalanced.
+ * We must continue upward from there rebalancing. */
+ fixfrom = mapRebalance( map, ub );
+
+ /* Find the next unbalaced element. */
+ ub = mapFindFirstUnbalEl( map, fixfrom );
+ }
+
+ return element;
+}
+
+
+
diff --git a/src/map.cc b/src/map.cc
new file mode 100644
index 00000000..52dd2697
--- /dev/null
+++ b/src/map.cc
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2008-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "pdarun.h"
+#include <assert.h>
+
+
+
diff --git a/src/map.h b/src/map.h
new file mode 100644
index 00000000..acb415b9
--- /dev/null
+++ b/src/map.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _MAP_H
+#define _MAP_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <program.h>
+
+typedef struct _MapEl
+{
+ /* Must overlay Kid. */
+ Tree *tree;
+ struct _MapEl *next;
+ struct _MapEl *prev;
+
+ struct _MapEl *left, *right, *parent;
+ long height;
+ Tree *key;
+} MapEl;
+
+typedef struct _Map
+{
+ /* Must overlay Tree. */
+ short id;
+ unsigned short flags;
+ long refs;
+ MapEl *head;
+
+ MapEl *tail;
+ MapEl *root;
+ long treeSize;
+ GenericInfo *genericInfo;
+} Map;
+
+void mapListAbandon( Map *map );
+
+void mapListAddBefore( Map *map, MapEl *next_el, MapEl *new_el );
+void mapListAddAfter( Map *map, MapEl *prev_el, MapEl *new_el );
+MapEl *mapListDetach( Map *map, MapEl *el );
+void mapAttachRebal( Map *map, MapEl *element, MapEl *parentEl, MapEl *lastLess );
+void mapDeleteChildrenOf( Map *map, MapEl *element );
+void mapEmpty( Map *map );
+MapEl *mapRebalance( Map *map, MapEl *n );
+void mapRecalcHeights( Map *map, MapEl *element );
+MapEl *mapFindFirstUnbalGP( Map *map, MapEl *element );
+MapEl *mapFindFirstUnbalEl( Map *map, MapEl *element );
+void mapRemoveEl( Map *map, MapEl *element, MapEl *filler );
+void mapReplaceEl( Map *map, MapEl *element, MapEl *replacement );
+MapEl *mapInsertEl( Program *prg, Map *map, MapEl *element, MapEl **lastFound );
+MapEl *mapInsertKey( Program *prg, Map *map, Tree *key, MapEl **lastFound );
+MapEl *mapImplFind( Program *prg, Map *map, Tree *key );
+MapEl *mapDetachByKey( Program *prg, Map *map, Tree *key );
+MapEl *mapDetach( Program *prg, Map *map, MapEl *element );
+MapEl *mapCopyBranch( Program *prg, Map *map, MapEl *el, Kid *oldNextDown, Kid **newNextDown );
+
+long cmpTree( Program *prg, const Tree *tree1, const Tree *tree2 );
+
+void mapImplRemoveEl( Program *prg, Map *map, MapEl *element );
+int mapImplRemoveKey( Program *prg, Map *map, Tree *key );
+
+/*
+ * Iterators.
+ */
+
+void initTreeIter( TreeIter *treeIter, const Ref *rootRef, int searchId, Tree **stackRoot );
+void initRevTreeIter( RevTreeIter *revTriter, const Ref *rootRef,
+ int searchId, Tree **stackRoot, int children );
+
+
+void initUserIter( UserIter *userIter, Tree **stackRoot, long argSize, long searchId );
+
+Tree *mapFind( Program *prg, Map *map, Tree *key );
+long mapLength( Map *map );
+Tree *mapUnstore( Program *prg, Map *map, Tree *key, Tree *existing );
+int mapInsert( Program *prg, Map *map, Tree *key, Tree *element );
+void mapUnremove( Program *prg, Map *map, Tree *key, Tree *element );
+Tree *mapUninsert( Program *prg, Map *map, Tree *key );
+Tree *mapStore( Program *prg, Map *map, Tree *key, Tree *element );
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
+
diff --git a/src/parsedata.h b/src/parsedata.h
new file mode 100644
index 00000000..79ba08c1
--- /dev/null
+++ b/src/parsedata.h
@@ -0,0 +1,1063 @@
+/*
+ * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PARSEDATA_H
+#define _PARSEDATA_H
+
+#include <iostream>
+#include <limits.h>
+#include "bstset.h"
+#include "global.h"
+#include "avlmap.h"
+#include "avlset.h"
+#include "bstmap.h"
+#include "vector.h"
+#include "dlist.h"
+#include "dlistmel.h"
+#include "fsmgraph.h"
+#include "compare.h"
+#include "vector.h"
+#include "keyops.h"
+#include "parsetree.h"
+#include "astring.h"
+#include "pdagraph.h"
+#include "compare.h"
+#include "pdarun.h"
+#include "bytecode.h"
+#include "program.h"
+
+using std::ostream;
+
+struct exit_object { };
+extern exit_object endp;
+void operator<<( std::ostream &out, exit_object & );
+
+/* Forwards. */
+struct RedFsm;
+struct LangEl;
+struct Compiler;
+struct PdaCodeGen;
+struct FsmCodeGen;
+
+#define SHIFT_CODE 0x1
+#define REDUCE_CODE 0x2
+#define SHIFT_REDUCE_CODE 0x3
+
+inline long makeReduceCode( long reduction, bool isShiftReduce )
+{
+ return ( isShiftReduce ? SHIFT_REDUCE_CODE : REDUCE_CODE ) |
+ ( reduction << 2 );
+}
+
+struct ProdEl;
+struct ProdElList;
+struct PdaLiteral;
+struct Definition;
+
+/* A pointer to this is in PdaRun, but it's specification is not known by the
+ * runtime code. The runtime functions that access it are defined in
+ * ctinput.cpp and stubbed in fsmcodegen.cpp */
+struct Bindings
+ : public Vector<ParseTree*>
+{};
+
+struct DefListEl { Definition *prev, *next; };
+struct LelDefListEl { Definition *prev, *next; };
+typedef Vector< LangEl* > LangElVect;
+typedef Vector< ProdEl* > FactorVect;
+
+typedef AvlMap<String, long, CmpStr> StringMap;
+typedef AvlMapEl<String, long> StringMapEl;
+
+enum PredType {
+ PredLeft,
+ PredRight,
+ PredNonassoc,
+ PredNone
+};
+
+struct PredDecl
+{
+ PredDecl( TypeRef *typeRef, PredType predType, long predValue )
+ : typeRef(typeRef), predType(predType), predValue(predValue)
+ {}
+
+ TypeRef *typeRef;
+ PredType predType;
+ long predValue;
+
+ PredDecl *prev, *next;
+};
+
+typedef DList<PredDecl> PredDeclList;
+
+/* Graph dictionary. */
+struct Definition
+:
+ public DefListEl, public LelDefListEl
+{
+ enum Type { Production };
+
+ Definition( const InputLoc &loc, LangEl *prodName, ProdElList *prodElList,
+ bool prodCommit, CodeBlock *redBlock, int prodId, int prodNum, Type type )
+ :
+ loc(loc), prodName(prodName), prodElList(prodElList),
+ prodCommit(prodCommit), redBlock(redBlock), prodId(prodId), prodNum(prodNum),
+ type(type), fsm(0), fsmLength(0), uniqueEmptyLeader(0),
+ isLeftRec(false), localFrame(0), lhsField(0), predOf(0),
+ collectIgnoreRegion(0) {}
+
+ InputLoc loc;
+ LangEl *prodName;
+ ProdElList *prodElList;
+ bool prodCommit;
+
+ CodeBlock *redBlock;
+
+ int prodId;
+ int prodNum;
+ Type type;
+
+ PdaGraph *fsm;
+ int fsmLength;
+ String data;
+ LongSet reducesTo;
+
+ LangEl *uniqueEmptyLeader;
+
+ ProdIdSet nonTermFirstSet;
+ AlphSet firstSet;
+ bool isLeftRec;
+
+ ObjectDef *localFrame;
+ ObjField *lhsField;
+
+ LangEl *predOf;
+
+ UnsignedCharVect copy;
+
+ TokenRegion *collectIgnoreRegion;
+};
+
+struct CmpDefById
+{
+ static int compare( Definition *d1, Definition *d2 )
+ {
+ if ( d1->prodId < d2->prodId )
+ return -1;
+ else if ( d1->prodId > d2->prodId )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+
+/* Map dotItems to productions. */
+typedef BstMap< int, Definition*, CmpOrd<int> > DotItemIndex;
+typedef BstMapEl< int, Definition*> DotItemIndexEl;
+
+struct DefList
+:
+ public DListMel<Definition, DefListEl>
+{};
+
+/* A vector of production vectors. Each non terminal can have many productions. */
+struct LelDefList
+:
+ public DListMel<Definition, LelDefListEl>
+{};
+
+/* A set of machines made during a closure round. */
+typedef Vector< PdaGraph* > Machines;
+
+/* List of language elements. */
+typedef DList<LangEl> LelList;
+
+typedef Vector< TokenDef* > TokenDefVect;
+
+struct UniqueType;
+
+typedef Vector<LangEl*> LangElVect;
+typedef BstSet<LangEl*> LangElSet;
+
+/* A language element class. Can be a nonTerm or a term. */
+struct LangEl : public DListEl<LangEl>
+{
+ enum Type { Unknown, Term, NonTerm };
+
+ LangEl( Namespace *nspace, const String &name, Type type );
+ ~LangEl();
+
+ /* The region the language element was defined in. */
+ Namespace *nspace;
+
+ String name;
+ String lit;
+
+ String fullName;
+ String fullLit;
+
+ /* For referencing the type. */
+ String refName;
+
+ /* For declaring things inside the type. */
+ String declName;
+
+ String xmlTag;
+
+ Type type;
+ long id;
+ bool isUserTerm;
+ bool isContext;
+ String displayString;
+ long numAppearances;
+ bool commit;
+ bool ignore;
+ bool reduceFirst;
+ bool isLiteral;
+ bool isRepeat;
+ bool isList;
+ bool isOpt;
+ bool parseStop;
+ bool isEOF;
+
+ LangEl *repeatOf;
+
+ /* Productions from the language element if it is a non-terminal. */
+ LelDefList defList;
+
+ TokenDef *tokenDef;
+ Definition *rootDef;
+ LangEl *termDup;
+ LangEl *eofLel;
+
+ PdaGraph *pdaGraph;
+ PdaTables *pdaTables;
+
+ PdaState *startState;
+
+ CodeBlock *transBlock;
+
+ ObjectDef *objectDef;
+ NamespaceQual *objectDefUsesQual;
+ String objectDefUses;
+
+ long thisSize;
+ long ofiOffset;
+
+ GenericType *generic;
+
+ long parserId;
+
+ PredType predType;
+ long predValue;
+
+ Context *contextDef;
+ Context *contextIn;
+ bool noPreIgnore;
+ bool noPostIgnore;
+ bool isCI;
+ TokenRegion *ciRegion;
+};
+
+struct ProdEl
+{
+ /* Language elements a factor node can be. */
+ enum Type {
+ LiteralType,
+ ReferenceType
+ };
+
+ /* Construct with a reference to a var def. */
+ ProdEl( Type type, const InputLoc &loc, ObjField *captureField, bool commit, TypeRef *typeRef, int priorVal )
+ :
+ captureField(captureField),
+ commit(commit),
+ typeRef(typeRef),
+ langEl(0),
+ priorVal(priorVal),
+ type(type),
+ objField(0)
+ {}
+
+ ProdEl( const InputLoc &loc, TypeRef *typeRef )
+ :
+ captureField(0),
+ commit(false),
+ typeRef(typeRef),
+ langEl(0),
+ priorVal(0),
+ type(ReferenceType),
+ objField(0)
+ {}
+
+ ObjField *captureField;
+ bool commit;
+
+ TypeRef *typeRef;
+
+ LangEl *langEl;
+ int priorVal;
+ Type type;
+ ObjField *objField;
+ ProdEl *prev, *next;
+};
+
+struct ProdElList : public DList<ProdEl>
+{
+ PdaGraph *walk( Compiler *pd, Definition *prod );
+};
+
+/* This should be renamed. It is a literal string in a type reference. */
+struct PdaLiteral
+{
+ PdaLiteral( const InputLoc &loc, const Token &token )
+ : loc(loc), token(token), value(0) { }
+
+ InputLoc loc;
+ Token token;
+ long value;
+};
+
+/* Nodes in the tree that use this action. */
+typedef Vector<NameInst*> ActionRefs;
+
+/* Element in list of actions. Contains the string for the code to exectute. */
+struct Action
+:
+ public DListEl<Action>,
+ public AvlTreeEl<Action>
+{
+public:
+
+ Action( const InputLoc &loc, const String &name, InlineList *inlineList )
+ :
+ loc(loc),
+ name(name),
+ markType(MarkNone),
+ objField(0),
+ markId(-1),
+ inlineList(inlineList),
+ actionId(-1),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0),
+ numCondRefs(0),
+ anyCall(false),
+ isLmAction(false)
+ {
+ }
+
+ Action( MarkType markType, long markId )
+ :
+ name("mark"),
+ markType(markType),
+ objField(0),
+ markId(markId),
+ inlineList(new InlineList),
+ actionId(-1),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0),
+ numCondRefs(0),
+ anyCall(false),
+ isLmAction(false)
+ {
+ }
+
+ /* Key for action dictionary. */
+ const String &getKey() const { return name; }
+
+ /* Data collected during parse. */
+ InputLoc loc;
+ String name;
+
+ MarkType markType;
+ ObjField *objField;
+ long markId;
+
+ InlineList *inlineList;
+ int actionId;
+
+ void actionName( ostream &out )
+ {
+ if ( name != 0 )
+ out << name;
+ else
+ out << loc.line << ":" << loc.col;
+ }
+
+ /* Places in the input text that reference the action. */
+ ActionRefs actionRefs;
+
+ /* Number of references in the final machine. */
+ bool numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+ int numCondRefs;
+ bool anyCall;
+
+ bool isLmAction;
+};
+
+/* A list of actions. */
+typedef DList<Action> ActionList;
+typedef AvlTree<Action, String, CmpStr> ActionDict;
+
+struct VarDef;
+struct Join;
+struct Expression;
+struct Term;
+struct FactorWithAug;
+struct FactorWithLabel;
+struct FactorWithRep;
+struct FactorWithNeg;
+struct Factor;
+struct Literal;
+struct Range;
+struct RegExpr;
+struct ReItem;
+struct ReOrBlock;
+struct ReOrItem;
+struct TokenRegion;
+
+/* Priority name dictionary. */
+typedef AvlMapEl<String, int> PriorDictEl;
+typedef AvlMap<String, int, CmpStr> PriorDict;
+
+/* Local error name dictionary. */
+typedef AvlMapEl<String, int> LocalErrDictEl;
+typedef AvlMap<String, int, CmpStr> LocalErrDict;
+
+/* Tree of instantiated names. */
+typedef BstMapEl<String, NameInst*> NameMapEl;
+typedef BstMap<String, NameInst*, CmpStr> NameMap;
+typedef Vector<NameInst*> NameVect;
+typedef BstSet<NameInst*> NameSet;
+
+/* Node in the tree of instantiated names. */
+struct NameInst
+{
+ NameInst( const InputLoc &loc, NameInst *parent, const String &name,
+ int id, bool isLabel ) :
+ loc(loc), parent(parent), name(name), id(id), isLabel(isLabel),
+ isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {}
+
+ InputLoc loc;
+
+ /* Keep parent pointers in the name tree to retrieve
+ * fully qulified names. */
+ NameInst *parent;
+
+ String name;
+ int id;
+ bool isLabel;
+ bool isLongestMatch;
+
+ int numRefs;
+ int numUses;
+
+ /* Names underneath us, excludes anonymous names. */
+ NameMap children;
+
+ /* All names underneath us in order of appearance. */
+ NameVect childVect;
+
+ /* Join scopes need an implicit "final" target. */
+ NameInst *start, *final;
+
+ /* During a fsm generation walk, lists the names that are referenced by
+ * epsilon operations in the current scope. After the link is made by the
+ * epsilon reference and the join operation is complete, the label can
+ * have its refcount decremented. Once there are no more references the
+ * entry point can be removed from the fsm returned. */
+ NameVect referencedNames;
+
+ /* Pointers for the name search queue. */
+ NameInst *prev, *next;
+
+ /* Check if this name inst or any name inst below is referenced. */
+ bool anyRefsRec();
+};
+
+typedef DList<NameInst> NameInstList;
+
+/* Stack frame used in walking the name tree. */
+struct NameFrame
+{
+ NameInst *prevNameInst;
+ int prevNameChild;
+ NameInst *prevLocalScope;
+};
+
+/* Class to collect information about the machine during the
+ * parse of input. */
+struct Compiler
+{
+ /* Create a new parse data object. This is done at the beginning of every
+ * fsm specification. */
+ Compiler( const String &fileName, const String &sectionName,
+ const InputLoc &sectionLoc, ostream &out );
+ ~Compiler();
+
+ /*
+ * Setting up the graph dict.
+ */
+
+ void compileLiteralTokens();
+ void initEmptyScanners();
+ void initUniqueTypes();
+
+ /* Initialize a graph dict with the basic fsms. */
+ void initGraphDict();
+ void createBuiltin( const char *name, BuiltinMachine builtin );
+
+ /* Make a name id in the current name instantiation scope if it is not
+ * already there. */
+ NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel );
+ NameInst *makeJoinNameTree( Join *join );
+ NameInst *makeNameTree( );
+ void fillNameIndex( NameInst **nameIndex, NameInst *from );
+ NameInst **makeNameIndex( NameInst *rootName );
+
+
+ void printNameTree( NameInst *rootName );
+ void printNameIndex( NameInst **nameIndex );
+
+ /* Increments the usage count on entry names. Names that are no longer
+ * needed will have their entry points unset. */
+ void unsetObsoleteEntries( FsmGraph *graph );
+
+ /* Resove name references in action code and epsilon transitions. */
+ NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly );
+ void resolveFrom( NameSet &result, NameInst *refFrom,
+ const NameRef &nameRef, int namePos );
+ void referenceRegions( NameInst *root );
+
+ /* Set the alphabet type. If type types are not valid returns false. */
+ bool setAlphType( char *s1, char *s2 );
+ bool setAlphType( char *s1 );
+
+ /* Unique actions. */
+ void removeDups( ActionTable &actionTable );
+ void removeActionDups( FsmGraph *graph );
+
+ /* Dumping the name instantiation tree. */
+ void printNameInst( NameInst *nameInst, int level );
+
+ /* Make the graph from a graph dict node. Does minimization. */
+ void finishGraphBuild( FsmGraph *graph );
+ FsmGraph *makeAllRegions();
+ FsmGraph *makeScanner();
+
+ void analyzeAction( Action *action, InlineList *inlineList );
+ void analyzeGraph( FsmGraph *graph );
+ void resolvePrecedence( PdaGraph *pdaGraph );
+ LangEl *predOf( PdaTrans *trans, long action );
+ bool precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 );
+ bool precedenceRemoveBoth( LangEl *l1, LangEl *l2 );
+
+ void initKeyOps();
+
+ /*
+ * Data collected during the parse.
+ */
+
+ /* The list of instances. */
+ RegionGraphList instanceList;
+
+ /* Dictionary of actions. Lets actions be defined and then referenced. */
+ ActionDict actionDict;
+
+ /* Dictionary of named priorities. */
+ PriorDict priorDict;
+
+ /* Dictionary of named local errors. */
+ LocalErrDict localErrDict;
+
+ /* List of actions. Will be pasted into a switch statement. */
+ ActionList actionList;
+
+ /* The id of the next priority name and label. */
+ int nextPriorKey, nextLocalErrKey, nextNameId;
+
+ /* The default priority number key for a machine. This is active during
+ * the parse of the rhs of a machine assignment. */
+ int curDefPriorKey;
+
+ int curDefLocalErrKey;
+
+ /* Alphabet type. */
+ HostType *userAlphType;
+ bool alphTypeSet;
+
+ /* Element type and get key expression. */
+ InlineList *getKeyExpr;
+ InlineList *accessExpr;
+ InlineList *curStateExpr;
+
+ /* The alphabet range. */
+ char *lowerNum, *upperNum;
+ Key lowKey, highKey;
+ InputLoc rangeLowLoc, rangeHighLoc;
+
+ /* The name of the file the fsm is from, and the spec name. */
+ String fileName;
+ String sectionName;
+ InputLoc sectionLoc;
+
+ /* Number of errors encountered parsing the fsm spec. */
+ int errorCount;
+
+ /* Counting the action and priority ordering. */
+ int curActionOrd;
+ int curPriorOrd;
+
+ /* Root of the name tree. */
+ NameInst *curNameInst;
+ int curNameChild;
+
+ /* The place where resolved epsilon transitions go. These cannot go into
+ * the parse tree because a single epsilon op can resolve more than once
+ * to different nameInsts if the machine it's in is used more than once. */
+ NameVect epsilonResolvedLinks;
+ int nextEpsilonResolvedLink;
+
+ /* Root of the name tree used for doing local name searches. */
+ NameInst *localNameScope;
+
+ void setLmInRetLoc( InlineList *inlineList );
+ void initLongestMatchData();
+ void initNameWalk( NameInst *rootName );
+ NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
+ NameFrame enterNameScope( bool isLocal, int numScopes );
+ void popNameScope( const NameFrame &frame );
+ void resetNameScope( const NameFrame &frame );
+
+ /* Counter for assigning ids to longest match items. */
+ int nextTokenId;
+
+ /* List of all longest match parse tree items. */
+ RegionList regionList;
+
+ NamespaceList namespaceList;
+
+ Action *newAction( const String &name, InlineList *inlineList );
+
+ Action *setTokStart;
+ int setTokStartOrd;
+
+ Action *initActId;
+ int initActIdOrd;
+
+ Action *setTokEnd;
+ int setTokEndOrd;
+
+ CodeBlock *rootCodeBlock;
+
+ void beginProcessing()
+ {
+ ::condData = &thisCondData;
+ ::keyOps = &thisKeyOps;
+ }
+
+ CondData thisCondData;
+ KeyOps thisKeyOps;
+
+ UniqueType *mainReturnUT;
+
+ /* CONTEXT FREE */
+ ProdElList *makeProdElList( LangEl *langEl );
+ void wrapNonTerminals();
+ void makeDefinitionNames();
+ void noUndefindLangEls();
+ void declareBaseLangEls();
+ void makeLangElIds();
+ void makeLangElNames();
+ void makeTerminalWrappers();
+ void makeEofElements();
+ void makeIgnoreCollectors();
+ void setPrecedence();
+
+ void typeDeclaration();
+ void typeResolve();
+
+ /* Parser generation. */
+ void advanceReductions( PdaGraph *pdaGraph );
+ void sortActions( PdaGraph *pdaGraph );
+ void addDupTerms( PdaGraph *pdaGraph );
+ void linkExpansions( PdaGraph *pdaGraph );
+ void lalr1FollowEpsilonOp( PdaGraph *pdaGraph );
+
+ void transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, PdaState *state, long prodId );
+
+ void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys );
+ void lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state );
+
+ void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior );
+ void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans );
+
+ void lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls );
+
+ void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState,
+ PdaTrans *expandFrom, Definition *prod );
+ void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state );
+ void lr0CloseAllStates( PdaGraph *pdaGraph );
+
+ void lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls );
+
+ void reduceActions( PdaGraph *pdaGraph );
+
+ bool makeNonTermFirstSetProd( Definition *prod, PdaState *state );
+ void makeNonTermFirstSets();
+
+ bool makeFirstSetProd( Definition *prod, PdaState *state );
+ void makeFirstSets();
+
+ int findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &currLen );
+ void trySetTime( PdaTrans *trans, long code, long &time );
+ void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey,
+ bool noPreIgnore, bool noPostIgnore );
+ PdaState *followProd( PdaState *tabState, PdaState *prodState );
+ void findFollow( AlphSet &result, PdaState *overTab,
+ PdaState *overSrc, Definition *parentDef );
+ void pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls );
+ void pdaOrderFollow( LangEl *rootEl, PdaState *tabState,
+ PdaTrans *tabTrans, PdaTrans *srcTrans,
+ Definition *parentDef, Definition *definition, long &time );
+ void pdaOrderProd( LangEl *rootEl, PdaState *tabState,
+ PdaState *srcState, Definition *parentDef, long &time );
+ void analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls );
+
+ void makeProdFsms();
+ void insertUniqueEmptyProductions();
+ void printNonTermFirstSets();
+ void printFirstSets();
+
+ LangEl *makeRepeatProd( Namespace *nspace, const String &repeatName,
+ NamespaceQual *nspaceQual, const String &name );
+ LangEl *makeListProd( Namespace *nspace, const String &listName,
+ NamespaceQual *nspaceQual, const String &name );
+ LangEl *makeOptProd( Namespace *nspace, const String &optName,
+ NamespaceQual *nspaceQual, const String &name );
+ void resolveFactor( ProdEl *fact );
+ void resolveProductionEls();
+ void resolvePatternEls();
+ void resolveReplacementEls();
+ void resolveParserEls();
+
+ void addMatchText( ObjectDef *frame, LangEl *lel );
+ void addMatchLength( ObjectDef *frame, LangEl *lel );
+ void addInput( ObjectDef *frame );
+ void addCtx( ObjectDef *frame );
+ void addTransTokVar( ObjectDef *frame, LangEl *lel );
+ void addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList );
+ void addProdRedObjectVar( ObjectDef *localFrame, LangEl *langEl );
+ void addProdObjects();
+
+ void addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos );
+ void addProdLHSLoad( Definition *prod, CodeVect &code, long &insertPos );
+ void addPushBackLHS( Definition *prod, CodeVect &code, long &insertPos );
+
+ void prepGrammar();
+ void parsePatterns();
+
+ void collectParserEls( LangElSet &parserEls );
+ void makeParser( LangElSet &parserEls );
+ PdaGraph *makePdaGraph( BstSet<LangEl*> &parserEls );
+ PdaTables *makePdaTables( PdaGraph *pdaGraph );
+
+ void fillInPatterns( Program *prg );
+ void makeRuntimeData();
+
+ /* Generate and write out the fsm. */
+ void generateGraphviz();
+
+ void verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph );
+ void computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph );
+
+ void initFieldInstructions( ObjField *el );
+ void initLocalInstructions( ObjField *el );
+ void initLocalRefInstructions( ObjField *el );
+
+ void initMapFunctions( GenericType *gen );
+ void initListField( GenericType *gen, const char *name, int offset );
+ void initListFields( GenericType *gen );
+ void initListFunctions( GenericType *gen );
+ void initVectorFunctions( GenericType *gen );
+ void initParserFunctions( GenericType *gen );
+ void initParserFields( GenericType *gen );
+ void initCtxField( GenericType *gen );
+
+ void addStdin();
+ void addStdout();
+ void addStderr();
+ void addArgv();
+ int argvOffset();
+ void initGlobalFunctions();
+ void makeDefaultIterators();
+ void addLengthField( ObjectDef *objDef, Code getLength );
+ ObjectDef *findObject( const String &name );
+ void initAllLanguageObjects();
+ void resolveListElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof );
+ void resolveMapElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof );
+ void resolveElementOf( ObjectDef *obj );
+ void makeFuncVisible( Function *func, bool isUserIter );
+
+ void resolveFunction( Function *func );
+ void resolveUserIter( Function *func );
+ void resolvePreEof( TokenRegion *region );
+ void resolveRootBlock();
+ void resolveTranslateBlock( LangEl *langEl );
+ void resolveReductionCode( Definition *prod );
+ void resolveParseTree();
+ void resolveGenericTypes();
+
+ void compileFunction( Function *func, CodeVect &code );
+ void compileFunction( Function *func );
+ void compileUserIter( Function *func, CodeVect &code );
+ void compileUserIter( Function *func );
+ void compilePreEof( TokenRegion *region );
+ void compileRootBlock();
+ void compileTranslateBlock( LangEl *langEl );
+ void findLocalTrees( CharSet &trees );
+ void makeProdCopies( Definition *prod );
+ void compileReductionCode( Definition *prod );
+ void initGenericTypes();
+ void removeNonUnparsableRepls();
+ void compileByteCode();
+
+ void resolveUses();
+ void createDefaultScanner();
+ void generateOutput();
+ void compile();
+
+ void openNameSpace( ostream &out, Namespace *nspace );
+ void closeNameSpace( ostream &out, Namespace *nspace );
+ void refNameSpace( LangEl *lel, Namespace *nspace );
+ void generateExports();
+ void generateExportsImpl();
+
+ /*
+ * Graphviz Generation
+ */
+ void writeTransList( PdaState *state );
+ void writeDotFile( PdaGraph *graph );
+ void writeDotFile( );
+
+
+ /*
+ * Data collected during the parse.
+ */
+
+ LelList langEls;
+ DefList prodList;
+
+ /* Dumping. */
+ DotItemIndex dotItemIndex;
+
+ PredDeclList predDeclList;
+
+ /* The name of the file the fsm is from, and the spec name. */
+ // EXISTS IN RL: char *fileName;
+ String parserName;
+ ostream &out;
+ // EXISTS IN RL: InputLoc sectionLoc;
+
+ /* How to access the instance data. */
+ String access;
+
+ /* The name of the token structure. */
+ String tokenStruct;
+
+ GenericType *anyList;
+ GenericType *anyMap;
+ GenericType *anyVector;
+
+ LangEl *ptrLangEl;
+ LangEl *boolLangEl;
+ LangEl *intLangEl;
+ LangEl *strLangEl;
+ LangEl *streamLangEl;
+ LangEl *inputLangEl;
+ LangEl *anyLangEl;
+ LangEl *rootLangEl;
+ LangEl *noTokenLangEl;
+ LangEl *eofLangEl;
+ LangEl *errorLangEl;
+ LangEl *defaultCharLangEl;
+ LangEl *ignoreLangEl;
+
+ TokenRegion *rootRegion;
+ TokenRegion *defaultRegion;
+ TokenRegion *eofTokenRegion;
+
+ Namespace *defaultNamespace;
+ Namespace *rootNamespace;
+
+ int nextSymbolId;
+ int firstNonTermId;
+
+ LangEl **langElIndex;
+ PdaState *actionDestState;
+ DefSetSet prodSetSet;
+
+ Definition **prodIdIndex;
+ AlphSet literalSet;
+
+ PatternList patternList;
+ ReplList replList;
+ ParserTextList parserTextList;
+
+ ObjectDef *globalObjectDef;
+
+ VectorTypeIdMap vectorTypeIdMap;
+ ObjectDef *curLocalFrame;
+
+ UniqueType *findUniqueType( int typeId );
+ UniqueType *findUniqueType( int typeId, LangEl *langEl );
+ UniqueType *findUniqueType( int typeId, IterDef *iterDef );
+
+ UniqueType *uniqueTypeNil;
+ UniqueType *uniqueTypePtr;
+ UniqueType *uniqueTypeBool;
+ UniqueType *uniqueTypeInt;
+ UniqueType *uniqueTypeStr;
+ UniqueType *uniqueTypeStream;
+ UniqueType *uniqueTypeInput;
+ UniqueType *uniqueTypeIgnore;
+ UniqueType *uniqueTypeAny;
+
+ UniqueTypeMap uniqeTypeMap;
+ UniqueRepeatMap uniqeRepeatMap;
+ UniqueMapMap uniqueMapMap;
+ UniqueListMap uniqueListMap;
+ UniqueVectorMap uniqueVectorMap;
+ UniqueParserMap uniqueParserMap;
+
+ void initStrObject();
+ void initStreamObject();
+ void initInputObject();
+ void initIntObject();
+ void initTokenObjects();
+
+ ObjectDef *intObj;
+ ObjectDef *strObj;
+ ObjectDef *streamObj;
+ ObjectDef *inputObj;
+ ObjectDef *tokenObj;
+
+ FsmTables *fsmTables;
+ RuntimeData *runtimeData;
+
+ int nextPatReplId;
+ int nextGenericId;
+
+ FunctionList functionList;
+ int nextFuncId;
+
+ enum CompileContext {
+ CompileTranslation,
+ CompileReduction,
+ CompileFunction,
+ CompileRoot
+ };
+
+ CompileContext compileContext;
+ LongVect returnJumps;
+ LongVect breakJumps;
+ Function *curFunction;
+
+ /* Loops fill this in for return statements to use. */
+ CodeVect *loopCleanup;
+
+ ObjField *makeDataEl();
+ ObjField *makePosEl();
+ ObjField *makeLineEl();
+
+ IterDef *findIterDef( IterDef::Type type, GenericType *generic );
+ IterDef *findIterDef( IterDef::Type type, Function *func );
+ IterDef *findIterDef( IterDef::Type type );
+ IterDefSet iterDefSet;
+
+ enum GeneratesType { GenToken, GenIgnore, GenCfl };
+
+ int nextObjectId;
+ GeneratesType generatesType;
+ bool generatesIgnore;
+ bool insideRegion;
+
+ StringMap literalStrings;
+
+ long nextFrameId;
+ long nextParserId;
+
+ ObjectDef *rootLocalFrame;
+
+ long nextLabelId;
+ ObjectDef *objectDef;
+
+ bool revertOn;
+
+ RedFsm *redFsm;
+
+ PdaGraph *pdaGraph;
+ PdaTables *pdaTables;
+
+ long predValue;
+ long nextMatchEndNum;
+
+ TypeRef *argvTypeRef;
+
+ Context *context;
+};
+
+void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true );
+Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd );
+Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd );
+Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd );
+Key makeFsmKeyChar( char c, Compiler *pd );
+void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd );
+void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
+ bool caseInsensitive, Compiler *pd );
+FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd );
+FsmGraph *dotFsm( Compiler *pd );
+FsmGraph *dotStarFsm( Compiler *pd );
+
+void errorStateLabels( const NameSet &locations );
+
+struct ColmParser;
+
+typedef AvlMap<String, ColmParser *, CmpStr> ParserDict;
+typedef AvlMapEl<String, ColmParser *> ParserDictEl;
+
+LangEl *declareLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type );
+LangEl *addLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type );
+void declareTypeAlias( Compiler *pd, Namespace *nspace, const String &data, TypeRef *typeRef );
+LangEl *findType( Compiler *pd, Namespace *nspace, const String &data );
+
+#endif /* _PARSEDATA_H */
diff --git a/src/parsetree.cc b/src/parsetree.cc
new file mode 100644
index 00000000..084ffbb8
--- /dev/null
+++ b/src/parsetree.cc
@@ -0,0 +1,1776 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "lmparse.h"
+#include "parsetree.h"
+#include "input.h"
+#include "fsmrun.h"
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+
+
+using namespace std;
+ostream &operator<<( ostream &out, const NameRef &nameRef );
+ostream &operator<<( ostream &out, const NameInst &nameInst );
+ostream &operator<<( ostream &out, const Token &token );
+
+/* Convert the literal string which comes in from the scanner into an array of
+ * characters with escapes and options interpreted. Also null terminates the
+ * string. Though this null termination should not be relied on for
+ * interpreting literals in the parser because the string may contain a
+ * literal string with \0 */
+void prepareLitString( String &result, bool &caseInsensitive,
+ const String &srcString, const InputLoc &loc )
+{
+ result.setAs( String::Fresh(), srcString.length() );
+ caseInsensitive = false;
+
+ char *src = srcString.data + 1;
+ char *end = srcString.data + srcString.length() - 1;
+
+ while ( *end != '\'' && *end != '\"' && *end != '\n' ) {
+ if ( *end == 'i' )
+ caseInsensitive = true;
+ else {
+ error( loc ) << "literal string '" << *end <<
+ "' option not supported" << endl;
+ }
+ end -= 1;
+ }
+
+ if ( *end == '\n' )
+ end++;
+
+ char *dest = result.data;
+ int len = 0;
+ while ( src != end ) {
+ if ( *src == '\\' ) {
+ switch ( src[1] ) {
+ case '0': dest[len++] = '\0'; break;
+ case 'a': dest[len++] = '\a'; break;
+ case 'b': dest[len++] = '\b'; break;
+ case 't': dest[len++] = '\t'; break;
+ case 'n': dest[len++] = '\n'; break;
+ case 'v': dest[len++] = '\v'; break;
+ case 'f': dest[len++] = '\f'; break;
+ case 'r': dest[len++] = '\r'; break;
+ case '\n': break;
+ default: dest[len++] = src[1]; break;
+ }
+ src += 2;
+ }
+ else {
+ dest[len++] = *src++;
+ }
+ }
+
+ result.chop( len );
+}
+
+int CmpUniqueType::compare( const UniqueType &ut1, const UniqueType &ut2 )
+{
+ if ( ut1.typeId < ut2.typeId )
+ return -1;
+ else if ( ut1.typeId > ut2.typeId )
+ return 1;
+ else if ( ut1.typeId == TYPE_TREE ||
+ ut1.typeId == TYPE_PTR ||
+ ut1.typeId == TYPE_REF )
+ {
+ if ( ut1.langEl < ut2.langEl )
+ return -1;
+ else if ( ut1.langEl > ut2.langEl )
+ return 1;
+ }
+ else if ( ut1.typeId == TYPE_ITER ) {
+ if ( ut1.iterDef < ut2.iterDef )
+ return -1;
+ else if ( ut1.iterDef > ut2.iterDef )
+ return 1;
+ }
+ else {
+ /* Fail on anything unimplemented. */
+ assert( false );
+ }
+
+ return 0;
+}
+
+int CmpUniqueRepeat::compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 )
+{
+ if ( ut1.repeatType < ut2.repeatType )
+ return -1;
+ else if ( ut1.repeatType > ut2.repeatType )
+ return 1;
+ else {
+ if ( ut1.langEl < ut2.langEl )
+ return -1;
+ else if ( ut1.langEl > ut2.langEl )
+ return 1;
+ }
+
+ return 0;
+}
+
+int CmpUniqueMap::compare( const UniqueMap &ut1, const UniqueMap &ut2 )
+{
+ if ( ut1.key < ut2.key )
+ return -1;
+ else if ( ut1.key > ut2.key )
+ return 1;
+ else {
+ if ( ut1.value < ut2.value )
+ return -1;
+ else if ( ut1.value > ut2.value )
+ return 1;
+ }
+
+ return 0;
+}
+
+int CmpUniqueList::compare( const UniqueList &ut1, const UniqueList &ut2 )
+{
+ if ( ut1.value < ut2.value )
+ return -1;
+ else if ( ut1.value > ut2.value )
+ return 1;
+
+ return 0;
+}
+
+int CmpUniqueVector::compare( const UniqueVector &ut1, const UniqueVector &ut2 )
+{
+ if ( ut1.value < ut2.value )
+ return -1;
+ else if ( ut1.value > ut2.value )
+ return 1;
+
+ return 0;
+}
+
+int CmpUniqueParser::compare( const UniqueParser &ut1, const UniqueParser &ut2 )
+{
+ if ( ut1.parseType < ut2.parseType )
+ return -1;
+ else if ( ut1.parseType > ut2.parseType )
+ return 1;
+
+ return 0;
+}
+
+FsmGraph *VarDef::walk( Compiler *pd )
+{
+ /* Recurse on the expression. */
+ FsmGraph *rtnVal = join->walk( pd );
+
+ /* Do the tranfer of local error actions. */
+ LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
+ if ( localErrDictEl != 0 ) {
+ for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
+ rtnVal->transferErrorActions( state, localErrDictEl->value );
+ }
+
+ /* If the expression below is a join operation with multiple expressions
+ * then it just had epsilon transisions resolved. If it is a join
+ * with only a single expression then run the epsilon op now. */
+ if ( join->exprList.length() == 1 )
+ rtnVal->epsilonOp();
+
+ /* We can now unset entry points that are not longer used. */
+ pd->unsetObsoleteEntries( rtnVal );
+
+ return rtnVal;
+}
+
+
+FsmGraph *RegionDef::walk( Compiler *pd )
+{
+ /* We enter into a new name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Recurse on the expression. */
+ FsmGraph *rtnVal = tokenRegion->walk( pd );
+
+ /* Do the tranfer of local error actions. */
+ LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
+ if ( localErrDictEl != 0 ) {
+ for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
+ rtnVal->transferErrorActions( state, localErrDictEl->value );
+ }
+
+ /* We can now unset entry points that are not longer used. */
+ pd->unsetObsoleteEntries( rtnVal );
+
+ /* If the name of the variable is referenced then add the entry point to
+ * the graph. */
+ if ( pd->curNameInst->numRefs > 0 )
+ rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+ return rtnVal;
+}
+
+void RegionDef::makeNameTree( const InputLoc &loc, Compiler *pd )
+{
+ /* The variable definition enters a new scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, name, false );
+
+ /* Guess we do this now. */
+ tokenRegion->makeActions( pd );
+
+ /* Save off the name inst into the token region. This is only legal for
+ * token regions because they are only ever referenced once (near the root
+ * of the name tree). They cannot have more than one corresponding name
+ * inst. */
+ assert( tokenRegion->regionNameInst == 0 );
+ tokenRegion->regionNameInst = pd->curNameInst;
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+}
+
+InputLoc TokenDef::getLoc()
+{
+ return action != 0 ? action->loc : semiLoc;
+}
+
+/*
+ * If there are any LMs then all of the following entry points must reset
+ * tokstart:
+ *
+ * 1. fentry(StateRef)
+ * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
+ * 3. targt of any transition that has an fcall (the return loc).
+ * 4. start state of all longest match routines.
+ */
+
+Action *TokenRegion::newAction( Compiler *pd, const InputLoc &loc,
+ const String &name, InlineList *inlineList )
+{
+ Action *action = new Action( loc, name, inlineList );
+ pd->actionList.append( action );
+ action->isLmAction = true;
+ return action;
+}
+
+void TokenRegion::makeActions( Compiler *pd )
+{
+ /* Make actions that set the action id. */
+ for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmSetActId ) );
+ char *actName = new char[50];
+ sprintf( actName, "store%i", lmi->longestMatchId );
+ lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the last character. */
+ for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLast ) );
+ char *actName = new char[50];
+ sprintf( actName, "imm%i", lmi->longestMatchId );
+ lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the next
+ * character. These actions will set tokend themselves (it is the current
+ * char). */
+ for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnNext ) );
+ char *actName = new char[50];
+ sprintf( actName, "lagh%i", lmi->longestMatchId );
+ lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart at tokend. These
+ * actions execute some time after matching the last char. */
+ for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLagBehind ) );
+ char *actName = new char[50];
+ sprintf( actName, "lag%i", lmi->longestMatchId );
+ lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ InputLoc loc;
+ loc.line = 1;
+ loc.col = 1;
+
+ /* Create the error action. */
+ InlineList *il6 = new InlineList;
+ il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
+ lmActSelect = newAction( pd, loc, "lagsel", il6 );
+}
+
+void TokenRegion::restart( FsmGraph *graph, FsmTrans *trans )
+{
+ FsmState *fromState = trans->fromState;
+ graph->detachTrans( fromState, trans->toState, trans );
+ graph->attachTrans( fromState, graph->startState, trans );
+}
+
+void TokenRegion::runLongestMatch( Compiler *pd, FsmGraph *graph )
+{
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ ms->lmItemSet.insert( 0 );
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* Transfer the first item of non-empty lmAction tables to the item sets
+ * of the states that follow. Exclude states that have no transitions out.
+ * This must happen on a separate pass so that on each iteration of the
+ * next pass we have the item set entries from all lmAction tables. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ FsmState *toState = trans->toState;
+ assert( toState );
+
+ /* Check if there are transitions out, this may be a very
+ * close approximation? Out transitions going nowhere?
+ * FIXME: Check. */
+ if ( toState->outList.length() > 0 ) {
+ /* Fill the item sets. */
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ ms->lmItemSet.insert( lmAct->value );
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* The lmItem sets are now filled, telling us which longest match rules
+ * can succeed in which states. First determine if we need to make sure
+ * act is defaulted to zero. */
+ int maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* The actions executed on starting to match a token. */
+ graph->isolateStartState();
+ graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
+ if ( maxItemSetLength > 1 ) {
+ /* The longest match action switch may be called when tokens are
+ * matched, in which case act must be initialized, there must be a
+ * case to handle the error, and the generated machine will require an
+ * error state. */
+ lmSwitchHandlesError = true;
+ graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
+ }
+
+ /* The place to store transitions to restart. It maybe possible for the
+ * restarting to affect the searching through the graph that follows. For
+ * now take the safe route and save the list of transitions to restart
+ * until after all searching is done. */
+ Vector<FsmTrans*> restartTrans;
+
+ /* Set actions that do immediate token recognition, set the longest match part
+ * id and set the token ending. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ FsmState *toState = trans->toState;
+ assert( toState );
+
+ /* Check if there are transitions out, this may be a very
+ * close approximation? Out transitions going nowhere?
+ * FIXME: Check. */
+ if ( toState->outList.length() == 0 ) {
+ /* Can execute the immediate action for the longest match
+ * part. Redirect the action to the start state. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->actOnLast );
+ restartTrans.append( trans );
+ }
+ else {
+ /* Look for non final states that have a non-empty item
+ * set. If these are present then we need to record the
+ * end of the token. Also Find the highest item set
+ * length reachable from here (excluding at transtions to
+ * final states). */
+ bool nonFinalNonEmptyItemSet = false;
+ maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
+ nonFinalNonEmptyItemSet = true;
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* If there are reachable states that are not final and
+ * have non empty item sets or that have an item set
+ * length greater than one then we need to set tokend
+ * because the error action that matches the token will
+ * require it. */
+ if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
+ trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
+
+ /* Some states may not know which longest match item to
+ * execute, must set it. */
+ if ( maxItemSetLength > 1 ) {
+ /* There are transitions out, another match may come. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->setActId );
+ }
+ }
+ }
+ }
+ }
+
+ /* Now that all graph searching is done it certainly safe set the
+ * restarting. It may be safe above, however this must be verified. */
+ for ( Vector<FsmTrans*>::Iter rs = restartTrans; rs.lte(); rs++ )
+ restart( graph, *rs );
+
+ int lmErrActionOrd = pd->curActionOrd++;
+
+ /* Embed the error for recognizing a char. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
+ if ( st->isFinState() ) {
+ /* On error execute the onActNext action, which knows that
+ * the last character of the token was one back and restart. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actOnNext, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd,
+ st->lmItemSet[0]->actOnNext );
+ st->eofTarget = graph->startState;
+ }
+ else {
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actLagBehind, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd,
+ st->lmItemSet[0]->actLagBehind );
+ st->eofTarget = graph->startState;
+ }
+ }
+ else if ( st->lmItemSet.length() > 1 ) {
+ /* Need to use the select. Take note of the which items the select
+ * is needed for so only the necessary actions are included. */
+ for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
+ if ( *plmi != 0 )
+ (*plmi)->inLmSelect = true;
+ }
+ /* On error, execute the action select and go to the start state. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &lmActSelect, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
+ st->eofTarget = graph->startState;
+ }
+ }
+
+ /* Finally, the start state should be made final. */
+ graph->setFinState( graph->startState );
+}
+
+void TokenRegion::transferScannerLeavingActions( FsmGraph *graph )
+{
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ if ( st->outActionTable.length() > 0 )
+ graph->setErrorActions( st, st->outActionTable );
+ }
+}
+
+FsmGraph *TokenRegion::walk( Compiler *pd )
+{
+ /* Make each part of the longest match. */
+ int numParts = 0;
+ FsmGraph **parts = new FsmGraph*[tokenDefList.length()];
+ for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
+ /* Watch out for patternless tokens. */
+ if ( lmi->join != 0 ) {
+ /* Create the machine and embed the setting of the longest match id. */
+ parts[numParts] = lmi->join->walk( pd );
+ parts[numParts]->longMatchAction( pd->curActionOrd++, lmi );
+
+ /* Look for tokens that accept the zero length-word. The first one found
+ * will be used as the default token. */
+ if ( defaultTokenDef == 0 && parts[numParts]->startState->isFinState() )
+ defaultTokenDef = lmi;
+
+ numParts += 1;
+ }
+ }
+ FsmGraph *retFsm = parts[0];
+
+ if ( defaultTokenDef != 0 && defaultTokenDef->tdLangEl->ignore )
+ error() << "ignore token cannot be a scanner's zero-length token" << endp;
+
+ /* The region is empty. Return the empty set. */
+ if ( numParts == 0 ) {
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Before we union the patterns we need to deal with leaving actions. They
+ * are transfered to error transitions out of the final states (like local
+ * error actions) and to eof actions. In the scanner we need to forbid
+ * on_last for any final state that has an leaving action. */
+ for ( int i = 0; i < numParts; i++ )
+ transferScannerLeavingActions( parts[i] );
+
+ /* Union machines one and up with machine zero. */
+ FsmGraph *retFsm = parts[0];
+ for ( int i = 1; i < numParts; i++ ) {
+ retFsm->unionOp( parts[i] );
+ afterOpMinimize( retFsm );
+ }
+
+ runLongestMatch( pd, retFsm );
+ delete[] parts;
+ }
+
+ return retFsm;
+}
+
+/* Construct with a location and the first expression. */
+Join::Join( Expression *expr )
+:
+ context(0),
+ mark(0)
+{
+ exprList.append( expr );
+}
+
+/* Walk an expression node. */
+FsmGraph *Join::walk( Compiler *pd )
+{
+ assert( exprList.length() == 1 );
+
+ FsmGraph *retFsm = exprList.head->walk( pd );
+
+ /* Maybe the the context. */
+ if ( context != 0 ) {
+ retFsm->leaveFsmAction( pd->curActionOrd++, mark );
+ FsmGraph *contextGraph = context->walk( pd );
+ retFsm->concatOp( contextGraph );
+ }
+
+ return retFsm;
+}
+
+/* Clean up after an expression node. */
+Expression::~Expression()
+{
+ switch ( type ) {
+ case OrType: case IntersectType: case SubtractType:
+ case StrongSubtractType:
+ delete expression;
+ delete term;
+ break;
+ case TermType:
+ delete term;
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+/* Evaluate a single expression node. */
+FsmGraph *Expression::walk( Compiler *pd, bool lastInSeq )
+{
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case OrType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd, false );
+ /* Evaluate the term. */
+ FsmGraph *rhs = term->walk( pd );
+ /* Perform union. */
+ rtnVal->unionOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case IntersectType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmGraph *rhs = term->walk( pd );
+ /* Perform intersection. */
+ rtnVal->intersectOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case SubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmGraph *rhs = term->walk( pd );
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case StrongSubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+
+ /* Evaluate the term and pad it with any* machines. */
+ FsmGraph *rhs = dotStarFsm( pd );
+ FsmGraph *termFsm = term->walk( pd );
+ FsmGraph *trailAnyStar = dotStarFsm( pd );
+ rhs->concatOp( termFsm );
+ rhs->concatOp( trailAnyStar );
+
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case TermType: {
+ /* Return result of the term. */
+ rtnVal = term->walk( pd );
+ break;
+ }
+ case BuiltinType: {
+ /* Duplicate the builtin. */
+ rtnVal = makeBuiltin( builtin, pd );
+ break;
+ }
+ }
+
+ return rtnVal;
+}
+
+/* Clean up after a term node. */
+Term::~Term()
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ delete term;
+ delete factorWithAug;
+ break;
+ case FactorWithAugType:
+ delete factorWithAug;
+ break;
+ }
+}
+
+/* Evaluate a term node. */
+FsmGraph *Term::walk( Compiler *pd, bool lastInSeq )
+{
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case ConcatType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd, false );
+ /* Evaluate the FactorWithRep. */
+ FsmGraph *rhs = factorWithAug->walk( pd );
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightStartType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmGraph *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the right get the higher start priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The start transitions right machine get the higher priority.
+ * Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightFinishType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmGraph *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the finishing transitions to the right
+ * get the higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The finishing transitions of the right machine get the higher
+ * priority. Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case LeftType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmGraph *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The right machine gets the lower priority. Since
+ * startTransPrior might unnecessarily increase the number of
+ * states during the state machine construction process (due to
+ * isolation), we use allTransPrior instead, which has the same
+ * effect. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case FactorWithAugType: {
+ rtnVal = factorWithAug->walk( pd );
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+/* Clean up after a factor with augmentation node. */
+FactorWithAug::~FactorWithAug()
+{
+ delete factorWithRep;
+
+ /* Walk the vector of parser actions, deleting function names. */
+
+ /* Clean up priority descriptors. */
+ if ( priorDescs != 0 )
+ delete[] priorDescs;
+}
+
+void FactorWithAug::assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd )
+{
+ /* Assign actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ switch ( actions[i].type ) {
+ /* Transition actions. */
+ case at_start:
+ graph->startFsmAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransAction( actionOrd[i], actions[i].action );
+ break;
+ case at_finish:
+ graph->finishFsmAction( actionOrd[i], actions[i].action );
+ break;
+ case at_leave:
+ graph->leaveFsmAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* Global error actions. */
+ case at_start_gbl_error:
+ graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
+ afterOpMinimize( graph );
+ break;
+ case at_all_gbl_error:
+ graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_final_gbl_error:
+ graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_not_start_gbl_error:
+ graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_not_final_gbl_error:
+ graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_middle_gbl_error:
+ graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+
+ /* Local error actions. */
+ case at_start_local_error:
+ graph->startErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ afterOpMinimize( graph );
+ break;
+ case at_all_local_error:
+ graph->allErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_final_local_error:
+ graph->finalErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_not_start_local_error:
+ graph->notStartErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_not_final_local_error:
+ graph->notFinalErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_middle_local_error:
+ graph->middleErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+
+ /* EOF actions. */
+ case at_start_eof:
+ graph->startEOFAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_eof:
+ graph->allEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_eof:
+ graph->finalEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_eof:
+ graph->notStartEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_eof:
+ graph->notFinalEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_eof:
+ graph->middleEOFAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* To State Actions. */
+ case at_start_to_state:
+ graph->startToStateAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_to_state:
+ graph->allToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_to_state:
+ graph->finalToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_to_state:
+ graph->notStartToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_to_state:
+ graph->notFinalToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_to_state:
+ graph->middleToStateAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* From State Actions. */
+ case at_start_from_state:
+ graph->startFromStateAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_from_state:
+ graph->allFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_from_state:
+ graph->finalFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_from_state:
+ graph->notStartFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_from_state:
+ graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_from_state:
+ graph->middleFromStateAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* Remaining cases, prevented by the parser. */
+ default:
+ assert( false );
+ break;
+ }
+ }
+}
+
+void FactorWithAug::assignPriorities( FsmGraph *graph, int *priorOrd )
+{
+ /* Assign priorities. */
+ for ( int i = 0; i < priorityAugs.length(); i++ ) {
+ switch ( priorityAugs[i].type ) {
+ case at_start:
+ graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
+ /* Start fsm priorities are a special case that may require
+ * minimization afterwards. */
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransPrior( priorOrd[i], &priorDescs[i] );
+ break;
+ case at_finish:
+ graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
+ break;
+ case at_leave:
+ graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
+ break;
+
+ default:
+ /* Parser Prevents this case. */
+ break;
+ }
+ }
+}
+
+void FactorWithAug::assignConditions( FsmGraph *graph )
+{
+ for ( int i = 0; i < conditions.length(); i++ ) {
+ switch ( conditions[i].type ) {
+ /* Transition actions. */
+ case at_start:
+ graph->startFsmCondition( conditions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransCondition( conditions[i].action );
+ break;
+ case at_leave:
+ graph->leaveFsmCondition( conditions[i].action );
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+
+/* Evaluate a factor with augmentation node. */
+FsmGraph *FactorWithAug::walk( Compiler *pd )
+{
+ /* Make the array of function orderings. */
+ int *actionOrd = 0;
+ if ( actions.length() > 0 )
+ actionOrd = new int[actions.length()];
+
+ /* First walk the list of actions, assigning order to all starting
+ * actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type == at_start ||
+ actions[i].type == at_start_gbl_error ||
+ actions[i].type == at_start_local_error ||
+ actions[i].type == at_start_to_state ||
+ actions[i].type == at_start_from_state ||
+ actions[i].type == at_start_eof )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ /* Evaluate the factor with repetition. */
+ FsmGraph *rtnVal = factorWithRep->walk( pd );
+
+ /* Compute the remaining action orderings. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type != at_start &&
+ actions[i].type != at_start_gbl_error &&
+ actions[i].type != at_start_local_error &&
+ actions[i].type != at_start_to_state &&
+ actions[i].type != at_start_from_state &&
+ actions[i].type != at_start_eof )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ assignConditions( rtnVal );
+
+ assignActions( pd, rtnVal , actionOrd );
+
+ /* Make the array of priority orderings. Orderings are local to this walk
+ * of the factor with augmentation. */
+ int *priorOrd = 0;
+ if ( priorityAugs.length() > 0 )
+ priorOrd = new int[priorityAugs.length()];
+
+ /* Walk all priorities, assigning the priority ordering. */
+ for ( int i = 0; i < priorityAugs.length(); i++ )
+ priorOrd[i] = pd->curPriorOrd++;
+
+ /* If the priority descriptors have not been made, make them now. Make
+ * priority descriptors for each priority asignment that will be passed to
+ * the fsm. Used to keep track of the key, value and used bit. */
+ if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
+ priorDescs = new PriorDesc[priorityAugs.length()];
+ for ( int i = 0; i < priorityAugs.length(); i++ ) {
+ /* Init the prior descriptor for the priority setting. */
+ priorDescs[i].key = priorityAugs[i].priorKey;
+ priorDescs[i].priority = priorityAugs[i].priorValue;
+ }
+ }
+
+ /* Assign priorities into the machine. */
+ assignPriorities( rtnVal, priorOrd );
+
+ /* Assign epsilon transitions. */
+ for ( int e = 0; e < epsilonLinks.length(); e++ ) {
+ /* Get the name, which may not exist. If it doesn't then silently
+ * ignore it because an error has already been reported. */
+ NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
+ if ( epTarg != 0 ) {
+ /* Make the epsilon transitions. */
+ rtnVal->epsilonTrans( epTarg->id );
+
+ /* Note that we have made a link to the name. */
+ pd->localNameScope->referencedNames.append( epTarg );
+ }
+ }
+
+ if ( priorOrd != 0 )
+ delete[] priorOrd;
+ if ( actionOrd != 0 )
+ delete[] actionOrd;
+ return rtnVal;
+}
+
+
+/* Clean up after a factor with repetition node. */
+FactorWithRep::~FactorWithRep()
+{
+ switch ( type ) {
+ case StarType: case StarStarType: case OptionalType: case PlusType:
+ case ExactType: case MaxType: case MinType: case RangeType:
+ delete factorWithRep;
+ break;
+ case FactorWithNegType:
+ delete factorWithNeg;
+ break;
+ }
+}
+
+/* Evaluate a factor with repetition node. */
+FsmGraph *FactorWithRep::walk( Compiler *pd )
+{
+ FsmGraph *retFsm = 0;
+
+ switch ( type ) {
+ case StarType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case StarStarType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Set up the prior descs. All gets priority one, whereas leaving gets
+ * priority zero. Make a unique key so that these priorities don't
+ * interfere with any priorities set by the user. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* Leaveing gets priority 0. Use same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case OptionalType: {
+ /* Make the null fsm. */
+ FsmGraph *nu = new FsmGraph();
+ nu->lambdaFsm( );
+
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+
+ /* Perform the question operator. */
+ retFsm->unionOp( nu );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case PlusType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying plus operator to a machine that "
+ "accpets zero length word" << endl;
+ }
+
+ /* Need a duplicated for the star end. */
+ FsmGraph *dup = new FsmGraph( *retFsm );
+
+ /* The start func orders need to be shifted before doing the star. */
+ pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case ExactType: {
+ /* Get an int from the repetition amount. */
+ if ( lowerRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "exactly zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MaxType: {
+ /* Get an int from the repetition amount. */
+ if ( upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "max zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying max repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MinType: {
+ /* Evaluate the repeated machine. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying min repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the repetition
+ * and the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Acts just like a star op on the machine to return. */
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* Take a duplicate for the plus. */
+ FsmGraph *dup = new FsmGraph( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ /* Tak on the kleene star. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case RangeType: {
+ /* Check for bogus range. */
+ if ( upperRep - lowerRep < 0 ) {
+ error(loc) << "invalid range repetition" << endl;
+
+ /* Return null machine as recovery. */
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else if ( lowerRep == 0 && upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep. This
+ * defeats the purpose so give a warning. */
+ warning(loc) << "zero to zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Now need to evaluate the repeated machine. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying range repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing both kinds
+ * of repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Just doing max repetition. Already guarded against n == 0. */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ else if ( lowerRep == upperRep ) {
+ /* Just doing exact repetition. Already guarded against n == 0. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* This is the case that 0 < lowerRep < upperRep. Take a
+ * duplicate for the optional repeat. */
+ FsmGraph *dup = new FsmGraph( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Do optional repetition on the second half. */
+ dup->optionalRepeatOp( upperRep - lowerRep );
+ afterOpMinimize( dup );
+
+ /* Tak on the duplicate machine. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ }
+ break;
+ }
+ case FactorWithNegType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factorWithNeg->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+
+/* Clean up after a factor with negation node. */
+FactorWithNeg::~FactorWithNeg()
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ delete factorWithNeg;
+ break;
+ case FactorType:
+ delete factor;
+ break;
+ }
+}
+
+/* Evaluate a factor with negation node. */
+FsmGraph *FactorWithNeg::walk( Compiler *pd )
+{
+ FsmGraph *retFsm = 0;
+
+ switch ( type ) {
+ case NegateType: {
+ /* Evaluate the factorWithNeg. */
+ FsmGraph *toNegate = factorWithNeg->walk( pd );
+
+ /* Negation is subtract from dot-star. */
+ retFsm = dotStarFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case CharNegateType: {
+ /* Evaluate the factorWithNeg. */
+ FsmGraph *toNegate = factorWithNeg->walk( pd );
+
+ /* CharNegation is subtract from dot. */
+ retFsm = dotFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case FactorType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factor->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+/* Clean up after a factor node. */
+Factor::~Factor()
+{
+ switch ( type ) {
+ case LiteralType:
+ delete literal;
+ break;
+ case RangeType:
+ delete range;
+ break;
+ case OrExprType:
+ delete reItem;
+ break;
+ case RegExprType:
+ delete regExp;
+ break;
+ case ReferenceType:
+ break;
+ case ParenType:
+ delete join;
+ break;
+ }
+}
+
+/* Evaluate a factor node. */
+FsmGraph *Factor::walk( Compiler *pd )
+{
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case LiteralType:
+ rtnVal = literal->walk( pd );
+ break;
+ case RangeType:
+ rtnVal = range->walk( pd );
+ break;
+ case OrExprType:
+ rtnVal = reItem->walk( pd, 0 );
+ break;
+ case RegExprType:
+ rtnVal = regExp->walk( pd, 0 );
+ break;
+ case ReferenceType:
+ rtnVal = varDef->walk( pd );
+ break;
+ case ParenType:
+ rtnVal = join->walk( pd );
+ break;
+ }
+
+ return rtnVal;
+}
+
+
+/* Clean up a range object. Must delete the two literals. */
+Range::~Range()
+{
+ delete lowerLit;
+ delete upperLit;
+}
+
+bool Range::verifyRangeFsm( FsmGraph *rangeEnd )
+{
+ /* Must have two states. */
+ if ( rangeEnd->stateList.length() != 2 )
+ return false;
+ /* The start state cannot be final. */
+ if ( rangeEnd->startState->isFinState() )
+ return false;
+ /* There should be only one final state. */
+ if ( rangeEnd->finStateSet.length() != 1 )
+ return false;
+ /* The final state cannot have any transitions out. */
+ if ( rangeEnd->finStateSet[0]->outList.length() != 0 )
+ return false;
+ /* The start state should have only one transition out. */
+ if ( rangeEnd->startState->outList.length() != 1 )
+ return false;
+ /* The singe transition out of the start state should not be a range. */
+ FsmTrans *startTrans = rangeEnd->startState->outList.head;
+ if ( startTrans->lowKey != startTrans->highKey )
+ return false;
+ return true;
+}
+
+/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
+FsmGraph *Range::walk( Compiler *pd )
+{
+ /* Construct and verify the suitability of the lower end of the range. */
+ FsmGraph *lowerFsm = lowerLit->walk( pd );
+ if ( !verifyRangeFsm( lowerFsm ) ) {
+ error(lowerLit->loc) <<
+ "bad range lower end, must be a single character" << endl;
+ }
+
+ /* Construct and verify the upper end. */
+ FsmGraph *upperFsm = upperLit->walk( pd );
+ if ( !verifyRangeFsm( upperFsm ) ) {
+ error(upperLit->loc) <<
+ "bad range upper end, must be a single character" << endl;
+ }
+
+ /* Grab the keys from the machines, then delete them. */
+ Key lowKey = lowerFsm->startState->outList.head->lowKey;
+ Key highKey = upperFsm->startState->outList.head->lowKey;
+ delete lowerFsm;
+ delete upperFsm;
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(lowerLit->loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Return the range now that it is validated. */
+ FsmGraph *retFsm = new FsmGraph();
+ retFsm->rangeFsm( lowKey, highKey );
+ return retFsm;
+}
+
+/* Evaluate a literal object. */
+FsmGraph *Literal::walk( Compiler *pd )
+{
+ /* FsmGraph to return, is the alphabet signed. */
+ FsmGraph *rtnVal = 0;
+
+ switch ( type ) {
+ case Number: {
+ /* Make the fsm key in int format. */
+ Key fsmKey = makeFsmKeyNum( literal.data, loc, pd );
+ /* Make the new machine. */
+ rtnVal = new FsmGraph();
+ rtnVal->concatFsm( fsmKey );
+ break;
+ }
+ case LitString: {
+ /* Make the array of keys in int format. */
+ String interp;
+ bool caseInsensitive;
+ prepareLitString( interp, caseInsensitive, literal, loc );
+ Key *arr = new Key[interp.length()];
+ makeFsmKeyArray( arr, interp.data, interp.length(), pd );
+
+ /* Make the new machine. */
+ rtnVal = new FsmGraph();
+ if ( caseInsensitive )
+ rtnVal->concatFsmCI( arr, interp.length() );
+ else
+ rtnVal->concatFsm( arr, interp.length() );
+ delete[] arr;
+ break;
+ }}
+ return rtnVal;
+}
+
+/* Clean up after a regular expression object. */
+RegExpr::~RegExpr()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete regExp;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmGraph *RegExpr::walk( Compiler *pd, RegExpr *rootRegex )
+{
+ /* This is the root regex, pass down a pointer to this. */
+ if ( rootRegex == 0 )
+ rootRegex = this;
+
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Walk both items. */
+ FsmGraph *fsm1 = regExp->walk( pd, rootRegex );
+ FsmGraph *fsm2 = item->walk( pd, rootRegex );
+ if ( fsm1 == 0 )
+ rtnVal = fsm2;
+ else {
+ fsm1->concatOp( fsm2 );
+ rtnVal = fsm1;
+ }
+ break;
+ }
+ case Empty: {
+ /* FIXME: Return something here. */
+ rtnVal = 0;
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+/* Clean up after an item in a regular expression. */
+ReItem::~ReItem()
+{
+ switch ( type ) {
+ case Data:
+ case Dot:
+ break;
+ case OrBlock:
+ case NegOrBlock:
+ delete orBlock;
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmGraph *ReItem::walk( Compiler *pd, RegExpr *rootRegex )
+{
+ /* The fsm to return, is the alphabet signed? */
+ FsmGraph *rtnVal = 0;
+
+ switch ( type ) {
+ case Data: {
+ /* Move the data into an integer array and make a concat fsm. */
+ Key *arr = new Key[data.length()];
+ makeFsmKeyArray( arr, data.data, data.length(), pd );
+
+ /* Make the concat fsm. */
+ rtnVal = new FsmGraph();
+ if ( rootRegex != 0 && rootRegex->caseInsensitive )
+ rtnVal->concatFsmCI( arr, data.length() );
+ else
+ rtnVal->concatFsm( arr, data.length() );
+ delete[] arr;
+ break;
+ }
+ case Dot: {
+ /* Make the dot fsm. */
+ rtnVal = dotFsm( pd );
+ break;
+ }
+ case OrBlock: {
+ /* Get the or block and minmize it. */
+ rtnVal = orBlock->walk( pd, rootRegex );
+ rtnVal->minimizePartition2();
+ break;
+ }
+ case NegOrBlock: {
+ /* Get the or block and minimize it. */
+ FsmGraph *fsm = orBlock->walk( pd, rootRegex );
+ fsm->minimizePartition2();
+
+ /* Make a dot fsm and subtract from it. */
+ rtnVal = dotFsm( pd );
+ rtnVal->subtractOp( fsm );
+ rtnVal->minimizePartition2();
+ break;
+ }
+ }
+
+ /* If the item is followed by a star, then apply the star op. */
+ if ( star ) {
+ if ( rtnVal->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accpets zero length word" << endl;
+ }
+
+ rtnVal->starOp();
+ rtnVal->minimizePartition2();
+ }
+ return rtnVal;
+}
+
+/* Clean up after an or block of a regular expression. */
+ReOrBlock::~ReOrBlock()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete orBlock;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+
+/* Evaluate an or block of a regular expression. */
+FsmGraph *ReOrBlock::walk( Compiler *pd, RegExpr *rootRegex )
+{
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Evaluate the two fsm. */
+ FsmGraph *fsm1 = orBlock->walk( pd, rootRegex );
+ FsmGraph *fsm2 = item->walk( pd, rootRegex );
+ if ( fsm1 == 0 )
+ rtnVal = fsm2;
+ else {
+ fsm1->unionOp( fsm2 );
+ rtnVal = fsm1;
+ }
+ break;
+ }
+ case Empty: {
+ rtnVal = 0;
+ break;
+ }
+ }
+ return rtnVal;;
+}
+
+/* Evaluate an or block item of a regular expression. */
+FsmGraph *ReOrItem::walk( Compiler *pd, RegExpr *rootRegex )
+{
+ /* The return value, is the alphabet signed? */
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case Data: {
+ /* Make the or machine. */
+ rtnVal = new FsmGraph();
+
+ /* Put the or data into an array of ints. Note that we find unique
+ * keys. Duplicates are silently ignored. The alternative would be to
+ * issue warning or an error but since we can't with [a0-9a] or 'a' |
+ * 'a' don't bother here. */
+ KeySet keySet;
+ makeFsmUniqueKeyArray( keySet, data.data, data.length(),
+ rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
+
+ /* Run the or operator. */
+ rtnVal->orFsm( keySet.data, keySet.length() );
+ break;
+ }
+ case Range: {
+ /* Make the upper and lower keys. */
+ Key lowKey = makeFsmKeyChar( lower, pd );
+ Key highKey = makeFsmKeyChar( upper, pd );
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Make the range machine. */
+ rtnVal = new FsmGraph();
+ rtnVal->rangeFsm( lowKey, highKey );
+
+ if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
+ if ( lowKey <= 'Z' && 'A' <= highKey ) {
+ Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
+ Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
+
+ otherLow = 'a' + ( otherLow - 'A' );
+ otherHigh = 'a' + ( otherHigh - 'A' );
+
+ FsmGraph *otherRange = new FsmGraph();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ else if ( lowKey <= 'z' && 'a' <= highKey ) {
+ Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
+ Key otherHigh = 'z' < highKey ? Key('z') : highKey;
+
+ otherLow = 'A' + ( otherLow - 'a' );
+ otherHigh = 'A' + ( otherHigh - 'a' );
+
+ FsmGraph *otherRange = new FsmGraph();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ }
+
+ break;
+ }}
+ return rtnVal;
+}
diff --git a/src/parsetree.h b/src/parsetree.h
new file mode 100644
index 00000000..c3a75df5
--- /dev/null
+++ b/src/parsetree.h
@@ -0,0 +1,2253 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PARSETREE_H
+#define _PARSETREE_H
+
+#include <iostream>
+#include <string.h>
+#include "global.h"
+#include "avlmap.h"
+#include "bstmap.h"
+#include "bstset.h"
+#include "vector.h"
+#include "dlist.h"
+#include "dlistval.h"
+#include "dlistmel.h"
+#include "astring.h"
+#include "bytecode.h"
+#include "avlbasic.h"
+#include "fsmrun.h"
+
+/* Operators that are represented with single symbol characters. */
+#define OP_DoubleEql 'e'
+#define OP_NotEql 'q'
+#define OP_LessEql 'l'
+#define OP_GrtrEql 'g'
+#define OP_LogicalAnd 'a'
+#define OP_LogicalOr 'o'
+#define OP_Deref 'd'
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+struct NameInst;
+struct FsmGraph;
+struct RedFsm;
+struct _FsmRun;
+struct ObjectDef;
+struct ElementOf;
+struct UniqueType;
+struct ObjField;
+struct TransBlock;
+struct CodeBlock;
+struct PdaLiteral;
+struct TypeAlias;
+typedef struct _PdaRun PdaRun;
+
+/*
+ * Code Vector
+ */
+struct CodeVect : public Vector<Code>
+{
+ void appendHalf( Half half )
+ {
+ /* not optimal. */
+ append( half & 0xff );
+ append( (half>>8) & 0xff );
+ }
+
+ void appendWord( Word word )
+ {
+ /* not optimal. */
+ append( word & 0xff );
+ append( (word>>8) & 0xff );
+ append( (word>>16) & 0xff );
+ append( (word>>24) & 0xff );
+ #if SIZEOF_LONG == 8
+ append( (word>>32) & 0xff );
+ append( (word>>40) & 0xff );
+ append( (word>>48) & 0xff );
+ append( (word>>56) & 0xff );
+ #endif
+ }
+
+ void setHalf( long pos, Half half )
+ {
+ /* not optimal. */
+ data[pos] = half & 0xff;
+ data[pos+1] = (half>>8) & 0xff;
+ }
+
+ void insertHalf( long pos, Half half )
+ {
+ /* not optimal. */
+ insert( pos, half & 0xff );
+ insert( pos+1, (half>>8) & 0xff );
+ }
+
+ void insertWord( long pos, Word word )
+ {
+ /* not at all optimal. */
+ insert( pos, word & 0xff );
+ insert( pos+1, (word>>8) & 0xff );
+ insert( pos+2, (word>>16) & 0xff );
+ insert( pos+3, (word>>24) & 0xff );
+ #if SIZEOF_LONG == 8
+ insert( pos+4, (word>>32) & 0xff );
+ insert( pos+5, (word>>40) & 0xff );
+ insert( pos+6, (word>>48) & 0xff );
+ insert( pos+7, (word>>56) & 0xff );
+ #endif
+ }
+
+ void insertTree( long pos, Tree *tree )
+ { insertWord( pos, (Word) tree ); }
+};
+
+
+
+/* Types of builtin machines. */
+enum BuiltinMachine
+{
+ BT_Any,
+ BT_Ascii,
+ BT_Extend,
+ BT_Alpha,
+ BT_Digit,
+ BT_Alnum,
+ BT_Lower,
+ BT_Upper,
+ BT_Cntrl,
+ BT_Graph,
+ BT_Print,
+ BT_Punct,
+ BT_Space,
+ BT_Xdigit,
+ BT_Lambda,
+ BT_Empty
+};
+
+typedef BstSet<char> CharSet;
+typedef Vector<unsigned char> UnsignedCharVect;
+
+
+struct Compiler;
+struct TypeRef;
+
+/* Leaf type. */
+struct Literal;
+
+/* Tree nodes. */
+
+struct Term;
+struct FactorWithAug;
+struct FactorWithRep;
+struct FactorWithNeg;
+struct Factor;
+struct Expression;
+struct Join;
+struct JoinOrLm;
+struct RegionJoinOrLm;
+struct TokenRegion;
+struct Namespace;
+struct Context;
+struct TokenDef;
+struct TokenDefListReg;
+struct TokenDefListNs;
+struct Range;
+struct LangEl;
+
+/* Type of augmentation. Describes locations in the machine. */
+enum AugType
+{
+ /* Transition actions/priorities. */
+ at_start,
+ at_all,
+ at_finish,
+ at_leave,
+
+ /* Global error actions. */
+ at_start_gbl_error,
+ at_all_gbl_error,
+ at_final_gbl_error,
+ at_not_start_gbl_error,
+ at_not_final_gbl_error,
+ at_middle_gbl_error,
+
+ /* Local error actions. */
+ at_start_local_error,
+ at_all_local_error,
+ at_final_local_error,
+ at_not_start_local_error,
+ at_not_final_local_error,
+ at_middle_local_error,
+
+ /* To State Action embedding. */
+ at_start_to_state,
+ at_all_to_state,
+ at_final_to_state,
+ at_not_start_to_state,
+ at_not_final_to_state,
+ at_middle_to_state,
+
+ /* From State Action embedding. */
+ at_start_from_state,
+ at_all_from_state,
+ at_final_from_state,
+ at_not_start_from_state,
+ at_not_final_from_state,
+ at_middle_from_state,
+
+ /* EOF Action embedding. */
+ at_start_eof,
+ at_all_eof,
+ at_final_eof,
+ at_not_start_eof,
+ at_not_final_eof,
+ at_middle_eof
+};
+
+/* IMPORTANT: These must follow the same order as the state augs in AugType
+ * since we will be using this to compose AugType. */
+enum StateAugType
+{
+ sat_start = 0,
+ sat_all,
+ sat_final,
+ sat_not_start,
+ sat_not_final,
+ sat_middle
+};
+
+struct Action;
+struct PriorDesc;
+struct RegExpr;
+struct ReItem;
+struct ReOrBlock;
+struct ReOrItem;
+struct ExplicitMachine;
+struct InlineItem;
+struct InlineList;
+
+/* Reference to a named state. */
+typedef Vector<String> NameRef;
+typedef Vector<NameRef*> NameRefList;
+typedef Vector<NameInst*> NameTargList;
+
+/* Structure for storing location of epsilon transitons. */
+struct EpsilonLink
+{
+ EpsilonLink( const InputLoc &loc, NameRef &target )
+ : loc(loc), target(target) { }
+
+ InputLoc loc;
+ NameRef target;
+};
+
+struct Label
+{
+ Label( const InputLoc &loc, const String &data, ObjField *objField )
+ : loc(loc), data(data), objField(objField) { }
+
+ InputLoc loc;
+ String data;
+ ObjField *objField;
+};
+
+/* Structure represents an action assigned to some FactorWithAug node. The
+ * factor with aug will keep an array of these. */
+struct ParserAction
+{
+ ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
+ : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
+
+ InputLoc loc;
+ AugType type;
+ int localErrKey;
+ Action *action;
+};
+
+struct Token
+{
+ String data;
+ InputLoc loc;
+};
+
+void prepareLitString( String &result, bool &caseInsensitive,
+ const String &srcString, const InputLoc &loc );
+
+std::ostream &operator<<(std::ostream &out, const Token &token );
+
+typedef AvlMap< String, TokenDef*, CmpStr > LiteralDict;
+typedef AvlMapEl< String, TokenDef* > LiteralDictEl;
+
+/* Store the value and type of a priority augmentation. */
+struct PriorityAug
+{
+ PriorityAug( AugType type, int priorKey, int priorValue ) :
+ type(type), priorKey(priorKey), priorValue(priorValue) { }
+
+ AugType type;
+ int priorKey;
+ int priorValue;
+};
+
+/*
+ * A Variable Definition
+ */
+struct VarDef
+{
+ VarDef( const String &name, Join *join )
+ : name(name), join(join) { }
+
+ /* Parse tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( const InputLoc &loc, Compiler *pd );
+
+ String name;
+ Join *join;
+};
+
+/*
+ * A Variable Definition
+ */
+struct RegionDef
+{
+ RegionDef( const String &name, TokenRegion *tokenRegion )
+ : name(name), tokenRegion(tokenRegion) { }
+
+ /* Parse tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( const InputLoc &loc, Compiler *pd );
+
+ String name;
+ TokenRegion *tokenRegion;
+};
+
+typedef Vector<String> StringVect;
+typedef CmpTable<String, CmpStr> CmpStrVect;
+
+struct NamespaceQual
+{
+ NamespaceQual( Namespace *declInNspace, TokenRegion *declInRegion ) :
+ cachedNspaceQual(0), declInNspace(declInNspace) {}
+
+ Namespace *cachedNspaceQual;
+ Namespace *declInNspace;
+
+ StringVect qualNames;
+
+ Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart );
+ Namespace *getQual( Compiler *pd );
+};
+
+struct ReCapture
+{
+ ReCapture( Action *markEnter, Action *markLeave, ObjField *objField )
+ : markEnter(markEnter), markLeave(markLeave), objField(objField) {}
+
+ Action *markEnter;
+ Action *markLeave;
+ ObjField *objField;
+};
+
+typedef Vector<Context*> ContextVect;
+
+struct Context
+{
+ Context( InputLoc &loc, LangEl *lel )
+ :
+ loc(loc),
+ lel(lel)
+ {}
+
+ InputLoc loc;
+ LangEl *lel;
+
+ ObjectDef *contextObjDef;
+};
+
+typedef Vector<ReCapture> ReCaptureVect;
+
+struct TokenDefPtr1
+{
+ TokenDef *prev, *next;
+};
+
+struct TokenDefPtr2
+{
+ TokenDef *prev, *next;
+};
+
+struct TokenDef
+:
+ public TokenDefPtr1,
+ public TokenDefPtr2
+{
+ TokenDef( const String &name, const String &literal, bool isLiteral, bool ignore,
+ Join *join, CodeBlock *codeBlock, InputLoc &semiLoc,
+ int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion,
+ ReCaptureVect *pReCaptureVect, ObjectDef *objectDef, Context *contextIn )
+ :
+ name(name), literal(literal), isLiteral(isLiteral), ignore(ignore), join(join), action(0),
+ codeBlock(codeBlock), tdLangEl(0), semiLoc(semiLoc),
+ longestMatchId(longestMatchId), inLmSelect(false),
+ nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef),
+ contextIn(contextIn),
+ dupOf(0), noPostIgnore(false), noPreIgnore(false), isZero(false)
+ {
+ if ( pReCaptureVect != 0 )
+ reCaptureVect = *pReCaptureVect;
+ }
+
+ InputLoc getLoc();
+
+ String name;
+ String literal;
+ bool isLiteral;
+ bool ignore;
+ Join *join;
+ Action *action;
+ CodeBlock *codeBlock;
+ LangEl *tdLangEl;
+ InputLoc semiLoc;
+
+ Action *setActId;
+ Action *actOnLast;
+ Action *actOnNext;
+ Action *actLagBehind;
+ int longestMatchId;
+ bool inLmSelect;
+ Namespace *nspace;
+ TokenRegion *tokenRegion;
+ ReCaptureVect reCaptureVect;
+ ObjectDef *objectDef;
+ Context *contextIn;
+
+ TokenDef *dupOf;
+ bool noPostIgnore;
+ bool noPreIgnore;
+ bool isZero;
+};
+
+struct LelDefList;
+
+struct NtDef
+{
+ NtDef( const String &name, Namespace *nspace,
+ LelDefList *defList, ObjectDef *objectDef,
+ Context *contextIn, bool reduceFirst )
+ :
+ name(name),
+ nspace(nspace),
+ defList(defList),
+ objectDef(objectDef),
+ contextIn(contextIn),
+ reduceFirst(reduceFirst)
+ {}
+
+ String name;
+ Namespace *nspace;
+ LelDefList *defList;
+ ObjectDef *objectDef;
+ Context *contextIn;
+ bool reduceFirst;
+
+ NtDef *prev, *next;
+};
+
+struct NtDefList : DList<NtDef> {};
+
+/* Declare a new type so that ptreetypes.h need not include dlist.h. */
+struct TokenDefListReg : DListMel<TokenDef, TokenDefPtr1> {};
+struct TokenDefListNs : DListMel<TokenDef, TokenDefPtr2> {};
+
+struct ContextDef
+{
+ ContextDef( const String &name, Context *context, Namespace *nspace )
+ : name(name), context(context), nspace(nspace) {}
+
+ String name;
+ Context *context;
+ Namespace *nspace;
+
+ ContextDef *prev, *next;
+};
+
+struct ContextDefList : DList<ContextDef> {};
+
+struct TypeMapEl
+ : public AvlTreeEl<TypeMapEl>
+{
+ enum Type
+ {
+ TypeAliasType = 1,
+ LangElType
+ };
+
+ const String &getKey() { return key; }
+
+ TypeMapEl( const String &key, TypeRef *typeRef )
+ : type(TypeAliasType), key(key), value(0), typeRef(typeRef) {}
+
+ TypeMapEl( const String &key, LangEl *value )
+ : type(LangElType), key(key), value(value), typeRef(0) {}
+
+
+ Type type;
+ String key;
+ LangEl *value;
+ TypeRef *typeRef;
+
+ TypeMapEl *prev, *next;
+};
+
+/* Symbol Map. */
+typedef AvlTree< TypeMapEl, String, CmpStr > TypeMap;
+
+typedef Vector<TokenRegion*> RegionVect;
+
+struct TokenRegion
+{
+ /* Construct with a list of joins */
+ TokenRegion( const InputLoc &loc, const String &name, int id,
+ TokenRegion *parentRegion ) :
+ loc(loc), name(name), id(id),
+ lmSwitchHandlesError(false), regionNameInst(0),
+ parentRegion(parentRegion), defaultTokenDef(0),
+ preEofBlock(0),
+ ignoreOnlyRegion(0), tokenOnlyRegion(0), ciRegion(0),
+ wasEmpty(false),
+ isFullRegion(false),
+ isIgnoreOnly(false),
+ isTokenOnly(false),
+ isCiOnly(false),
+ ciLel(0),
+ derivedFrom(0)
+ { }
+
+ /* Tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+ void runLongestMatch( Compiler *pd, FsmGraph *graph );
+ void transferScannerLeavingActions( FsmGraph *graph );
+ Action *newAction( Compiler *pd, const InputLoc &loc, const String &name,
+ InlineList *inlineList );
+ void makeActions( Compiler *pd );
+ void findName( Compiler *pd );
+ void restart( FsmGraph *graph, FsmTrans *trans );
+
+ InputLoc loc;
+ TokenDefListReg tokenDefList;
+ String name;
+ int id;
+
+ Action *lmActSelect;
+ bool lmSwitchHandlesError;
+
+ /* This gets saved off during the name walk. Can save it off because token
+ * regions are referenced once only. */
+ NameInst *regionNameInst;
+
+ TokenRegion *parentRegion;
+ RegionVect childRegions;
+
+ TokenDef *defaultTokenDef;
+
+ CodeBlock *preEofBlock;
+
+ /* Dupe of the region, containing only the ignore tokens. */
+ TokenRegion *ignoreOnlyRegion;
+ TokenRegion *tokenOnlyRegion;
+ TokenRegion *ciRegion;
+
+ /* We alway init empty scanners with a single token. If we had to do this
+ * then wasEmpty is true. */
+ bool wasEmpty;
+
+ bool isFullRegion;
+ bool isIgnoreOnly;
+ bool isTokenOnly;
+ bool isCiOnly;
+
+ LangEl *ciLel;
+ TokenRegion *derivedFrom;
+
+ TokenRegion *next, *prev;
+};
+
+typedef DList<TokenRegion> RegionList;
+typedef BstSet< TokenRegion*, CmpOrd<TokenRegion*> > RegionSet;
+
+typedef Vector<Namespace*> NamespaceVect;
+
+struct GenericType
+ : public DListEl<GenericType>
+{
+ GenericType( const String &name, long typeId, long id,
+ LangEl *langEl, TypeRef *typeArg )
+ :
+ name(name), typeId(typeId), id(id), langEl(langEl),
+ typeArg(typeArg), keyTypeArg(0),
+ utArg(0), keyUT(0),
+ objDef(0)
+ {}
+
+ const String &getKey() const
+ { return name; };
+
+ void declare( Compiler *pd, Namespace *nspace );
+
+ String name;
+ long typeId;
+ long id;
+ LangEl *langEl;
+ TypeRef *typeArg;
+ TypeRef *keyTypeArg;
+ UniqueType *utArg;
+ UniqueType *keyUT;
+
+ ObjectDef *objDef;
+};
+
+typedef DList<GenericType> GenericList;
+
+typedef struct _UserIter UserIter;
+typedef AvlMap<String, UserIter*, CmpStr> UserIterMap;
+typedef AvlMapEl<String, UserIter*> UserIterMapEl;
+
+/* Graph dictionary. */
+struct GraphDictEl
+:
+ public AvlTreeEl<GraphDictEl>,
+ public DListEl<GraphDictEl>
+{
+ GraphDictEl( const String &key )
+ : key(key), value(0), isInstance(false) { }
+ GraphDictEl( const String &key, VarDef *value )
+ : key(key), value(value), isInstance(false) { }
+
+ const String &getKey() { return key; }
+
+ String key;
+ VarDef *value;
+ bool isInstance;
+
+ /* Location info of graph definition. Points to variable name of assignment. */
+ InputLoc loc;
+};
+
+typedef AvlTree<GraphDictEl, String, CmpStr> GraphDict;
+typedef DList<GraphDictEl> GraphList;
+
+/* Graph dictionary. */
+struct RegionGraphDictEl
+:
+ public AvlTreeEl<RegionGraphDictEl>,
+ public DListEl<RegionGraphDictEl>
+{
+ RegionGraphDictEl( const String &key )
+ : key(key), value(0), isInstance(false) { }
+ RegionGraphDictEl( const String &key, RegionDef *value )
+ : key(key), value(value), isInstance(false) { }
+
+ const String &getKey() { return key; }
+
+ String key;
+ RegionDef *value;
+ bool isInstance;
+
+ /* Location info of graph definition. Points to variable name of assignment. */
+ InputLoc loc;
+};
+
+typedef AvlTree<RegionGraphDictEl, String, CmpStr> RegionGraphDict;
+typedef DList<RegionGraphDictEl> RegionGraphList;
+
+struct TypeAlias
+{
+ TypeAlias( const InputLoc &loc, Namespace *nspace,
+ const String &name, TypeRef *typeRef )
+ :
+ loc(loc),
+ nspace(nspace),
+ name(name),
+ typeRef(typeRef)
+ {}
+
+ InputLoc loc;
+ Namespace *nspace;
+ String name;
+ TypeRef *typeRef;
+
+ TypeAlias *prev, *next;
+};
+
+typedef DList<TypeAlias> TypeAliasList;
+
+struct Namespace
+{
+ /* Construct with a list of joins */
+ Namespace( const InputLoc &loc, const String &name, int id,
+ Namespace *parentNamespace ) :
+ loc(loc), name(name), id(id),
+ parentNamespace(parentNamespace) { }
+
+ /* Tree traversal. */
+ Namespace *findNamespace( const String &name );
+
+ InputLoc loc;
+ String name;
+ int id;
+
+ /* Literal patterns and the dictionary mapping literals to the underlying
+ * tokens. */
+ LiteralDict literalDict;
+
+ /* List of tokens defs in the namespace. */
+ TokenDefListNs tokenDefList;
+
+ /* List of nonterminal defs in the namespace. */
+ NtDefList ntDefList;
+
+ /* List of context definitions for encapsulating the data of a parser. */
+ ContextDefList contextDefList;
+
+ /* Dictionary of symbols within the region. */
+ TypeMap typeMap;
+ GenericList genericList;
+
+ /* Dictionary of graphs. Both instances and non-instances go here. */
+ RegionGraphDict graphDict;
+
+ /* regular language definitions. */
+ GraphDict rlMap;
+
+ TypeAliasList typeAliasList;
+
+ Namespace *parentNamespace;
+ NamespaceVect childNamespaces;
+
+ Namespace *next, *prev;
+
+ void declare( Compiler *pd );
+};
+
+typedef DList<Namespace> NamespaceList;
+typedef BstSet< Namespace*, CmpOrd<Namespace*> > NamespaceSet;
+
+/* List of Expressions. */
+typedef DList<Expression> ExprList;
+
+struct JoinOrLm
+{
+ JoinOrLm( Join *join ) :
+ join(join) {}
+
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ Join *join;
+};
+
+struct RegionJoinOrLm
+{
+ enum Type { LongestMatchType };
+
+ RegionJoinOrLm( TokenRegion *tokenRegion ) :
+ tokenRegion(tokenRegion) {}
+
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ TokenRegion *tokenRegion;
+};
+
+/*
+ * Join
+ */
+struct Join
+{
+ /* Construct with the first expression. */
+ Join( Expression *expr );
+
+ /* Tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ /* Data. */
+ ExprList exprList;
+
+ Join *context;
+ Action *mark;
+};
+
+/*
+ * Expression
+ */
+struct Expression
+{
+ enum Type {
+ OrType,
+ IntersectType,
+ SubtractType,
+ StrongSubtractType,
+ TermType,
+ BuiltinType
+ };
+
+ /* Construct with an expression on the left and a term on the right. */
+ Expression( Expression *expression, Term *term, Type type ) :
+ expression(expression), term(term),
+ builtin(builtin), type(type), prev(this), next(this) { }
+
+ /* Construct with only a term. */
+ Expression( Term *term ) :
+ expression(0), term(term), builtin(builtin),
+ type(TermType) , prev(this), next(this) { }
+
+ /* Construct with a builtin type. */
+ Expression( BuiltinMachine builtin ) :
+ expression(0), term(0), builtin(builtin),
+ type(BuiltinType), prev(this), next(this) { }
+
+ ~Expression();
+
+ /* Tree traversal. */
+ FsmGraph *walk( Compiler *pd, bool lastInSeq = true );
+ void makeNameTree( Compiler *pd );
+
+ /* Node data. */
+ Expression *expression;
+ Term *term;
+ BuiltinMachine builtin;
+ Type type;
+
+ Expression *prev, *next;
+};
+
+/*
+ * Term
+ */
+struct Term
+{
+ enum Type {
+ ConcatType,
+ RightStartType,
+ RightFinishType,
+ LeftType,
+ FactorWithAugType
+ };
+
+ Term( Term *term, FactorWithAug *factorWithAug ) :
+ term(term), factorWithAug(factorWithAug), type(ConcatType) { }
+
+ Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
+ term(term), factorWithAug(factorWithAug), type(type) { }
+
+ Term( FactorWithAug *factorWithAug ) :
+ term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
+
+ ~Term();
+
+ FsmGraph *walk( Compiler *pd, bool lastInSeq = true );
+ void makeNameTree( Compiler *pd );
+
+ Term *term;
+ FactorWithAug *factorWithAug;
+ Type type;
+
+ /* Priority descriptor for RightFinish type. */
+ PriorDesc priorDescs[2];
+};
+
+
+/* Third level of precedence. Augmenting nodes with actions and priorities. */
+struct FactorWithAug
+{
+ FactorWithAug( FactorWithRep *factorWithRep ) :
+ priorDescs(0), factorWithRep(factorWithRep) { }
+ ~FactorWithAug();
+
+ /* Tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ void assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd );
+ void assignPriorities( FsmGraph *graph, int *priorOrd );
+
+ void assignConditions( FsmGraph *graph );
+
+ /* Actions and priorities assigned to the factor node. */
+ Vector<ParserAction> actions;
+ Vector<PriorityAug> priorityAugs;
+ PriorDesc *priorDescs;
+ Vector<EpsilonLink> epsilonLinks;
+ Vector<ParserAction> conditions;
+
+ FactorWithRep *factorWithRep;
+};
+
+/* Fourth level of precedence. Trailing unary operators. Provide kleen star,
+ * optional and plus. */
+struct FactorWithRep
+{
+ enum Type {
+ StarType,
+ StarStarType,
+ OptionalType,
+ PlusType,
+ ExactType,
+ MaxType,
+ MinType,
+ RangeType,
+ FactorWithNegType
+ };
+
+ FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
+ int lowerRep, int upperRep, Type type ) :
+ loc(loc), factorWithRep(factorWithRep),
+ factorWithNeg(0), lowerRep(lowerRep),
+ upperRep(upperRep), type(type) { }
+
+ FactorWithRep( const InputLoc &loc, FactorWithNeg *factorWithNeg )
+ : loc(loc), factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
+
+ ~FactorWithRep();
+
+ /* Tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ InputLoc loc;
+ FactorWithRep *factorWithRep;
+ FactorWithNeg *factorWithNeg;
+ int lowerRep, upperRep;
+ Type type;
+
+ /* Priority descriptor for StarStar type. */
+ PriorDesc priorDescs[2];
+};
+
+/* Fifth level of precedence. Provides Negation. */
+struct FactorWithNeg
+{
+ enum Type {
+ NegateType,
+ CharNegateType,
+ FactorType
+ };
+
+ FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
+ loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
+
+ FactorWithNeg( const InputLoc &loc, Factor *factor ) :
+ loc(loc), factorWithNeg(0), factor(factor), type(FactorType) { }
+
+ ~FactorWithNeg();
+
+ /* Tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ InputLoc loc;
+ FactorWithNeg *factorWithNeg;
+ Factor *factor;
+ Type type;
+};
+
+/*
+ * Factor
+ */
+struct Factor
+{
+ /* Language elements a factor node can be. */
+ enum Type {
+ LiteralType,
+ RangeType,
+ OrExprType,
+ RegExprType,
+ ReferenceType,
+ ParenType,
+ };
+
+ /* Construct with a literal fsm. */
+ Factor( Literal *literal ) :
+ literal(literal), type(LiteralType) { }
+
+ /* Construct with a range. */
+ Factor( Range *range ) :
+ range(range), type(RangeType) { }
+
+ /* Construct with the or part of a regular expression. */
+ Factor( ReItem *reItem ) :
+ reItem(reItem), type(OrExprType) { }
+
+ /* Construct with a regular expression. */
+ Factor( RegExpr *regExp ) :
+ regExp(regExp), type(RegExprType) { }
+
+ /* Construct with a reference to a var def. */
+ Factor( const InputLoc &loc, VarDef *varDef ) :
+ loc(loc), varDef(varDef), type(ReferenceType) {}
+
+ /* Construct with a parenthesized join. */
+ Factor( Join *join ) :
+ join(join), type(ParenType) {}
+
+ /* Cleanup. */
+ ~Factor();
+
+ /* Tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ InputLoc loc;
+ Literal *literal;
+ Range *range;
+ ReItem *reItem;
+ RegExpr *regExp;
+ VarDef *varDef;
+ Join *join;
+ int lower, upper;
+ Type type;
+};
+
+/* A range machine. Only ever composed of two literals. */
+struct Range
+{
+ Range( Literal *lowerLit, Literal *upperLit )
+ : lowerLit(lowerLit), upperLit(upperLit) { }
+
+ ~Range();
+ FsmGraph *walk( Compiler *pd );
+ bool verifyRangeFsm( FsmGraph *rangeEnd );
+
+ Literal *lowerLit;
+ Literal *upperLit;
+};
+
+/* Some literal machine. Can be a number or literal string. */
+struct Literal
+{
+ enum LiteralType { Number, LitString };
+
+ Literal( const InputLoc &loc, const String &literal, LiteralType type )
+ : loc(loc), literal(literal), type(type) { }
+
+ FsmGraph *walk( Compiler *pd );
+
+ InputLoc loc;
+ String literal;
+ LiteralType type;
+};
+
+/* Regular expression. */
+struct RegExpr
+{
+ enum RegExpType { RecurseItem, Empty };
+
+ /* Constructors. */
+ RegExpr() :
+ type(Empty), caseInsensitive(false) { }
+ RegExpr(RegExpr *regExp, ReItem *item) :
+ regExp(regExp), item(item),
+ type(RecurseItem), caseInsensitive(false) { }
+
+ ~RegExpr();
+ FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
+
+ RegExpr *regExp;
+ ReItem *item;
+ RegExpType type;
+ bool caseInsensitive;
+};
+
+/* An item in a regular expression. */
+struct ReItem
+{
+ enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
+
+ ReItem( const InputLoc &loc, const String &data )
+ : loc(loc), data(data), star(false), type(Data) { }
+ ReItem( const InputLoc &loc, ReItemType type )
+ : loc(loc), star(false), type(type) { }
+ ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
+ : loc(loc), orBlock(orBlock), star(false), type(type) { }
+
+ ~ReItem();
+ FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
+
+ InputLoc loc;
+ String data;
+ ReOrBlock *orBlock;
+ bool star;
+ ReItemType type;
+};
+
+/* An or block item. */
+struct ReOrBlock
+{
+ enum ReOrBlockType { RecurseItem, Empty };
+
+ /* Constructors. */
+ ReOrBlock()
+ : type(Empty) { }
+ ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
+ : orBlock(orBlock), item(item), type(RecurseItem) { }
+
+ ~ReOrBlock();
+ FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
+
+ ReOrBlock *orBlock;
+ ReOrItem *item;
+ ReOrBlockType type;
+};
+
+/* An item in an or block. */
+struct ReOrItem
+{
+ enum ReOrItemType { Data, Range };
+
+ ReOrItem( const InputLoc &loc, const String &data )
+ : loc(loc), data(data), type(Data) {}
+ ReOrItem( const InputLoc &loc, char lower, char upper )
+ : loc(loc), lower(lower), upper(upper), type(Range) { }
+
+ FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
+
+ InputLoc loc;
+ String data;
+ char lower;
+ char upper;
+ ReOrItemType type;
+};
+
+
+/*
+ * Inline code tree
+ */
+struct InlineList;
+struct InlineItem
+{
+ enum Type
+ {
+ Text,
+ LmSwitch,
+ LmSetActId,
+ LmSetTokEnd,
+ LmOnLast,
+ LmOnNext,
+ LmOnLagBehind,
+ LmInitAct,
+ LmInitTokStart,
+ LmSetTokStart
+ };
+
+ InlineItem( const InputLoc &loc, const String &data, Type type ) :
+ loc(loc), data(data), nameRef(0), children(0), type(type) { }
+
+ InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
+ loc(loc), nameRef(nameRef), children(0), type(type) { }
+
+ InlineItem( const InputLoc &loc, TokenRegion *tokenRegion,
+ TokenDef *longestMatchPart, Type type ) : loc(loc),
+ nameRef(0), children(0), tokenRegion(tokenRegion),
+ longestMatchPart(longestMatchPart), type(type) { }
+
+ InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
+ loc(loc), nameRef(0), nameTarg(nameTarg), children(0),
+ type(type) { }
+
+ InlineItem( const InputLoc &loc, Type type ) :
+ loc(loc), nameRef(0), children(0), type(type) { }
+
+ InputLoc loc;
+ String data;
+ NameRef *nameRef;
+ NameInst *nameTarg;
+ InlineList *children;
+ TokenRegion *tokenRegion;
+ TokenDef *longestMatchPart;
+ Type type;
+
+ InlineItem *prev, *next;
+};
+
+/* Normally this would be atypedef, but that would entail including DList from
+ * ptreetypes, which should be just typedef forwards. */
+struct InlineList : public DList<InlineItem> { };
+
+struct ProdEl;
+struct LangVarRef;
+struct ObjField;
+
+struct PatternItem
+{
+ enum Type {
+ FactorType,
+ InputText
+ };
+
+ PatternItem( const InputLoc &loc, const String &data, Type type ) :
+ loc(loc), factor(0), data(data), type(type), region(0),
+ varRef(0), bindId(0) {}
+
+ PatternItem( const InputLoc &loc, ProdEl *factor, Type type ) :
+ loc(loc), factor(factor), type(type), region(0),
+ varRef(0), bindId(0) {}
+
+ InputLoc loc;
+ ProdEl *factor;
+ String data;
+ Type type;
+ TokenRegion *region;
+ LangVarRef *varRef;
+ long bindId;
+
+ PatternItem *prev, *next;
+};
+
+struct LangExpr;
+typedef DList<PatternItem> PatternItemList;
+
+struct ReplItem
+{
+ enum Type {
+ InputText,
+ ExprType,
+ FactorType
+ };
+
+ ReplItem( const InputLoc &loc, Type type, const String &data ) :
+ loc(loc), type(type), data(data), expr(0), bindId(0) {}
+
+ ReplItem( const InputLoc &loc, Type type, LangExpr *expr ) :
+ loc(loc), type(type), expr(expr), bindId(0) {}
+
+ ReplItem( const InputLoc &loc, Type type, ProdEl *factor ) :
+ loc(loc), type(type), expr(expr), factor(factor), bindId(0) {}
+
+ InputLoc loc;
+ Type type;
+ String data;
+ LangExpr *expr;
+ LangEl *langEl;
+ ProdEl *factor;
+ long bindId;
+
+ ReplItem *prev, *next;
+};
+
+typedef DList<ReplItem> ReplItemList;
+
+
+struct Pattern
+{
+ Pattern( const InputLoc &loc, Namespace *nspace, TokenRegion *region,
+ PatternItemList *list, int patRepId ) :
+ loc(loc), nspace(nspace), region(region), list(list), patRepId(patRepId),
+ langEl(0), pdaRun(0), nextBindId(1) {}
+
+ InputLoc loc;
+ Namespace *nspace;
+ TokenRegion *region;
+ PatternItemList *list;
+ long patRepId;
+ LangEl *langEl;
+ PdaRun *pdaRun;
+ long nextBindId;
+
+ Pattern *prev, *next;
+};
+
+typedef DList<Pattern> PatternList;
+
+struct Replacement
+{
+ Replacement( const InputLoc &loc, Namespace *nspace,
+ TokenRegion *region, ReplItemList *list, int patRepId ) :
+ loc(loc), nspace(nspace), region(region), list(list),
+ patRepId(patRepId), langEl(0), pdaRun(0), nextBindId(1), parse(true) {}
+
+ InputLoc loc;
+ Namespace *nspace;
+ TokenRegion *region;
+ ReplItemList *list;
+ int patRepId;
+ LangEl *langEl;
+ PdaRun *pdaRun;
+ long nextBindId;
+ bool parse;
+
+ Replacement *prev, *next;
+};
+
+typedef DList<Replacement> ReplList;
+
+struct ParserText
+{
+ ParserText( const InputLoc &loc, Namespace *nspace,
+ TokenRegion *region, ReplItemList *list ) :
+ loc(loc), nspace(nspace), region(region), list(list),
+ langEl(0), pdaRun(0), nextBindId(1), parse(true) {}
+
+ InputLoc loc;
+ Namespace *nspace;
+ TokenRegion *region;
+ ReplItemList *list;
+ LangEl *langEl;
+ PdaRun *pdaRun;
+ long nextBindId;
+ bool parse;
+
+ ParserText *prev, *next;
+};
+
+typedef DList<ParserText> ParserTextList;
+
+struct Function;
+
+struct IterDef
+{
+ enum Type { Tree, Child, RevChild, Repeat, RevRepeat, User };
+
+ IterDef( Type type, Function *func );
+ IterDef( Type type );
+
+ Type type;
+
+ Function *func;
+ bool useFuncId;
+ bool useSearchUT;
+
+ Code inCreateWV;
+ Code inCreateWC;
+ Code inDestroy;
+ Code inAdvance;
+
+ Code inGetCurR;
+ Code inGetCurWC;
+ Code inSetCurWC;
+
+ Code inRefFromCur;
+};
+
+struct CmpIterDef
+{
+ static int compare( const IterDef &id1, const IterDef &id2 )
+ {
+ if ( id1.type < id2.type )
+ return -1;
+ else if ( id1.type > id2.type )
+ return 1;
+ else if ( id1.type == IterDef::User ) {
+ if ( id1.func < id2.func )
+ return -1;
+ else if ( id1.func > id2.func )
+ return 1;
+ }
+
+ return 0;
+ }
+};
+
+typedef AvlSet<IterDef, CmpIterDef> IterDefSet;
+typedef AvlSetEl<IterDef> IterDefSetEl;
+
+
+/*
+ * Unique Types.
+ */
+
+/*
+ * type_ref -> qualified_name
+ * type_ref -> '*' type_ref
+ * type_ref -> '&' type_ref
+ * type_ref -> list type_ref type_ref
+ * type_ref -> map type_ref type_ref
+ * type_ref -> vector type_ref
+ * type_ref -> parser type_ref
+ * type_ref -> iter_tree type_ref
+ * type_ref -> iter_child type_ref
+ * type_ref -> iter_revchild type_ref
+ * type_ref -> iter_repeat type_ref
+ * type_ref -> iter_revrepeat type_ref
+ * type_ref -> iter_user type_ref
+ *
+ * type -> nil
+ * type -> def term
+ * type -> def nonterm
+ * type -> '*' type
+ * type -> '&' type
+ * type -> list type
+ * type -> map type type
+ * type -> vector type
+ * type -> parser type
+ * type -> iter_tree type
+ * type -> iter_child type
+ * type -> iter_revchild type
+ * type -> iter_repeat type
+ * type -> iter_revrepeat type
+ * type -> iter_user type
+ */
+
+struct UniqueType : public AvlTreeEl<UniqueType>
+{
+ UniqueType( int typeId ) :
+ typeId(typeId),
+ langEl(0),
+ iterDef(0) {}
+
+ UniqueType( int typeId, LangEl *langEl ) :
+ typeId(typeId),
+ langEl(langEl),
+ iterDef(0) {}
+
+ UniqueType( int typeId, IterDef *iterDef ) :
+ typeId(typeId),
+ langEl(langEl),
+ iterDef(iterDef) {}
+
+ int typeId;
+ LangEl *langEl;
+ IterDef *iterDef;
+};
+
+struct CmpUniqueType
+{
+ static int compare( const UniqueType &ut1, const UniqueType &ut2 );
+};
+
+typedef AvlBasic< UniqueType, CmpUniqueType > UniqueTypeMap;
+
+enum RepeatType {
+ RepeatNone = 1,
+ RepeatRepeat,
+ RepeatList,
+ RepeatOpt,
+};
+
+/*
+ * Repeat types.
+ */
+
+struct UniqueRepeat
+ : public AvlTreeEl<UniqueRepeat>
+{
+ UniqueRepeat( RepeatType repeatType, LangEl *langEl ) :
+ repeatType(repeatType),
+ langEl(langEl), declLangEl(0) {}
+
+ RepeatType repeatType;
+ LangEl *langEl;
+ LangEl *declLangEl;
+};
+
+struct CmpUniqueRepeat
+{
+ static int compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 );
+};
+
+typedef AvlBasic< UniqueRepeat, CmpUniqueRepeat > UniqueRepeatMap;
+
+/*
+ * Unique Map Types
+ */
+
+struct UniqueMap
+ : public AvlTreeEl<UniqueMap>
+{
+ UniqueMap( UniqueType *key, UniqueType *value ) :
+ key(key), value(value), generic(0) {}
+
+ UniqueType *key;
+ UniqueType *value;
+
+ GenericType *generic;
+};
+
+struct CmpUniqueMap
+{
+ static int compare( const UniqueMap &ut1, const UniqueMap &ut2 );
+};
+
+typedef AvlBasic< UniqueMap, CmpUniqueMap > UniqueMapMap;
+
+/*
+ * Unique List Types
+ */
+
+struct UniqueList
+ : public AvlTreeEl<UniqueList>
+{
+ UniqueList( UniqueType *value ) :
+ value(value), generic(0) {}
+
+ UniqueType *value;
+ GenericType *generic;
+};
+
+struct CmpUniqueList
+{
+ static int compare( const UniqueList &ut1, const UniqueList &ut2 );
+};
+
+typedef AvlBasic< UniqueList, CmpUniqueList > UniqueListMap;
+
+/*
+ * Unique Vector Types
+ */
+
+struct UniqueVector
+ : public AvlTreeEl<UniqueVector>
+{
+ UniqueVector( UniqueType *value ) :
+ value(value), generic(0) {}
+
+ UniqueType *value;
+ GenericType *generic;
+};
+
+struct CmpUniqueVector
+{
+ static int compare( const UniqueVector &ut1, const UniqueVector &ut2 );
+};
+
+typedef AvlBasic< UniqueVector, CmpUniqueVector > UniqueVectorMap;
+
+/*
+ * Unique Parser Types
+ */
+
+struct UniqueParser
+ : public AvlTreeEl<UniqueParser>
+{
+ UniqueParser( UniqueType *parseType ) :
+ parseType(parseType), generic(0) {}
+
+ UniqueType *parseType;
+ GenericType *generic;
+};
+
+struct CmpUniqueParser
+{
+ static int compare( const UniqueParser &ut1, const UniqueParser &ut2 );
+};
+
+typedef AvlBasic< UniqueParser, CmpUniqueParser > UniqueParserMap;
+
+/*
+ *
+ */
+
+typedef AvlMap< StringVect, int, CmpStrVect > VectorTypeIdMap;
+typedef AvlMapEl< StringVect, int > VectorTypeIdMapEl;
+
+typedef Vector<TypeRef*> TypeRefVect;
+
+struct TypeRef
+{
+ enum Type
+ {
+ Unspecified,
+ Name,
+ Literal,
+ Iterator,
+ Map,
+ List,
+ Vector,
+ Parser,
+ Ref,
+ Ptr,
+ };
+
+ /* Qualification and a type name. These require lookup. */
+ TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, String typeName ) :
+ type(Name), loc(loc), nspaceQual(nspaceQual), typeName(typeName), pdaLiteral(0), iterDef(0),
+ typeRef1(0), typeRef2(0),
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
+
+ /* Qualification and a type name. These require lookup. */
+ TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral ) :
+ type(Literal), loc(loc), nspaceQual(nspaceQual), pdaLiteral(pdaLiteral), iterDef(0),
+ typeRef1(0), typeRef2(0),
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
+
+ /* Generics. */
+ TypeRef( Type type, const InputLoc &loc, NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 ) :
+ type(type), loc(loc), nspaceQual(nspaceQual), pdaLiteral(0), iterDef(0),
+ typeRef1(typeRef1), typeRef2(typeRef2),
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
+
+ /* Pointers and Refs. */
+ TypeRef( Type type, const InputLoc &loc, TypeRef *typeRef1 ) :
+ type(type), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0),
+ typeRef1(typeRef1), typeRef2(0),
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
+
+ /* Resolution not needed. */
+
+ /* Iterator definition. */
+ TypeRef( const InputLoc &loc, IterDef *iterDef, UniqueType *uniqueType,
+ UniqueType *searchUniqueType ) :
+ type(Iterator), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(iterDef),
+ typeRef1(0), typeRef2(0),
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(uniqueType), searchUniqueType(searchUniqueType), generic(0) {}
+
+ /* Unique type is given directly. */
+ TypeRef( const InputLoc &loc, UniqueType *uniqueType ) :
+ type(Unspecified), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0),
+ typeRef1(0), typeRef2(0),
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {}
+
+ void resolveRepeat( Compiler *pd );
+
+ UniqueType *lookupTypeName( Compiler *pd );
+ UniqueType *lookupTypeLiteral( Compiler *pd );
+ UniqueType *lookupTypeMap( Compiler *pd );
+ UniqueType *lookupTypeList( Compiler *pd );
+ UniqueType *lookupTypeVector( Compiler *pd );
+ UniqueType *lookupTypeParser( Compiler *pd );
+ UniqueType *lookupType( Compiler *pd );
+ UniqueType *lookupTypePtr( Compiler *pd );
+ UniqueType *lookupTypeRef( Compiler *pd );
+
+ Type type;
+ InputLoc loc;
+ NamespaceQual *nspaceQual;
+ String typeName;
+ PdaLiteral *pdaLiteral;
+ IterDef *iterDef;
+ TypeRef *typeRef1;
+ TypeRef *typeRef2;
+ RepeatType repeatType;
+
+ /* Resolved. */
+ Namespace *nspace;
+ UniqueType *uniqueType;
+ UniqueType *searchUniqueType;
+ GenericType *generic;
+};
+
+typedef DList<ObjField> ParameterList;
+
+struct ObjMethod
+{
+ ObjMethod( UniqueType *returnUT, String name,
+ int opcodeWV, int opcodeWC, int numParams,
+ UniqueType **types, ParameterList *paramList, bool isConst )
+ :
+ returnUT(returnUT),
+ returnTypeId(0),
+ name(name),
+ opcodeWV(opcodeWV),
+ opcodeWC(opcodeWC),
+ numParams(numParams),
+ paramList(paramList),
+ isConst(isConst),
+ funcId(0),
+ useFuncId(false),
+ useCallObj(true),
+ isCustom(false),
+ func(0),
+ iterDef(0)
+ {
+ this->paramUTs = new UniqueType*[numParams];
+ memcpy( this->paramUTs, types, sizeof(UniqueType*)*numParams );
+ }
+
+ UniqueType *returnUT;
+ long returnTypeId;
+ String name;
+ long opcodeWV;
+ long opcodeWC;
+ long numParams;
+ UniqueType **paramUTs;
+ ParameterList *paramList;
+ bool isConst;
+ long funcId;
+ bool useFuncId;
+ bool useCallObj;
+ bool isCustom;
+ Function *func;
+ IterDef *iterDef;
+};
+
+typedef AvlMap<String, ObjMethod*, CmpStr> ObjMethodMap;
+typedef AvlMapEl<String, ObjMethod*> ObjMethodMapEl;
+
+struct RhsVal { RhsVal( int prodNum, int childNum ) : prodNum(prodNum), childNum(childNum) { } int prodNum; int childNum; };
+
+struct ObjField
+{
+ ObjField( const InputLoc &loc, TypeRef *typeRef, const String &name ) :
+ loc(loc), typeRef(typeRef), name(name),
+ context(0),
+ pos(0), offset(0),
+ beenReferenced(false),
+ beenInitialized(false),
+ useOffset(true),
+ isConst(false),
+ isLhsEl(false), isRhsEl(false),
+ refActive(false),
+ isArgv(false),
+ isCustom(false),
+ isParam(false),
+ isRhsGet(false),
+ isExport(false),
+ dirtyTree(false),
+ inGetR( IN_HALT ),
+ inGetWC( IN_HALT ),
+ inGetWV( IN_HALT ),
+ inSetWC( IN_HALT ),
+ inSetWV( IN_HALT )
+ {}
+
+ InputLoc loc;
+ TypeRef *typeRef;
+ String name;
+ Context *context;
+ long pos;
+ long offset;
+ bool beenReferenced;
+ bool beenInitialized;
+ bool useOffset;
+ bool isConst;
+ bool isLhsEl;
+ bool isRhsEl;
+ bool refActive;
+ bool isArgv;
+ bool isCustom;
+ bool isParam;
+ bool isRhsGet;
+ bool isExport;
+
+ /* True if some aspect of the tree has possibly been written to. This does
+ * not include attributes. This is here so we can optimize the storage of
+ * old lhs vars. If only a lhs attribute changes we don't need to preserve
+ * the original for backtracking. */
+ bool dirtyTree;
+
+ Vector<RhsVal> rhsVal;
+
+ Code inGetR;
+ Code inGetWC;
+ Code inGetWV;
+ Code inSetWC;
+ Code inSetWV;
+
+ ObjField *prev, *next;
+};
+
+typedef AvlMap<String, ObjField*, CmpStr> ObjFieldMap;
+typedef AvlMapEl<String, ObjField*> ObjFieldMapEl;
+
+typedef DListVal<ObjField*> ObjFieldList;
+
+typedef DList<ObjField> ParameterList;
+
+struct TemplateType;
+
+/* Tree of name scopes for an object def. All of the object fields inside this
+ * tree live in one object def. This is used for scoping names in functions. */
+struct ObjNameScope
+{
+ ObjNameScope()
+ : parentScope(0), childIter(0)
+ {}
+
+ ObjFieldMap *objFieldMap;
+
+ ObjNameScope *parentScope;
+ DList<ObjNameScope> children;
+
+ /* For iteration after declaration. */
+ ObjNameScope *childIter;
+
+ ObjNameScope *prev, *next;
+};
+
+struct ObjectDef
+{
+ enum Type {
+ UserType,
+ FrameType,
+ IterType,
+ BuiltinType
+ };
+
+ ObjectDef( Type type, String name, int id )
+ :
+ type(type), name(name), id(id),
+ nextOffset(0), firstNonTree(0)
+ {
+ scope = new ObjNameScope;
+ scope->objFieldMap = new ObjFieldMap;
+
+ objFieldList = new ObjFieldList;
+ objMethodMap = new ObjMethodMap();
+ }
+
+ Type type;
+ String name;
+ ObjFieldList *objFieldList;
+ ObjMethodMap *objMethodMap;
+
+ /* Head of stack of name scopes. */
+ ObjNameScope *scope;
+
+ void pushScope();
+ void popScope();
+ void iterPushScope();
+ void iterPopScope();
+
+ long id;
+ long nextOffset;
+ long firstNonTree;
+
+ void referenceField( Compiler *pd, ObjField *field );
+ void initField( Compiler *pd, ObjField *field );
+ void createCode( Compiler *pd, CodeVect &code );
+ ObjField *checkRedecl( const String &name );
+ ObjMethod *findMethod( const String &name );
+ ObjField *findFieldInScope( const String &name, ObjNameScope *inScope );
+ ObjField *findField( const String &name );
+ void insertField( const String &name, ObjField *value );
+ void resolve( Compiler *pd );
+ ObjField *findFieldNum( long offset );
+
+ long size() { return nextOffset; }
+ long sizeTrees() { return firstNonTree; }
+};
+
+typedef Vector<LangExpr*> ExprVect;
+typedef Vector<String> StringVect;
+
+struct FieldInit
+{
+ FieldInit( const InputLoc &loc, String name, LangExpr *expr )
+ : loc(loc), name(name), expr(expr) {}
+
+ InputLoc loc;
+ String name;
+ LangExpr *expr;
+
+ UniqueType *exprUT;
+};
+
+typedef Vector<FieldInit*> FieldInitVect;
+
+struct VarRefLookup
+{
+ VarRefLookup( int lastPtrInQual, int firstConstPart, ObjectDef *inObject ) :
+ lastPtrInQual(lastPtrInQual),
+ firstConstPart(firstConstPart),
+ inObject(inObject),
+ objField(0),
+ objMethod(0),
+ uniqueType(0),
+ iterSearchUT(0)
+ {}
+
+ int lastPtrInQual;
+ int firstConstPart;
+ ObjectDef *inObject;
+ ObjField *objField;
+ ObjMethod *objMethod;
+ UniqueType *uniqueType;
+ UniqueType *iterSearchUT;
+};
+
+struct QualItem
+{
+ enum Type { Dot, Arrow };
+
+ QualItem( const InputLoc &loc, const String &data, Type type )
+ : loc(loc), data(data), type(type) {}
+
+ InputLoc loc;
+ String data;
+ Type type;
+};
+
+typedef Vector<QualItem> QualItemVect;
+
+struct LangVarRef
+{
+ LangVarRef( const InputLoc &loc, QualItemVect *qual, String name )
+ : loc(loc), qual(qual), name(name) {}
+
+ void resolve( Compiler *pd ) const;
+
+ UniqueType *loadFieldInstr( Compiler *pd, CodeVect &code, ObjectDef *inObject,
+ ObjField *el, bool forWriting, bool revert ) const;
+ void setFieldInstr( Compiler *pd, CodeVect &code, ObjectDef *inObject,
+ ObjField *el, UniqueType *exprUT, bool revert ) const;
+
+ VarRefLookup lookupMethod( Compiler *pd ) ;
+ VarRefLookup lookupField( Compiler *pd ) const;
+
+ VarRefLookup lookupQualification( Compiler *pd, ObjectDef *rootDef ) const;
+ VarRefLookup lookupObj( Compiler *pd ) const;
+
+ bool isCustom( Compiler *pd ) const;
+ bool isLocalRef( Compiler *pd ) const;
+ bool isContextRef( Compiler *pd ) const;
+ void loadQualification( Compiler *pd, CodeVect &code, ObjectDef *rootObj,
+ int lastPtrInQual, bool forWriting, bool revert ) const;
+ void loadCustom( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const;
+ void loadLocalObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const;
+ void loadContextObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const;
+ void loadGlobalObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const;
+ void loadObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const;
+ void canTakeRef( Compiler *pd, VarRefLookup &lookup ) const;
+
+ void setFieldIter( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, UniqueType *objUT, UniqueType *exprType, bool revert ) const;
+ void setFieldSearch( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, UniqueType *exprType ) const;
+ void setField( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, UniqueType *type, bool revert ) const;
+
+ void assignValue( Compiler *pd, CodeVect &code, UniqueType *exprUT ) const;
+ ObjField **evaluateArgs( Compiler *pd, CodeVect &code,
+ VarRefLookup &lookup, ExprVect *args ) const;
+ void callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const;
+ UniqueType *evaluateCall( Compiler *pd, CodeVect &code, ExprVect *args );
+ UniqueType *evaluate( Compiler *pd, CodeVect &code, bool forWriting = false ) const;
+ ObjField *evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const;
+ ObjField *preEvaluateRef( Compiler *pd, CodeVect &code ) const;
+ void resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjField **paramRefs ) const;
+ long loadQualificationRefs( Compiler *pd, CodeVect &code ) const;
+ void popRefQuals( Compiler *pd, CodeVect &code,
+ VarRefLookup &lookup, ExprVect *args ) const;
+
+ InputLoc loc;
+ QualItemVect *qual;
+ String name;
+};
+
+struct LangTerm
+{
+ enum Type {
+ VarRefType,
+ MethodCallType,
+ NumberType,
+ StringType,
+ MatchType,
+ NewType,
+ ConstructType,
+ TypeIdType,
+ SearchType,
+ NilType,
+ TrueType,
+ FalseType,
+ ParseType,
+ ParseStopType,
+ MakeTreeType,
+ MakeTokenType,
+ EmbedStringType
+ };
+
+ LangTerm( Type type, LangVarRef *varRef )
+ : type(type), varRef(varRef) {}
+
+ LangTerm( LangVarRef *varRef, ExprVect *args )
+ : type(MethodCallType), varRef(varRef), args(args) {}
+
+ LangTerm( const InputLoc &loc, Type type, ExprVect *args )
+ : loc(loc), type(type), args(args) {}
+
+ LangTerm( Type type, String data )
+ : type(type), varRef(0), data(data) {}
+
+ LangTerm( Type type, NamespaceQual *nspaceQual, const String &data )
+ : type(type), varRef(0), nspaceQual(nspaceQual), data(data) {}
+
+ LangTerm( const InputLoc &loc, Type type )
+ : loc(loc), type(type), varRef(0), typeRef(0) {}
+
+ LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef )
+ : loc(loc), type(type), varRef(0), typeRef(typeRef) {}
+
+ LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef )
+ : loc(loc), type(type), varRef(varRef) {}
+
+ LangTerm( Type type, LangVarRef *varRef, Pattern *pattern )
+ : type(type), varRef(varRef), pattern(pattern) {}
+
+ LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef, LangVarRef *varRef )
+ : loc(loc), type(type), varRef(varRef), typeRef(typeRef) {}
+
+ LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef, FieldInitVect *fieldInitArgs,
+ Replacement *replacement )
+ : loc(loc), type(type), typeRef(typeRef), fieldInitArgs(fieldInitArgs),
+ replacement(replacement) {}
+
+ LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef, ObjField *objField,
+ TypeRef *typeRef, FieldInitVect *fieldInitArgs, Replacement *replacement )
+ : loc(loc), type(type), varRef(varRef), objField(objField), typeRef(typeRef),
+ fieldInitArgs(fieldInitArgs), replacement(replacement) {}
+
+ LangTerm( Type type, LangExpr *expr )
+ : type(type), expr(expr) {}
+
+ LangTerm( ReplItemList *replItemList )
+ : type(EmbedStringType), replItemList(replItemList) {}
+
+ LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef,
+ ObjField *objField, TypeRef *typeRef, GenericType *generic, TypeRef *parserTypeRef,
+ Replacement *replacement )
+ : loc(loc), type(type), varRef(varRef), objField(objField),
+ typeRef(typeRef), generic(generic), parserTypeRef(parserTypeRef),
+ replacement(replacement) {}
+
+ void resolve( Compiler *pd );
+
+ UniqueType *evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const;
+ UniqueType *evaluateNew( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateConstruct( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateMatch( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluate( Compiler *pd, CodeVect &code ) const;
+ void assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const;
+ UniqueType *evaluateMakeToken( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateMakeTree( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateEmbedString( Compiler *pd, CodeVect &code ) const;
+
+ InputLoc loc;
+ Type type;
+ LangVarRef *varRef;
+ ExprVect *args;
+ NamespaceQual *nspaceQual;
+ String data;
+ ObjField *objField;
+ TypeRef *typeRef;
+ Pattern *pattern;
+ FieldInitVect *fieldInitArgs;
+ GenericType *generic;
+ TypeRef *parserTypeRef;
+ Replacement *replacement;
+ LangExpr *expr;
+ ReplItemList *replItemList;
+};
+
+struct LangExpr
+{
+ enum Type {
+ BinaryType,
+ UnaryType,
+ TermType
+ };
+
+ LangExpr( const InputLoc &loc, LangExpr *left, char op, LangExpr *right )
+ : loc(loc), type(BinaryType), left(left), op(op), right(right) {}
+
+ LangExpr( const InputLoc &loc, char op, LangExpr *right )
+ : loc(loc), type(UnaryType), left(0), op(op), right(right) {}
+
+ LangExpr( LangTerm *term )
+ : type(TermType), term(term) {}
+
+ void resolve( Compiler *pd ) const;
+
+ UniqueType *evaluate( Compiler *pd, CodeVect &code ) const;
+
+ InputLoc loc;
+ Type type;
+ LangExpr *left;
+ char op;
+ LangExpr *right;
+ LangTerm *term;
+};
+
+struct LangStmt;
+typedef DList<LangStmt> StmtList;
+
+struct LangStmt
+{
+ enum Type {
+ AssignType,
+ PrintType,
+ PrintXMLACType,
+ PrintXMLType,
+ PrintStreamType,
+ ExprType,
+ IfType,
+ ElseType,
+ RejectType,
+ WhileType,
+ ReturnType,
+ YieldType,
+ ForIterType,
+ BreakType,
+ ParserType
+ };
+
+ LangStmt( const InputLoc &loc, Type type, FieldInitVect *fieldInitVect ) :
+ loc(loc), type(type), varRef(0), expr(0), fieldInitVect(fieldInitVect), next(0) {}
+
+ LangStmt( const InputLoc &loc, Type type, ExprVect *exprPtrVect ) :
+ loc(loc), type(type), varRef(0), expr(0), exprPtrVect(exprPtrVect), next(0) {}
+
+ LangStmt( const InputLoc &loc, Type type, LangExpr *expr ) :
+ loc(loc), type(type), varRef(0), expr(expr), exprPtrVect(0), next(0) {}
+
+ LangStmt( Type type, LangVarRef *varRef ) :
+ type(type), varRef(varRef), expr(0), exprPtrVect(0), next(0) {}
+
+ LangStmt( const InputLoc &loc, Type type, ObjField *objField ) :
+ loc(loc), type(type), varRef(0), objField(objField), expr(0),
+ exprPtrVect(0), next(0) {}
+
+ LangStmt( const InputLoc &loc, Type type, LangVarRef *varRef, LangExpr *expr ) :
+ loc(loc), type(type), varRef(varRef), expr(expr), exprPtrVect(0), next(0) {}
+
+ LangStmt( Type type, LangExpr *expr, StmtList *stmtList ) :
+ type(type), expr(expr), stmtList(stmtList), next(0) {}
+
+ LangStmt( Type type, StmtList *stmtList ) :
+ type(type), stmtList(stmtList), next(0) {}
+
+ LangStmt( Type type, LangExpr *expr, StmtList *stmtList, LangStmt *elsePart ) :
+ type(type), expr(expr), stmtList(stmtList), elsePart(elsePart), next(0) {}
+
+ LangStmt( const InputLoc &loc, Type type ) :
+ loc(loc), type(type), next(0) {}
+
+ LangStmt( Type type, LangVarRef *varRef, Replacement *replacement ) :
+ type(type), varRef(varRef), expr(0), replacement(replacement),
+ exprPtrVect(0), next(0) {}
+
+ LangStmt( Type type, LangVarRef *varRef, ParserText *parserText ) :
+ type(type), varRef(varRef), expr(0), parserText(parserText),
+ exprPtrVect(0), next(0) {}
+
+ /* ForIterType */
+ LangStmt( const InputLoc &loc, Type type, ObjField *objField,
+ TypeRef *typeRef, LangTerm *langTerm, StmtList *stmtList ) :
+ loc(loc), type(type), langTerm(langTerm), objField(objField), typeRef(typeRef),
+ stmtList(stmtList), next(0) {}
+
+ LangStmt( Type type ) :
+ type(type), next(0) {}
+
+ void resolve( Compiler *pd ) const;
+ void resolveParserItems( Compiler *pd ) const;
+
+ void evaluateParserItems( Compiler *pd, CodeVect &code ) const;
+ LangTerm *chooseDefaultIter( Compiler *pd, LangTerm *fromVarRef ) const;
+ void compileWhile( Compiler *pd, CodeVect &code ) const;
+ void compileForIterBody( Compiler *pd, CodeVect &code, UniqueType *iterUT ) const;
+ void compileForIter( Compiler *pd, CodeVect &code ) const;
+ void compile( Compiler *pd, CodeVect &code ) const;
+
+ InputLoc loc;
+ Type type;
+ LangVarRef *varRef;
+ LangTerm *langTerm;
+ ObjField *objField;
+ TypeRef *typeRef;
+ LangExpr *expr;
+ Replacement *replacement;
+ ParserText *parserText;
+ ExprVect *exprPtrVect;
+ FieldInitVect *fieldInitVect;
+ StmtList *stmtList;
+ /* Either another if, or an else. */
+ LangStmt *elsePart;
+ String name;
+
+ /* Normally you don't need to initialize double list pointers, however, we
+ * make use of the next pointer for returning a pair of statements using
+ * one pointer to a LangStmt, so we need to initialize it above. */
+ LangStmt *prev, *next;
+};
+
+struct CodeBlock
+{
+ CodeBlock( StmtList *stmtList )
+ :
+ frameId(-1),
+ stmtList(stmtList),
+ localFrame(0),
+ context(0) {}
+
+ void compile( Compiler *pd, CodeVect &code ) const;
+ void resolve( Compiler *pd ) const;
+
+ long frameId;
+ StmtList *stmtList;
+ ObjectDef *localFrame;
+ CharSet trees;
+ Context *context;
+
+ /* Each frame has two versions of
+ * the code: revert and commit. */
+ CodeVect codeWV, codeWC;
+};
+
+struct Function
+{
+ Function( TypeRef *typeRef, const String &name,
+ ParameterList *paramList, CodeBlock *codeBlock,
+ int funcId, bool isUserIter )
+ :
+ typeRef(typeRef),
+ name(name),
+ paramList(paramList),
+ codeBlock(codeBlock),
+ funcId(funcId),
+ isUserIter(isUserIter),
+ paramListSize(0),
+ paramUTs(0),
+ inContext(0)
+ {}
+
+ TransBlock *transBlock;
+ TypeRef *typeRef;
+ String name;
+ ParameterList *paramList;
+ CodeBlock *codeBlock;
+ ObjectDef *localFrame;
+ long funcId;
+ bool isUserIter;
+ long paramListSize;
+ UniqueType **paramUTs;
+ Context *inContext;
+
+ Function *prev, *next;
+};
+
+typedef DList<Function> FunctionList;
+
+#endif /* _PARSETREE_H */
diff --git a/src/pcheck.cc b/src/pcheck.cc
new file mode 100644
index 00000000..d5401f7d
--- /dev/null
+++ b/src/pcheck.cc
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "pcheck.h"
+#include <assert.h>
+
+/* Construct a new parameter checker with for paramSpec. */
+ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv )
+:
+ state(noparam),
+ argOffset(0),
+ curArg(0),
+ iCurArg(1),
+ paramSpec(paramSpec),
+ argc(argc),
+ argv(argv)
+{
+}
+
+/* Check a single option. Returns the index of the next parameter. Sets p to
+ * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if
+ * there is one, NULL otherwise. */
+bool ParamCheck::check()
+{
+ bool requiresParam;
+
+ if ( iCurArg >= argc ) { /* Off the end of the arg list. */
+ state = noparam;
+ return false;
+ }
+
+ if ( argOffset != 0 && *argOffset == 0 ) {
+ /* We are at the end of an arg string. */
+ iCurArg += 1;
+ if ( iCurArg >= argc ) {
+ state = noparam;
+ return false;
+ }
+ argOffset = 0;
+ }
+
+ if ( argOffset == 0 ) {
+ /* Set the current arg. */
+ curArg = argv[iCurArg];
+
+ /* We are at the beginning of an arg string. */
+ if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */
+ argv[iCurArg][0] != '-' || /* Not a param. */
+ argv[iCurArg][1] == 0 ) { /* Only a dash. */
+ parameter = 0;
+ parameterArg = 0;
+
+ iCurArg += 1;
+ state = noparam;
+ return true;
+ }
+ argOffset = argv[iCurArg] + 1;
+ }
+
+ /* Get the arg char. */
+ char argChar = *argOffset;
+
+ /* Loop over all the parms and look for a match. */
+ const char *pSpec = paramSpec;
+ while ( *pSpec != 0 ) {
+ char pSpecChar = *pSpec;
+
+ /* If there is a ':' following the char then
+ * it requires a parm. If a parm is required
+ * then move ahead two in the parmspec. Otherwise
+ * move ahead one in the parm spec. */
+ if ( pSpec[1] == ':' ) {
+ requiresParam = true;
+ pSpec += 2;
+ }
+ else {
+ requiresParam = false;
+ pSpec += 1;
+ }
+
+ /* Do we have a match. */
+ if ( argChar == pSpecChar ) {
+ if ( requiresParam ) {
+ if ( argOffset[1] == 0 ) {
+ /* The param must follow. */
+ if ( iCurArg + 1 == argc ) {
+ /* We are the last arg so there
+ * cannot be a parameter to it. */
+ parameter = argChar;
+ parameterArg = 0;
+ iCurArg += 1;
+ argOffset = 0;
+ state = invalid;
+ return true;
+ }
+ else {
+ /* the parameter to the arg is the next arg. */
+ parameter = pSpecChar;
+ parameterArg = argv[iCurArg + 1];
+ iCurArg += 2;
+ argOffset = 0;
+ state = match;
+ return true;
+ }
+ }
+ else {
+ /* The param for the arg is built in. */
+ parameter = pSpecChar;
+ parameterArg = argOffset + 1;
+ iCurArg += 1;
+ argOffset = 0;
+ state = match;
+ return true;
+ }
+ }
+ else {
+ /* Good, we matched the parm and no
+ * arg is required. */
+ parameter = pSpecChar;
+ parameterArg = 0;
+ argOffset += 1;
+ state = match;
+ return true;
+ }
+ }
+ }
+
+ /* We did not find a match. Bad Argument. */
+ parameter = argChar;
+ parameterArg = 0;
+ argOffset += 1;
+ state = invalid;
+ return true;
+}
+
+
diff --git a/src/pcheck.h b/src/pcheck.h
new file mode 100644
index 00000000..5be60426
--- /dev/null
+++ b/src/pcheck.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2001, 2002 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PCHECK_H
+#define _PCHECK_H
+
+class ParamCheck
+{
+public:
+ ParamCheck( const char *paramSpec, int argc, const char **argv );
+
+ bool check();
+
+ const char *parameterArg; /* The argument to the parameter. */
+ char parameter; /* The parameter matched. */
+ enum { match, invalid, noparam } state;
+
+ const char *argOffset; /* If we are reading params inside an
+ * arg this points to the offset. */
+
+ const char *curArg; /* Pointer to the current arg. */
+ int iCurArg; /* Index to the current arg. */
+
+private:
+ const char *paramSpec; /* Parameter spec supplied by the coder. */
+ int argc; /* Arguement data from the command line. */
+ const char **argv;
+};
+
+#endif /* _PCHECK_H */
diff --git a/src/pdabuild.cc b/src/pdabuild.cc
new file mode 100644
index 00000000..4bce96ce
--- /dev/null
+++ b/src/pdabuild.cc
@@ -0,0 +1,2091 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <stdlib.h>
+
+/* Parsing. */
+#include "global.h"
+#include "parsedata.h"
+#include "pdacodegen.h"
+#include "pdarun.h"
+#include "redfsm.h"
+#include "fsmcodegen.h"
+#include "redbuild.h"
+#include "fsmrun.h"
+
+/* Dumping the fsm. */
+#include "mergesort.h"
+
+using namespace std;
+
+char startDefName[] = "start";
+
+/* Count the transitions in the fsm by walking the state list. */
+int countTransitions( PdaGraph *fsm )
+{
+ int numTrans = 0;
+ PdaState *state = fsm->stateList.head;
+ while ( state != 0 ) {
+ numTrans += state->transMap.length();
+ state = state->next;
+ }
+ return numTrans;
+}
+
+LangEl::LangEl( Namespace *nspace, const String &name, Type type )
+:
+ nspace(nspace),
+ name(name),
+ lit(name),
+ type(type),
+ id(-1),
+ isUserTerm(false),
+ isContext(false),
+ displayString(0),
+ numAppearances(0),
+ commit(false),
+ ignore(false),
+ reduceFirst(false),
+ isLiteral(false),
+ isRepeat(false),
+ isList(false),
+ isOpt(false),
+ parseStop(false),
+ isEOF(false),
+ repeatOf(0),
+ tokenDef(0),
+ rootDef(0),
+ termDup(0),
+ eofLel(0),
+ pdaGraph(0),
+ pdaTables(0),
+ transBlock(0),
+ objectDef(0),
+ thisSize(0),
+ ofiOffset(0),
+ generic(0),
+ parserId(-1),
+ predType(PredNone),
+ predValue(0),
+ contextDef(0),
+ contextIn(0),
+ noPreIgnore(false),
+ noPostIgnore(false),
+ isCI(false),
+ ciRegion(0)
+{
+}
+
+PdaGraph *ProdElList::walk( Compiler *pd, Definition *prod )
+{
+ PdaGraph *prodFsm = new PdaGraph();
+ PdaState *last = prodFsm->addState();
+ prodFsm->setStartState( last );
+
+ if ( prod->collectIgnoreRegion != 0 ) {
+// cerr << "production " << prod->data << " has collect ignore region " <<
+// prod->collectIgnoreRegion->name << endl;
+
+ /* Use the IGNORE TOKEN lang el for the region. */
+ long value = prod->collectIgnoreRegion->ciLel->id;
+
+ PdaState *newState = prodFsm->addState();
+ PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value );
+
+ newTrans->isShift = true;
+ newTrans->shiftPrior = 0; // WAT
+ last = newState;
+ }
+
+ int prodLength = 0;
+ for ( Iter prodEl = first(); prodEl.lte(); prodEl++, prodLength++ ) {
+ //PdaGraph *itemFsm = prodEl->walk( pd );
+ long value = prodEl->langEl->id;
+
+ PdaState *newState = prodFsm->addState();
+ PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value );
+
+ newTrans->isShift = true;
+ newTrans->shiftPrior = prodEl->priorVal;
+ //cerr << "PRIOR VAL: " << newTrans->shiftPrior << endl;
+
+ if ( prodEl->commit ) {
+ //cout << "COMMIT: inserting commit of length: " << pd->prodLength << endl;
+ /* Insert the commit into transitions out of last */
+ for ( TransMap::Iter trans = last->transMap; trans.lte(); trans++ )
+ trans->value->commits.insert( prodLength );
+ }
+
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ prodFsm->setFinState( last );
+ return prodFsm;
+}
+
+
+ProdElList *Compiler::makeProdElList( LangEl *langEl )
+{
+ ProdElList *prodElList = new ProdElList();
+ UniqueType *uniqueType = findUniqueType( TYPE_TREE, langEl );
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueType );
+ prodElList->append( new ProdEl( InputLoc(), typeRef ) );
+ prodElList->tail->langEl = langEl;
+ return prodElList;
+}
+
+void Compiler::makeDefinitionNames()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ int prodNum = 1;
+ for ( LelDefList::Iter def = lel->defList; def.lte(); def++ ) {
+ def->data.setAs( lel->name.length() + 32, "%s-%i",
+ lel->name.data, prodNum++ );
+ }
+ }
+}
+
+/* Make sure there there are no language elements whose type is unkonwn. This
+ * can happen when an id is used on the rhs of a definition but is not defined
+ * as anything. */
+void Compiler::noUndefindLangEls()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->type == LangEl::Unknown )
+ error() << "'" << lel->name << "' was not defined as anything" << endp;
+ }
+}
+
+void Compiler::makeLangElIds()
+{
+ /* The first id 0 is reserved for the stack sentinal. A negative id means
+ * error to the parsing function, inducing backtracking. */
+ nextSymbolId = 1;
+
+ /* First pass assigns to the user terminals. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Must be a term, and not any of the special reserved terminals.
+ * Remember if the non terminal is a user non terminal. */
+ if ( lel->type == LangEl::Term &&
+ !lel->isEOF &&
+ lel != errorLangEl &&
+ lel != noTokenLangEl )
+ {
+ lel->isUserTerm = true;
+ lel->id = nextSymbolId++;
+ }
+ }
+
+ //eofLangEl->id = nextSymbolId++;
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Must be a term, and not any of the special reserved terminals.
+ * Remember if the non terminal is a user non terminal. */
+ if ( lel->isEOF )
+ lel->id = nextSymbolId++;
+ }
+
+ /* Next assign to the eof notoken, which we always create. */
+ noTokenLangEl->id = nextSymbolId++;
+
+ /* Possibly assign to the error language element. */
+ if ( errorLangEl != 0 )
+ errorLangEl->id = nextSymbolId++;
+
+ /* Save this for the code generation. */
+ firstNonTermId = nextSymbolId;
+
+ /* A third and final pass assigns to everything else. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Anything else not yet assigned gets assigned now. */
+ if ( lel->id < 0 )
+ lel->id = nextSymbolId++;
+ }
+
+ assert( ptrLangEl->id == LEL_ID_PTR );
+ assert( boolLangEl->id == LEL_ID_BOOL );
+ assert( intLangEl->id == LEL_ID_INT );
+ assert( strLangEl->id == LEL_ID_STR );
+ assert( streamLangEl->id == LEL_ID_STREAM );
+ assert( inputLangEl->id == LEL_ID_INPUT );
+ assert( ignoreLangEl->id == LEL_ID_IGNORE );
+}
+
+void Compiler::refNameSpace( LangEl *lel, Namespace *nspace )
+{
+ if ( nspace == defaultNamespace || nspace == rootNamespace ) {
+ lel->refName = "::" + lel->refName;
+ return;
+ }
+
+ lel->refName = nspace->name + "::" + lel->refName;
+ lel->declName = nspace->name + "::" + lel->declName;
+ lel->xmlTag = nspace->name + "::" + lel->xmlTag;
+ refNameSpace( lel, nspace->parentNamespace );
+}
+
+void Compiler::makeLangElNames()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->id == LEL_ID_INT ) {
+ lel->fullName = "_int";
+ lel->fullLit = "_int";
+ lel->refName = "_int";
+ lel->declName = "_int";
+ lel->xmlTag = "int";
+ }
+ else if ( lel->id == LEL_ID_BOOL ) {
+ lel->fullName = "_bool";
+ lel->fullLit = "_bool";
+ lel->refName = "_bool";
+ lel->declName = "_bool";
+ lel->xmlTag = "bool";
+ }
+ else {
+ lel->fullName = lel->name;
+ lel->fullLit = lel->lit;
+ lel->refName = lel->lit;
+ lel->declName = lel->lit;
+ lel->xmlTag = lel->name;
+ }
+
+ /* If there is also a namespace next to the type, we add a prefix to
+ * the type. It's not convenient to name C++ classes the same as a
+ * namespace in the same scope. We don't want to restrict colm, so we
+ * add a workaround for the least-common case. The type gets t_ prefix.
+ * */
+ Namespace *nspace = lel->nspace->findNamespace( lel->name );
+ if ( nspace != 0 ) {
+ lel->refName = "t_" + lel->refName;
+ lel->fullName = "t_" + lel->fullName;
+ lel->declName = "t_" + lel->declName;
+ lel->xmlTag = "t_" + lel->xmlTag;
+ }
+
+ refNameSpace( lel, lel->nspace );
+ }
+}
+
+/* Set up dot sets, shift info, and prod sets. */
+void Compiler::makeProdFsms()
+{
+ /* There are two items in the index for each production (high and low). */
+ int indexLen = prodList.length() * 2;
+ dotItemIndex.setAsNew( indexLen );
+ int dsiLow = 0, indexPos = 0;
+
+ /* Build FSMs for all production language elements. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ prod->fsm = prod->prodElList->walk( this, prod );
+
+ makeNonTermFirstSets();
+ makeFirstSets();
+
+ /* Build FSMs for all production language elements. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( addUniqueEmptyProductions ) {
+ /* This must be re-implemented. */
+ assert( false );
+ //if ( !prod->isLeftRec && prod->uniqueEmptyLeader != 0 ) {
+ // PdaGraph *emptyLeader = prod->uniqueEmptyLeader->walk( this );
+ // emptyLeader->concatOp( prod->fsm );
+ // prod->fsm = emptyLeader;
+ //}
+ }
+
+ /* Compute the machine's length. */
+ prod->fsmLength = prod->fsm->fsmLength( );
+
+ /* Productions have a unique production id for each final state.
+ * This lets us use a production length specific to each final state.
+ * Start states are always isolated therefore if the start state is
+ * final then reductions from it will always have a fixed production
+ * length. This is a simple method for determining the length
+ * of zero-length derivations when reducing. */
+
+ /* Number of dot items needed for the production is elements + 1
+ * because the dot can be before the first and after the last element. */
+ int numForProd = prod->fsm->stateList.length() + 1;
+
+ /* Set up the low and high values in the index for this production. */
+ dotItemIndex.data[indexPos].key = dsiLow;
+ dotItemIndex.data[indexPos].value = prod;
+ dotItemIndex.data[indexPos+1].key = dsiLow + numForProd - 1;
+ dotItemIndex.data[indexPos+1].value = prod;
+
+ int dsi = dsiLow;
+ for ( PdaStateList::Iter state = prod->fsm->stateList; state.lte(); state++, dsi++ ) {
+ /* All transitions are shifts. */
+ for ( TransMap::Iter out = state->transMap; out.lte(); out++ )
+ assert( out->value->isShift );
+
+ state->dotSet.insert( dsi );
+ }
+
+ /* Move over the production. */
+ dsiLow += numForProd;
+ indexPos += 2;
+
+ if ( prod->prodCommit ) {
+ for ( PdaStateSet::Iter fin = prod->fsm->finStateSet; fin.lte(); fin++ ) {
+ int length = prod->fsmLength;
+ //cerr << "PENDING COMMIT IN FINAL STATE of " << prod->prodId <<
+ // " with len: " << length << endl;
+ (*fin)->pendingCommits.insert( ProdIdPair( prod->prodId, length ) );
+ }
+ }
+ }
+
+ /* Make the final state specific prod id to prod id mapping. */
+ prodIdIndex = new Definition*[prodList.length()];
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ prodIdIndex[prod->prodId] = prod;
+}
+
+/* Want the first set of over src. If the first set contains epsilon, go over
+ * it and over tab. If overSrc is the end of the production, find the follow
+ * from the table, taking only the characters on which the parent is reduced.
+ * */
+void Compiler::findFollow( AlphSet &result, PdaState *overTab,
+ PdaState *overSrc, Definition *parentDef )
+{
+ if ( overSrc->isFinState() ) {
+ assert( overSrc->transMap.length() == 0 );
+
+ /* At the end of the production. Turn to the table. */
+ long redCode = makeReduceCode( parentDef->prodId, false );
+ for ( TransMap::Iter tabTrans = overTab->transMap; tabTrans.lte(); tabTrans++ ) {
+ for ( ActDataList::Iter adl = tabTrans->value->actions; adl.lte(); adl++ ) {
+ if ( *adl == redCode )
+ result.insert( tabTrans->key );
+ }
+ }
+ }
+ else {
+ /* Get the first set of the item. If the first set contains epsilon
+ * then move over overSrc and overTab and recurse. */
+ assert( overSrc->transMap.length() == 1 );
+ TransMap::Iter pastTrans = overSrc->transMap;
+
+ LangEl *langEl = langElIndex[pastTrans->key];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ bool hasEpsilon = false;
+ for ( LelDefList::Iter def = langEl->defList; def.lte(); def++ ) {
+ result.insert( def->firstSet );
+
+ if ( def->firstSet.find( -1 ) )
+ hasEpsilon = true;
+ }
+
+ /* Find the equivalent state in the parser. */
+ if ( hasEpsilon ) {
+ PdaTrans *tabTrans = overTab->findTrans( pastTrans->key );
+ findFollow( result, tabTrans->toState,
+ pastTrans->value->toState, parentDef );
+ }
+
+ /* Now possibly the dup. */
+ if ( langEl->termDup != 0 )
+ result.insert( langEl->termDup->id );
+ }
+ else {
+ result.insert( pastTrans->key );
+ }
+ }
+}
+
+PdaState *Compiler::followProd( PdaState *tabState, PdaState *prodState )
+{
+ while ( prodState->transMap.length() == 1 ) {
+ TransMap::Iter prodTrans = prodState->transMap;
+ PdaTrans *tabTrans = tabState->findTrans( prodTrans->key );
+ prodState = prodTrans->value->toState;
+ tabState = tabTrans->toState;
+ }
+ return tabState;
+}
+
+void Compiler::trySetTime( PdaTrans *trans, long code, long &time )
+{
+ /* Find the item. */
+ for ( ActDataList::Iter adl = trans->actions; adl.lte(); adl++ ) {
+ if ( *adl == code ) {
+ /* If the time of the shift is not already set, set it. */
+ if ( trans->actOrds[adl.pos()] == 0 ) {
+ //cerr << "setting time: state = " << tabState->stateNum
+ // << ", trans = " << tabTrans->lowKey
+ // << ", time = " << time << endl;
+ trans->actOrds[adl.pos()] = time++;
+ }
+ break;
+ }
+ }
+}
+
+/* Go down a defintiion and then handle the follow actions. */
+void Compiler::pdaOrderFollow( LangEl *rootEl, PdaState *tabState,
+ PdaTrans *tabTrans, PdaTrans *srcTrans, Definition *parentDef,
+ Definition *definition, long &time )
+{
+ /* We need the follow from tabState/srcState over the defintion we are
+ * currently processing. */
+ PdaState *overTab = tabTrans->toState;
+ PdaState *overSrc = srcTrans->toState;
+
+ AlphSet alphSet;
+ if ( parentDef == rootEl->rootDef )
+ alphSet.insert( rootEl->eofLel->id );
+ else
+ findFollow( alphSet, overTab, overSrc, parentDef );
+
+ /* Now follow the production to find out where it expands to. */
+ PdaState *expandToState = followProd( tabState, definition->fsm->startState );
+
+ /* Find the reduce item. */
+ long redCode = makeReduceCode( definition->prodId, false );
+
+ for ( TransMap::Iter tt = expandToState->transMap; tt.lte(); tt++ ) {
+ if ( alphSet.find( tt->key ) ) {
+ trySetTime( tt->value, redCode, time );
+
+ /* If the items token region is not recorded in the state, do it now. */
+ addRegion( expandToState, tt->value, tt->key,
+ tt->value->noPreIgnore, tt->value->noPostIgnore );
+ }
+ }
+}
+
+bool regionVectHas( RegionVect &regVect, TokenRegion *region )
+{
+ for ( RegionVect::Iter trvi = regVect; trvi.lte(); trvi++ ) {
+ if ( *trvi == region )
+ return true;
+ }
+ return false;
+}
+
+void Compiler::addRegion( PdaState *tabState, PdaTrans *tabTrans,
+ long pdaKey, bool noPreIgnore, bool noPostIgnore )
+{
+ LangEl *langEl = langElIndex[pdaKey];
+ if ( langEl != 0 && langEl->type == LangEl::Term ) {
+ TokenRegion *region = 0;
+
+ /* If it is not the eof, then use the region associated
+ * with the token definition. */
+ if ( langEl->isCI ) {
+ //cerr << "isCI" << endl;
+ region = langEl->ciRegion->ciRegion;
+ }
+ else if ( !langEl->isEOF && langEl->tokenDef != 0 ) {
+ region = langEl->tokenDef->tokenRegion;
+ }
+
+ if ( region != 0 ) {
+ /* region. */
+ TokenRegion *scanRegion = region;
+
+ if ( langEl->noPreIgnore )
+ scanRegion = region->tokenOnlyRegion;
+
+ if ( !regionVectHas( tabState->regions, scanRegion ) ) {
+ tabState->regions.append( scanRegion );
+ }
+
+ /* Pre-region of to state */
+ PdaState *toState = tabTrans->toState;
+ if ( !langEl->noPostIgnore &&
+ region->ignoreOnlyRegion != 0 &&
+ !regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) )
+ {
+ toState->preRegions.append( region->ignoreOnlyRegion );
+ }
+ }
+ }
+}
+
+#if 0
+ orderState( tabState, prodState, time ):
+ if not tabState.dotSet.find( prodState.dotID )
+ tabState.dotSet.insert( prodState.dotID )
+ tabTrans = tabState.findMatchingTransition( prodState.getTransition() )
+
+ if tabTrans is NonTerminal:
+ for production in tabTrans.nonTerm.prodList:
+ orderState( tabState, production.startState, time )
+
+ for all expandToState in tabTrans.expandToStates:
+ for all followTrans in expandToState.transList
+ reduceAction = findAction( production.reduction )
+ if reduceAction.time is unset:
+ reduceAction.time = time++
+ end
+ end
+ end
+ end
+ end
+
+ shiftAction = tabTrans.findAction( shift )
+ if shiftAction.time is unset:
+ shiftAction.time = time++
+ end
+
+ orderState( tabTrans.toState, prodTrans.toState, time )
+ end
+ end
+
+ orderState( parseTable.startState, startProduction.startState, 1 )
+#endif
+
+void Compiler::pdaOrderProd( LangEl *rootEl, PdaState *tabState,
+ PdaState *srcState, Definition *parentDef, long &time )
+{
+ assert( srcState->dotSet.length() == 1 );
+ if ( tabState->dotSet2.find( srcState->dotSet[0] ) )
+ return;
+ tabState->dotSet2.insert( srcState->dotSet[0] );
+
+ assert( srcState->transMap.length() == 0 || srcState->transMap.length() == 1 );
+
+ if ( srcState->transMap.length() == 1 ) {
+ TransMap::Iter srcTrans = srcState->transMap;
+
+ /* Find the equivalent state in the parser. */
+ PdaTrans *tabTrans = tabState->findTrans( srcTrans->key );
+
+ /* Recurse into the transition if it is a non-terminal. */
+ LangEl *langEl = langElIndex[srcTrans->key];
+ if ( langEl != 0 ) {
+ if ( langEl->reduceFirst ) {
+ /* Use a shortest match ordering for the contents of this
+ * nonterminal. Does follows for all productions first, then
+ * goes down the productions. */
+ for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) {
+ pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value,
+ parentDef, expDef, time );
+ }
+ for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ )
+ pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time );
+
+ }
+ else {
+ /* The default action ordering. For each prod, goes down the
+ * prod then sets the follow before going to the next prod. */
+ for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) {
+ pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time );
+
+ pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value,
+ parentDef, expDef, time );
+ }
+ }
+ }
+
+ trySetTime( tabTrans, SHIFT_CODE, time );
+
+ /* Now possibly for the dup. */
+ if ( langEl != 0 && langEl->termDup != 0 ) {
+ PdaTrans *dupTrans = tabState->findTrans( langEl->termDup->id );
+ trySetTime( dupTrans, SHIFT_CODE, time );
+ }
+
+ /* If the items token region is not recorded in the state, do it now. */
+ addRegion( tabState, tabTrans, srcTrans->key,
+ srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore );
+
+ /* Go over one in the production. */
+ pdaOrderProd( rootEl, tabTrans->toState,
+ srcTrans->value->toState, parentDef, time );
+ }
+}
+
+void Compiler::pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( (state->stateBits & SB_ISMARKED) == 0 );
+
+ /* Traverse the src state's transitions. */
+ long last = 0;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( ! trans.first() )
+ assert( last < trans->key );
+ last = trans->key;
+ }
+ }
+
+ /* Compute the action orderings, record the max value. */
+ long time = 1;
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ PdaState *startState = (*pe)->rootDef->fsm->startState;
+ pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time );
+
+ /* Walk over the start lang el and set the time for shift of
+ * the eof action that completes the parse. */
+ PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id );
+ PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id );
+ eofTrans->actOrds[0] = time++;
+ }
+
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( state->regions.length() == 0 ) {
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ /* There are no regions and EOF leaves the state. Add the eof
+ * token region. */
+ PdaTrans *trans = tel->value;
+ LangEl *lel = langElIndex[trans->lowKey];
+ if ( lel != 0 && lel->isEOF )
+ state->regions.append( eofTokenRegion );
+ }
+ }
+ }
+
+ if ( colm_log_compile ) {
+ /* Warn about states with empty token region lists. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( state->regions.length() == 0 ) {
+ warning() << "state has an empty token region, state: " <<
+ state->stateNum << endl;
+ }
+ }
+ }
+
+ /* Some actions may not have an ordering. I believe these to be actions
+ * that result in a parse error and they arise because the state tables
+ * are LALR(1) but the action ordering is LR(1). LALR(1) causes some
+ * reductions that lead nowhere. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ PdaTrans *trans = tel->value;
+ /* Check every action has an ordering. */
+ for ( ActDataList::Iter adl = trans->actOrds; adl.lte(); adl++ ) {
+ if ( *adl == 0 )
+ *adl = time++;
+ }
+ }
+ }
+}
+
+void Compiler::advanceReductions( PdaGraph *pdaGraph )
+{
+ /* Loop all states. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( !state->advanceReductions )
+ continue;
+
+ bool outHasShift = false;
+ ReductionMap outReds;
+ LongSet outCommits;
+ for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) {
+ /* Get the transition from the trans el. */
+ if ( out->value->isShift )
+ outHasShift = true;
+ outReds.insert( out->value->reductions );
+ outCommits.insert( out->value->commits );
+ }
+
+ bool inHasShift = false;
+ ReductionMap inReds;
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ /* Get the transition from the trans el. */
+ if ( in->isShift )
+ inHasShift = true;
+ inReds.insert( in->reductions );
+ }
+
+ if ( !outHasShift && outReds.length() == 1 &&
+ inHasShift && inReds.length() == 0 )
+ {
+ //cerr << "moving reduction to shift" << endl;
+
+ /* Move the reduction to all in transitions. */
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ assert( in->actions.length() == 1 );
+ assert( in->actions[0] == SHIFT_CODE );
+ in->actions[0] = makeReduceCode( outReds[0].key, true );
+ in->afterShiftCommits.insert( outCommits );
+ }
+
+ /*
+ * Remove all transitions out of the state.
+ */
+
+ /* Detach out range transitions. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ pdaGraph->detachTrans( state, trans->value->toState, trans->value );
+ delete trans->value;
+ }
+ state->transMap.empty();
+
+ /* Redirect all the in transitions to the actionDestState. */
+ pdaGraph->inTransMove( actionDestState, state );
+ }
+ }
+
+ pdaGraph->removeUnreachableStates();
+}
+
+void Compiler::sortActions( PdaGraph *pdaGraph )
+{
+ /* Sort the actions. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ PdaTrans *trans = tel->value;
+
+ /* Sort by the action ords. */
+ ActDataList actions( trans->actions );
+ ActDataList actOrds( trans->actOrds );
+ ActDataList actPriors( trans->actPriors );
+ trans->actions.empty();
+ trans->actOrds.empty();
+ trans->actPriors.empty();
+ while ( actOrds.length() > 0 ) {
+ int min = 0;
+ for ( int i = 1; i < actOrds.length(); i++ ) {
+ if ( actPriors[i] > actPriors[min] ||
+ (actPriors[i] == actPriors[min] &&
+ actOrds[i] < actOrds[min] ) )
+ {
+ min = i;
+ }
+ }
+ trans->actions.append( actions[min] );
+ trans->actOrds.append( actOrds[min] );
+ trans->actPriors.append( actPriors[min] );
+ actions.remove(min);
+ actOrds.remove(min);
+ actPriors.remove(min);
+ }
+
+ if ( branchPointInfo && trans->actions.length() > 1 ) {
+ cerr << "info: branch point"
+ << " state: " << state->stateNum
+ << " trans: ";
+ LangEl *lel = langElIndex[trans->lowKey];
+ if ( lel == 0 )
+ cerr << (char)trans->lowKey << endl;
+ else
+ cerr << lel->lit << endl;
+
+ for ( ActDataList::Iter act = trans->actions; act.lte(); act++ ) {
+ switch ( *act & 0x3 ) {
+ case 1:
+ cerr << " shift" << endl;
+ break;
+ case 2:
+ cerr << " reduce " <<
+ prodIdIndex[(*act >> 2)]->data << endl;
+ break;
+ case 3:
+ cerr << " shift-reduce" << endl;
+ break;
+ }
+ }
+ }
+
+ /* Verify that shifts of nonterminals don't have any branch
+ * points or commits. */
+ if ( trans->lowKey >= firstNonTermId ) {
+ if ( trans->actions.length() != 1 ||
+ (trans->actions[0] & 0x3) != 1 )
+ {
+ error() << "TRANS ON NONTERMINAL is something "
+ "other than a shift" << endl;
+ }
+ if ( trans->commits.length() > 0 )
+ error() << "TRANS ON NONTERMINAL has a commit" << endl;
+ }
+
+ /* TODO: Shift-reduces are optimizations. Verify that
+ * shift-reduces exist only if they don't entail a conflict. */
+ }
+ }
+}
+
+void Compiler::reduceActions( PdaGraph *pdaGraph )
+{
+ /* Reduce the actions. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ PdaTrans *trans = tel->value;
+ PdaActionSetEl *inSet;
+
+ int commitLen = trans->commits.length() > 0 ?
+ trans->commits[trans->commits.length()-1] : 0;
+
+ if ( trans->afterShiftCommits.length() > 0 ) {
+ int afterShiftCommit = trans->afterShiftCommits[
+ trans->afterShiftCommits.length()-1];
+
+ if ( commitLen > 0 && commitLen+1 > afterShiftCommit )
+ commitLen = ( commitLen + 1 );
+ else
+ commitLen = afterShiftCommit;
+ }
+ else {
+ commitLen = commitLen * -1;
+ }
+
+ //if ( commitLen != 0 ) {
+ // cerr << "FINAL ACTION COMMIT LEN: " << commitLen << endl;
+ //}
+
+ pdaGraph->actionSet.insert( ActionData( trans->toState->stateNum,
+ trans->actions, commitLen ), &inSet );
+ trans->actionSetEl = inSet;
+ }
+ }
+}
+
+void Compiler::computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph )
+{
+ /* Get the entry into the graph and traverse over the root. The resulting
+ * state can have eof, nothing else can. */
+ PdaState *overStart = pdaGraph->followFsm(
+ langEl->startState,
+ langEl->rootDef->fsm );
+
+ /* The graph must reduce to root all on it's own. It cannot depend on
+ * require EOF. */
+ for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
+ if ( st == overStart )
+ continue;
+
+ for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
+ if ( tr->value->lowKey == langEl->eofLel->id )
+ st->advanceReductions = true;
+ }
+ }
+}
+
+void Compiler::verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph )
+{
+ /* Get the entry into the graph and traverse over the root. The resulting
+ * state can have eof, nothing else can. */
+ PdaState *overStart = pdaGraph->followFsm(
+ langEl->startState,
+ langEl->rootDef->fsm );
+
+ /* The graph must reduce to root all on it's own. It cannot depend on
+ * require EOF. */
+ for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
+ if ( st == overStart )
+ continue;
+
+ for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
+ if ( tr->value->lowKey == langEl->eofLel->id ) {
+ /* This needs a better error message. Appears to be voodoo. */
+ error() << "grammar is not usable with parse_stop" << endp;
+ }
+ }
+ }
+}
+
+LangEl *Compiler::predOf( PdaTrans *trans, long action )
+{
+ LangEl *lel;
+ if ( action == SHIFT_CODE )
+ lel = langElIndex[trans->lowKey];
+ else
+ lel = prodIdIndex[action >> 2]->predOf;
+ return lel;
+}
+
+
+bool Compiler::precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 )
+{
+ bool swap = false;
+ if ( l2->predValue > l1->predValue )
+ swap = true;
+ else if ( l1->predValue == l2->predValue ) {
+ if ( l1->predType == PredLeft && action1 == SHIFT_CODE )
+ swap = true;
+ else if ( l1->predType == PredRight && action2 == SHIFT_CODE )
+ swap = true;
+ }
+ return swap;
+}
+
+bool Compiler::precedenceRemoveBoth( LangEl *l1, LangEl *l2 )
+{
+ if ( l1->predValue == l2->predValue && l1->predType == PredNonassoc )
+ return true;
+ return false;
+}
+
+void Compiler::resolvePrecedence( PdaGraph *pdaGraph )
+{
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
+
+ for ( long t = 0; t < state->transMap.length(); /* increment at end */ ) {
+ PdaTrans *trans = state->transMap[t].value;
+
+again:
+ /* Find action with precedence. */
+ for ( int i = 0; i < trans->actions.length(); i++ ) {
+ LangEl *li = predOf( trans, trans->actions[i] );
+
+ if ( li != 0 && li->predType != PredNone ) {
+ /* Find another action with precedence. */
+ for ( int j = i+1; j < trans->actions.length(); j++ ) {
+ LangEl *lj = predOf( trans, trans->actions[j] );
+
+ if ( lj != 0 && lj->predType != PredNone ) {
+ /* Conflict to check. */
+ bool swap = precedenceSwap( trans->actions[i],
+ trans->actions[j], li, lj );
+
+ if ( swap ) {
+ long t = trans->actions[i];
+ trans->actions[i] = trans->actions[j];
+ trans->actions[j] = t;
+ }
+
+ trans->actions.remove( j );
+ if ( precedenceRemoveBoth( li, lj ) )
+ trans->actions.remove( i );
+
+ goto again;
+ }
+ }
+ }
+ }
+
+ /* If there are still actions then move to the next one. If not,
+ * (due to nonassoc) then remove the transition. */
+ if ( trans->actions.length() > 0 )
+ t += 1;
+ else
+ state->transMap.vremove( t );
+ }
+ }
+}
+
+void Compiler::analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ pdaGraph->maxState = pdaGraph->stateList.length() - 1;
+ pdaGraph->maxLelId = nextSymbolId - 1;
+ pdaGraph->maxOffset = pdaGraph->stateList.length() * pdaGraph->maxLelId;
+
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->value->isShift ) {
+ trans->value->actions.append( SHIFT_CODE );
+ trans->value->actPriors.append( trans->value->shiftPrior );
+ }
+ for ( ReductionMap::Iter red = trans->value->reductions; red.lte(); red++ ) {
+ trans->value->actions.append( makeReduceCode( red->key, false ) );
+ trans->value->actPriors.append( red->value );
+ }
+ trans->value->actOrds.appendDup( 0, trans->value->actions.length() );
+ }
+ }
+
+ pdaActionOrder( pdaGraph, parserEls );
+ sortActions( pdaGraph );
+ resolvePrecedence( pdaGraph );
+
+ /* Verify that any type we parse_stop can actually be parsed that way. */
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ LangEl *lel = *pe;
+ if ( lel->parseStop )
+ computeAdvanceReductions(lel , pdaGraph);
+ }
+
+ advanceReductions( pdaGraph );
+ pdaGraph->setStateNumbers();
+ reduceActions( pdaGraph );
+
+ /* Set the action ids. */
+ int actionSetId = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ asi->key.id = actionSetId++;
+
+ /* Get the max index. */
+ pdaGraph->maxIndex = actionSetId - 1;
+
+ /* Compute the max prod length. */
+ pdaGraph->maxProdLen = 0;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( (unsigned)prod->fsmLength > pdaGraph->maxProdLen )
+ pdaGraph->maxProdLen = prod->fsmLength;
+ }
+
+ /* Asserts that any transition with a nonterminal has a single action
+ * which is either a shift or a shift-reduce. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ LangEl *langEl = langElIndex[trans->value->lowKey];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ assert( trans->value->actions.length() == 1 );
+ assert( trans->value->actions[0] == SHIFT_CODE ||
+ (trans->value->actions[0] & 0x3) == SHIFT_REDUCE_CODE );
+ }
+ }
+ }
+
+ /* Assert that shift reduces always appear on their own. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) {
+ if ( (*act & 0x3) == SHIFT_REDUCE_CODE )
+ assert( trans->value->actions.length() == 1 );
+ }
+ }
+ }
+
+ /* Verify that any type we parse_stop can actually be parsed that way. */
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ LangEl *lel = *pe;
+ if ( lel->parseStop )
+ verifyParseStopGrammar(lel , pdaGraph);
+ }
+}
+
+void Compiler::wrapNonTerminals()
+{
+ /* Make a language element that will be used to make the root productions.
+ * These are used for making parsers rooted at any production (including
+ * the start symbol). */
+ rootLangEl = declareLangEl( this, rootNamespace, "_root", LangEl::NonTerm );
+
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Make a single production used when the lel is a root. */
+ ProdElList *prodElList = makeProdElList( lel );
+ lel->rootDef = new Definition( InputLoc(), rootLangEl,
+ prodElList, false, 0,
+ prodList.length(), rootLangEl->defList.length(),
+ Definition::Production );
+ prodList.append( lel->rootDef );
+ rootLangEl->defList.append( lel->rootDef );
+
+ /* First resolve. */
+ for ( ProdElList::Iter fact = *prodElList; fact.lte(); fact++ )
+ resolveFactor( fact );
+ }
+}
+
+bool Compiler::makeNonTermFirstSetProd( Definition *prod, PdaState *state )
+{
+ bool modified = false;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->key >= firstNonTermId ) {
+ long *inserted = prod->nonTermFirstSet.insert( trans->key );
+ if ( inserted != 0 )
+ modified = true;
+
+ bool hasEpsilon = false;
+ LangEl *lel = langElIndex[trans->key];
+ for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) {
+ for ( ProdIdSet::Iter pid = ldef->nonTermFirstSet;
+ pid.lte(); pid++ )
+ {
+ if ( *pid == -1 )
+ hasEpsilon = true;
+ else {
+ long *inserted = prod->nonTermFirstSet.insert( *pid );
+ if ( inserted != 0 )
+ modified = true;
+ }
+ }
+ }
+
+ if ( hasEpsilon ) {
+ if ( trans->value->toState->isFinState() ) {
+ long *inserted = prod->nonTermFirstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeNonTermFirstSetProd( prod, trans->value->toState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+ }
+ return modified;
+}
+
+
+void Compiler::makeNonTermFirstSets()
+{
+ bool modified = true;
+ while ( modified ) {
+ modified = false;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->fsm->startState->isFinState() ) {
+ long *inserted = prod->nonTermFirstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeNonTermFirstSetProd( prod, prod->fsm->startState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->nonTermFirstSet.find( prod->prodName->id ) )
+ prod->isLeftRec = true;
+ }
+}
+
+void Compiler::printNonTermFirstSets()
+{
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ cerr << prod->data << ": ";
+ for ( ProdIdSet::Iter pid = prod->nonTermFirstSet; pid.lte(); pid++ )
+ {
+ if ( *pid < 0 )
+ cerr << " <EPSILON>";
+ else {
+ LangEl *lel = langElIndex[*pid];
+ cerr << " " << lel->name;
+ }
+ }
+ cerr << endl;
+
+ if ( prod->isLeftRec )
+ cerr << "PROD IS LEFT REC: " << prod->data << endl;
+ }
+}
+
+bool Compiler::makeFirstSetProd( Definition *prod, PdaState *state )
+{
+ bool modified = false;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->key < firstNonTermId ) {
+ long *inserted = prod->firstSet.insert( trans->key );
+ if ( inserted != 0 )
+ modified = true;
+ }
+ else {
+ long *inserted = prod->firstSet.insert( trans->key );
+ if ( inserted != 0 )
+ modified = true;
+
+ LangEl *klangEl = langElIndex[trans->key];
+ if ( klangEl != 0 && klangEl->termDup != 0 ) {
+ long *inserted2 = prod->firstSet.insert( klangEl->termDup->id );
+ if ( inserted2 != 0 )
+ modified = true;
+ }
+
+ bool hasEpsilon = false;
+ LangEl *lel = langElIndex[trans->key];
+ for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) {
+ for ( ProdIdSet::Iter pid = ldef->firstSet;
+ pid.lte(); pid++ )
+ {
+ if ( *pid == -1 )
+ hasEpsilon = true;
+ else {
+ long *inserted = prod->firstSet.insert( *pid );
+ if ( inserted != 0 )
+ modified = true;
+ }
+ }
+ }
+
+ if ( hasEpsilon ) {
+ if ( trans->value->toState->isFinState() ) {
+ long *inserted = prod->firstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeFirstSetProd( prod, trans->value->toState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+ }
+ return modified;
+}
+
+
+void Compiler::makeFirstSets()
+{
+ bool modified = true;
+ while ( modified ) {
+ modified = false;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->fsm->startState->isFinState() ) {
+ long *inserted = prod->firstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeFirstSetProd( prod, prod->fsm->startState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+}
+
+void Compiler::printFirstSets()
+{
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ cerr << prod->data << ": ";
+ for ( ProdIdSet::Iter pid = prod->firstSet; pid.lte(); pid++ )
+ {
+ if ( *pid < 0 )
+ cerr << " <EPSILON>";
+ else {
+ LangEl *lel = langElIndex[*pid];
+ if ( lel != 0 )
+ cerr << endl << " " << lel->name;
+ else
+ cerr << endl << " " << *pid;
+ }
+ }
+ cerr << endl;
+ }
+}
+
+void Compiler::insertUniqueEmptyProductions()
+{
+ int limit = prodList.length();
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->prodId == limit )
+ break;
+
+ /* Get a language element. */
+ char name[20];
+ sprintf(name, "U%li", prodList.length());
+ LangEl *prodName = addLangEl( this, rootNamespace, name, LangEl::NonTerm );
+ Definition *newDef = new Definition( InputLoc(), prodName,
+ 0 /* FIXME new VarDef( name, 0 )*/,
+ false, 0, prodList.length(), prodName->defList.length(),
+ Definition::Production );
+ prodName->defList.append( newDef );
+ prodList.append( newDef );
+
+ prod->uniqueEmptyLeader = prodName;
+ }
+}
+
+void Compiler::makeRuntimeData()
+{
+ long count = 0;
+
+ /*
+ * ProdLengths
+ * ProdLhsIs
+ * ProdNames
+ * ProdCodeBlocks
+ * ProdCodeBlockLens
+ */
+
+ runtimeData->frameInfo = new FrameInfo[nextFrameId];
+ runtimeData->numFrames = nextFrameId;
+ memset( runtimeData->frameInfo, 0, sizeof(FrameInfo) * nextFrameId );
+
+ /*
+ * Init code block.
+ */
+ if ( rootCodeBlock == 0 ) {
+ runtimeData->rootCode = 0;
+ runtimeData->rootCodeLen = 0;
+ runtimeData->rootFrameId = 0;
+ }
+ else {
+ runtimeData->rootCode = rootCodeBlock->codeWC.data;
+ runtimeData->rootCodeLen = rootCodeBlock->codeWC.length();
+ runtimeData->rootFrameId = rootCodeBlock->frameId;
+ }
+
+ runtimeData->frameInfo[rootCodeBlock->frameId].codeWV = 0;
+ runtimeData->frameInfo[rootCodeBlock->frameId].codeLenWV = 0;
+ runtimeData->frameInfo[rootCodeBlock->frameId].trees = rootCodeBlock->trees.data;
+ runtimeData->frameInfo[rootCodeBlock->frameId].treesLen = rootCodeBlock->trees.length();
+ runtimeData->frameInfo[rootCodeBlock->frameId].frameSize = rootLocalFrame->size();
+ runtimeData->frameInfo[rootCodeBlock->frameId].argSize = 0;
+
+ /*
+ * prodInfo
+ */
+ count = prodList.length();
+ runtimeData->prodInfo = new ProdInfo[count];
+ runtimeData->numProds = count;
+
+ count = 0;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ runtimeData->prodInfo[count].lhsId = prod->prodName->id;
+ runtimeData->prodInfo[count].prodNum = prod->prodNum;
+ runtimeData->prodInfo[count].length = prod->fsmLength;
+ runtimeData->prodInfo[count].name = prod->data;
+ runtimeData->prodInfo[count].frameId = -1;
+
+ CodeBlock *block = prod->redBlock;
+ if ( block != 0 ) {
+ runtimeData->prodInfo[count].frameId = block->frameId;
+ runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frameInfo[block->frameId].trees = block->trees.data;
+ runtimeData->frameInfo[block->frameId].treesLen = block->trees.length();
+
+ runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size();
+ runtimeData->frameInfo[block->frameId].argSize = 0;
+ }
+
+ runtimeData->prodInfo[count].lhsUpref = true;
+ runtimeData->prodInfo[count].copy = prod->copy.data;
+ runtimeData->prodInfo[count].copyLen = prod->copy.length() / 2;
+ count += 1;
+ }
+
+ /*
+ * regionInfo
+ */
+ runtimeData->numRegions = regionList.length()+1;
+ runtimeData->regionInfo = new RegionInfo[runtimeData->numRegions];
+ memset( runtimeData->regionInfo, 0, sizeof(RegionInfo) * runtimeData->numRegions );
+
+ runtimeData->regionInfo[0].name = "___EMPTY";
+ runtimeData->regionInfo[0].defaultToken = -1;
+ for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+ long regId = reg->id+1;
+ runtimeData->regionInfo[regId].name = reg->name;
+ runtimeData->regionInfo[regId].defaultToken =
+ reg->defaultTokenDef == 0 ? -1 : reg->defaultTokenDef->tdLangEl->id;
+ runtimeData->regionInfo[regId].eofFrameId = -1;
+ runtimeData->regionInfo[regId].isIgnoreOnly = reg->isIgnoreOnly;
+ runtimeData->regionInfo[regId].isCiOnly = reg->isCiOnly;
+ runtimeData->regionInfo[regId].ciLelId = reg->isCiOnly ? reg->derivedFrom->ciLel->id : 0;
+
+ CodeBlock *block = reg->preEofBlock;
+ if ( block != 0 ) {
+ runtimeData->regionInfo[regId].eofFrameId = block->frameId;
+ runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frameInfo[block->frameId].trees = block->trees.data;
+ runtimeData->frameInfo[block->frameId].treesLen = block->trees.length();
+
+ runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size();
+ runtimeData->frameInfo[block->frameId].argSize = 0;
+ }
+ }
+
+ /*
+ * lelInfo
+ */
+
+ count = nextSymbolId;
+ runtimeData->lelInfo = new LangElInfo[count];
+ runtimeData->numLangEls = count;
+ memset( runtimeData->lelInfo, 0, sizeof(LangElInfo)*count );
+
+ for ( int i = 0; i < nextSymbolId; i++ ) {
+ LangEl *lel = langElIndex[i];
+ if ( lel != 0 ) {
+ runtimeData->lelInfo[i].name = lel->fullLit;
+ runtimeData->lelInfo[i].xmlTag = lel->xmlTag;
+ runtimeData->lelInfo[i].repeat = lel->isRepeat;
+ runtimeData->lelInfo[i].list = lel->isList;
+ runtimeData->lelInfo[i].literal = lel->isLiteral;
+ runtimeData->lelInfo[i].ignore = lel->ignore;
+ runtimeData->lelInfo[i].frameId = -1;
+
+ CodeBlock *block = lel->transBlock;
+ if ( block != 0 ) {
+ runtimeData->lelInfo[i].frameId = block->frameId;
+ runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frameInfo[block->frameId].trees = block->trees.data;
+ runtimeData->frameInfo[block->frameId].treesLen = block->trees.length();
+
+ runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size();
+ runtimeData->frameInfo[block->frameId].argSize = 0;
+ }
+
+
+ runtimeData->lelInfo[i].objectTypeId =
+ lel->objectDef == 0 ? 0 : lel->objectDef->id;
+ runtimeData->lelInfo[i].ofiOffset = lel->ofiOffset;
+ runtimeData->lelInfo[i].objectLength =
+ ( lel->objectDef == 0 || lel->objectDef == tokenObj ) ? 0 :
+ lel->objectDef->size();
+
+// runtimeData->lelInfo[i].contextTypeId = 0;
+// lel->context == 0 ? 0 : lel->context->contextObjDef->id;
+// runtimeData->lelInfo[i].contextLength = 0; //lel->context == 0 ? 0 :
+// lel->context->contextObjDef->size();
+// if ( lel->context != 0 ) {
+// cout << "type: " << runtimeData->lelInfo[i].contextTypeId << " length: " <<
+// runtimeData->lelInfo[i].contextLength << endl;
+// }
+
+ runtimeData->lelInfo[i].termDupId = lel->termDup == 0 ? 0 : lel->termDup->id;
+ runtimeData->lelInfo[i].genericId = lel->generic == 0 ? 0 : lel->generic->id;
+
+ if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 &&
+ lel->tokenDef->join->context != 0 )
+ runtimeData->lelInfo[i].markId = lel->tokenDef->join->mark->markId;
+ else
+ runtimeData->lelInfo[i].markId = -1;
+
+ runtimeData->lelInfo[i].numCaptureAttr = 0;
+ }
+ else {
+ memset(&runtimeData->lelInfo[i], 0, sizeof(LangElInfo) );
+ runtimeData->lelInfo[i].name = "__UNUSED";
+ runtimeData->lelInfo[i].xmlTag = "__UNUSED";
+ runtimeData->lelInfo[i].frameId = -1;
+ }
+ }
+
+ /*
+ * FunctionInfo
+ */
+ count = functionList.length();
+
+ runtimeData->functionInfo = new FunctionInfo[count];
+ runtimeData->numFunctions = count;
+ memset( runtimeData->functionInfo, 0, sizeof(FunctionInfo)*count );
+ for ( FunctionList::Iter func = functionList; func.lte(); func++ ) {
+ runtimeData->functionInfo[func->funcId].name = func->name;
+ runtimeData->functionInfo[func->funcId].frameId = -1;
+
+ CodeBlock *block = func->codeBlock;
+ if ( block != 0 ) {
+ runtimeData->functionInfo[func->funcId].frameId = block->frameId;
+
+ runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frameInfo[block->frameId].codeWC = block->codeWC.data;
+ runtimeData->frameInfo[block->frameId].codeLenWC = block->codeWC.length();
+
+ runtimeData->frameInfo[block->frameId].trees = block->trees.data;
+ runtimeData->frameInfo[block->frameId].treesLen = block->trees.length();
+
+ runtimeData->frameInfo[block->frameId].frameSize = func->localFrame->size();
+ runtimeData->frameInfo[block->frameId].argSize = func->paramListSize;
+ }
+
+ runtimeData->functionInfo[func->funcId].frameSize = func->localFrame->size();
+ runtimeData->functionInfo[func->funcId].argSize = func->paramListSize;
+ }
+
+ /*
+ * PatReplInfo
+ */
+
+ /* Filled in later after patterns are parsed. */
+ runtimeData->patReplInfo = new PatReplInfo[nextPatReplId];
+ memset( runtimeData->patReplInfo, 0, sizeof(PatReplInfo) * nextPatReplId );
+ runtimeData->numPatterns = nextPatReplId;
+ runtimeData->patReplNodes = 0;
+ runtimeData->numPatternNodes = 0;
+
+
+ /*
+ * GenericInfo
+ */
+ count = 1;
+ for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ )
+ count += nspace->genericList.length();
+ assert( count == nextGenericId );
+
+ runtimeData->genericInfo = new GenericInfo[count];
+ runtimeData->numGenerics = count;
+ memset( &runtimeData->genericInfo[0], 0, sizeof(GenericInfo) );
+ for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) {
+ for ( GenericList::Iter gen = nspace->genericList; gen.lte(); gen++ ) {
+ runtimeData->genericInfo[gen->id].type = gen->typeId;
+ runtimeData->genericInfo[gen->id].typeArg = gen->utArg->typeId;
+ runtimeData->genericInfo[gen->id].keyType = gen->keyUT != 0 ?
+ gen->keyUT->typeId : 0;
+ runtimeData->genericInfo[gen->id].keyOffset = 0;
+ runtimeData->genericInfo[gen->id].langElId = gen->langEl->id;
+ runtimeData->genericInfo[gen->id].parserId = gen->utArg->langEl->parserId;
+ }
+ }
+
+ runtimeData->argvGenericId = argvTypeRef->generic->id;
+
+ /*
+ * Literals
+ */
+ runtimeData->numLiterals = literalStrings.length();
+ runtimeData->litdata = new const char *[literalStrings.length()];
+ runtimeData->litlen = new long [literalStrings.length()];
+ runtimeData->literals = 0;
+ for ( StringMap::Iter el = literalStrings; el.lte(); el++ ) {
+ /* Data. */
+ char *data = new char[el->key.length()+1];
+ memcpy( data, el->key.data, el->key.length() );
+ data[el->key.length()] = 0;
+ runtimeData->litdata[el->value] = data;
+
+ /* Length. */
+ runtimeData->litlen[el->value] = el->key.length();
+ }
+
+ /* Captured attributes. Loop over tokens and count first. */
+ long numCapturedAttr = 0;
+// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+// for ( TokenDefListReg::Iter td = reg->tokenDefList; td.lte(); td++ )
+// numCapturedAttr += td->reCaptureVect.length();
+// }
+ runtimeData->captureAttr = new CaptureAttr[numCapturedAttr];
+ runtimeData->numCapturedAttr = numCapturedAttr;
+ memset( runtimeData->captureAttr, 0, sizeof( CaptureAttr ) * numCapturedAttr );
+
+ count = 0;
+// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+// for ( TokenDefListReg::Iter td = reg->tokenDefList; td.lte(); td++ ) {
+// runtimeData->lelInfo[td->token->id].captureAttr = count;
+// runtimeData->lelInfo[td->token->id].numCaptureAttr = td->reCaptureVect.length();
+// for ( ReCaptureVect::Iter c = td->reCaptureVect; c.lte(); c++ ) {
+// runtimeData->captureAttr[count].mark_enter = c->markEnter->markId;
+// runtimeData->captureAttr[count].mark_leave = c->markLeave->markId;
+// runtimeData->captureAttr[count].offset = c->objField->offset;
+//
+// count += 1;
+// }
+// }
+// }
+
+ runtimeData->fsmTables = fsmTables;
+ runtimeData->pdaTables = pdaTables;
+
+ /* FIXME: need a parser descriptor. */
+ runtimeData->startStates = new int[nextParserId];
+ runtimeData->eofLelIds = new int[nextParserId];
+ runtimeData->parserLelIds = new int[nextParserId];
+ runtimeData->numParsers = nextParserId;
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->parserId >= 0 ) {
+ runtimeData->startStates[lel->parserId] = lel->startState->stateNum;
+ runtimeData->eofLelIds[lel->parserId] = lel->eofLel->id;
+ runtimeData->parserLelIds[lel->parserId] = lel->id;
+ }
+ }
+
+ runtimeData->globalSize = globalObjectDef->size();
+
+ /*
+ * firstNonTermId
+ */
+ runtimeData->firstNonTermId = firstNonTermId;
+
+ /* Special trees. */
+ runtimeData->integerId = intLangEl->id;
+ runtimeData->stringId = strLangEl->id;
+ runtimeData->anyId = anyLangEl->id;
+ runtimeData->eofId = 0; //eofLangEl->id;
+ runtimeData->noTokenId = noTokenLangEl->id;
+}
+
+/* Borrow alg->state for mapsTo. */
+void countNodes( Program *prg, int &count, ParseTree *parseTree, Kid *kid )
+{
+ if ( kid != 0 ) {
+ count += 1;
+
+ /* Should't have to recurse here. */
+ Tree *ignoreList = treeLeftIgnore( prg, kid->tree );
+ if ( ignoreList != 0 ) {
+ Kid *ignore = ignoreList->child;
+ while ( ignore != 0 ) {
+ count += 1;
+ ignore = ignore->next;
+ }
+ }
+
+ ignoreList = treeRightIgnore( prg, kid->tree );
+ if ( ignoreList != 0 ) {
+ Kid *ignore = ignoreList->child;
+ while ( ignore != 0 ) {
+ count += 1;
+ ignore = ignore->next;
+ }
+ }
+
+ //count += prg->rtd->lelInfo[kid->tree->id].numCaptureAttr;
+
+ if ( !( parseTree->flags & PF_NAMED ) &&
+ !( parseTree->flags & PF_ARTIFICIAL ) &&
+ treeChild( prg, kid->tree ) != 0 )
+ {
+ countNodes( prg, count, parseTree->child, treeChild( prg, kid->tree ) );
+ }
+ countNodes( prg, count, parseTree->next, kid->next );
+ }
+}
+
+void fillNodes( Program *prg, int &nextAvail, Bindings *bindings, long &bindId,
+ PatReplNode *nodes, ParseTree *parseTree, Kid *kid, int ind )
+{
+ if ( kid != 0 ) {
+ PatReplNode &node = nodes[ind];
+
+ Kid *child =
+ !( parseTree->flags & PF_NAMED ) &&
+ !( parseTree->flags & PF_ARTIFICIAL ) &&
+ treeChild( prg, kid->tree ) != 0
+ ?
+ treeChild( prg, kid->tree ) : 0;
+
+ ParseTree *ptChild =
+ !( parseTree->flags & PF_NAMED ) &&
+ !( parseTree->flags & PF_ARTIFICIAL ) &&
+ treeChild( prg, kid->tree ) != 0
+ ?
+ parseTree->child : 0;
+
+ /* Set up the fields. */
+ node.id = kid->tree->id;
+ node.prodNum = kid->tree->prodNum;
+ node.length = stringLength( kid->tree->tokdata );
+ node.data = stringData( kid->tree->tokdata );
+
+ /* Ignore items. */
+ Tree *ignoreList = treeLeftIgnore( prg, kid->tree );
+ Kid *ignore = ignoreList == 0 ? 0 : ignoreList->child;
+ node.leftIgnore = ignore == 0 ? -1 : nextAvail;
+
+ while ( ignore != 0 ) {
+ PatReplNode &node = nodes[nextAvail++];
+
+ memset( &node, 0, sizeof(PatReplNode) );
+ node.id = ignore->tree->id;
+ node.prodNum = ignore->tree->prodNum;
+ node.next = ignore->next == 0 ? -1 : nextAvail;
+
+ node.length = stringLength( ignore->tree->tokdata );
+ node.data = stringData( ignore->tree->tokdata );
+
+ ignore = ignore->next;
+ }
+
+ /* Ignore items. */
+ ignoreList = treeRightIgnore( prg, kid->tree );
+ ignore = ignoreList == 0 ? 0 : ignoreList->child;
+ node.rightIgnore = ignore == 0 ? -1 : nextAvail;
+
+ while ( ignore != 0 ) {
+ PatReplNode &node = nodes[nextAvail++];
+
+ memset( &node, 0, sizeof(PatReplNode) );
+ node.id = ignore->tree->id;
+ node.prodNum = ignore->tree->prodNum;
+ node.next = ignore->next == 0 ? -1 : nextAvail;
+
+ node.length = stringLength( ignore->tree->tokdata );
+ node.data = stringData( ignore->tree->tokdata );
+
+ ignore = ignore->next;
+ }
+
+ ///* The captured attributes. */
+ //for ( int i = 0; i < prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; i++ ) {
+ // CaptureAttr *cap = prg->rtd->captureAttr +
+ // prg->rtd->lelInfo[kid->tree->id].captureAttr + i;
+ //
+ // Tree *attr = getAttr( kid->tree, cap->offset );
+ //
+ // PatReplNode &node = nodes[nextAvail++];
+ // memset( &node, 0, sizeof(PatReplNode) );
+ //
+ // node.id = attr->id;
+ // node.prodNum = attr->prodNum;
+ // node.length = stringLength( attr->tokdata );
+ // node.data = stringData( attr->tokdata );
+ //}
+
+ node.stop = parseTree->flags & PF_TERM_DUP;
+
+ node.child = child == 0 ? -1 : nextAvail++;
+
+ /* Recurse. */
+ fillNodes( prg, nextAvail, bindings, bindId, nodes, ptChild, child, node.child );
+
+ /* Since the parser is bottom up the bindings are in a bottom up
+ * traversal order. Check after recursing. */
+ node.bindId = 0;
+ if ( bindId < bindings->length() && bindings->data[bindId] == parseTree ) {
+ /* Remember that binding ids are indexed from one. */
+ node.bindId = bindId++;
+
+ //cout << "binding match in " << __PRETTY_FUNCTION__ << endl;
+ //cout << "bindId: " << node.bindId << endl;
+ }
+
+ node.next = kid->next == 0 ? -1 : nextAvail++;
+
+ /* Move to the next child. */
+ fillNodes( prg, nextAvail, bindings, bindId, nodes, parseTree->next, kid->next, node.next );
+ }
+}
+
+void Compiler::fillInPatterns( Program *prg )
+{
+ /*
+ * patReplNodes
+ */
+
+ /* Count is referenced and computed by mapNode. */
+ int count = 0;
+ for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
+ countNodes( prg, count,
+ pat->pdaRun->stackTop->next,
+ pat->pdaRun->stackTop->next->shadow );
+ }
+
+ for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
+ countNodes( prg, count,
+ repl->pdaRun->stackTop->next,
+ repl->pdaRun->stackTop->next->shadow );
+ }
+
+ runtimeData->patReplNodes = new PatReplNode[count];
+ runtimeData->numPatternNodes = count;
+
+ int nextAvail = 0;
+
+ for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
+ int ind = nextAvail++;
+ runtimeData->patReplInfo[pat->patRepId].offset = ind;
+
+ /* BindIds are indexed base one. */
+ runtimeData->patReplInfo[pat->patRepId].numBindings =
+ pat->pdaRun->bindings->length() - 1;
+
+ /* Init the bind */
+ long bindId = 1;
+ fillNodes( prg, nextAvail, pat->pdaRun->bindings, bindId,
+ runtimeData->patReplNodes,
+ pat->pdaRun->stackTop->next,
+ pat->pdaRun->stackTop->next->shadow,
+ ind );
+ }
+
+ for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
+ int ind = nextAvail++;
+ runtimeData->patReplInfo[repl->patRepId].offset = ind;
+
+ /* BindIds are indexed base one. */
+ runtimeData->patReplInfo[repl->patRepId].numBindings =
+ repl->pdaRun->bindings->length() - 1;
+
+ long bindId = 1;
+ fillNodes( prg, nextAvail, repl->pdaRun->bindings, bindId,
+ runtimeData->patReplNodes,
+ repl->pdaRun->stackTop->next,
+ repl->pdaRun->stackTop->next->shadow,
+ ind );
+ }
+
+ assert( nextAvail == count );
+}
+
+
+int Compiler::findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &curLen )
+{
+ for ( int start = 0; start < curLen; ) {
+ int offset = start;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( pdaTables->owners[offset] != -1 )
+ goto next_start;
+
+ offset++;
+ if ( ! trans.last() ) {
+ TransMap::Iter next = trans.next();
+ offset += next->key - trans->key - 1;
+ }
+ }
+
+ /* Got though the whole list without a conflict. */
+ return start;
+
+next_start:
+ start++;
+ }
+
+ return curLen;
+}
+
+struct CmpSpan
+{
+ static int compare( PdaState *state1, PdaState *state2 )
+ {
+ int dist1 = 0, dist2 = 0;
+
+ if ( state1->transMap.length() > 0 ) {
+ TransMap::Iter first1 = state1->transMap.first();
+ TransMap::Iter last1 = state1->transMap.last();
+ dist1 = last1->key - first1->key;
+ }
+
+ if ( state2->transMap.length() > 0 ) {
+ TransMap::Iter first2 = state2->transMap.first();
+ TransMap::Iter last2 = state2->transMap.last();
+ dist2 = last2->key - first2->key;
+ }
+
+ if ( dist1 < dist2 )
+ return 1;
+ else if ( dist2 < dist1 )
+ return -1;
+ return 0;
+ }
+};
+
+PdaGraph *Compiler::makePdaGraph( LangElSet &parserEls )
+{
+ //for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ // cerr << prod->prodId << " " << prod->data << endl;
+
+ PdaGraph *pdaGraph = new PdaGraph();
+ lalr1GenerateParser( pdaGraph, parserEls );
+ pdaGraph->setStateNumbers();
+ analyzeMachine( pdaGraph, parserEls );
+
+ //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
+
+ return pdaGraph;
+}
+
+PdaTables *Compiler::makePdaTables( PdaGraph *pdaGraph )
+{
+ int count, pos;
+ PdaTables *pdaTables = new PdaTables;
+
+ /*
+ * Counting max indices.
+ */
+ count = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ count++;
+ if ( ! trans.last() ) {
+ TransMap::Iter next = trans.next();
+ count += next->key - trans->key - 1;
+ }
+ }
+ }
+
+
+ /* Allocate indicies and owners. */
+ pdaTables->numIndicies = count;
+ pdaTables->indicies = new int[count];
+ pdaTables->owners = new int[count];
+ for ( long i = 0; i < count; i++ ) {
+ pdaTables->indicies[i] = -1;
+ pdaTables->owners[i] = -1;
+ }
+
+ /* Allocate offsets. */
+ int numStates = pdaGraph->stateList.length();
+ pdaTables->offsets = new unsigned int[numStates];
+ pdaTables->numStates = numStates;
+
+ /* Place transitions into indicies/owners */
+ PdaState **states = new PdaState*[numStates];
+ long ds = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ states[ds++] = state;
+
+ /* Sorting baseded on span length. Gives an improvement, but incures a
+ * cost. Off for now. */
+ //MergeSort< PdaState*, CmpSpan > mergeSort;
+ //mergeSort.sort( states, numStates );
+
+ int indLen = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ PdaState *state = states[s];
+
+ int indOff = findIndexOff( pdaTables, pdaGraph, state, indLen );
+ pdaTables->offsets[state->stateNum] = indOff;
+
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ pdaTables->indicies[indOff] = trans->value->actionSetEl->key.id;
+ pdaTables->owners[indOff] = state->stateNum;
+ indOff++;
+
+ if ( ! trans.last() ) {
+ TransMap::Iter next = trans.next();
+ indOff += next->key - trans->key - 1;
+ }
+ }
+
+ if ( indOff > indLen )
+ indLen = indOff;
+ }
+
+ /* We allocated the max, but cmpression gives us less. */
+ pdaTables->numIndicies = indLen;
+ delete[] states;
+
+
+ /*
+ * Keys
+ */
+ count = pdaGraph->stateList.length() * 2;;
+ pdaTables->keys = new int[count];
+ pdaTables->numKeys = count;
+
+ count = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( state->transMap.length() == 0 ) {
+ pdaTables->keys[count+0] = 0;
+ pdaTables->keys[count+1] = 0;
+ }
+ else {
+ TransMap::Iter first = state->transMap.first();
+ TransMap::Iter last = state->transMap.last();
+ pdaTables->keys[count+0] = first->key;
+ pdaTables->keys[count+1] = last->key;
+ }
+ count += 2;
+ }
+
+ /*
+ * Targs
+ */
+ count = pdaGraph->actionSet.length();
+ pdaTables->targs = new unsigned int[count];
+ pdaTables->numTargs = count;
+
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ pdaTables->targs[count++] = asi->key.targ;
+
+ /*
+ * ActInds
+ */
+ count = pdaGraph->actionSet.length();
+ pdaTables->actInds = new unsigned int[count];
+ pdaTables->numActInds = count;
+
+ count = pos = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) {
+ pdaTables->actInds[count++] = pos;
+ pos += asi->key.actions.length() + 1;
+ }
+
+ /*
+ * Actions
+ */
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ count += asi->key.actions.length() + 1;
+
+ pdaTables->actions = new unsigned int[count];
+ pdaTables->numActions = count;
+
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) {
+ for ( ActDataList::Iter ali = asi->key.actions; ali.lte(); ali++ )
+ pdaTables->actions[count++] = *ali;
+
+ pdaTables->actions[count++] = 0;
+ }
+
+ /*
+ * CommitLen
+ */
+ count = pdaGraph->actionSet.length();
+ pdaTables->commitLen = new int[count];
+ pdaTables->numCommitLen = count;
+
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ pdaTables->commitLen[count++] = asi->key.commitLen;
+
+ /*
+ * tokenRegionInds. Start at one so region index 0 is null (unset).
+ */
+ count = 0;
+ pos = 1;
+ pdaTables->tokenRegionInds = new int[pdaTables->numStates];
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ pdaTables->tokenRegionInds[count++] = pos;
+ pos += state->regions.length() + 1;
+ }
+
+
+ /*
+ * tokenRegions. Build in a null at the beginning.
+ */
+
+ count = 1;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ count += state->regions.length() + 1;
+
+ pdaTables->numRegionItems = count;
+ pdaTables->tokenRegions = new int[pdaTables->numRegionItems];
+
+ count = 0;
+ pdaTables->tokenRegions[count++] = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ )
+ pdaTables->tokenRegions[count++] = (*reg)->id + 1;
+
+ pdaTables->tokenRegions[count++] = 0;
+ }
+
+ /*
+ * tokenPreRegions. Build in a null at the beginning.
+ */
+
+ count = 1;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ count += state->regions.length() + 1;
+
+ pdaTables->numPreRegionItems = count;
+ pdaTables->tokenPreRegions = new int[pdaTables->numPreRegionItems];
+
+ count = 0;
+ pdaTables->tokenPreRegions[count++] = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) {
+ assert( state->preRegions.length() <= 1 );
+ if ( state->preRegions.length() == 0 || state->preRegions[0]->wasEmpty )
+ pdaTables->tokenPreRegions[count++] = -1;
+ else
+ pdaTables->tokenPreRegions[count++] = state->preRegions[0]->id + 1;
+ }
+
+ pdaTables->tokenPreRegions[count++] = 0;
+ }
+
+
+ return pdaTables;
+}
+
+void Compiler::makeParser( LangElSet &parserEls )
+{
+ pdaGraph = makePdaGraph( parserEls );
+ pdaTables = makePdaTables( pdaGraph );
+}
+
diff --git a/src/pdacodegen.cc b/src/pdacodegen.cc
new file mode 100644
index 00000000..9e3dca47
--- /dev/null
+++ b/src/pdacodegen.cc
@@ -0,0 +1,653 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include "global.h"
+#include "parsedata.h"
+#include "avlmap.h"
+#include "avlbasic.h"
+#include "avlset.h"
+#include "mergesort.h"
+#include "pdacodegen.h"
+
+using std::cerr;
+using std::endl;
+
+#define FRESH_BLOCK 8128
+#define act_sb "0x1"
+#define act_rb "0x2"
+#define lower "0x0000ffff"
+#define upper "0xffff0000"
+
+void escapeLiteralString( std::ostream &out, const char *path, int length )
+{
+ for ( const char *pc = path, *end = path+length; pc != end; pc++ ) {
+ switch ( *pc ) {
+ case '\\': out << "\\\\"; break;
+ case '"': out << "\\\""; break;
+ case '\a': out << "\\a"; break;
+ case '\b': out << "\\b"; break;
+ case '\t': out << "\\t"; break;
+ case '\n': out << "\\n"; break;
+ case '\v': out << "\\v"; break;
+ case '\f': out << "\\f"; break;
+ case '\r': out << "\\r"; break;
+ default: out << *pc; break;
+ }
+ }
+}
+
+void escapeLiteralString( std::ostream &out, const char *path )
+{
+ escapeLiteralString( out, path, strlen(path) );
+}
+
+void PdaCodeGen::writeTokenIds()
+{
+ out << "/*\n";
+ for ( LelList::Iter lel = pd->langEls; lel.lte(); lel++ ) {
+ if ( lel->name != 0 )
+ out << " " << lel->name << " " << lel->id << endl;
+ else
+ out << " " << lel->id << endl;
+ }
+ out << "*/\n\n";
+}
+
+void PdaCodeGen::defineRuntime()
+{
+ out <<
+ "extern RuntimeData main_runtimeData;\n"
+ "\n";
+}
+
+void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables )
+{
+ /*
+ * Blocks of code in frames.
+ */
+ for ( int i = 0; i < runtimeData->numFrames; i++ ) {
+ /* FIXME: horrible code cloning going on here. */
+ if ( runtimeData->frameInfo[i].codeLenWV > 0 ) {
+ out << "Code code_" << i << "_wv[] = {\n\t";
+
+ Code *block = runtimeData->frameInfo[i].codeWV;
+ for ( int j = 0; j < runtimeData->frameInfo[i].codeLenWV; j++ ) {
+ out << (unsigned long) block[j];
+
+ if ( j < runtimeData->frameInfo[i].codeLenWV-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+ }
+
+ if ( runtimeData->frameInfo[i].codeLenWC > 0 ) {
+ out << "Code code_" << i << "_wc[] = {\n\t";
+
+ Code *block = runtimeData->frameInfo[i].codeWC;
+ for ( int j = 0; j < runtimeData->frameInfo[i].codeLenWC; j++ ) {
+ out << (unsigned long) block[j];
+
+ if ( j < runtimeData->frameInfo[i].codeLenWC-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+ }
+
+ if ( runtimeData->frameInfo[i].treesLen > 0 ) {
+ out << "char trees_" << i << "[] = {\n\t";
+
+ char *block = runtimeData->frameInfo[i].trees;
+ for ( int j = 0; j < runtimeData->frameInfo[i].treesLen; j++ ) {
+ out << (long) block[j];
+
+ if ( j < runtimeData->frameInfo[i].treesLen-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+ }
+ }
+
+ /*
+ * Blocks in production info.
+ */
+ for ( int i = 0; i < runtimeData->numProds; i++ ) {
+ if ( runtimeData->prodInfo[i].copyLen > 0 ) {
+ out << "unsigned char copy_" << i << "[] = {\n\t";
+
+ unsigned char *block = runtimeData->prodInfo[i].copy;
+ for ( int j = 0; j < runtimeData->prodInfo[i].copyLen; j++ ) {
+ out << (long) block[j*2] << ", " << (long) block[j*2+1];
+
+ if ( j < runtimeData->prodInfo[i].copyLen-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+ }
+ }
+
+ /*
+ * Init code.
+ */
+ out << "Code " << rootCode() << "[] = {\n\t";
+ Code *block = runtimeData->rootCode ;
+ for ( int j = 0; j < runtimeData->rootCodeLen; j++ ) {
+ out << (unsigned int) block[j];
+
+ if ( j < runtimeData->rootCodeLen-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ /*
+ * lelInfo
+ */
+ out << "LangElInfo " << lelInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->numLangEls; i++ ) {
+ out << "\t{";
+
+ /* Name. */
+ out << " \"";
+ escapeLiteralString( out, runtimeData->lelInfo[i].name );
+ out << "\", ";
+
+ /* Name. */
+ out << " \"";
+ escapeLiteralString( out, runtimeData->lelInfo[i].xmlTag );
+ out << "\", ";
+
+ /* Repeat, literal, ignore flags. */
+ out << (int)runtimeData->lelInfo[i].repeat << ", " <<
+ (int)runtimeData->lelInfo[i].list << ", " <<
+ (int)runtimeData->lelInfo[i].literal << ", " <<
+ (int)runtimeData->lelInfo[i].ignore << ", ";
+
+ out << runtimeData->lelInfo[i].frameId << ", ";
+
+ out << runtimeData->lelInfo[i].objectTypeId << ", ";
+
+ out << runtimeData->lelInfo[i].ofiOffset << ", ";
+
+ out << runtimeData->lelInfo[i].objectLength << ", ";
+
+// out << runtimeData->lelInfo[i].contextTypeId << ", ";
+// out << runtimeData->lelInfo[i].contextLength << ", ";
+
+ out << runtimeData->lelInfo[i].termDupId << ", ";
+
+ out << runtimeData->lelInfo[i].genericId << ", ";
+
+ out << runtimeData->lelInfo[i].markId << ", ";
+
+ out << runtimeData->lelInfo[i].captureAttr << ", ";
+
+ out << runtimeData->lelInfo[i].numCaptureAttr;
+
+ out << " }";
+
+ if ( i < runtimeData->numLangEls-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+ /*
+ * frameInfo
+ */
+ out << "FrameInfo " << frameInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->numFrames; i++ ) {
+ out << "\t{ ";
+
+ if ( runtimeData->frameInfo[i].codeLenWV > 0 )
+ out << "code_" << i << "_wv, ";
+ else
+ out << "0, ";
+ out << runtimeData->frameInfo[i].codeLenWV << ", ";
+
+ if ( runtimeData->frameInfo[i].codeLenWC > 0 )
+ out << "code_" << i << "_wc, ";
+ else
+ out << "0, ";
+ out << runtimeData->frameInfo[i].codeLenWC << ", ";
+
+ if ( runtimeData->frameInfo[i].treesLen > 0 )
+ out << "trees_" << i << ", ";
+ else
+ out << "0, ";
+
+ out <<
+ runtimeData->frameInfo[i].treesLen << ", " <<
+ runtimeData->frameInfo[i].argSize << ", " <<
+ runtimeData->frameInfo[i].frameSize;
+
+ out << " }";
+
+ if ( i < runtimeData->numFrames-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+
+ /*
+ * prodInfo
+ */
+ out << "ProdInfo " << prodInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->numProds; i++ ) {
+ out << "\t{ ";
+
+ out << runtimeData->prodInfo[i].lhsId << ", ";
+ out << runtimeData->prodInfo[i].prodNum << ", ";
+ out << runtimeData->prodInfo[i].length << ", ";
+
+ out <<
+ '"' << runtimeData->prodInfo[i].name << "\", " <<
+ runtimeData->prodInfo[i].frameId << ", " <<
+ (int)runtimeData->prodInfo[i].lhsUpref << ", ";
+
+ if ( runtimeData->prodInfo[i].copyLen > 0 )
+ out << "copy_" << i << ", ";
+ else
+ out << "0, ";
+
+ out << runtimeData->prodInfo[i].copyLen << ", ";
+
+
+ out << " }";
+
+ if ( i < runtimeData->numProds-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+ /*
+ * patReplInfo
+ */
+ out << "PatReplInfo " << patReplInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->numPatterns; i++ ) {
+ out << " { " << runtimeData->patReplInfo[i].offset << ", " <<
+ runtimeData->patReplInfo[i].numBindings << " },\n";
+ }
+ out << "};\n\n";
+
+ /*
+ * patReplNodes
+ */
+ out << "PatReplNode " << patReplNodes() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->numPatternNodes; i++ ) {
+ PatReplNode &node = runtimeData->patReplNodes[i];
+ out << " { " << node.id << ", " <<
+ node.prodNum << ", " << node.next << ", " <<
+ node.child << ", " << node.bindId << ", ";
+ if ( node.data == 0 )
+ out << "0";
+ else {
+ out << '\"';
+ escapeLiteralString( out, node.data, node.length );
+ out << '\"';
+ }
+ out << ", " << node.length << ", ";
+
+ out << node.leftIgnore << ", ";
+ out << node.rightIgnore << ", ";
+
+ out << (int)node.stop << " },\n";
+ }
+ out << "};\n\n";
+
+ /*
+ * functionInfo
+ */
+ out << "FunctionInfo " << functionInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->numFunctions; i++ ) {
+ out << "\t{ " <<
+ "\"" << runtimeData->functionInfo[i].name << "\", " <<
+ runtimeData->functionInfo[i].frameId << ", " <<
+ runtimeData->functionInfo[i].argSize << ", " <<
+ runtimeData->functionInfo[i].frameSize;
+ out << " }";
+
+ if ( i < runtimeData->numFunctions-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+ /*
+ * regionInfo
+ */
+ out << "RegionInfo " << regionInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->numRegions; i++ ) {
+ out << "\t{ \"";
+ /* Name. */
+ escapeLiteralString( out, runtimeData->regionInfo[i].name );
+ out << "\", " << runtimeData->regionInfo[i].defaultToken <<
+ ", " << runtimeData->regionInfo[i].eofFrameId <<
+ ", " << runtimeData->regionInfo[i].isIgnoreOnly <<
+ ", " << runtimeData->regionInfo[i].isCiOnly <<
+ ", " << runtimeData->regionInfo[i].ciLelId <<
+ " }";
+
+ if ( i < runtimeData->numRegions-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+ /*
+ * genericInfo
+ */
+ out << "GenericInfo " << genericInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->numGenerics; i++ ) {
+ out << "\t{ " <<
+ runtimeData->genericInfo[i].type << ", " <<
+ runtimeData->genericInfo[i].typeArg << ", " <<
+ runtimeData->genericInfo[i].keyOffset << ", " <<
+ runtimeData->genericInfo[i].keyType << ", " <<
+ runtimeData->genericInfo[i].langElId << ", " <<
+ runtimeData->genericInfo[i].parserId << " },\n";
+ }
+ out << "};\n\n";
+
+ /*
+ * literals
+ */
+ out << "const char *" << litdata() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->numLiterals; i++ ) {
+ out << "\t\"";
+ escapeLiteralString( out, runtimeData->litdata[i] );
+ out << "\",\n";
+ }
+ out << "};\n\n";
+
+ out << "long " << litlen() << "[] = {\n\t";
+ for ( int i = 0; i < runtimeData->numLiterals; i++ )
+ out << runtimeData->litlen[i] << ", ";
+ out << "};\n\n";
+
+ out << "Head *" << literals() << "[] = {\n\t";
+ for ( int i = 0; i < runtimeData->numLiterals; i++ )
+ out << "0, ";
+ out << "};\n\n";
+
+ out << "int startStates[] = {\n\t";
+ for ( long i = 0; i < runtimeData->numParsers; i++ ) {
+ out << runtimeData->startStates[i] << ", ";
+ }
+ out << "};\n\n";
+
+ out << "int eofLelIds[] = {\n\t";
+ for ( long i = 0; i < runtimeData->numParsers; i++ ) {
+ out << runtimeData->eofLelIds[i] << ", ";
+ }
+ out << "};\n\n";
+
+ out << "int parserLelIds[] = {\n\t";
+ for ( long i = 0; i < runtimeData->numParsers; i++ ) {
+ out << runtimeData->parserLelIds[i] << ", ";
+ }
+ out << "};\n\n";
+
+ out << "CaptureAttr captureAttr[] = {\n";
+ for ( long i = 0; i < runtimeData->numCapturedAttr; i++ ) {
+ out << "\t{ " <<
+ runtimeData->captureAttr[i].mark_enter << ", " <<
+ runtimeData->captureAttr[i].mark_leave << ", " <<
+ runtimeData->captureAttr[i].offset << " },\n";
+ }
+
+ out << "};\n\n";
+
+ out <<
+ "RuntimeData main_runtimeData = \n"
+ "{\n"
+ " " << lelInfo() << ",\n"
+ " " << runtimeData->numLangEls << ",\n"
+ "\n"
+ " " << prodInfo() << ",\n"
+ " " << runtimeData->numProds << ",\n"
+ "\n"
+ " " << regionInfo() << ",\n"
+ " " << runtimeData->numRegions << ",\n"
+ "\n"
+ " " << rootCode() << ",\n"
+ " " << runtimeData->rootCodeLen << ",\n"
+ " " << runtimeData->rootFrameId << ",\n"
+ "\n"
+ " " << frameInfo() << ",\n"
+ " " << runtimeData->numFrames << ",\n"
+ "\n"
+ " " << functionInfo() << ",\n"
+ " " << runtimeData->numFunctions << ",\n"
+ "\n"
+ " " << patReplInfo() << ",\n"
+ " " << runtimeData->numPatterns << ",\n"
+ "\n"
+ " " << patReplNodes() << ",\n"
+ " " << runtimeData->numPatternNodes << ",\n"
+ "\n"
+ " " << genericInfo() << ",\n"
+ " " << runtimeData->numGenerics << ",\n"
+ " " << runtimeData->argvGenericId << ",\n"
+ "\n"
+ " " << litdata() << ",\n"
+ " " << litlen() << ",\n"
+ " " << literals() << ",\n"
+ " " << runtimeData->numLiterals << ",\n"
+ "\n"
+ " captureAttr,\n"
+ " " << runtimeData->numCapturedAttr << ",\n"
+ "\n"
+ " &fsmTables_start,\n"
+ " &pid_0_pdaTables,\n"
+ " startStates, eofLelIds, parserLelIds, " << runtimeData->numParsers << ",\n"
+ "\n"
+ " " << runtimeData->globalSize << ",\n"
+ "\n"
+ " " << runtimeData->firstNonTermId << ",\n"
+ " " << runtimeData->integerId << ",\n"
+ " " << runtimeData->stringId << ",\n"
+ " " << runtimeData->anyId << ",\n"
+ " " << runtimeData->eofId << ",\n"
+ " " << runtimeData->noTokenId << "\n"
+ "};\n"
+ "\n";
+}
+
+void PdaCodeGen::writeParserData( long id, PdaTables *tables )
+{
+ String prefix = "pid_" + String(0, "%ld", id) + "_";
+
+ out << "int " << prefix << indicies() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numIndicies; i++ ) {
+ out << tables->indicies[i];
+
+ if ( i < tables->numIndicies-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "int " << prefix << owners() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numIndicies; i++ ) {
+ out << tables->owners[i];
+
+ if ( i < tables->numIndicies-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "int " << prefix << keys() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numKeys; i++ ) {
+ out << tables->keys[i];
+
+ if ( i < tables->numKeys-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "unsigned int " << prefix << offsets() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numStates; i++ ) {
+ out << tables->offsets[i];
+
+ if ( i < tables->numStates-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "unsigned int " << prefix << targs() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numTargs; i++ ) {
+ out << tables->targs[i];
+
+ if ( i < tables->numTargs-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "unsigned int " << prefix << actInds() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numActInds; i++ ) {
+ out << tables->actInds[i];
+
+ if ( i < tables->numActInds-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "unsigned int " << prefix << actions() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numActions; i++ ) {
+ out << tables->actions[i];
+
+ if ( i < tables->numActions-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "int " << prefix << commitLen() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numCommitLen; i++ ) {
+ out << tables->commitLen[i];
+
+ if ( i < tables->numCommitLen-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "int " << prefix << tokenRegionInds() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numStates; i++ ) {
+ out << tables->tokenRegionInds[i];
+
+ if ( i < tables->numStates-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "int " << prefix << tokenRegions() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numRegionItems; i++ ) {
+ out << tables->tokenRegions[i];
+
+ if ( i < tables->numRegionItems-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "int " << prefix << tokenPreRegions() << "[] = {\n\t";
+ for ( int i = 0; i < tables->numPreRegionItems; i++ ) {
+ out << tables->tokenPreRegions[i];
+
+ if ( i < tables->numPreRegionItems-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out <<
+ "PdaTables " << prefix << "pdaTables =\n"
+ "{\n"
+ " " << prefix << indicies() << ",\n"
+ " " << prefix << owners() << ",\n"
+ " " << prefix << keys() << ",\n"
+ " " << prefix << offsets() << ",\n"
+ " " << prefix << targs() << ",\n"
+ " " << prefix << actInds() << ",\n"
+ " " << prefix << actions() << ",\n"
+ " " << prefix << commitLen() << ",\n"
+
+ " " << prefix << tokenRegionInds() << ",\n"
+ " " << prefix << tokenRegions() << ",\n"
+ " " << prefix << tokenPreRegions() << ",\n"
+ "\n"
+ " " << tables->numIndicies << ",\n"
+ " " << tables->numKeys << ",\n"
+ " " << tables->numStates << ",\n"
+ " " << tables->numTargs << ",\n"
+ " " << tables->numActInds << ",\n"
+ " " << tables->numActions << ",\n"
+ " " << tables->numCommitLen << ",\n"
+ " " << tables->numRegionItems << ",\n"
+ " " << tables->numPreRegionItems << "\n"
+ "};\n"
+ "\n";
+}
+
diff --git a/src/pdacodegen.h b/src/pdacodegen.h
new file mode 100644
index 00000000..8e5e7a3a
--- /dev/null
+++ b/src/pdacodegen.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#ifndef _PDACODEGEN_H
+#define _PDACODEGEN_H
+
+struct Compiler;
+
+struct PdaCodeGen
+{
+ PdaCodeGen( const char *fileName, const char *parserName, Compiler *pd, ostream &out )
+ :
+ fileName(fileName),
+ parserName(parserName),
+ pd(pd),
+ out(out)
+ {}
+
+ /*
+ * Code Generation.
+ */
+ void startCodeGen();
+ void endCodeGen( int endLine );
+
+ void writeTokenIds();
+ void writeLangEls();
+
+ void writeReference( Definition *prod, char *data );
+ void writeUndoReference( Definition *prod, char *data );
+ void writeFinalReference( Definition *prod, char *data );
+ void writeFirstLocate( Definition *prod );
+ void writeRhsLocate( Definition *prod );
+
+ void defineRuntime();
+ void writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables );
+ void writeParserData( long id, PdaTables *tables );
+
+ String PARSER() { return "parser_"; }
+
+ String startState() { return PARSER() + "startState"; }
+ String indicies() { return PARSER() + "indicies"; }
+ String owners() { return PARSER() + "owners"; }
+ String keys() { return PARSER() + "keys"; }
+ String offsets() { return PARSER() + "offsets"; }
+ String targs() { return PARSER() + "targs"; }
+ String actInds() { return PARSER() + "actInds"; }
+ String actions() { return PARSER() + "actions"; }
+ String commitLen() { return PARSER() + "commitLen"; }
+ String fssProdIdIndex() { return PARSER() + "fssProdIdIndex"; }
+ String prodLengths() { return PARSER() + "prodLengths"; }
+ String prodLhsIds() { return PARSER() + "prodLhsIds"; }
+ String prodNames() { return PARSER() + "prodNames"; }
+ String lelInfo() { return PARSER() + "lelInfo"; }
+ String prodInfo() { return PARSER() + "prodInfo"; }
+ String tokenRegionInds() { return PARSER() + "tokenRegionInds"; }
+ String tokenRegions() { return PARSER() + "tokenRegions"; }
+ String tokenPreRegions() { return PARSER() + "tokenPreRegions"; }
+ String prodCodeBlocks() { return PARSER() + "prodCodeBlocks"; }
+ String prodCodeBlockLens() { return PARSER() + "prodCodeBlockLens"; }
+ String rootCode() { return PARSER() + "rootCode"; }
+ String frameInfo() { return PARSER() + "frameInfo"; }
+ String functionInfo() { return PARSER() + "functionInfo"; }
+ String objFieldInfo() { return PARSER() + "objFieldInfo"; }
+ String patReplInfo() { return PARSER() + "patReplInfo"; }
+ String patReplNodes() { return PARSER() + "patReplNodes"; }
+ String regionInfo() { return PARSER() + "regionInfo"; }
+ String genericInfo() { return PARSER() + "genericInfo"; }
+ String litdata() { return PARSER() + "litdata"; }
+ String litlen() { return PARSER() + "litlen"; }
+ String literals() { return PARSER() + "literals"; }
+ String fsmTables() { return PARSER() + "fsmTables"; }
+
+ /*
+ * Graphviz Generation
+ */
+ void writeTransList( PdaState *state );
+ void writeDotFile( PdaGraph *graph );
+ void writeDotFile( );
+
+
+ const char *fileName;
+ const char *parserName;
+ Compiler *pd;
+ ostream &out;
+};
+
+#endif
diff --git a/src/pdagraph.cc b/src/pdagraph.cc
new file mode 100644
index 00000000..8f17b7a5
--- /dev/null
+++ b/src/pdagraph.cc
@@ -0,0 +1,533 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <iostream>
+#include <string.h>
+#include <assert.h>
+#include "global.h"
+#include "pdagraph.h"
+#include "mergesort.h"
+
+using std::cerr;
+using std::endl;
+
+/* Create a new fsm state. State has not out transitions or in transitions, not
+ * out out transition data and not number. */
+PdaState::PdaState()
+:
+ /* No in transitions. */
+ inRange(),
+
+ /* No entry points, or epsilon trans. */
+ pendingCommits(),
+
+ stateSet(0),
+
+ /* Only used during merging. Normally null. */
+ stateDictEl(0),
+
+ /* No state identification bits. */
+ stateBits(0),
+
+ onClosureQueue(false),
+ inClosedMap(false),
+ followMarked(false),
+
+ advanceReductions(false)
+{
+}
+
+/* Copy everything except the action transitions. That is left up to the
+ * PdaGraph copy constructor. */
+PdaState::PdaState(const PdaState &other)
+:
+ inRange(),
+
+ /* Duplicate the entry id set, epsilon transitions and context sets. These
+ * are sets of integers and as such need no fixing. */
+ pendingCommits(other.pendingCommits),
+
+ stateSet(0),
+
+ /* This is only used during merging. Normally null. */
+ stateDictEl(0),
+
+ /* Fsm state data. */
+ stateBits(other.stateBits),
+
+ dotSet(other.dotSet),
+ onClosureQueue(false),
+ inClosedMap(false),
+ followMarked(false),
+
+ transMap()
+{
+ /* Duplicate all the transitions. */
+ for ( TransMap::Iter trans = other.transMap; trans.lte(); trans++ ) {
+ /* Dupicate and store the orginal target in the transition. This will
+ * be corrected once all the states have been created. */
+ PdaTrans *newTrans = new PdaTrans(*trans->value);
+ newTrans->toState = trans->value->toState;
+ transMap.append( TransMapEl( newTrans->lowKey, newTrans ) );
+ }
+}
+
+/* If there is a state dict element, then delete it. Everything else is left
+ * up to the FsmGraph destructor. */
+PdaState::~PdaState()
+{
+ if ( stateDictEl != 0 )
+ delete stateDictEl;
+}
+
+/* Graph constructor. */
+PdaGraph::PdaGraph()
+:
+ /* No start state. */
+ startState(0)
+{
+}
+
+/* Copy all graph data including transitions. */
+PdaGraph::PdaGraph( const PdaGraph &graph )
+:
+ /* Lists start empty. Will be filled by copy. */
+ stateList(),
+ misfitList(),
+
+ /* Copy in the entry points,
+ * pointers will be resolved later. */
+ startState(graph.startState),
+
+ /* Will be filled by copy. */
+ finStateSet()
+{
+ /* Create the states and record their map in the original state. */
+ PdaStateList::Iter origState = graph.stateList;
+ for ( ; origState.lte(); origState++ ) {
+ /* Make the new state. */
+ PdaState *newState = new PdaState( *origState );
+
+ /* Add the state to the list. */
+ stateList.append( newState );
+
+ /* Set the mapsTo item of the old state. */
+ origState->stateMap = newState;
+ }
+
+ /* Derefernce all the state maps. */
+ for ( PdaStateList::Iter state = stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ /* The points to the original in the src machine. The taget's duplicate
+ * is in the statemap. */
+ PdaState *toState = trans->value->toState != 0 ?
+ trans->value->toState->stateMap : 0;
+
+ /* Attach The transition to the duplicate. */
+ trans->value->toState = 0;
+ attachTrans( state, toState, trans->value );
+ }
+ }
+
+ /* Fix the start state pointer and the new start state's count of in
+ * transiions. */
+ startState = startState->stateMap;
+
+ /* Build the final state set. */
+ PdaStateSet::Iter st = graph.finStateSet;
+ for ( ; st.lte(); st++ )
+ finStateSet.insert((*st)->stateMap);
+}
+
+/* Deletes all transition data then deletes each state. */
+PdaGraph::~PdaGraph()
+{
+ /* Delete all the transitions. */
+ PdaStateList::Iter state = stateList;
+ for ( ; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ )
+ delete trans->value;
+ }
+
+ /* Delete all the states. */
+ stateList.empty();
+}
+
+/* Set a state final. The state has its isFinState set to true and the state
+ * is added to the finStateSet. */
+void PdaGraph::setFinState( PdaState *state )
+{
+ /* Is it already a fin state. */
+ if ( state->stateBits & SB_ISFINAL )
+ return;
+
+ state->stateBits |= SB_ISFINAL;
+ finStateSet.insert( state );
+}
+
+void PdaGraph::unsetAllFinStates( )
+{
+ for ( PdaStateSet::Iter st = finStateSet; st.lte(); st++ ) {
+ PdaState *state = *st;
+ state->stateBits &= ~ SB_ISFINAL;
+ }
+ finStateSet.empty();
+}
+
+/* Set and unset a state as the start state. */
+void PdaGraph::setStartState( PdaState *state )
+{
+ /* Sould change from unset to set. */
+ assert( startState == 0 );
+ startState = state;
+}
+
+/* Mark all states reachable from state. Traverses transitions forward. Used
+ * for removing states that have no path into them. */
+void PdaGraph::markReachableFromHere( PdaState *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->value->toState != 0 )
+ markReachableFromHere( trans->value->toState );
+ }
+}
+
+void PdaGraph::setStateNumbers()
+{
+ int curNum = 0;
+ PdaStateList::Iter state = stateList;
+ for ( ; state.lte(); state++ )
+ state->stateNum = curNum++;
+}
+
+/* Insert a transition into an inlist. The head must be supplied. */
+void PdaGraph::attachToInList( PdaState *from, PdaState *to,
+ PdaTrans *&head, PdaTrans *trans )
+{
+ trans->ilnext = head;
+ trans->ilprev = 0;
+
+ /* If in trans list is not empty, set the head->prev to trans. */
+ if ( head != 0 )
+ head->ilprev = trans;
+
+ /* Now insert ourselves at the front of the list. */
+ head = trans;
+};
+
+/* Detach a transition from an inlist. The head of the inlist must be supplied. */
+void PdaGraph::detachFromInList( PdaState *from, PdaState *to,
+ PdaTrans *&head, PdaTrans *trans )
+{
+ /* Detach in the inTransList. */
+ if ( trans->ilprev == 0 )
+ head = trans->ilnext;
+ else
+ trans->ilprev->ilnext = trans->ilnext;
+
+ if ( trans->ilnext != 0 )
+ trans->ilnext->ilprev = trans->ilprev;
+}
+
+/* Attach states on the default transition, range list or on out/in list key.
+ * Type of attaching and is controlled by keyType. First makes a new
+ * transition. If there is already a transition out from fromState on the
+ * default, then will assertion fail. */
+PdaTrans *PdaGraph::appendNewTrans( PdaState *from, PdaState *to, long lowKey, long )
+{
+ /* Make the new transition. */
+ PdaTrans *retVal = new PdaTrans();
+
+ /* The transition is now attached. Remember the parties involved. */
+ retVal->fromState = from;
+ retVal->toState = to;
+
+ /* Make the entry in the out list for the transitions. */
+ from->transMap.append( TransMapEl( lowKey, retVal ) );
+
+ /* Set the the keys of the new trans. */
+ retVal->lowKey = lowKey;
+
+ /* Attach using inRange as the head pointer. */
+ attachToInList( from, to, to->inRange.head, retVal );
+
+ return retVal;
+}
+
+PdaTrans *PdaGraph::insertNewTrans( PdaState *from, PdaState *to, long lowKey, long )
+{
+ /* Make the new transition. */
+ PdaTrans *retVal = new PdaTrans();
+
+ /* The transition is now attached. Remember the parties involved. */
+ retVal->fromState = from;
+ retVal->toState = to;
+
+ /* Make the entry in the out list for the transitions. */
+ from->transMap.insert( lowKey, retVal );
+
+ /* Set the the keys of the new trans. */
+ retVal->lowKey = lowKey;
+
+ /* Attach using inRange as the head pointer. */
+ attachToInList( from, to, to->inRange.head, retVal );
+
+ return retVal;
+}
+
+/* Attach for range lists or for the default transition. Type of attaching is
+ * controlled by the keyType parameter. This attach should be used when a
+ * transition already is allocated and must be attached to a target state.
+ * Does not handle adding the transition into the out list. */
+void PdaGraph::attachTrans( PdaState *from, PdaState *to, PdaTrans *trans )
+{
+ assert( trans->fromState == 0 && trans->toState == 0 );
+ trans->fromState = from;
+ trans->toState = to;
+
+ /* Attach using the inRange pointer as the head pointer. */
+ attachToInList( from, to, to->inRange.head, trans );
+}
+
+/* Detach for out/in lists or for default transition. The type of detaching is
+ * controlled by the keyType parameter. */
+void PdaGraph::detachTrans( PdaState *from, PdaState *to, PdaTrans *trans )
+{
+ assert( trans->fromState == from && trans->toState == to );
+ trans->fromState = 0;
+ trans->toState = 0;
+
+ /* Detach using to's inRange pointer as the head. */
+ detachFromInList( from, to, to->inRange.head, trans );
+}
+
+
+/* Detach a state from the graph. Detaches and deletes transitions in and out
+ * of the state. Empties inList and outList. Removes the state from the final
+ * state set. A detached state becomes useless and should be deleted. */
+void PdaGraph::detachState( PdaState *state )
+{
+ /* Detach the in transitions from the inRange list of transitions. */
+ while ( state->inRange.head != 0 ) {
+ /* Get pointers to the trans and the state. */
+ PdaTrans *trans = state->inRange.head;
+ PdaState *fromState = trans->fromState;
+
+ /* Detach the transitions from the source state. */
+ detachTrans( fromState, state, trans );
+
+ /* Ok to delete the transition. */
+ fromState->transMap.remove( trans->lowKey );
+ delete trans;
+ }
+
+ /* Detach out range transitions. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ detachTrans( state, trans->value->toState, trans->value );
+ delete trans->value;
+ }
+
+ /* Delete all of the out range pointers. */
+ state->transMap.empty();
+
+ /* Unset final stateness before detaching from graph. */
+ if ( state->stateBits & SB_ISFINAL )
+ finStateSet.remove( state );
+}
+
+/* Move all the transitions that go into src so that they go into dest. */
+void PdaGraph::inTransMove( PdaState *dest, PdaState *src )
+{
+ /* Do not try to move in trans to and from the same state. */
+ assert( dest != src );
+
+ /* If src is the start state, dest becomes the start state. */
+ assert( src != startState );
+
+ /* Move the transitions in inRange. */
+ while ( src->inRange.head != 0 ) {
+ /* Get trans and from state. */
+ PdaTrans *trans = src->inRange.head;
+ PdaState *fromState = trans->fromState;
+
+ /* Detach from src, reattach to dest. */
+ detachTrans( fromState, src, trans );
+ attachTrans( fromState, dest, trans );
+ }
+}
+
+void PdaGraph::addInReduction( PdaTrans *dest, long prodId, long prior )
+{
+ /* Look for the reduction. If not there insert it, otherwise take
+ * the max of the priorities. */
+ ReductionMapEl *redMapEl = dest->reductions.find( prodId );
+ if ( redMapEl == 0 )
+ dest->reductions.insert( prodId, prior );
+ else if ( prior > redMapEl->value )
+ redMapEl->value = prior;
+}
+
+/* Callback invoked when another trans (or possibly this) is added into this
+ * transition during the merging process. Draw in any properties of srcTrans
+ * into this transition. AddInTrans is called when a new transitions is made
+ * that will be a duplicate of another transition or a combination of several
+ * other transitions. AddInTrans will be called for each transition that the
+ * new transition is to represent. */
+void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans )
+{
+ /* Protect against adding in from ourselves. */
+ if ( srcTrans != destTrans ) {
+
+ /* Add in the shift priority. */
+ if ( destTrans->isShift && srcTrans->isShift ) {
+ /* Both shifts are set. We want the max of the two. */
+ if ( srcTrans->shiftPrior > destTrans->shiftPrior )
+ destTrans->shiftPrior = srcTrans->shiftPrior;
+ }
+ else if ( srcTrans->isShift ) {
+ /* Just the source is set, copy the source prior over. */
+ destTrans->shiftPrior = srcTrans->shiftPrior;
+ }
+
+ /* If either is a shift, dest is a shift. */
+ destTrans->isShift = destTrans->isShift || srcTrans->isShift;
+
+ /* Add in the reductions. */
+ for ( ReductionMap::Iter red = srcTrans->reductions; red.lte(); red++ )
+ addInReduction( destTrans, red->key, red->value );
+
+ /* Add in the commit points. */
+ destTrans->commits.insert( srcTrans->commits );
+
+ if ( srcTrans->toState->advanceReductions )
+ destTrans->toState->advanceReductions = true;
+
+ if ( srcTrans->noPreIgnore )
+ destTrans->noPreIgnore = true;
+ if ( srcTrans->noPostIgnore )
+ destTrans->noPostIgnore = true;
+ }
+}
+
+/* NO LONGER USED. */
+void PdaGraph::addInState( PdaState *destState, PdaState *srcState )
+{
+ /* Draw in any properties of srcState into destState. */
+ if ( srcState != destState ) {
+ /* Get the epsilons, context, out priorities. */
+ destState->pendingCommits.insert( srcState->pendingCommits );
+ if ( srcState->pendingCommits.length() > 0 )
+ cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl;
+
+ /* Parser generation data. */
+ destState->dotSet.insert( srcState->dotSet );
+
+ if ( srcState->onClosureQueue && !destState->onClosureQueue ) {
+ stateClosureQueue.append( destState );
+ destState->onClosureQueue = true;
+ }
+ }
+}
+
+/* Make a new state. The new state will be put on the graph's
+ * list of state. The new state can be created final or non final. */
+PdaState *PdaGraph::addState()
+{
+ /* Make the new state to return. */
+ PdaState *state = new PdaState();
+
+ /* Create the new state. */
+ stateList.append( state );
+
+ return state;
+}
+
+
+/* Follow from to the final state of srcFsm. */
+PdaState *PdaGraph::followFsm( PdaState *from, PdaGraph *srcFsm )
+{
+ PdaState *followSrc = srcFsm->startState;
+
+ while ( ! followSrc->isFinState() ) {
+ assert( followSrc->transMap.length() == 1 );
+ PdaTrans *followTrans = followSrc->transMap[0].value;
+
+ PdaTrans *inTrans = from->findTrans( followTrans->lowKey );
+ assert( inTrans != 0 );
+
+ from = inTrans->toState;
+ followSrc = followTrans->toState;
+ }
+
+ return from;
+}
+
+int PdaGraph::fsmLength( )
+{
+ int length = 0;
+ PdaState *state = startState;
+ while ( ! state->isFinState() ) {
+ length += 1;
+ state = state->transMap[0].value->toState;
+ }
+ return length;
+}
+
+/* Remove states that have no path to them from the start state. Recursively
+ * traverses the graph marking states that have paths into them. Then removes
+ * all states that did not get marked. */
+void PdaGraph::removeUnreachableStates()
+{
+ /* Mark all the states that can be reached
+ * through the existing set of entry points. */
+ if ( startState != 0 )
+ markReachableFromHere( startState );
+
+ for ( PdaStateSet::Iter si = entryStateSet; si.lte(); si++ )
+ markReachableFromHere( *si );
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ PdaState *state = stateList.head;
+ while ( state ) {
+ PdaState *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
diff --git a/src/pdagraph.h b/src/pdagraph.h
new file mode 100644
index 00000000..dc11b3e1
--- /dev/null
+++ b/src/pdagraph.h
@@ -0,0 +1,515 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _PDAGRAPH_H
+#define _PDAGRAPH_H
+
+#include <assert.h>
+#include "vector.h"
+#include "bstset.h"
+#include "compare.h"
+#include "avltree.h"
+#include "dlist.h"
+#include "bstmap.h"
+#include "sbstmap.h"
+#include "sbstset.h"
+#include "sbsttable.h"
+#include "avlset.h"
+#include "dlistmel.h"
+#include "avltree.h"
+
+/* Flags for states. */
+#define SB_ISFINAL 0x04
+#define SB_ISMARKED 0x08
+#define SB_ISSTART 0x10
+
+/* Flags for transitions. */
+#define TB_ISMARKED 0x01
+
+struct PdaTrans;
+struct PdaState;
+struct PdaGraph;
+struct TokenDef;
+struct Definition;
+struct LangEl;
+struct TokenRegion;
+
+typedef Vector<TokenRegion*> RegionVect;
+
+typedef Vector<long> ActDataList;
+
+struct ActionData
+{
+ ActionData( int targ, ActDataList &actions, int commitLen )
+ : targ(targ), commitLen(commitLen), id(0), actions(actions) { }
+
+ int targ;
+ int commitLen;
+ int id;
+
+ ActDataList actions;
+};
+
+
+struct CmpActionData
+{
+ static int compare( const ActionData &ap1, const ActionData &ap2 )
+ {
+ if ( ap1.targ < ap2.targ )
+ return -1;
+ else if ( ap1.targ > ap2.targ )
+ return 1;
+ else if ( ap1.commitLen < ap2.commitLen )
+ return -1;
+ else if ( ap1.commitLen > ap2.commitLen )
+ return 1;
+ else if ( ap1.id < ap2.id )
+ return -1;
+ else if ( ap1.id > ap2.id )
+ return 1;
+
+ return CmpTable< long, CmpOrd<long> >::
+ compare( ap1.actions, ap2.actions );
+ }
+};
+
+typedef AvlSet<ActionData, CmpActionData> PdaActionSet;
+typedef AvlSetEl<ActionData> PdaActionSetEl;
+
+/* List pointers for the closure queue. Goes into state. */
+struct ClosureQueueListEl { PdaState *prev, *next; };
+
+/* Queue of states, transitions to be closed. */
+typedef DListMel< PdaState, ClosureQueueListEl > StateClosureQueue;
+typedef DList<PdaTrans> TransClosureQueue;
+
+typedef BstSet< Definition*, CmpOrd<Definition*> > DefSet;
+typedef CmpTable< Definition*, CmpOrd<Definition*> > CmpDefSet;
+typedef BstSet< DefSet, CmpDefSet > DefSetSet;
+
+typedef Vector< Definition* > DefVect;
+typedef BstSet< long, CmpOrd<long> > AlphSet;
+
+struct ExpandToEl
+{
+ ExpandToEl( PdaState *state, int prodId )
+ : state(state), prodId(prodId) { }
+
+ PdaState *state;
+ int prodId;
+};
+
+struct CmpExpandToEl
+{
+ static inline int compare( const ExpandToEl &etel1, const ExpandToEl &etel2 )
+ {
+ if ( etel1.state < etel2.state )
+ return -1;
+ else if ( etel1.state > etel2.state )
+ return 1;
+ else if ( etel1.prodId < etel2.prodId )
+ return -1;
+ else if ( etel1.prodId > etel2.prodId )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+typedef BstSet<ExpandToEl, CmpExpandToEl> ExpandToSet;
+typedef BstSet< int, CmpOrd<int> > IntSet;
+typedef CmpTable< int, CmpOrd<int> > CmpIntSet;
+
+typedef BstSet< long, CmpOrd<long> > LongSet;
+typedef CmpTable< long, CmpOrd<long> > CmpLongSet;
+
+typedef BstMap< long, long, CmpOrd<long> > LongMap;
+typedef BstMapEl< long, long > LongMapEl;
+
+typedef LongSet ProdIdSet;
+typedef CmpLongSet CmpProdIdSet;
+
+/* Set of states, list of states. */
+typedef BstSet<PdaState*> PdaStateSet;
+typedef Vector<PdaState*> StateVect;
+typedef DList<PdaState> PdaStateList;
+
+typedef LongMap FollowToAdd;
+typedef LongMap ReductionMap;
+typedef LongMapEl ReductionMapEl;
+
+struct ProdIdPair
+{
+ ProdIdPair( int onReduce, int length )
+ : onReduce(onReduce), length(length) {}
+
+ int onReduce;
+ int length;
+};
+
+struct CmpProdIdPair
+{
+ static inline int compare( const ProdIdPair &pair1, const ProdIdPair &pair2 )
+ {
+ if ( pair1.onReduce < pair2.onReduce )
+ return -1;
+ else if ( pair1.onReduce > pair2.onReduce )
+ return 1;
+ else if ( pair1.length < pair2.length )
+ return -1;
+ else if ( pair1.length > pair2.length )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+typedef BstSet< ProdIdPair, CmpProdIdPair > ProdIdPairSet;
+
+/* Transition class that implements actions and priorities. */
+struct PdaTrans
+{
+ PdaTrans() :
+ fromState(0),
+ toState(0),
+ isShift(false),
+ isShiftReduce(false),
+ shiftPrior(0),
+ noPreIgnore(false),
+ noPostIgnore(false)
+ { }
+
+ PdaTrans( const PdaTrans &other ) :
+ lowKey(other.lowKey),
+ fromState(0), toState(0),
+ isShift(other.isShift),
+ isShiftReduce(other.isShiftReduce),
+ shiftPrior(other.shiftPrior),
+ reductions(other.reductions),
+ commits(other.commits),
+ noPreIgnore(false),
+ noPostIgnore(false)
+ { }
+
+ long lowKey;
+ PdaState *fromState;
+ PdaState *toState;
+
+ /* Pointers for outlist. */
+ PdaTrans *prev, *next;
+
+ /* Pointers for in-list. */
+ PdaTrans *ilprev, *ilnext;
+
+ long maxPrior();
+
+ /* Parse Table construction data. */
+ bool isShift, isShiftReduce;
+ int shiftPrior;
+ ReductionMap reductions;
+ ActDataList actions;
+ ActDataList actOrds;
+ ActDataList actPriors;
+
+ ExpandToSet expandTo;
+
+ PdaActionSetEl *actionSetEl;
+
+ LongSet commits;
+ LongSet afterShiftCommits;
+
+ bool noPreIgnore;
+ bool noPostIgnore;
+};
+
+/* In transition list. Like DList except only has head pointers, which is all
+ * that is required. Insertion and deletion is handled by the graph. This
+ * class provides the iterator of a single list. */
+struct PdaTransInList
+{
+ PdaTransInList() : head(0) { }
+
+ PdaTrans *head;
+
+ struct Iter
+ {
+ /* Default construct. */
+ Iter() : ptr(0) { }
+
+ /* Construct, assign from a list. */
+ Iter( const PdaTransInList &il ) : ptr(il.head) { }
+ Iter &operator=( const PdaTransInList &dl ) { ptr = dl.head; return *this; }
+
+ /* At the end */
+ bool lte() const { return ptr != 0; }
+ bool end() const { return ptr == 0; }
+
+ /* At the first, last element. */
+ bool first() const { return ptr && ptr->ilprev == 0; }
+ bool last() const { return ptr && ptr->ilnext == 0; }
+
+ /* Cast, dereference, arrow ops. */
+ operator PdaTrans*() const { return ptr; }
+ PdaTrans &operator *() const { return *ptr; }
+ PdaTrans *operator->() const { return ptr; }
+
+ /* Increment, decrement. */
+ inline void operator++(int) { ptr = ptr->ilnext; }
+ inline void operator--(int) { ptr = ptr->ilprev; }
+
+ /* The iterator is simply a pointer. */
+ PdaTrans *ptr;
+ };
+};
+
+typedef DList<PdaTrans> PdaTransList;
+
+/* A element in a state dict. */
+struct PdaStateDictEl
+:
+ public AvlTreeEl<PdaStateDictEl>
+{
+ PdaStateDictEl(const PdaStateSet &stateSet)
+ : stateSet(stateSet) { }
+
+ const PdaStateSet &getKey() { return stateSet; }
+ PdaStateSet stateSet;
+ PdaState *targState;
+};
+
+/* Dictionary mapping a set of states to a target state. */
+typedef AvlTree< PdaStateDictEl, PdaStateSet, CmpTable<PdaState*> > PdaStateDict;
+
+/* What items does a particular state encompass. */
+typedef BstSet< long, CmpOrd<long> > DotSet;
+typedef CmpTable< long, CmpOrd<long> > CmpDotSet;
+
+/* Map of dot sets to states. */
+typedef AvlTree< PdaState, DotSet, CmpDotSet > DotSetMap;
+typedef PdaState DotSetMapEl;
+
+typedef BstMap< long, PdaTrans* > TransMap;
+typedef BstMapEl< long, PdaTrans* > TransMapEl;
+
+/* State class that implements actions and priorities. */
+struct PdaState
+:
+ public ClosureQueueListEl,
+ public AvlTreeEl< PdaState >
+{
+ PdaState();
+ PdaState(const PdaState &other);
+ ~PdaState();
+
+ /* Is the state final? */
+ bool isFinState() { return stateBits & SB_ISFINAL; }
+
+ PdaTrans *findTrans( long key )
+ {
+ TransMapEl *transMapEl = transMap.find( key );
+ if ( transMapEl == 0 )
+ return 0;
+ return transMapEl->value;
+ }
+
+ /* In transition list. */
+ PdaTransInList inRange;
+
+ ProdIdPairSet pendingCommits;
+
+ /* When duplicating the fsm we need to map each
+ * state to the new state representing it. */
+ PdaState *stateMap;
+
+ /* When merging states (state machine operations) this next pointer is
+ * used for the list of states that need to be filled in. */
+ PdaState *alg_next;
+
+ PdaStateSet *stateSet;
+
+ /* Identification for printing and stable minimization. */
+ int stateNum;
+
+ /* A pointer to a dict element that contains the set of states this state
+ * represents. This cannot go into alg, because alg.next is used during
+ * the merging process. */
+ PdaStateDictEl *stateDictEl;
+
+ /* Bits controlling the behaviour of the state during collapsing to dfa. */
+ int stateBits;
+
+ /* State list elements. */
+ PdaState *next, *prev;
+
+ /* For dotset map. */
+ DotSet &getKey() { return dotSet; }
+
+ /* Closure management. */
+ DotSet dotSet;
+ DotSet dotSet2;
+ bool onClosureQueue;
+ bool inClosedMap;
+ bool followMarked;
+ bool onStateList;
+
+ TransMap transMap;
+
+ RegionVect regions;
+ RegionVect preRegions;
+
+ bool advanceReductions;
+};
+
+/* Compare lists of epsilon transitions. Entries are name ids of targets. */
+typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
+
+/* Compare sets of context values. */
+typedef CmpTable< int, CmpOrd<int> > CmpContextSets;
+
+/* Graph class that implements actions and priorities. */
+struct PdaGraph
+{
+ /* Constructors/Destructors. */
+ PdaGraph();
+ PdaGraph( const PdaGraph &graph );
+ ~PdaGraph();
+
+ /* The list of states. */
+ PdaStateList stateList;
+ PdaStateList misfitList;
+
+ /* The start state. */
+ PdaState *startState;
+ PdaStateSet entryStateSet;
+
+ /* The set of final states. */
+ PdaStateSet finStateSet;
+
+ /* Closure queues and maps. */
+ DotSetMap closedMap;
+ StateClosureQueue stateClosureQueue;
+ StateClosureQueue stateClosedList;
+
+ TransClosureQueue transClosureQueue;
+ PdaState *stateClosureHead;
+
+ LangEl **langElIndex;
+
+ void setStartState( PdaState *state );
+ void unsetStartState( );
+
+ /*
+ * Basic attaching and detaching.
+ */
+
+ /* Common to attaching/detaching list and default. */
+ void attachToInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans );
+ void detachFromInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans );
+
+ /* Attach with a new transition. */
+ PdaTrans *appendNewTrans( PdaState *from, PdaState *to, long onChar1, long );
+ PdaTrans *insertNewTrans( PdaState *from, PdaState *to, long lowKey, long );
+
+ /* Attach with an existing transition that already in an out list. */
+ void attachTrans( PdaState *from, PdaState *to, PdaTrans *trans );
+
+ /* Detach a transition from a target state. */
+ void detachTrans( PdaState *from, PdaState *to, PdaTrans *trans );
+
+ /* Detach a state from the graph. */
+ void detachState( PdaState *state );
+
+ /*
+ * Callbacks.
+ */
+
+ /* Add in the properties of srcTrans into this. */
+ void addInReduction( PdaTrans *dest, long prodId, long prior );
+ void addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans );
+ void addInState( PdaState *destState, PdaState *srcState );
+
+ /*
+ * Allocation.
+ */
+
+ /* New up a state and add it to the graph. */
+ PdaState *addState();
+
+ /*
+ * Fsm operators.
+ */
+
+ /* Follow to the fin state of src fsm. */
+ PdaState *followFsm( PdaState *from, PdaGraph *srcFsm );
+
+ /*
+ * Final states
+ */
+
+ /* Set and Unset a state as final. */
+ void setFinState( PdaState *state );
+ void unsetFinState( PdaState *state );
+ void unsetAllFinStates( );
+
+ /* Set State numbers starting at 0. */
+ void setStateNumbers();
+
+ /*
+ * Path pruning
+ */
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHere( PdaState *state );
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeUnreachableStates();
+
+ /* Remove error actions from states on which the error transition will
+ * never be taken. */
+ bool outListCovers( PdaState *state );
+
+ /* Remove states that are on the misfit list. */
+ void removeMisfits();
+
+
+ /*
+ * Other
+ */
+
+ /* Move the in trans into src into dest. */
+ void inTransMove(PdaState *dest, PdaState *src);
+
+ int fsmLength( );
+
+ /* Collected machine information. */
+ unsigned long long maxState;
+ unsigned long long maxAction;
+ unsigned long long maxLelId;
+ unsigned long long maxOffset;
+ unsigned long long maxIndex;
+ unsigned long long maxProdLen;
+
+ PdaActionSet actionSet;
+};
+
+
+#endif /* _FSMGRAPH_H */
diff --git a/src/pdarun.c b/src/pdarun.c
new file mode 100644
index 00000000..62ab107e
--- /dev/null
+++ b/src/pdarun.c
@@ -0,0 +1,2272 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+#include "debug.h"
+#include "pdarun.h"
+#include "fsmrun.h"
+#include "bytecode.h"
+#include "tree.h"
+#include "pool.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define true 1
+#define false 0
+
+#define act_sb 0x1
+#define act_rb 0x2
+#define lower 0x0000ffff
+#define upper 0xffff0000
+
+#define read_word_p( i, p ) do { \
+ i = ((Word) p[0]); \
+ i |= ((Word) p[1]) << 8; \
+ i |= ((Word) p[2]) << 16; \
+ i |= ((Word) p[3]) << 24; \
+} while(0)
+
+#define read_tree_p( i, p ) do { \
+ Word w; \
+ w = ((Word) p[0]); \
+ w |= ((Word) p[1]) << 8; \
+ w |= ((Word) p[2]) << 16; \
+ w |= ((Word) p[3]) << 24; \
+ i = (Tree*)w; \
+} while(0)
+
+void initFsmRun( FsmRun *fsmRun, Program *prg )
+{
+ fsmRun->tables = prg->rtd->fsmTables;
+ fsmRun->runBuf = 0;
+
+ /* Run buffers need to stick around because
+ * token strings point into them. */
+ fsmRun->runBuf = newRunBuf();
+ fsmRun->runBuf->next = 0;
+
+ fsmRun->p = fsmRun->pe = fsmRun->runBuf->data;
+ fsmRun->peof = 0;
+
+ fsmRun->attachedInput = 0;
+ fsmRun->attachedSource = 0;
+ fsmRun->preRegion = -1;
+}
+
+void clearFsmRun( Program *prg, FsmRun *fsmRun )
+{
+ if ( fsmRun->runBuf != 0 ) {
+ /* Transfer the run buf list to the program */
+ RunBuf *head = fsmRun->runBuf;
+ RunBuf *tail = head;
+ while ( tail->next != 0 )
+ tail = tail->next;
+
+ tail->next = prg->allocRunBuf;
+ prg->allocRunBuf = head;
+ }
+}
+
+/* Keep the position up to date after consuming text. */
+void updatePosition( InputStream *inputStream, const char *data, long length )
+{
+ if ( !inputStream->handlesLine ) {
+ int i;
+ for ( i = 0; i < length; i++ ) {
+ if ( data[i] != '\n' )
+ inputStream->column += 1;
+ else {
+ inputStream->line += 1;
+ inputStream->column = 1;
+ }
+ }
+ }
+
+ inputStream->byte += length;
+}
+
+/* Keep the position up to date after sending back text. */
+void undoPosition( InputStream *inputStream, const char *data, long length )
+{
+ /* FIXME: this needs to fetch the position information from the parsed
+ * token and restore based on that.. */
+ int i;
+ if ( !inputStream->handlesLine ) {
+ for ( i = 0; i < length; i++ ) {
+ if ( data[i] == '\n' )
+ inputStream->line -= 1;
+ }
+ }
+
+ inputStream->byte -= length;
+}
+
+void incrementSteps( PdaRun *pdaRun )
+{
+ pdaRun->steps += 1;
+ debug( REALM_PARSE, "steps up to %ld\n", pdaRun->steps );
+}
+
+void decrementSteps( PdaRun *pdaRun )
+{
+ pdaRun->steps -= 1;
+ debug( REALM_PARSE, "steps down to %ld\n", pdaRun->steps );
+}
+
+/* Load up a token, starting from tokstart if it is set. If not set then
+ * start it at data. */
+Head *streamPull( Program *prg, FsmRun *fsmRun, InputStream *inputStream, long length )
+{
+ /* We should not be in the midst of getting a token. */
+ assert( fsmRun->tokstart == 0 );
+
+ RunBuf *runBuf = newRunBuf();
+ runBuf->next = fsmRun->runBuf;
+ fsmRun->runBuf = runBuf;
+
+ int len = 0;
+ getData( fsmRun, inputStream, 0, runBuf->data, length, &len );
+ consumeData( inputStream, length );
+ fsmRun->p = fsmRun->pe = runBuf->data + length;
+
+ Head *tokdata = stringAllocPointer( prg, runBuf->data, length );
+ updatePosition( inputStream, runBuf->data, length );
+
+ return tokdata;
+}
+
+void undoStreamPull( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length )
+{
+ debug( REALM_PARSE, "undoing stream pull\n" );
+
+ prependData( inputStream, data, length );
+}
+
+void streamPushText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length )
+{
+ prependData( inputStream, data, length );
+}
+
+void streamPushTree( FsmRun *fsmRun, InputStream *inputStream, Tree *tree, int ignore )
+{
+ prependTree( inputStream, tree, ignore );
+}
+
+void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, long length )
+{
+ if ( length < 0 ) {
+ Tree *tree = undoPrependTree( inputStream );
+ treeDownref( prg, sp, tree );
+ }
+ else {
+ undoPrependData( inputStream, length );
+ }
+}
+
+void undoStreamAppend( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, Tree *input, long length )
+{
+ if ( input->id == LEL_ID_STR )
+ undoAppendData( inputStream, length );
+ else if ( input->id == LEL_ID_STREAM )
+ undoAppendStream( inputStream );
+ else {
+ Tree *tree = undoAppendTree( inputStream );
+ treeDownref( prg, sp, tree );
+ }
+}
+
+/* Should only be sending back whole tokens/ignores, therefore the send back
+ * should never cross a buffer boundary. Either we slide back data, or we move to
+ * a previous buffer and slide back data. */
+static void sendBackText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length )
+{
+ debug( REALM_PARSE, "push back of %ld characters\n", length );
+
+ if ( length == 0 )
+ return;
+
+ debug( REALM_PARSE, "sending back text: %.*s\n",
+ (int)length, data );
+
+ undoConsumeData( fsmRun, inputStream, data, length );
+ undoPosition( inputStream, data, length );
+}
+
+void sendBackTree( InputStream *inputStream, Tree *tree )
+{
+ undoConsumeTree( inputStream, tree, false );
+}
+
+/*
+ * Stops on:
+ * PcrRevIgnore
+ */
+static void sendBackIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun,
+ InputStream *inputStream, ParseTree *parseTree )
+{
+ #ifdef DEBUG
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ debug( REALM_PARSE, "sending back: %s%s\n",
+ lelInfo[parseTree->shadow->tree->id].name,
+ parseTree->flags & PF_ARTIFICIAL ? " (artificial)" : "" );
+ #endif
+
+ Head *head = parseTree->shadow->tree->tokdata;
+ int artificial = parseTree->flags & PF_ARTIFICIAL;
+
+ if ( head != 0 && !artificial )
+ sendBackText( fsmRun, inputStream, stringData( head ), head->length );
+
+ decrementSteps( pdaRun );
+
+ /* Check for reverse code. */
+ if ( parseTree->flags & PF_HAS_RCODE ) {
+ pdaRun->onDeck = true;
+ parseTree->flags &= ~PF_HAS_RCODE;
+ }
+
+ if ( pdaRun->steps == pdaRun->targetSteps ) {
+ debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps );
+ pdaRun->stop = true;
+ }
+
+}
+
+void attachInput( FsmRun *fsmRun, InputStream *is )
+{
+ if ( is->attached != 0 && is->attached != fsmRun )
+ detachInput( is->attached, is );
+
+ if ( is->attached != fsmRun ) {
+ debug( REALM_INPUT, "attaching fsm run to input stream: %p %p\n", fsmRun, is );
+ fsmRun->attachedInput = is;
+ is->attached = fsmRun;
+ }
+}
+
+void attachSource( FsmRun *fsmRun, SourceStream *ss )
+{
+ if ( ss->attached != 0 && ss->attached != fsmRun )
+ detachSource( ss->attached, ss );
+
+ if ( ss->attached != fsmRun ) {
+ debug( REALM_INPUT, "attaching fsm run to source stream: %p %p\n", fsmRun, ss );
+ fsmRun->attachedSource = ss;
+ ss->attached = fsmRun;
+ }
+}
+
+void detachInput( FsmRun *fsmRun, InputStream *is )
+{
+ debug( REALM_INPUT, "detaching fsm run from input stream: %p %p\n", fsmRun, is );
+
+ fsmRun->attachedInput = 0;
+ is->attached = 0;
+
+ clearBuffered( fsmRun );
+
+ if ( fsmRun->attachedSource != 0 ) {
+ fsmRun->attachedSource->attached = 0;
+ fsmRun->attachedSource = 0;
+ }
+}
+
+void detachSource( FsmRun *fsmRun, SourceStream *is )
+{
+ debug( REALM_INPUT, "detaching fsm run from source stream: %p %p\n", fsmRun, is );
+
+ fsmRun->attachedSource = 0;
+ is->attached = 0;
+
+ clearBuffered( fsmRun );
+
+ if ( fsmRun->attachedInput != 0 ) {
+ fsmRun->attachedInput->attached = 0;
+ fsmRun->attachedInput = 0;
+ }
+}
+
+void clearBuffered( FsmRun *fsmRun )
+{
+ /* If there is data in the current buffer then send the whole send back
+ * should be in this buffer. */
+ if ( fsmRun->tokstart != 0 ) {
+ fsmRun->p = fsmRun->pe = fsmRun->tokstart;
+ fsmRun->tokstart = 0;
+ }
+ else {
+ fsmRun->pe = fsmRun->p;
+ }
+}
+
+void resetToken( FsmRun *fsmRun )
+{
+ /* If there is a token started, but never finished for a lack of data, we
+ * must first backup over it. */
+ if ( fsmRun->tokstart != 0 ) {
+ fsmRun->p = fsmRun->tokstart;
+ fsmRun->tokstart = 0;
+ }
+}
+
+/* Stops on:
+ * PcrRevToken
+ */
+
+static void sendBack( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun,
+ InputStream *inputStream, ParseTree *parseTree )
+{
+ debug( REALM_PARSE, "sending back: %s\n", prg->rtd->lelInfo[parseTree->id].name );
+
+ if ( parseTree->flags & PF_NAMED ) {
+ ///* Send back anything in the buffer that has not been parsed. */
+ //if ( fsmRun->p == fsmRun->runBuf->data )
+ // sendBackRunBufHead( fsmRun, inputStream );
+
+ /* Send the named lang el back first, then send back any leading
+ * whitespace. */
+ undoConsumeLangEl( inputStream );
+ }
+
+ decrementSteps( pdaRun );
+
+ /* Artifical were not parsed, instead sent in as items. */
+ if ( parseTree->flags & PF_ARTIFICIAL ) {
+ /* Check for reverse code. */
+ if ( parseTree->flags & PF_HAS_RCODE ) {
+ debug( REALM_PARSE, "tree has rcode, setting on deck\n" );
+ pdaRun->onDeck = true;
+ parseTree->flags &= ~PF_HAS_RCODE;
+ }
+
+ treeUpref( parseTree->shadow->tree );
+
+ sendBackTree( inputStream, parseTree->shadow->tree );
+ }
+ else {
+ /* Check for reverse code. */
+ if ( parseTree->flags & PF_HAS_RCODE ) {
+ debug( REALM_PARSE, "tree has rcode, setting on deck\n" );
+ pdaRun->onDeck = true;
+ parseTree->flags &= ~PF_HAS_RCODE;
+ }
+
+ /* Push back the token data. */
+ sendBackText( fsmRun, inputStream, stringData( parseTree->shadow->tree->tokdata ),
+ stringLength( parseTree->shadow->tree->tokdata ) );
+
+ /* If eof was just sent back remember that it needs to be sent again. */
+ if ( parseTree->id == prg->rtd->eofLelIds[pdaRun->parserId] )
+ inputStream->eofSent = false;
+
+ /* If the item is bound then store remove it from the bindings array. */
+ popBinding( pdaRun, parseTree );
+ }
+
+ if ( pdaRun->steps == pdaRun->targetSteps ) {
+ debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps );
+ pdaRun->stop = true;
+ }
+
+ /* Downref the tree that was sent back and free the kid. */
+ treeDownref( prg, sp, parseTree->shadow->tree );
+ kidFree( prg, parseTree->shadow );
+ parseTreeFree( prg, parseTree );
+}
+
+void setRegion( PdaRun *pdaRun, int emptyIgnore, ParseTree *tree )
+{
+ if ( emptyIgnore ) {
+ /* Recording the next region. */
+ tree->region = pdaRun->nextRegionInd;
+ if ( pdaRun->tables->tokenRegions[tree->region+1] != 0 )
+ pdaRun->numRetry += 1;
+ }
+}
+
+void ignoreTree( Program *prg, FsmRun *fsmRun, PdaRun *pdaRun, Tree *tree )
+{
+ int emptyIgnore = pdaRun->accumIgnore == 0;
+
+ incrementSteps( pdaRun );
+
+ ParseTree *parseTree = parseTreeAllocate( prg );
+ parseTree->shadow = kidAllocate( prg );
+ parseTree->shadow->tree = tree;
+
+ parseTree->next = pdaRun->accumIgnore;
+ pdaRun->accumIgnore = parseTree;
+
+ transferReverseCode( pdaRun, parseTree );
+
+ if ( fsmRun->preRegion >= 0 )
+ parseTree->flags |= PF_RIGHT_IGNORE;
+
+ setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore );
+}
+
+void ignoreTree2( Program *prg, PdaRun *pdaRun, Tree *tree )
+{
+ int emptyIgnore = pdaRun->accumIgnore == 0;
+
+ incrementSteps( pdaRun );
+
+ ParseTree *parseTree = parseTreeAllocate( prg );
+ parseTree->flags |= PF_ARTIFICIAL;
+ parseTree->shadow = kidAllocate( prg );
+ parseTree->shadow->tree = tree;
+
+ parseTree->next = pdaRun->accumIgnore;
+ pdaRun->accumIgnore = parseTree;
+
+ transferReverseCode( pdaRun, parseTree );
+
+ setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore );
+}
+
+Kid *makeTokenWithData( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun,
+ InputStream *inputStream, int id, Head *tokdata )
+{
+ /* Make the token object. */
+ long objectLength = prg->rtd->lelInfo[id].objectLength;
+ Kid *attrs = allocAttrs( prg, objectLength );
+
+ Kid *input = 0;
+ input = kidAllocate( prg );
+ input->tree = treeAllocate( prg );
+
+ debug( REALM_PARSE, "made token %p\n", input->tree );
+
+ input->tree->refs = 1;
+ input->tree->id = id;
+ input->tree->tokdata = tokdata;
+
+ /* No children and ignores get added later. */
+ input->tree->child = attrs;
+
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ if ( lelInfo[id].numCaptureAttr > 0 ) {
+ int i;
+ for ( i = 0; i < lelInfo[id].numCaptureAttr; i++ ) {
+ CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[id].captureAttr + i];
+ Head *data = stringAllocFull( prg,
+ fsmRun->mark[ca->mark_enter], fsmRun->mark[ca->mark_leave]
+ - fsmRun->mark[ca->mark_enter] );
+ Tree *string = constructString( prg, data );
+ treeUpref( string );
+ setAttr( input->tree, ca->offset, string );
+ }
+ }
+
+ return input;
+}
+
+void clearIgnoreList( Program *prg, Tree **sp, Kid *kid )
+{
+ while ( kid != 0 ) {
+ Kid *next = kid->next;
+ treeDownref( prg, sp, kid->tree );
+ kidFree( prg, kid );
+ kid = next;
+ }
+}
+
+static void reportParseError( Program *prg, Tree **sp, PdaRun *pdaRun )
+{
+ Kid *kid = pdaRun->btPoint;
+ Head *deepest = 0;
+ while ( kid != 0 ) {
+ Head *head = kid->tree->tokdata;
+ if ( head != 0 && head->location != 0 ) {
+ if ( deepest == 0 || head->location->byte > deepest->location->byte )
+ deepest = head;
+ }
+ kid = kid->next;
+ }
+
+ Head *errorHead = 0;
+
+ /* If there are no error points on record assume the error occurred at the beginning of the stream. */
+ if ( deepest == 0 )
+ errorHead = stringAllocFull( prg, "PARSE ERROR at 1:1", 18 );
+ else {
+ debug( REALM_PARSE, "deepest location byte: %d\n", deepest->location->byte );
+
+ long line = deepest->location->line;
+ long i, column = deepest->location->column;
+
+ for ( i = 0; i < deepest->length; i++ ) {
+ if ( deepest->data[i] != '\n' )
+ column += 1;
+ else {
+ line += 1;
+ column = 1;
+ }
+ }
+
+ char formatted[128];
+ sprintf( formatted, "PARSE ERROR at %ld:%ld", line, column );
+ errorHead = stringAllocFull( prg, formatted, strlen(formatted) );
+ }
+
+ Tree *tree = constructString( prg, errorHead );
+ treeDownref( prg, sp, prg->lastParseError );
+ prg->lastParseError = tree;
+ treeUpref( prg->lastParseError );
+}
+
+static void attachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree )
+{
+ if ( pdaRun->accumIgnore == 0 )
+ return;
+
+ if ( pdaRun->stackTop->id > 0 && pdaRun->stackTop->id < prg->rtd->firstNonTermId ) {
+ /* OK, do it */
+ debug( REALM_PARSE, "attaching right ignore\n" );
+
+ /* Reset. */
+ assert( ! ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) );
+
+ ParseTree *accum = pdaRun->accumIgnore;
+
+ ParseTree *stopAt = 0, *use = accum;
+ while ( use != 0 ) {
+ if ( ! (use->flags & PF_RIGHT_IGNORE) )
+ stopAt = use;
+ use = use->next;
+ }
+
+ if ( stopAt != 0 ) {
+ /* Stop at was set. Make it the last item in the igore list. Take
+ * the rest. */
+ accum = stopAt->next;
+ stopAt->next = 0;
+ }
+ else {
+ /* Stop at was never set. All right ignore. Use it all. */
+ pdaRun->accumIgnore = 0;
+ }
+
+ /* The data list needs to be extracted and reversed. The parse tree list
+ * can remain in stack order. */
+ ParseTree *child = accum, *last = 0;
+ Kid *dataChild = 0, *dataLast = 0;
+
+ while ( child ) {
+ dataChild = child->shadow;
+ ParseTree *next = child->next;
+
+ /* Reverse the lists. */
+ dataChild->next = dataLast;
+ child->next = last;
+
+ /* Detach the parse tree from the data tree. */
+ child->shadow = 0;
+
+ /* Keep the last for reversal. */
+ dataLast = dataChild;
+ last = child;
+
+ child = next;
+ }
+
+ /* Last is now the first. */
+ parseTree->rightIgnore = last;
+
+ if ( dataChild != 0 ) {
+ debug( REALM_PARSE, "attaching ignore right\n" );
+
+ Kid *ignoreKid = dataLast;
+
+ /* Copy the ignore list first if we need to attach it as a right
+ * ignore. */
+ Tree *rightIgnore = 0;
+
+ rightIgnore = treeAllocate( prg );
+ rightIgnore->id = LEL_ID_IGNORE;
+ rightIgnore->child = ignoreKid;
+
+ Tree *pushTo = parseTree->shadow->tree;
+
+ pushTo = pushRightIgnore( prg, pushTo, rightIgnore );
+
+ parseTree->shadow->tree = pushTo;
+
+ parseTree->flags |= PF_RIGHT_IL_ATTACHED;
+ }
+ }
+}
+
+static void attachLeftIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree )
+{
+ /* Reset. */
+ assert( ! ( parseTree->flags & PF_LEFT_IL_ATTACHED ) );
+
+ ParseTree *accum = pdaRun->accumIgnore;
+ pdaRun->accumIgnore = 0;
+
+ /* The data list needs to be extracted and reversed. The parse tree list
+ * can remain in stack order. */
+ ParseTree *child = accum, *last = 0;
+ Kid *dataChild = 0, *dataLast = 0;
+
+ while ( child ) {
+ dataChild = child->shadow;
+ ParseTree *next = child->next;
+
+ /* Reverse the lists. */
+ dataChild->next = dataLast;
+ child->next = last;
+
+ /* Detach the parse tree from the data tree. */
+ child->shadow = 0;
+
+ /* Keep the last for reversal. */
+ dataLast = dataChild;
+ last = child;
+
+ child = next;
+ }
+
+ /* Last is now the first. */
+ parseTree->leftIgnore = last;
+
+ if ( dataChild != 0 ) {
+ debug( REALM_PARSE, "attaching left ignore\n" );
+
+ Kid *ignoreKid = dataChild;
+
+ /* Make the ignore list for the left-ignore. */
+ Tree *leftIgnore = treeAllocate( prg );
+ leftIgnore->id = LEL_ID_IGNORE;
+ leftIgnore->child = ignoreKid;
+
+ Tree *pushTo = parseTree->shadow->tree;
+
+ pushTo = pushLeftIgnore( prg, pushTo, leftIgnore );
+
+ parseTree->shadow->tree = pushTo;
+
+ parseTree->flags |= PF_LEFT_IL_ATTACHED;
+ }
+}
+
+/* Not currently used. Need to revive this. WARNING: untested changes here */
+static void detachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree )
+{
+ /* Right ignore are immediately discarded since they are copies of
+ * left-ignores. */
+ Tree *rightIgnore = 0;
+ if ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) {
+ Tree *popFrom = parseTree->shadow->tree;
+
+ popFrom = popRightIgnore( prg, sp, popFrom, &rightIgnore );
+
+ parseTree->shadow->tree = popFrom;
+
+ parseTree->flags &= ~PF_RIGHT_IL_ATTACHED;
+ }
+
+ if ( parseTree->rightIgnore != 0 ) {
+ assert( rightIgnore != 0 );
+
+ /* Transfer the trees to accumIgnore. */
+ ParseTree *ignore = parseTree->rightIgnore;
+ parseTree->rightIgnore = 0;
+
+ Kid *dataIgnore = rightIgnore->child;
+ rightIgnore->child = 0;
+
+ ParseTree *last = 0;
+ Kid *dataLast = 0;
+ while ( ignore != 0 ) {
+ ParseTree *next = ignore->next;
+ Kid *dataNext = dataIgnore->next;
+
+ /* Put the data trees underneath the parse trees. */
+ ignore->shadow = dataIgnore;
+
+ /* Reverse. */
+ ignore->next = last;
+ dataIgnore->next = dataLast;
+
+ /* Keep last for reversal. */
+ last = ignore;
+ dataLast = dataIgnore;
+
+ ignore = next;
+ dataIgnore = dataNext;
+ }
+
+ pdaRun->accumIgnore = last;
+
+ treeDownref( prg, sp, rightIgnore );
+ }
+}
+
+static void detachLeftIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, ParseTree *parseTree )
+{
+ /* Detach left. */
+ Tree *leftIgnore = 0;
+ if ( parseTree->flags & PF_LEFT_IL_ATTACHED ) {
+ Tree *popFrom = parseTree->shadow->tree;
+
+ popFrom = popLeftIgnore( prg, sp, popFrom, &leftIgnore );
+
+ parseTree->shadow->tree = popFrom;
+
+ parseTree->flags &= ~PF_LEFT_IL_ATTACHED;
+ }
+
+ if ( parseTree->leftIgnore != 0 ) {
+ assert( leftIgnore != 0 );
+
+ /* Transfer the trees to accumIgnore. */
+ ParseTree *ignore = parseTree->leftIgnore;
+ parseTree->leftIgnore = 0;
+
+ Kid *dataIgnore = leftIgnore->child;
+ leftIgnore->child = 0;
+
+ ParseTree *last = 0;
+ Kid *dataLast = 0;
+ while ( ignore != 0 ) {
+ ParseTree *next = ignore->next;
+ Kid *dataNext = dataIgnore->next;
+
+ /* Put the data trees underneath the parse trees. */
+ ignore->shadow = dataIgnore;
+
+ /* Reverse. */
+ ignore->next = last;
+ dataIgnore->next = dataLast;
+
+ /* Keep last for reversal. */
+ last = ignore;
+ dataLast = dataIgnore;
+
+ ignore = next;
+ dataIgnore = dataNext;
+ }
+
+ pdaRun->accumIgnore = last;
+ }
+
+ treeDownref( prg, sp, leftIgnore );
+}
+
+void handleError( Program *prg, Tree **sp, PdaRun *pdaRun )
+{
+ /* Check the result. */
+ if ( pdaRun->parseError ) {
+ /* Error occured in the top-level parser. */
+ reportParseError( prg, sp, pdaRun );
+ }
+ else {
+ if ( isParserStopFinished( pdaRun ) ) {
+ debug( REALM_PARSE, "stopping the parse\n" );
+ pdaRun->stopParsing = true;
+ }
+ }
+}
+
+void sendIgnore( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, long id )
+{
+ debug( REALM_PARSE, "ignoring: %s\n", prg->rtd->lelInfo[id].name );
+
+ /* Make the ignore string. */
+ Head *ignoreStr = extractMatch( prg, fsmRun, inputStream );
+ updatePosition( inputStream, fsmRun->tokstart, ignoreStr->length );
+
+ debug( REALM_PARSE, "ignoring: %.*s\n", ignoreStr->length, ignoreStr->data );
+
+ Tree *tree = treeAllocate( prg );
+ tree->refs = 1;
+ tree->id = id;
+ tree->tokdata = ignoreStr;
+
+ /* Send it to the pdaRun. */
+ ignoreTree( prg, fsmRun, pdaRun, tree );
+}
+
+
+/* Doesn't consume. */
+Head *peekMatch( Program *prg, FsmRun *fsmRun, InputStream *inputStream )
+{
+ long length = fsmRun->p - fsmRun->tokstart;
+ Head *head = stringAllocPointer( prg, fsmRun->tokstart, length );
+ head->location = locationAllocate( prg );
+ head->location->line = inputStream->line;
+ head->location->column = inputStream->column;
+ head->location->byte = inputStream->byte;
+
+ debug( REALM_PARSE, "location byte: %d\n", inputStream->byte );
+
+ return head;
+}
+
+/* Consumes. */
+Head *extractMatch( Program *prg, FsmRun *fsmRun, InputStream *inputStream )
+{
+ long length = fsmRun->p - fsmRun->tokstart;
+ Head *head = stringAllocPointer( prg, fsmRun->tokstart, length );
+ head->location = locationAllocate( prg );
+ head->location->line = inputStream->line;
+ head->location->column = inputStream->column;
+ head->location->byte = inputStream->byte;
+
+ debug( REALM_PARSE, "location byte: %d\n", inputStream->byte );
+
+ consumeData( inputStream, length );
+
+ return head;
+}
+
+static void sendToken( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, long id )
+{
+ int emptyIgnore = pdaRun->accumIgnore == 0;
+
+ /* Make the token data. */
+ Head *tokdata = extractMatch( prg, fsmRun, inputStream );
+
+ debug( REALM_PARSE, "token: %s text: %.*s\n",
+ prg->rtd->lelInfo[id].name,
+ stringLength(tokdata), stringData(tokdata) );
+
+ updatePosition( inputStream, fsmRun->tokstart, tokdata->length );
+
+ Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, id, tokdata );
+
+ incrementSteps( pdaRun );
+
+ ParseTree *parseTree = parseTreeAllocate( prg );
+ parseTree->id = input->tree->id;
+ parseTree->shadow = input;
+
+ pdaRun->parseInput = parseTree;
+
+ /* Store any alternate scanning region. */
+ if ( input != 0 && pdaRun->cs >= 0 )
+ setRegion( pdaRun, emptyIgnore, parseTree );
+}
+
+static void sendTree( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream )
+{
+ Kid *input = kidAllocate( prg );
+ input->tree = consumeTree( inputStream );
+
+ incrementSteps( pdaRun );
+
+ ParseTree *parseTree = parseTreeAllocate( prg );
+ parseTree->id = input->tree->id;
+ parseTree->flags |= PF_ARTIFICIAL;
+ parseTree->shadow = input;
+
+ pdaRun->parseInput = parseTree;
+}
+
+static void sendIgnoreTree( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream )
+{
+ Tree *tree = consumeTree( inputStream );
+ ignoreTree2( prg, pdaRun, tree );
+}
+
+static void sendCi( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, int id )
+{
+ debug( REALM_PARSE, "token: CI\n" );
+
+/**/
+
+ int emptyIgnore = pdaRun->accumIgnore == 0;
+
+ /* Make the token data. */
+ Head *tokdata = headAllocate( prg );
+ tokdata->location = locationAllocate( prg );
+ tokdata->location->line = inputStream->line;
+ tokdata->location->column = inputStream->column;
+ tokdata->location->byte = inputStream->byte;
+
+ debug( REALM_PARSE, "token: %s text: %.*s\n",
+ prg->rtd->lelInfo[id].name,
+ stringLength(tokdata), stringData(tokdata) );
+
+ updatePosition( inputStream, fsmRun->tokstart, tokdata->length );
+
+ Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, id, tokdata );
+
+ incrementSteps( pdaRun );
+
+ ParseTree *parseTree = parseTreeAllocate( prg );
+ parseTree->id = input->tree->id;
+ parseTree->shadow = input;
+
+ pdaRun->parseInput = parseTree;
+
+ /* Store any alternate scanning region. */
+ if ( input != 0 && pdaRun->cs >= 0 )
+ setRegion( pdaRun, emptyIgnore, parseTree );
+}
+
+
+static void sendEof( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun )
+{
+ debug( REALM_PARSE, "token: _EOF\n" );
+
+ incrementSteps( pdaRun );
+
+ Head *head = headAllocate( prg );
+ head->location = locationAllocate( prg );
+ head->location->line = inputStream->line;
+ head->location->column = inputStream->column;
+ head->location->byte = inputStream->byte;
+
+ Kid *input = kidAllocate( prg );
+ input->tree = treeAllocate( prg );
+
+ input->tree->refs = 1;
+ input->tree->id = prg->rtd->eofLelIds[pdaRun->parserId];
+ input->tree->tokdata = head;
+
+ /* Set the state using the state of the parser. */
+ fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 );
+ fsmRun->preRegion = pdaRunGetNextPreRegion( pdaRun );
+ fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->region];
+
+ ParseTree *parseTree = parseTreeAllocate( prg );
+ parseTree->id = input->tree->id;
+ parseTree->shadow = input;
+
+ pdaRun->parseInput = parseTree;
+}
+
+void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun )
+{
+ /* Init the scanner vars. */
+ fsmRun->act = 0;
+ fsmRun->tokstart = 0;
+ fsmRun->tokend = 0;
+ fsmRun->matchedToken = 0;
+
+ /* Set the state using the state of the parser. */
+ fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 );
+ fsmRun->preRegion = pdaRunGetNextPreRegion( pdaRun );
+ if ( fsmRun->preRegion > 0 ) {
+ debug( REALM_PARSE, "pre region for next token: %s\n",
+ prg->rtd->regionInfo[fsmRun->preRegion].name );
+ fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->preRegion];
+ fsmRun->ncs = fsmRun->tables->entryByRegion[fsmRun->region];
+ }
+ else {
+ debug( REALM_PARSE, "scanning using token region: %s\n",
+ prg->rtd->regionInfo[fsmRun->region].name );
+
+ fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->region];
+ }
+
+
+ /* Clear the mark array. */
+ memset( fsmRun->mark, 0, sizeof(fsmRun->mark) );
+}
+
+static void pushBtPoint( Program *prg, PdaRun *pdaRun )
+{
+ Tree *tree = 0;
+ if ( pdaRun->accumIgnore != 0 )
+ tree = pdaRun->accumIgnore->shadow->tree;
+ else if ( pdaRun->tokenList != 0 )
+ tree = pdaRun->tokenList->kid->tree;
+
+ if ( tree != 0 ) {
+ debug( REALM_PARSE, "pushing bt point with location byte %d\n",
+ ( tree != 0 && tree->tokdata != 0 && tree->tokdata->location != 0 ) ?
+ tree->tokdata->location->byte : 0 );
+
+ Kid *kid = kidAllocate( prg );
+ kid->tree = tree;
+ treeUpref( tree );
+ kid->next = pdaRun->btPoint;
+ pdaRun->btPoint = kid;
+ }
+}
+
+
+#define SCAN_UNDO -7
+#define SCAN_IGNORE -6
+#define SCAN_TREE -5
+#define SCAN_TRY_AGAIN_LATER -4
+#define SCAN_ERROR -3
+#define SCAN_LANG_EL -2
+#define SCAN_EOF -1
+
+long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream )
+{
+ if ( pdaRun->triggerUndo )
+ return SCAN_UNDO;
+
+ while ( true ) {
+ fsmExecute( fsmRun, inputStream );
+
+ /* First check if scanning stopped because we have a token. */
+ if ( fsmRun->matchedToken > 0 ) {
+ /* If the token has a marker indicating the end (due to trailing
+ * context) then adjust data now. */
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ if ( lelInfo[fsmRun->matchedToken].markId >= 0 )
+ fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId];
+
+ return fsmRun->matchedToken;
+ }
+
+ /* Check for error. */
+ if ( fsmRun->cs == fsmRun->tables->errorState ) {
+ /* If a token was started, but not finished (tokstart != 0) then
+ * restore data to the beginning of that token. */
+ if ( fsmRun->tokstart != 0 )
+ fsmRun->p = fsmRun->tokstart;
+
+ /* Check for a default token in the region. If one is there
+ * then send it and continue with the processing loop. */
+ if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) {
+ fsmRun->tokstart = fsmRun->tokend = fsmRun->p;
+ return prg->rtd->regionInfo[fsmRun->region].defaultToken;
+ }
+
+ return SCAN_ERROR;
+ }
+
+ /* Got here because the state machine didn't match a token or
+ * encounter an error. Must be because we got to the end of the buffer
+ * data. */
+ assert( fsmRun->p == fsmRun->pe );
+
+ /* There may be space left in the current buffer. If not then we need
+ * to make some. */
+ long space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe;
+ if ( space == 0 ) {
+ /* Create a new run buf. */
+ RunBuf *newBuf = newRunBuf();
+
+ /* If partway through a token then preserve the prefix. */
+ long have = 0;
+
+ if ( fsmRun->tokstart == 0 ) {
+ /* No prefix. We filled the previous buffer. */
+ fsmRun->runBuf->length = FSM_BUFSIZE;
+ }
+ else {
+ int i;
+
+ debug( REALM_SCAN, "copying data over to new buffer\n" );
+ assert( fsmRun->runBuf->offset == 0 );
+
+ if ( fsmRun->tokstart == fsmRun->runBuf->data ) {
+ /* A token is started and it is already at the beginning
+ * of the current buffer. This means buffer is full and it
+ * must be grown. Probably need to do this sooner. */
+ fatal( "OUT OF BUFFER SPACE\n" );
+ }
+
+ /* There is data that needs to be shifted over. */
+ have = fsmRun->pe - fsmRun->tokstart;
+ memcpy( newBuf->data, fsmRun->tokstart, have );
+
+ /* Compute the length of the previous buffer. */
+ fsmRun->runBuf->length = FSM_BUFSIZE - have;
+
+ /* Compute tokstart and tokend. */
+ long dist = fsmRun->tokstart - newBuf->data;
+
+ fsmRun->tokend -= dist;
+ fsmRun->tokstart = newBuf->data;
+
+ /* Shift any markers. */
+ for ( i = 0; i < MARK_SLOTS; i++ ) {
+ if ( fsmRun->mark[i] != 0 )
+ fsmRun->mark[i] -= dist;
+ }
+ }
+
+ fsmRun->p = fsmRun->pe = newBuf->data + have;
+ fsmRun->peof = 0;
+
+ newBuf->next = fsmRun->runBuf;
+ fsmRun->runBuf = newBuf;
+ }
+
+ /* We don't have any data. What is next in the input inputStream? */
+ space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe;
+ assert( space > 0 );
+
+ /* Get more data. */
+ int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0;
+ int len = 0;
+ debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space );
+ int type = getData( fsmRun, inputStream, have, fsmRun->p, space, &len );
+
+ switch ( type ) {
+ case INPUT_DATA:
+ fsmRun->pe = fsmRun->p + len;
+ break;
+
+ case INPUT_EOF:
+ if ( fsmRun->tokstart != 0 )
+ fsmRun->peof = fsmRun->pe;
+ else
+ return SCAN_EOF;
+ break;
+
+ case INPUT_EOD:
+ return SCAN_TRY_AGAIN_LATER;
+
+ case INPUT_LANG_EL:
+ if ( fsmRun->tokstart != 0 )
+ fsmRun->peof = fsmRun->pe;
+ else
+ return SCAN_LANG_EL;
+ break;
+
+ case INPUT_TREE:
+ if ( fsmRun->tokstart != 0 )
+ fsmRun->peof = fsmRun->pe;
+ else
+ return SCAN_TREE;
+ break;
+ case INPUT_IGNORE:
+ if ( fsmRun->tokstart != 0 )
+ fsmRun->peof = fsmRun->pe;
+ else
+ return SCAN_IGNORE;
+ break;
+ }
+ }
+
+ /* Should not be reached. */
+ return SCAN_ERROR;
+}
+
+/*
+ * Stops on:
+ * PcrPreEof
+ * PcrGeneration
+ * PcrReduction
+ * PcrRevReduction
+ * PcrRevIgnore
+ * PcrRevToken
+ */
+
+long parseLoop( Program *prg, Tree **sp, PdaRun *pdaRun,
+ FsmRun *fsmRun, InputStream *inputStream, long entry )
+{
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+
+switch ( entry ) {
+case PcrStart:
+
+ pdaRun->stop = false;
+
+ while ( true ) {
+ debug( REALM_PARSE, "parse loop start %d:%d\n", inputStream->line, inputStream->column );
+
+ /* Pull the current scanner from the parser. This can change during
+ * parsing due to inputStream pushes, usually for the purpose of includes.
+ * */
+ pdaRun->tokenId = scanToken( prg, pdaRun, fsmRun, inputStream );
+
+ if ( pdaRun->tokenId == SCAN_ERROR ) {
+ if ( fsmRun->preRegion >= 0 ) {
+ fsmRun->preRegion = -1;
+ fsmRun->cs = fsmRun->ncs;
+ debug( REALM_PARSE, "moving from pre region to main region: %s\n",
+ prg->rtd->regionInfo[fsmRun->region].name );
+ continue;
+ }
+ }
+
+ if ( pdaRun->tokenId == SCAN_ERROR &&
+ ( prg->rtd->regionInfo[fsmRun->region].ciLelId > 0 ) )
+ {
+ debug( REALM_PARSE, "sending a collect ignore\n" );
+ sendCi( prg, sp, inputStream, fsmRun, pdaRun, prg->rtd->regionInfo[fsmRun->region].ciLelId );
+ goto yes;
+ }
+
+ if ( pdaRun->tokenId == SCAN_TRY_AGAIN_LATER ) {
+ debug( REALM_PARSE, "scanner says try again later\n" );
+ break;
+ }
+
+ assert( pdaRun->parseInput == 0 );
+ pdaRun->parseInput = 0;
+
+ /* Check for EOF. */
+ if ( pdaRun->tokenId == SCAN_EOF ) {
+ inputStream->eofSent = true;
+ sendEof( prg, sp, inputStream, fsmRun, pdaRun );
+
+ pdaRun->frameId = prg->rtd->regionInfo[fsmRun->region].eofFrameId;
+
+ if ( prg->ctxDepParsing && pdaRun->frameId >= 0 ) {
+ debug( REALM_PARSE, "HAVE PRE_EOF BLOCK\n" );
+
+ pdaRun->fi = &prg->rtd->frameInfo[pdaRun->frameId];
+ pdaRun->code = pdaRun->fi->codeWV;
+
+return PcrPreEof;
+case PcrPreEof:
+ makeReverseCode( pdaRun );
+ }
+ }
+ else if ( pdaRun->tokenId == SCAN_UNDO ) {
+ /* Fall through with parseInput = 0. FIXME: Do we need to send back ignore? */
+ debug( REALM_PARSE, "invoking undo from the scanner\n" );
+ }
+ else if ( pdaRun->tokenId == SCAN_ERROR ) {
+ /* Scanner error, maybe retry. */
+ if ( pdaRun->accumIgnore == 0 && pdaRunGetNextRegion( pdaRun, 1 ) != 0 ) {
+ debug( REALM_PARSE, "scanner failed, trying next region\n" );
+
+ pdaRun->nextRegionInd += 1;
+ goto skipSend;
+ }
+ else if ( pdaRun->numRetry > 0 ) {
+ debug( REALM_PARSE, "invoking parse error from the scanner\n" );
+
+ /* Fall through to send null (error). */
+ pushBtPoint( prg, pdaRun );
+ }
+ else {
+ debug( REALM_PARSE, "no alternate scanning regions\n" );
+
+ /* There are no alternative scanning regions to try, nor are
+ * there any alternatives stored in the current parse tree. No
+ * choice but to end the parse. */
+ pushBtPoint( prg, pdaRun );
+
+ reportParseError( prg, sp, pdaRun );
+ pdaRun->parseError = 1;
+ goto skipSend;
+ }
+ }
+ else if ( pdaRun->tokenId == SCAN_LANG_EL ) {
+ debug( REALM_PARSE, "sending an named lang el\n" );
+
+ /* A named language element (parsing colm program). */
+ sendNamedLangEl( prg, sp, pdaRun, fsmRun, inputStream );
+ }
+ else if ( pdaRun->tokenId == SCAN_TREE ) {
+ debug( REALM_PARSE, "sending a tree\n" );
+
+ /* A tree already built. */
+ sendTree( prg, sp, pdaRun, fsmRun, inputStream );
+ }
+ else if ( pdaRun->tokenId == SCAN_IGNORE ) {
+ debug( REALM_PARSE, "sending an ignore token\n" );
+
+ /* A tree to ignore. */
+ sendIgnoreTree( prg, sp, pdaRun, fsmRun, inputStream );
+ goto skipSend;
+ }
+ else if ( prg->ctxDepParsing && lelInfo[pdaRun->tokenId].frameId >= 0 ) {
+ /* Has a generation action. */
+ debug( REALM_PARSE, "token gen action: %s\n",
+ prg->rtd->lelInfo[pdaRun->tokenId].name );
+
+ /* Make the token data. */
+ pdaRun->tokdata = peekMatch( prg, fsmRun, inputStream );
+
+ /* Note that we don't update the position now. It is done when the token
+ * data is pulled from the inputStream. */
+
+ fsmRun->p = fsmRun->tokstart;
+ fsmRun->tokstart = 0;
+
+ pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->lelInfo[pdaRun->tokenId].frameId];
+ pdaRun->frameId = prg->rtd->lelInfo[pdaRun->tokenId].frameId;
+ pdaRun->code = pdaRun->fi->codeWV;
+
+return PcrGeneration;
+case PcrGeneration:
+
+ makeReverseCode( pdaRun );
+
+ /* Finished with the match text. */
+ stringFree( prg, pdaRun->tokdata );
+
+ goto skipSend;
+ }
+ else if ( lelInfo[pdaRun->tokenId].ignore ) {
+ debug( REALM_PARSE, "sending an ignore token: %s\n",
+ prg->rtd->lelInfo[pdaRun->tokenId].name );
+
+ /* Is an ignore token. */
+ sendIgnore( prg, sp, inputStream, fsmRun, pdaRun, pdaRun->tokenId );
+ goto skipSend;
+ }
+ else {
+ debug( REALM_PARSE, "sending an a plain old token: %s\n",
+ prg->rtd->lelInfo[pdaRun->tokenId].name );
+
+ /* Is a plain token. */
+ sendToken( prg, sp, inputStream, fsmRun, pdaRun, pdaRun->tokenId );
+ }
+yes:
+
+ if ( pdaRun->parseInput != 0 )
+ transferReverseCode( pdaRun, pdaRun->parseInput );
+
+ if ( pdaRun->parseInput != 0 ) {
+ /* If it's a nonterminal with a termdup then flip the parse tree to the terminal. */
+ if ( pdaRun->parseInput->id >= prg->rtd->firstNonTermId ) {
+ pdaRun->parseInput->id = prg->rtd->lelInfo[pdaRun->parseInput->id].termDupId;
+ pdaRun->parseInput->flags |= PF_TERM_DUP;
+ }
+ }
+
+ long pcr = parseToken( prg, sp, pdaRun, fsmRun, inputStream, PcrStart );
+
+ while ( pcr != PcrDone ) {
+
+return pcr;
+case PcrReduction:
+case PcrReverse:
+
+ pcr = parseToken( prg, sp, pdaRun, fsmRun, inputStream, entry );
+ }
+
+ assert( pcr == PcrDone );
+
+ handleError( prg, sp, pdaRun );
+
+skipSend:
+ newToken( prg, pdaRun, fsmRun );
+
+ /* Various stop conditions. This should all be coverned by one test
+ * eventually. */
+
+ if ( pdaRun->triggerUndo ) {
+ debug( REALM_PARSE, "parsing stopped by triggerUndo\n" );
+ break;
+ }
+
+ if ( inputStream->eofSent ) {
+ debug( REALM_PARSE, "parsing stopped by EOF\n" );
+ break;
+ }
+
+ if ( pdaRun->stopParsing ) {
+ debug( REALM_PARSE, "scanner has been stopped\n" );
+ break;
+ }
+
+ if ( pdaRun->stop ) {
+ debug( REALM_PARSE, "parsing has been stopped by consumedCount\n" );
+ break;
+ }
+
+ if ( prg->induceExit ) {
+ debug( REALM_PARSE, "parsing has been stopped by a call to exit\n" );
+ break;
+ }
+
+ if ( pdaRun->parseError ) {
+ debug( REALM_PARSE, "parsing stopped by a parse error\n" );
+ break;
+ }
+ }
+
+case PcrDone:
+break; }
+
+ return PcrDone;
+}
+
+/* Offset can be used to look at the next nextRegionInd. */
+int pdaRunGetNextRegion( PdaRun *pdaRun, int offset )
+{
+ return pdaRun->tables->tokenRegions[pdaRun->nextRegionInd+offset];
+}
+
+int pdaRunGetNextPreRegion( PdaRun *pdaRun )
+{
+ return pdaRun->tables->tokenPreRegions[pdaRun->nextRegionInd];
+}
+
+Tree *getParsedRoot( PdaRun *pdaRun, int stop )
+{
+ if ( pdaRun->parseError )
+ return 0;
+ else if ( stop ) {
+ if ( pdaRun->stackTop->shadow != 0 )
+ return pdaRun->stackTop->shadow->tree;
+ }
+ else {
+ if ( pdaRun->stackTop->next->shadow != 0 )
+ return pdaRun->stackTop->next->shadow->tree;
+ }
+ return 0;
+}
+
+void clearParseTree( Program *prg, Tree **sp, ParseTree *parseTree )
+{
+ /* Traverse the stack downreffing. */
+ ParseTree *pt = parseTree;
+ while ( pt != 0 ) {
+ ParseTree *next = pt->next;
+ if ( pt->shadow != 0 ) {
+ treeDownref( prg, sp, pt->shadow->tree );
+ kidFree( prg, pt->shadow );
+ }
+ if ( pt->child != 0 )
+ clearParseTree( prg, sp, pt->child );
+ if ( pt->leftIgnore != 0 )
+ clearParseTree( prg, sp, pt->leftIgnore );
+ if ( pt->rightIgnore != 0 )
+ clearParseTree( prg, sp, pt->rightIgnore );
+ parseTreeFree( prg, pt );
+ pt = next;
+ }
+}
+
+void clearPdaRun( Program *prg, Tree **sp, PdaRun *pdaRun )
+{
+ /* Remaining stack and parse trees underneath. */
+ clearParseTree( prg, sp, pdaRun->stackTop );
+ pdaRun->stackTop = 0;
+
+ /* Traverse the token list downreffing. */
+ Ref *ref = pdaRun->tokenList;
+ while ( ref != 0 ) {
+ Ref *next = ref->next;
+ kidFree( prg, (Kid*)ref );
+ ref = next;
+ }
+ pdaRun->tokenList = 0;
+
+ /* Traverse the btPoint list downreffing */
+ Kid *btp = pdaRun->btPoint;
+ while ( btp != 0 ) {
+ Kid *next = btp->next;
+ treeDownref( prg, sp, btp->tree );
+ kidFree( prg, (Kid*)btp );
+ btp = next;
+ }
+ pdaRun->btPoint = 0;
+
+ /* Clear out any remaining ignores. */
+ clearParseTree( prg, sp, pdaRun->accumIgnore );
+ pdaRun->accumIgnore = 0;
+
+ if ( pdaRun->context != 0 )
+ treeDownref( prg, sp, pdaRun->context );
+
+ rcodeDownrefAll( prg, sp, &pdaRun->reverseCode );
+ rtCodeVectEmpty( &pdaRun->reverseCode );
+ rtCodeVectEmpty( &pdaRun->rcodeCollect );
+}
+
+int isParserStopFinished( PdaRun *pdaRun )
+{
+ int done =
+ pdaRun->stackTop->next != 0 &&
+ pdaRun->stackTop->next->next == 0 &&
+ pdaRun->stackTop->id == pdaRun->stopTarget;
+ return done;
+}
+
+void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables,
+ FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context )
+{
+ memset( pdaRun, 0, sizeof(PdaRun) );
+ pdaRun->tables = tables;
+ pdaRun->parserId = parserId;
+ pdaRun->stopTarget = stopTarget;
+ pdaRun->revertOn = revertOn;
+ pdaRun->targetSteps = -1;
+
+ debug( REALM_PARSE, "initializing PdaRun\n" );
+
+ /* FIXME: need the right one here. */
+ pdaRun->cs = prg->rtd->startStates[pdaRun->parserId];
+
+ Kid *sentinal = kidAllocate( prg );
+ sentinal->tree = treeAllocate( prg );
+ sentinal->tree->refs = 1;
+
+ /* Init the element allocation variables. */
+ pdaRun->stackTop = parseTreeAllocate( prg );
+ pdaRun->stackTop->state = -1;
+ pdaRun->stackTop->shadow = sentinal;
+
+ pdaRun->numRetry = 0;
+ pdaRun->nextRegionInd = pdaRun->tables->tokenRegionInds[pdaRun->cs];
+ pdaRun->stopParsing = false;
+ pdaRun->accumIgnore = 0;
+ pdaRun->btPoint = 0;
+ pdaRun->checkNext = false;
+ pdaRun->checkStop = false;
+
+ initBindings( pdaRun );
+
+ initRtCodeVect( &pdaRun->reverseCode );
+ initRtCodeVect( &pdaRun->rcodeCollect );
+
+ pdaRun->context = splitTree( prg, context );
+ pdaRun->parseError = 0;
+ pdaRun->parseInput = 0;
+ pdaRun->triggerUndo = 0;
+
+ pdaRun->tokenId = 0;
+
+ pdaRun->onDeck = false;
+ pdaRun->parsed = 0;
+ pdaRun->reject = false;
+
+ pdaRun->rcBlockCount = 0;
+}
+
+long stackTopTarget( Program *prg, PdaRun *pdaRun )
+{
+ long state;
+ if ( pdaRun->stackTop->state < 0 )
+ state = prg->rtd->startStates[pdaRun->parserId];
+ else {
+ state = pdaRun->tables->targs[(int)pdaRun->tables->indicies[pdaRun->tables->offsets[
+ pdaRun->stackTop->state] +
+ (pdaRun->stackTop->id - pdaRun->tables->keys[pdaRun->stackTop->state<<1])]];
+ }
+ return state;
+}
+
+/*
+ * Local commit:
+ * -clears reparse flags underneath
+ * -must be possible to backtrack after
+ * Global commit (revertOn)
+ * -clears all reparse flags
+ * -must be possible to backtrack after
+ * Global commit (!revertOn)
+ * -clears all reparse flags
+ * -clears all 'parsed' reverse code
+ * -clears all reverse code
+ * -clears all alg structures
+ */
+
+int beenCommitted( ParseTree *parseTree )
+{
+ return parseTree->flags & PF_COMMITTED;
+}
+
+Code *backupOverRcode( Code *rcode )
+{
+ Word len;
+ rcode -= SIZEOF_WORD;
+ read_word_p( len, rcode );
+ rcode -= len;
+ return rcode;
+}
+
+/* The top level of the stack is linked right-to-left. Trees underneath are
+ * linked left-to-right. */
+void commitKid( Program *prg, PdaRun *pdaRun, Tree **root, ParseTree *lel, Code **rcode, long *causeReduce )
+{
+ ParseTree *tree = 0;
+ Tree **sp = root;
+ //Tree *restore = 0;
+
+head:
+ /* Commit */
+ debug( REALM_PARSE, "commit: visiting %s\n",
+ prg->rtd->lelInfo[lel->id].name );
+
+ /* Load up the parsed tree. */
+ tree = lel;
+
+ /* Check for reverse code. */
+ //restore = 0;
+ if ( tree->flags & PF_HAS_RCODE ) {
+ /* If tree caused some reductions, now is not the right time to backup
+ * over the reverse code. We need to backup over the reductions first. Store
+ * the count of the reductions and do it when the count drops to zero. */
+ if ( tree->causeReduce > 0 ) {
+ /* The top reduce block does not correspond to this alg. */
+ debug( REALM_PARSE, "commit: causeReduce found, delaying backup: %ld\n",
+ (long)tree->causeReduce );
+ *causeReduce = tree->causeReduce;
+ }
+ else {
+ *rcode = backupOverRcode( *rcode );
+
+ //if ( **rcode == IN_RESTORE_LHS ) {
+ // debug( REALM_PARSE, "commit: has restore_lhs\n" );
+ // read_tree_p( restore, (*rcode+1) );
+ //}
+ }
+ }
+
+ //FIXME: what was this about?
+ //if ( restore != 0 )
+ // tree = restore;
+
+ /* All the parse algorithm data except for the RCODE flag is in the
+ * original. That is why we restore first, then we can clear the retry
+ * values. */
+
+ /* Check causeReduce, might be time to backup over the reverse code
+ * belonging to a nonterminal that caused previous reductions. */
+ if ( *causeReduce > 0 &&
+ tree->id >= prg->rtd->firstNonTermId &&
+ !(tree->flags & PF_TERM_DUP) )
+ {
+ *causeReduce -= 1;
+
+ if ( *causeReduce == 0 ) {
+ debug( REALM_PARSE, "commit: causeReduce dropped to zero, backing up over rcode\n" );
+
+ /* Cause reduce just dropped down to zero. */
+ *rcode = backupOverRcode( *rcode );
+ }
+ }
+
+ ///* FIXME: why was this here?
+ // * Reset retries. */
+ //if ( tree->flags & AF_PARSED ) {
+ // if ( tree->retryLower > 0 ) {
+ // pdaRun->numRetry -= 1;
+ // tree->retryLower = 0;
+ // }
+ // if ( tree->retryUpper > 0 ) {
+ // pdaRun->numRetry -= 1;
+ // tree->retryUpper = 0;
+ // }
+ //}
+
+ tree->flags |= PF_COMMITTED;
+
+ /* Do not recures on trees that are terminal dups. */
+ if ( !(tree->flags & PF_TERM_DUP) &&
+ !(tree->flags & PF_NAMED) &&
+ !(tree->flags & PF_ARTIFICIAL) &&
+ tree->child != 0 )
+ {
+ vm_push( (Tree*)lel );
+ lel = tree->child;
+
+ if ( lel != 0 ) {
+ while ( lel != 0 ) {
+ vm_push( (Tree*)lel );
+ lel = lel->next;
+ }
+ }
+ }
+
+backup:
+ if ( sp != root ) {
+ ParseTree *next = (ParseTree*)vm_pop();
+ if ( next->next == lel ) {
+ /* Moving backwards. */
+ lel = next;
+
+ if ( !beenCommitted( lel ) )
+ goto head;
+ }
+ else {
+ /* Moving upwards. */
+ lel = next;
+ }
+
+ goto backup;
+ }
+
+ pdaRun->numRetry = 0;
+ assert( sp == root );
+}
+
+void commitFull( Program *prg, Tree **sp, PdaRun *pdaRun, long causeReduce )
+{
+ debug( REALM_PARSE, "running full commit" );
+
+ ParseTree *parseTree = pdaRun->stackTop;
+ Code *rcode = pdaRun->reverseCode.data + pdaRun->reverseCode.tabLen;
+
+ /* The top level of the stack is linked right to left. This is the
+ * traversal order we need for committing. */
+ while ( parseTree != 0 && !beenCommitted( parseTree ) ) {
+ commitKid( prg, pdaRun, sp, parseTree, &rcode, &causeReduce );
+ parseTree = parseTree->next;
+ }
+
+ /* We cannot always clear all the rcode here. We may need to backup over
+ * the parse statement. We depend on the context flag. */
+ if ( !pdaRun->revertOn )
+ rcodeDownrefAll( prg, sp, &pdaRun->reverseCode );
+}
+
+/*
+ * shift: retry goes into lower of shifted node.
+ * reduce: retry goes into upper of reduced node.
+ * shift-reduce: cannot be a retry
+ */
+
+/* Stops on:
+ * PcrReduction
+ * PcrRevToken
+ * PcrRevReduction
+ */
+long parseToken( Program *prg, Tree **sp, PdaRun *pdaRun,
+ FsmRun *fsmRun, InputStream *inputStream, long entry )
+{
+ int pos;
+ unsigned int *action;
+ int rhsLen;
+ int owner;
+ int induceReject;
+ int indPos;
+ //LangElInfo *lelInfo = prg->rtd->lelInfo;
+
+switch ( entry ) {
+case PcrStart:
+
+ /* The scanner will send a null token if it can't find a token. */
+ if ( pdaRun->parseInput == 0 )
+ goto parseError;
+
+ /* This will cause parseInput to be lost. This
+ * path should be traced. */
+ if ( pdaRun->cs < 0 )
+ return PcrDone;
+
+ /* Record the state in the parse tree. */
+ pdaRun->parseInput->state = pdaRun->cs;
+
+again:
+ if ( pdaRun->parseInput == 0 )
+ goto _out;
+
+ pdaRun->lel = pdaRun->parseInput;
+ pdaRun->curState = pdaRun->cs;
+
+ if ( pdaRun->lel->id < pdaRun->tables->keys[pdaRun->curState<<1] ||
+ pdaRun->lel->id > pdaRun->tables->keys[(pdaRun->curState<<1)+1] ) {
+ debug( REALM_PARSE, "parse error, no transition 1\n" );
+ pushBtPoint( prg, pdaRun );
+ goto parseError;
+ }
+
+ indPos = pdaRun->tables->offsets[pdaRun->curState] +
+ (pdaRun->lel->id - pdaRun->tables->keys[pdaRun->curState<<1]);
+
+ owner = pdaRun->tables->owners[indPos];
+ if ( owner != pdaRun->curState ) {
+ debug( REALM_PARSE, "parse error, no transition 2\n" );
+ pushBtPoint( prg, pdaRun );
+ goto parseError;
+ }
+
+ pos = pdaRun->tables->indicies[indPos];
+ if ( pos < 0 ) {
+ debug( REALM_PARSE, "parse error, no transition 3\n" );
+ pushBtPoint( prg, pdaRun );
+ goto parseError;
+ }
+
+ /* Checking complete. */
+
+ induceReject = false;
+ pdaRun->cs = pdaRun->tables->targs[pos];
+ action = pdaRun->tables->actions + pdaRun->tables->actInds[pos];
+ if ( pdaRun->lel->retryLower )
+ action += pdaRun->lel->retryLower;
+
+ /*
+ * Shift
+ */
+
+ if ( *action & act_sb ) {
+ debug( REALM_PARSE, "shifted: %s\n",
+ prg->rtd->lelInfo[pdaRun->lel->id].name );
+ /* Consume. */
+ pdaRun->parseInput = pdaRun->parseInput->next;
+
+ pdaRun->lel->state = pdaRun->curState;
+
+ /* If its a token then attach ignores and record it in the token list
+ * of the next ignore attachment to use. */
+ if ( pdaRun->lel->id < prg->rtd->firstNonTermId ) {
+ if ( pdaRun->lel->causeReduce == 0 )
+ attachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop );
+ }
+
+ pdaRun->lel->next = pdaRun->stackTop;
+ pdaRun->stackTop = pdaRun->lel;
+
+ /* If its a token then attach ignores and record it in the token list
+ * of the next ignore attachment to use. */
+ if ( pdaRun->lel->id < prg->rtd->firstNonTermId ) {
+ attachLeftIgnore( prg, sp, pdaRun, pdaRun->lel );
+
+ Ref *ref = (Ref*)kidAllocate( prg );
+ ref->kid = pdaRun->lel->shadow;
+ //treeUpref( pdaRun->tree );
+ ref->next = pdaRun->tokenList;
+ pdaRun->tokenList = ref;
+ }
+
+ if ( action[1] == 0 )
+ pdaRun->lel->retryLower = 0;
+ else {
+ debug( REALM_PARSE, "retry: %p\n", pdaRun->stackTop );
+ pdaRun->lel->retryLower += 1;
+ assert( pdaRun->lel->retryUpper == 0 );
+ /* FIXME: Has the retry already been counted? */
+ pdaRun->numRetry += 1;
+ }
+ }
+
+ /*
+ * Commit
+ */
+
+ if ( pdaRun->tables->commitLen[pos] != 0 ) {
+ long causeReduce = 0;
+ if ( pdaRun->parseInput != 0 ) {
+ if ( pdaRun->parseInput->flags & PF_HAS_RCODE )
+ causeReduce = pdaRun->parseInput->causeReduce;
+ }
+ commitFull( prg, sp, pdaRun, causeReduce );
+ }
+
+ /*
+ * Reduce
+ */
+
+ if ( *action & act_rb ) {
+ int r, objectLength;
+ ParseTree *last, *child;
+ Kid *attrs;
+ Kid *dataLast, *dataChild;
+
+ /* If there was shift don't attach again. */
+ if ( !( *action & act_sb ) && pdaRun->lel->id < prg->rtd->firstNonTermId )
+ attachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop );
+
+ pdaRun->reduction = *action >> 2;
+
+ if ( pdaRun->parseInput != 0 )
+ pdaRun->parseInput->causeReduce += 1;
+
+ Kid *value = kidAllocate( prg );
+ value->tree = treeAllocate( prg );
+ value->tree->refs = 1;
+ value->tree->id = prg->rtd->prodInfo[pdaRun->reduction].lhsId;
+ value->tree->prodNum = prg->rtd->prodInfo[pdaRun->reduction].prodNum;
+
+ pdaRun->redLel = parseTreeAllocate( prg );
+ pdaRun->redLel->id = prg->rtd->prodInfo[pdaRun->reduction].lhsId;
+ pdaRun->redLel->next = 0;
+ pdaRun->redLel->causeReduce = 0;
+ pdaRun->redLel->retryLower = 0;
+ pdaRun->redLel->shadow = value;
+
+ /* Transfer. */
+ pdaRun->redLel->retryUpper = pdaRun->lel->retryLower;
+ pdaRun->lel->retryLower = 0;
+
+ /* Allocate the attributes. */
+ objectLength = prg->rtd->lelInfo[pdaRun->redLel->id].objectLength;
+ attrs = allocAttrs( prg, objectLength );
+
+ /* Build the list of children. We will be giving up a reference when we
+ * detach parse tree and data tree, but gaining the reference when we
+ * put the children under the new data tree. No need to alter refcounts
+ * here. */
+ rhsLen = prg->rtd->prodInfo[pdaRun->reduction].length;
+ child = last = 0;
+ dataChild = dataLast = 0;
+ for ( r = 0; r < rhsLen; r++ ) {
+
+ /* The child. */
+ child = pdaRun->stackTop;
+ dataChild = child->shadow;
+
+ /* Pop. */
+ pdaRun->stackTop = pdaRun->stackTop->next;
+
+ /* Detach the parse tree from the data. */
+ child->shadow = 0;
+
+ /* Reverse list. */
+ child->next = last;
+ dataChild->next = dataLast;
+
+ /* Track last for reversal. */
+ last = child;
+ dataLast = dataChild;
+ }
+
+ pdaRun->redLel->child = child;
+ pdaRun->redLel->shadow->tree->child = kidListConcat( attrs, dataChild );
+
+ debug( REALM_PARSE, "reduced: %s rhsLen %d\n",
+ prg->rtd->prodInfo[pdaRun->reduction].name, rhsLen );
+ if ( action[1] == 0 )
+ pdaRun->redLel->retryUpper = 0;
+ else {
+ pdaRun->redLel->retryUpper += 1;
+ assert( pdaRun->lel->retryLower == 0 );
+ pdaRun->numRetry += 1;
+ debug( REALM_PARSE, "retry: %p\n", pdaRun->redLel );
+ }
+
+ /* When the production is of zero length we stay in the same state.
+ * Otherwise we use the state stored in the first child. */
+ pdaRun->cs = rhsLen == 0 ? pdaRun->curState : child->state;
+
+ if ( prg->ctxDepParsing && prg->rtd->prodInfo[pdaRun->reduction].frameId >= 0 ) {
+ /* Frame info for reduction. */
+ pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->prodInfo[pdaRun->reduction].frameId];
+ pdaRun->frameId = prg->rtd->prodInfo[pdaRun->reduction].frameId;
+ pdaRun->reject = false;
+ pdaRun->parsed = 0;
+ pdaRun->code = pdaRun->fi->codeWV;
+
+return PcrReduction;
+case PcrReduction:
+
+ if ( prg->induceExit )
+ goto fail;
+
+ /* If the lhs was stored and it changed then we need to restore the
+ * original upon backtracking, otherwise downref since we took a
+ * copy above. */
+ if ( pdaRun->parsed != 0 ) {
+ if ( pdaRun->parsed != pdaRun->redLel->shadow->tree ) {
+ debug( REALM_PARSE, "lhs tree was modified, adding a restore instruction\n" );
+//
+// /* Make it into a parse tree. */
+// Tree *newPt = prepParseTree( prg, sp, pdaRun->redLel->tree );
+// treeDownref( prg, sp, pdaRun->redLel->tree );
+//
+// /* Copy it in. */
+// pdaRun->redLel->tree = newPt;
+// treeUpref( pdaRun->redLel->tree );
+
+ /* Add the restore instruct. */
+ append( &pdaRun->rcodeCollect, IN_RESTORE_LHS );
+ appendWord( &pdaRun->rcodeCollect, (Word)pdaRun->parsed );
+ append( &pdaRun->rcodeCollect, SIZEOF_CODE + SIZEOF_WORD );
+ }
+ else {
+ /* Not changed. Done with parsed. */
+ treeDownref( prg, sp, pdaRun->parsed );
+ }
+ pdaRun->parsed = 0;
+ }
+
+ /* Pull out the reverse code, if any. */
+ makeReverseCode( pdaRun );
+ transferReverseCode( pdaRun, pdaRun->redLel );
+
+ /* Perhaps the execution environment is telling us we need to
+ * reject the reduction. */
+ induceReject = pdaRun->reject;
+ }
+
+ /* If the left hand side was replaced then the only parse algorithm
+ * data that is contained in it will the PF_HAS_RCODE flag. Everthing
+ * else will be in the original. This requires that we restore first
+ * when going backwards and when doing a commit. */
+
+ if ( induceReject ) {
+ debug( REALM_PARSE, "error induced during reduction of %s\n",
+ prg->rtd->lelInfo[pdaRun->redLel->id].name );
+ pdaRun->redLel->state = pdaRun->curState;
+ pdaRun->redLel->next = pdaRun->stackTop;
+ pdaRun->stackTop = pdaRun->redLel;
+ /* FIXME: What is the right argument here? */
+ pushBtPoint( prg, pdaRun );
+ goto parseError;
+ }
+
+ pdaRun->redLel->next = pdaRun->parseInput;
+ pdaRun->parseInput = pdaRun->redLel;
+ }
+
+ goto again;
+
+parseError:
+ debug( REALM_PARSE, "hit error, backtracking\n" );
+
+ if ( pdaRun->numRetry == 0 ) {
+ debug( REALM_PARSE, "out of retries failing parse\n" );
+ goto fail;
+ }
+
+ while ( 1 ) {
+ if ( pdaRun->onDeck ) {
+ debug( REALM_BYTECODE, "dropping out for reverse code call\n" );
+
+ pdaRun->frameId = -1;
+ pdaRun->code = popReverseCode( &pdaRun->reverseCode );
+
+return PcrReverse;
+case PcrReverse:
+
+ decrementSteps( pdaRun );
+ }
+ else if ( pdaRun->checkNext ) {
+ pdaRun->checkNext = false;
+
+ if ( pdaRun->next > 0 && pdaRun->tables->tokenRegions[pdaRun->next] != 0 ) {
+ debug( REALM_PARSE, "found a new region\n" );
+ pdaRun->numRetry -= 1;
+ pdaRun->cs = stackTopTarget( prg, pdaRun );
+ pdaRun->nextRegionInd = pdaRun->next;
+ return PcrDone;
+ }
+ }
+ else if ( pdaRun->checkStop ) {
+ pdaRun->checkStop = false;
+
+ if ( pdaRun->stop ) {
+ debug( REALM_PARSE, "stopping the backtracking, steps is %d\n", pdaRun->steps );
+
+ pdaRun->cs = stackTopTarget( prg, pdaRun );
+ goto _out;
+ }
+ }
+ else if ( pdaRun->parseInput != 0 ) {
+ /* Either we are dealing with a terminal that was
+ * shifted or a nonterminal that was reduced. */
+ if ( pdaRun->parseInput->id < prg->rtd->firstNonTermId ) {
+ assert( pdaRun->parseInput->retryUpper == 0 );
+
+ if ( pdaRun->parseInput->retryLower != 0 ) {
+ debug( REALM_PARSE, "found retry targ: %p\n", pdaRun->parseInput );
+
+ pdaRun->numRetry -= 1;
+ pdaRun->cs = pdaRun->parseInput->state;
+ goto again;
+ }
+
+ if ( pdaRun->parseInput->causeReduce != 0 ) {
+ pdaRun->undoLel = pdaRun->stackTop;
+
+ /* Check if we've arrived at the stack sentinal. This guard
+ * is here to allow us to initially set numRetry to one to
+ * cause the parser to backup all the way to the beginning
+ * when an error occurs. */
+ if ( pdaRun->undoLel->next == 0 )
+ break;
+
+ /* Either we are dealing with a terminal that was
+ * shifted or a nonterminal that was reduced. */
+ assert( !(pdaRun->stackTop->id < prg->rtd->firstNonTermId) );
+
+ debug( REALM_PARSE, "backing up over non-terminal: %s\n",
+ prg->rtd->lelInfo[pdaRun->stackTop->id].name );
+
+ /* Pop the item from the stack. */
+ pdaRun->stackTop = pdaRun->stackTop->next;
+
+ /* Queue it as next parseInput item. */
+ pdaRun->undoLel->next = pdaRun->parseInput;
+ pdaRun->parseInput = pdaRun->undoLel;
+ }
+ else {
+ long region = pdaRun->parseInput->region;
+ pdaRun->next = region > 0 ? region + 1 : 0;
+ pdaRun->checkNext = true;
+ pdaRun->checkStop = true;
+
+ sendBack( prg, sp, pdaRun, fsmRun, inputStream, pdaRun->parseInput );
+
+ pdaRun->parseInput = 0;
+ }
+ }
+ else if ( pdaRun->parseInput->flags & PF_HAS_RCODE ) {
+ debug( REALM_PARSE, "tree has rcode, setting on deck\n" );
+ pdaRun->onDeck = true;
+ pdaRun->parsed = 0;
+
+ /* Only the RCODE flag was in the replaced lhs. All the rest is in
+ * the the original. We read it after restoring. */
+
+ pdaRun->parseInput->flags &= ~PF_HAS_RCODE;
+ }
+ else {
+ /* Remove it from the input queue. */
+ pdaRun->undoLel = pdaRun->parseInput;
+ pdaRun->parseInput = pdaRun->parseInput->next;
+
+ /* Extract children from the child list. */
+ ParseTree *first = pdaRun->undoLel->child;
+ pdaRun->undoLel->child = 0;
+
+ /* This will skip the ignores/attributes, etc. */
+ Kid *dataFirst = treeExtractChild( prg, pdaRun->undoLel->shadow->tree );
+
+ /* Walk the child list and and push the items onto the parsing
+ * stack one at a time. */
+ while ( first != 0 ) {
+ /* Get the next item ahead of time. */
+ ParseTree *next = first->next;
+ Kid *dataNext = dataFirst->next;
+
+ /* Push onto the stack. */
+ first->next = pdaRun->stackTop;
+ pdaRun->stackTop = first;
+
+ /* Reattach the data and the parse tree. */
+ first->shadow = dataFirst;
+
+ first = next;
+ dataFirst = dataNext;
+ }
+
+ /* If there is an parseInput queued, this is one less reduction it has
+ * caused. */
+ if ( pdaRun->parseInput != 0 )
+ pdaRun->parseInput->causeReduce -= 1;
+
+ if ( pdaRun->undoLel->retryUpper != 0 ) {
+ /* There is always an parseInput item here because reduce
+ * conflicts only happen on a lookahead character. */
+ assert( pdaRun->parseInput != pdaRun->undoLel );
+ assert( pdaRun->parseInput != 0 );
+ assert( pdaRun->undoLel->retryLower == 0 );
+ assert( pdaRun->parseInput->retryUpper == 0 );
+
+ /* Transfer the retry from undoLel to parseInput. */
+ pdaRun->parseInput->retryLower = pdaRun->undoLel->retryUpper;
+ pdaRun->parseInput->retryUpper = 0;
+ pdaRun->parseInput->state = stackTopTarget( prg, pdaRun );
+ }
+
+ /* Free the reduced item. */
+ treeDownref( prg, sp, pdaRun->undoLel->shadow->tree );
+ kidFree( prg, pdaRun->undoLel->shadow );
+ parseTreeFree( prg, pdaRun->undoLel );
+
+ /* If the stacktop had right ignore attached, detach now. */
+ if ( pdaRun->stackTop->flags & PF_RIGHT_IL_ATTACHED )
+ detachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop );
+ }
+ }
+ else if ( pdaRun->accumIgnore != 0 ) {
+ debug( REALM_PARSE, "have accumulated ignore to undo\n" );
+
+ /* Send back any accumulated ignore tokens, then trigger error
+ * in the the parser. */
+ ParseTree *ignore = pdaRun->accumIgnore;
+ pdaRun->accumIgnore = pdaRun->accumIgnore->next;
+ ignore->next = 0;
+
+ long region = ignore->region;
+ pdaRun->next = region > 0 ? region + 1 : 0;
+ pdaRun->checkNext = true;
+ pdaRun->checkStop = true;
+
+ sendBackIgnore( prg, sp, pdaRun, fsmRun, inputStream, ignore );
+
+ treeDownref( prg, sp, ignore->shadow->tree );
+ kidFree( prg, ignore->shadow );
+ parseTreeFree( prg, ignore );
+ }
+ else {
+ /* Now it is time to undo something. Pick an element from the top of
+ * the stack. */
+ pdaRun->undoLel = pdaRun->stackTop;
+
+ /* Check if we've arrived at the stack sentinal. This guard is
+ * here to allow us to initially set numRetry to one to cause the
+ * parser to backup all the way to the beginning when an error
+ * occurs. */
+ if ( pdaRun->undoLel->next == 0 )
+ break;
+
+ /* Either we are dealing with a terminal that was
+ * shifted or a nonterminal that was reduced. */
+ if ( pdaRun->stackTop->id < prg->rtd->firstNonTermId ) {
+ debug( REALM_PARSE, "backing up over effective terminal: %s\n",
+ prg->rtd->lelInfo[pdaRun->stackTop->id].name );
+
+ /* Pop the item from the stack. */
+ pdaRun->stackTop = pdaRun->stackTop->next;
+
+ /* Queue it as next parseInput item. */
+ pdaRun->undoLel->next = pdaRun->parseInput;
+ pdaRun->parseInput = pdaRun->undoLel;
+
+ /* Pop from the token list. */
+ Ref *ref = pdaRun->tokenList;
+ pdaRun->tokenList = ref->next;
+ kidFree( prg, (Kid*)ref );
+
+ assert( pdaRun->accumIgnore == 0 );
+ detachLeftIgnore( prg, sp, pdaRun, fsmRun, pdaRun->parseInput );
+ }
+ else {
+ debug( REALM_PARSE, "backing up over non-terminal: %s\n",
+ prg->rtd->lelInfo[pdaRun->stackTop->id].name );
+
+ /* Pop the item from the stack. */
+ pdaRun->stackTop = pdaRun->stackTop->next;
+
+ /* Queue it as next parseInput item. */
+ pdaRun->undoLel->next = pdaRun->parseInput;
+ pdaRun->parseInput = pdaRun->undoLel;
+ }
+
+ /* Undo attach of right ignore. */
+ if ( pdaRun->stackTop->flags & PF_RIGHT_IL_ATTACHED )
+ detachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop );
+ }
+ }
+
+fail:
+ pdaRun->cs = -1;
+ pdaRun->parseError = 1;
+
+ /* If we failed parsing on tree we must free it. The caller expected us to
+ * either consume it or send it back to the parseInput. */
+ if ( pdaRun->parseInput != 0 ) {
+ //treeDownref( prg, sp, (Tree*)pdaRun->parseInput->tree );
+ //ptKidFree( prg, pdaRun->parseInput );
+ pdaRun->parseInput = 0;
+ }
+
+ /* FIXME: do we still need to fall through here? A fail is permanent now,
+ * no longer called into again. */
+
+ return PcrDone;
+
+_out:
+ pdaRun->nextRegionInd = pdaRun->tables->tokenRegionInds[pdaRun->cs];
+
+case PcrDone:
+break; }
+
+ return PcrDone;
+}
diff --git a/src/pdarun.h b/src/pdarun.h
new file mode 100644
index 00000000..1bdf651c
--- /dev/null
+++ b/src/pdarun.h
@@ -0,0 +1,473 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __COLM_PDARUN_H
+#define __COLM_PDARUN_H
+
+#include <input.h>
+#include <fsmrun.h>
+#include <defs.h>
+#include <tree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ColmProgram;
+
+#define MARK_SLOTS 32
+
+typedef struct _FsmTables
+{
+ long *actions;
+ long *keyOffsets;
+ char *transKeys;
+ long *singleLengths;
+ long *rangeLengths;
+ long *indexOffsets;
+ long *transTargsWI;
+ long *transActionsWI;
+ long *toStateActions;
+ long *fromStateActions;
+ long *eofActions;
+ long *eofTargs;
+ long *entryByRegion;
+
+ long numStates;
+ long numActions;
+ long numTransKeys;
+ long numSingleLengths;
+ long numRangeLengths;
+ long numIndexOffsets;
+ long numTransTargsWI;
+ long numTransActionsWI;
+ long numRegions;
+
+ long startState;
+ long firstFinal;
+ long errorState;
+
+ struct GenAction **actionSwitch;
+ long numActionSwitch;
+} FsmTables;
+
+typedef struct _FsmRun
+{
+ FsmTables *tables;
+
+ RunBuf *runBuf;
+
+ /* FsmRun State. */
+ long region, preRegion;
+ long cs, ncs, act;
+ char *tokstart, *tokend;
+ char *p, *pe, *peof;
+ int returnResult;
+ char *mark[MARK_SLOTS];
+ long matchedToken;
+
+ InputStream *attachedInput;
+ SourceStream *attachedSource;
+} FsmRun;
+
+void initFsmRun( FsmRun *fsmRun, struct ColmProgram *prg );
+void clearFsmRun( struct ColmProgram *prg, FsmRun *fsmRun );
+void updatePosition( InputStream *inputStream, const char *data, long length );
+void undoPosition( InputStream *inputStream, const char *data, long length );
+void sendBackRunBufHead( FsmRun *fsmRun, InputStream *inputStream );
+void undoStreamPull( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length );
+
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+struct _Execution;
+
+typedef struct _RtCodeVect
+{
+ Code *data;
+ long tabLen;
+ long allocLen;
+
+ /* FIXME: leak when freed. */
+} RtCodeVect;
+
+void listAddAfter( List *list, ListEl *prev_el, ListEl *new_el );
+void listAddBefore( List *list, ListEl *next_el, ListEl *new_el );
+
+void listPrepend( List *list, ListEl *new_el );
+void listAppend( List *list, ListEl *new_el );
+
+ListEl *listDetach( List *list, ListEl *el );
+ListEl *listDetachFirst(List *list );
+ListEl *listDetachLast(List *list );
+
+long listLength(List *list);
+
+typedef struct _FunctionInfo
+{
+ const char *name;
+ long frameId;
+ long argSize;
+ long frameSize;
+} FunctionInfo;
+
+/*
+ * Program Data.
+ */
+
+typedef struct _PatReplInfo
+{
+ long offset;
+ long numBindings;
+} PatReplInfo;
+
+typedef struct _PatReplNode
+{
+ long id;
+ long prodNum;
+ long next;
+ long child;
+ long bindId;
+ const char *data;
+ long length;
+ long leftIgnore;
+ long rightIgnore;
+
+ /* Just match nonterminal, don't go inside. */
+ unsigned char stop;
+} PatReplNode;
+
+/* FIXME: should have a descriptor for object types to give the length. */
+
+typedef struct _LangElInfo
+{
+ const char *name;
+ const char *xmlTag;
+ unsigned char repeat;
+ unsigned char list;
+ unsigned char literal;
+ unsigned char ignore;
+
+ long frameId;
+
+ long objectTypeId;
+ long ofiOffset;
+ long objectLength;
+
+// long contextTypeId;
+// long contextLength;
+
+ long termDupId;
+ long genericId;
+ long markId;
+ long captureAttr;
+ long numCaptureAttr;
+} LangElInfo;
+
+typedef struct _ObjFieldInfo
+{
+ int typeId;
+} ObjFieldInfo;
+
+typedef struct _ProdInfo
+{
+ unsigned long lhsId;
+ short prodNum;
+ long length;
+ const char *name;
+ long frameId;
+ unsigned char lhsUpref;
+ unsigned char *copy;
+ long copyLen;
+} ProdInfo;
+
+typedef struct _FrameInfo
+{
+ Code *codeWV;
+ long codeLenWV;
+ Code *codeWC;
+ long codeLenWC;
+ char *trees;
+ long treesLen;
+ long argSize;
+ long frameSize;
+} FrameInfo;
+
+typedef struct _RegionInfo
+{
+ const char *name;
+ long defaultToken;
+ long eofFrameId;
+ int isIgnoreOnly;
+ int isCiOnly;
+ int ciLelId;
+} RegionInfo;
+
+typedef struct _CaptureAttr
+{
+ long mark_enter;
+ long mark_leave;
+ long offset;
+} CaptureAttr;
+
+typedef struct _PdaTables
+{
+ /* Parser table data. */
+ int *indicies;
+ int *owners;
+ int *keys;
+ unsigned int *offsets;
+ unsigned int *targs;
+ unsigned int *actInds;
+ unsigned int *actions;
+ int *commitLen;
+ int *tokenRegionInds;
+ int *tokenRegions;
+ int *tokenPreRegions;
+
+ int numIndicies;
+ int numKeys;
+ int numStates;
+ int numTargs;
+ int numActInds;
+ int numActions;
+ int numCommitLen;
+ int numRegionItems;
+ int numPreRegionItems;
+} PdaTables;
+
+typedef struct _PoolBlock
+{
+ void *data;
+ struct _PoolBlock *next;
+} PoolBlock;
+
+typedef struct _PoolItem
+{
+ struct _PoolItem *next;
+} PoolItem;
+
+typedef struct _PoolAlloc
+{
+ PoolBlock *head;
+ long nextel;
+ PoolItem *pool;
+ int sizeofT;
+} PoolAlloc;
+
+typedef struct _PdaRun
+{
+ int numRetry;
+ ParseTree *stackTop;
+ Ref *tokenList;
+ int cs;
+ int nextRegionInd;
+
+ PdaTables *tables;
+ int parserId;
+
+ /* Reused. */
+ RtCodeVect rcodeCollect;
+ RtCodeVect reverseCode;
+
+ int stopParsing;
+ long stopTarget;
+
+ ParseTree *accumIgnore;
+
+ Kid *btPoint;
+
+ struct Bindings *bindings;
+
+ int revertOn;
+
+ Tree *context;
+
+ int stop;
+ int parseError;
+
+ long steps;
+ long targetSteps;
+
+ int onDeck;
+
+ /*
+ * Data we added when refactoring the parsing engine into a coroutine.
+ */
+
+ ParseTree *parseInput;
+ FrameInfo *fi;
+ int reduction;
+ ParseTree *redLel;
+ int curState;
+ ParseTree *lel;
+ int triggerUndo;
+
+ int tokenId;
+ Head *tokdata;
+ int frameId;
+ int next;
+ ParseTree *undoLel;
+
+ int checkNext;
+ int checkStop;
+
+ /* The lhs is sometimes saved before reduction actions in case it is
+ * replaced and we need to restore it on backtracking */
+ Tree *parsed;
+
+ int reject;
+
+ /* Instruction pointer to use when we stop parsing and execute code. */
+ Code *code;
+
+ int rcBlockCount;
+} PdaRun;
+
+void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len );
+void rtCodeVectEmpty( RtCodeVect *vect );
+void rtCodeVectRemove( RtCodeVect *vect, long pos, long len );
+
+void initRtCodeVect( RtCodeVect *codeVect );
+
+//inline static void remove( RtCodeVect *vect, long pos );
+inline static void append( RtCodeVect *vect, const Code val );
+inline static void append2( RtCodeVect *vect, const Code *val, long len );
+inline static void appendHalf( RtCodeVect *vect, Half half );
+inline static void appendWord( RtCodeVect *vect, Word word );
+
+inline static void append2( RtCodeVect *vect, const Code *val, long len )
+{
+ rtCodeVectReplace( vect, vect->tabLen, val, len );
+}
+
+inline static void append( RtCodeVect *vect, const Code val )
+{
+ rtCodeVectReplace( vect, vect->tabLen, &val, 1 );
+}
+
+inline static void appendHalf( RtCodeVect *vect, Half half )
+{
+ /* not optimal. */
+ append( vect, half & 0xff );
+ append( vect, (half>>8) & 0xff );
+}
+
+inline static void appendWord( RtCodeVect *vect, Word word )
+{
+ /* not optimal. */
+ append( vect, word & 0xff );
+ append( vect, (word>>8) & 0xff );
+ append( vect, (word>>16) & 0xff );
+ append( vect, (word>>24) & 0xff );
+ #if SIZEOF_LONG == 8
+ append( vect, (word>>32) & 0xff );
+ append( vect, (word>>40) & 0xff );
+ append( vect, (word>>48) & 0xff );
+ append( vect, (word>>56) & 0xff );
+ #endif
+}
+
+void incrementSteps( PdaRun *pdaRun );
+void decrementSteps( PdaRun *pdaRun );
+
+int makeReverseCode( PdaRun *pdaRun );
+void transferReverseCode( PdaRun *pdaRun, ParseTree *tree );
+
+void initPdaRun( PdaRun *pdaRun, struct ColmProgram *prg, PdaTables *tables,
+ FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context );
+void clearPdaRun( struct ColmProgram *prg, Tree **root, PdaRun *pdaRun );
+
+void initInputStream( InputStream *inputStream );
+void clearInputStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream );
+void initSourceStream( SourceStream *in );
+void clearSourceStream( struct ColmProgram *prg, Tree **sp, SourceStream *sourceStream );
+
+
+void clearContext( PdaRun *pdaRun, Tree **sp );
+Kid *extractIgnore( PdaRun *pdaRun );
+long stackTopTarget( struct ColmProgram *prg, PdaRun *pdaRun );
+void runCommit( PdaRun *pdaRun );
+int isParserStopFinished( PdaRun *pdaRun );
+void pdaRunMatch( PdaRun *pdaRun, Kid *tree, Kid *pattern );
+
+/* Offset can be used to look at the next nextRegionInd. */
+int pdaRunGetNextRegion( PdaRun *pdaRun, int offset );
+int pdaRunGetNextPreRegion( PdaRun *pdaRun );
+
+#define PcrStart 1
+#define PcrDone 2
+#define PcrReduction 3
+#define PcrGeneration 4
+#define PcrPreEof 5
+#define PcrReverse 6
+
+long parseToken( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun,
+ FsmRun *fsmRun, InputStream *inputStream, long entry );
+
+long undoParse( Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, Tree *tree );
+
+Head *streamPull( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream, long length );
+Head *stringAllocPointer( struct ColmProgram *prg, const char *data, long length );
+
+void streamPushText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length );
+void streamPushTree( FsmRun *fsmRun, InputStream *inputStream, Tree *tree, int ignore );
+void undoStreamPush( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, long length );
+void undoStreamAppend( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, struct ColmTree *tree, long length );
+Kid *makeTokenWithData( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun,
+ InputStream *inputStream, int id, Head *tokdata );
+
+void pushBinding( PdaRun *pdaRun, ParseTree *parseTree );
+void popBinding( PdaRun *pdaRun, ParseTree *parseTree );
+
+void executeGenerationAction( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, PdaRun *pdaRun,
+ InputStream *inputStream, int frameId, Code *code, long id, Head *tokdata );
+Kid *extractIgnore( PdaRun *pdaRun );
+long sendBackQueuedIgnore( struct ColmProgram *prg, Tree **sp, InputStream *inputStream,
+ FsmRun *fsmRun, PdaRun *pdaRun, long entry );
+void clearIgnoreList( struct ColmProgram *prg, Tree **sp, Kid *kid );
+Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream );
+Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream );
+void newToken( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun );
+void fsmExecute( FsmRun *fsmRun, InputStream *inputStream );
+void sendNamedLangEl( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream );
+long parseLoop( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun,
+ FsmRun *fsmRun, InputStream *inputStream, long entry );
+void initBindings( PdaRun *pdaRun );
+Tree *getParsedRoot( PdaRun *pdaRun, int stop );
+void undoParseStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun,
+ PdaRun *pdaRun, long steps );
+
+void clearBuffered( FsmRun *fsmRun );
+void resetToken( FsmRun *fsmRun );
+
+void detachInput( FsmRun *fsmRun, InputStream *is );
+void attachInput( FsmRun *fsmRun, InputStream *is );
+void detachSource( FsmRun *fsmRun, SourceStream *ss );
+void attachSource( FsmRun *fsmRun, SourceStream *ss );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/pool.c b/src/pool.c
new file mode 100644
index 00000000..e1c3c240
--- /dev/null
+++ b/src/pool.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include <pdarun.h>
+#include <pool.h>
+#include <debug.h>
+
+void initPoolAlloc( PoolAlloc *poolAlloc, int sizeofT )
+{
+ poolAlloc->head = 0;
+ poolAlloc->nextel = FRESH_BLOCK;
+ poolAlloc->pool = 0;
+ poolAlloc->sizeofT = sizeofT;
+}
+
+void *poolAllocAllocate( PoolAlloc *poolAlloc )
+{
+ debug( REALM_POOL, "pool allocation\n" );
+
+#ifdef POOL_MALLOC
+ void *res = malloc( poolAlloc->sizeofT );
+ memset( res, 0, poolAlloc->sizeofT );
+ return res;
+#else
+ //#ifdef COLM_LOG_BYTECODE
+ //cerr << "allocating in: " << __PRETTY_FUNCTION__ << endl;
+ //#endif
+
+ void *newEl = 0;
+ if ( poolAlloc->pool == 0 ) {
+ if ( poolAlloc->nextel == FRESH_BLOCK ) {
+ //#ifdef COLM_LOG_BYTECODE
+ //if ( colm_log_bytecode )
+ // cerr << "allocating " << FRESH_BLOCK << " Elements of type T" << endl;
+ //#endif
+
+ PoolBlock *newBlock = (PoolBlock*)malloc( sizeof(PoolBlock) );
+ newBlock->data = malloc( poolAlloc->sizeofT * FRESH_BLOCK );
+ newBlock->next = poolAlloc->head;
+ poolAlloc->head = newBlock;
+ poolAlloc->nextel = 0;
+ }
+
+ newEl = (char*)poolAlloc->head->data + poolAlloc->sizeofT * poolAlloc->nextel++;
+ }
+ else {
+ newEl = poolAlloc->pool;
+ poolAlloc->pool = poolAlloc->pool->next;
+ }
+ memset( newEl, 0, poolAlloc->sizeofT );
+ return newEl;
+#endif
+}
+
+void poolAllocFree( PoolAlloc *poolAlloc, void *el )
+{
+ #if 0
+ /* Some sanity checking. Best not to normally run with this on. */
+ char *p = (char*)el + sizeof(PoolItem*);
+ char *pe = (char*)el + sizeof(T);
+ for ( ; p < pe; p++ )
+ assert( *p != 0xcc );
+ memset( el, 0xcc, sizeof(T) );
+ #endif
+
+#ifdef POOL_MALLOC
+ free( el );
+#else
+ PoolItem *pi = (PoolItem*) el;
+ pi->next = poolAlloc->pool;
+ poolAlloc->pool = pi;
+#endif
+}
+
+void poolAllocClear( PoolAlloc *poolAlloc )
+{
+ PoolBlock *block = poolAlloc->head;
+ while ( block != 0 ) {
+ PoolBlock *next = block->next;
+ free( block->data );
+ free( block );
+ block = next;
+ }
+
+ poolAlloc->head = 0;
+ poolAlloc->nextel = 0;
+ poolAlloc->pool = 0;
+}
+
+long poolAllocNumLost( PoolAlloc *poolAlloc )
+{
+ /* Count the number of items allocated. */
+ long lost = 0;
+ PoolBlock *block = poolAlloc->head;
+ if ( block != 0 ) {
+ lost = poolAlloc->nextel;
+ block = block->next;
+ while ( block != 0 ) {
+ lost += FRESH_BLOCK;
+ block = block->next;
+ }
+ }
+
+ /* Subtract. Items that are on the free list. */
+ PoolItem *pi = poolAlloc->pool;
+ while ( pi != 0 ) {
+ lost -= 1;
+ pi = pi->next;
+ }
+
+ return lost;
+}
+
+/*
+ * Kid
+ */
+
+Kid *kidAllocate( Program *prg )
+{
+ return (Kid*) poolAllocAllocate( &prg->kidPool );
+}
+
+void kidFree( Program *prg, Kid *el )
+{
+ poolAllocFree( &prg->kidPool, el );
+}
+
+void kidClear( Program *prg )
+{
+ poolAllocClear( &prg->kidPool );
+}
+
+long kidNumLost( Program *prg )
+{
+ return poolAllocNumLost( &prg->kidPool );
+}
+
+/*
+ * Tree
+ */
+
+Tree *treeAllocate( Program *prg )
+{
+ return (Tree*) poolAllocAllocate( &prg->treePool );
+}
+
+void treeFree( Program *prg, Tree *el )
+{
+ poolAllocFree( &prg->treePool, el );
+}
+
+void treeClear( Program *prg )
+{
+ poolAllocClear( &prg->treePool );
+}
+
+long treeNumLost( Program *prg )
+{
+ return poolAllocNumLost( &prg->treePool );
+}
+
+/*
+ * ParseTree
+ */
+
+ParseTree *parseTreeAllocate( Program *prg )
+{
+ return (ParseTree*) poolAllocAllocate( &prg->parseTreePool );
+}
+
+void parseTreeFree( Program *prg, ParseTree *el )
+{
+ poolAllocFree( &prg->parseTreePool, el );
+}
+
+void parseTreeClear( Program *prg )
+{
+ poolAllocClear( &prg->parseTreePool );
+}
+
+long parseTreeNumLost( Program *prg )
+{
+ return poolAllocNumLost( &prg->parseTreePool );
+}
+
+/*
+ * ListEl
+ */
+
+ListEl *listElAllocate( Program *prg )
+{
+ return (ListEl*) poolAllocAllocate( &prg->listElPool );
+}
+
+void listElFree( Program *prg, ListEl *el )
+{
+ poolAllocFree( &prg->listElPool, el );
+}
+
+void listElClear( Program *prg )
+{
+ poolAllocClear( &prg->listElPool );
+}
+
+long listElNumLost( Program *prg )
+{
+ return poolAllocNumLost( &prg->listElPool );
+}
+
+/*
+ * MapEl
+ */
+
+MapEl *mapElAllocate( Program *prg )
+{
+ return (MapEl*) poolAllocAllocate( &prg->mapElPool );
+}
+
+void mapElFree( Program *prg, MapEl *el )
+{
+ poolAllocFree( &prg->mapElPool, el );
+}
+
+void mapElClear( Program *prg )
+{
+ poolAllocClear( &prg->mapElPool );
+}
+
+long mapElNumLost( Program *prg )
+{
+ return poolAllocNumLost( &prg->mapElPool );
+}
+
+/*
+ * Head
+ */
+
+Head *headAllocate( Program *prg )
+{
+ return (Head*) poolAllocAllocate( &prg->headPool );
+}
+
+void headFree( Program *prg, Head *el )
+{
+ poolAllocFree( &prg->headPool, el );
+}
+
+void headClear( Program *prg )
+{
+ poolAllocClear( &prg->headPool );
+}
+
+long headNumLost( Program *prg )
+{
+ return poolAllocNumLost( &prg->headPool );
+}
+
+/*
+ * Location
+ */
+
+Location *locationAllocate( Program *prg )
+{
+ return (Location*) poolAllocAllocate( &prg->locationPool );
+}
+
+void locationFree( Program *prg, Location *el )
+{
+ poolAllocFree( &prg->locationPool, el );
+}
+
+void locationClear( Program *prg )
+{
+ poolAllocClear( &prg->locationPool );
+}
+
+long locationNumLost( Program *prg )
+{
+ return poolAllocNumLost( &prg->locationPool );
+}
+
+/*
+ * Stream
+ */
+
+Stream *streamAllocate( Program *prg )
+{
+ return (Stream*)mapElAllocate( prg );
+}
+
+void streamFree( Program *prg, Stream *stream )
+{
+ mapElFree( prg, (MapEl*)stream );
+}
+
+
+/*
+ * Input
+ */
+
+Input *inputAllocate( Program *prg )
+{
+ return (Input*)mapElAllocate( prg );
+}
+
+void inputFree( Program *prg, Input *accumStream )
+{
+ mapElFree( prg, (MapEl*)accumStream );
+}
diff --git a/src/pool.h b/src/pool.h
new file mode 100644
index 00000000..ae647abd
--- /dev/null
+++ b/src/pool.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _POOL_H
+#define _POOL_H
+
+/* Allocation, number of items. */
+#define FRESH_BLOCK 8128
+
+#include <pdarun.h>
+#include <map.h>
+#include <tree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void initPoolAlloc( PoolAlloc *poolAlloc, int sizeofT );
+
+Kid *kidAllocate( Program *prg );
+void kidFree( Program *prg, Kid *el );
+void kidClear( Program *prg );
+long kidNumLost( Program *prg );
+
+Tree *treeAllocate( Program *prg );
+void treeFree( Program *prg, Tree *el );
+void treeClear( Program *prg );
+long treeNumLost( Program *prg );
+
+ParseTree *parseTreeAllocate( Program *prg );
+void parseTreeFree( Program *prg, ParseTree *el );
+void parseTreeClear( Program *prg );
+long parseTreeNumLost( Program *prg );
+
+ListEl *listElAllocate( Program *prg );
+void listElFree( Program *prg, ListEl *el );
+void listElClear( Program *prg );
+long listElNumLost( Program *prg );
+
+MapEl *mapElAllocate( Program *prg );
+void mapElFree( Program *prg, MapEl *el );
+void mapElClear( Program *prg );
+long mapElNumLost( Program *prg );
+
+Head *headAllocate( Program *prg );
+void headFree( Program *prg, Head *el );
+void headClear( Program *prg );
+long headNumLost( Program *prg );
+
+Location *locationAllocate( Program *prg );
+void locationFree( Program *prg, Location *el );
+void locationClear( Program *prg );
+long locationNumLost( Program *prg );
+
+Stream *streamAllocate( Program *prg );
+void streamFree( Program *prg, Stream *stream );
+
+Input *inputAllocate( Program *prg );
+void inputFree( Program *prg, Input *stream );
+
+/* Wrong place. */
+TreePair mapRemove( Program *prg, Map *map, Tree *key );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/program.c b/src/program.c
new file mode 100644
index 00000000..c17b8bb4
--- /dev/null
+++ b/src/program.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <pdarun.h>
+#include <fsmrun.h>
+#include <tree.h>
+#include <bytecode.h>
+#include <pool.h>
+#include <debug.h>
+#include <config.h>
+
+#include <alloca.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+void colmInit( long debugRealm )
+{
+ /* Always on because because logging is controlled with ifdefs in\n" the
+ * runtime lib. */
+ colm_log_bytecode = 1;
+ colm_log_parse = 1;
+ colm_log_match = 1;
+ colm_log_compile = 1;
+ colm_log_conds = 1;
+ colmActiveRealm = debugRealm;
+ initInputFuncs();
+}
+
+void colmRunProgram( Program *prg )
+{
+ assert( sizeof(Int) <= sizeof(Tree) );
+ assert( sizeof(Str) <= sizeof(Tree) );
+ assert( sizeof(Pointer) <= sizeof(Tree) );
+ assert( sizeof(Map) <= sizeof(MapEl) );
+ assert( sizeof(List) <= sizeof(MapEl) );
+ assert( sizeof(Stream) <= sizeof(MapEl) );
+ assert( sizeof(Parser) <= sizeof(MapEl) );
+
+ /* Allocate the global variable. */
+ allocGlobal( prg );
+
+ /*
+ * Allocate the VM stack.
+ */
+
+ prg->vm_stack = stackAlloc();
+ prg->vm_root = &prg->vm_stack[VM_STACK_SIZE];
+
+ /*
+ * Execute
+ */
+ if ( prg->rtd->rootCodeLen > 0 ) {
+ //RtCodeVect rcodeCollect;
+ Execution execution;
+
+ initExecution( &execution, 0, 0, 0, 0, prg->rtd->rootFrameId );
+ mainExecution( prg, &execution, prg->rtd->rootCode );
+ }
+}
+
+void clearGlobal( Program *prg, Tree **sp )
+{
+ /* Downref all the fields in the global object. */
+ int g;
+ for ( g = 0; g < prg->rtd->globalSize; g++ ) {
+ //assert( getAttr( global, g )->refs == 1 );
+ treeDownref( prg, sp, getAttr( prg->global, g ) );
+ }
+
+ /* Free the global object. */
+ if ( prg->rtd->globalSize > 0 )
+ freeAttrs( prg, prg->global->child );
+ treeFree( prg, prg->global );
+}
+
+void allocGlobal( Program *prg )
+{
+ /* Alloc the global. */
+ Tree *tree = treeAllocate( prg );
+ tree->child = allocAttrs( prg, prg->rtd->globalSize );
+ tree->refs = 1;
+ prg->global = tree;
+}
+
+Tree **stackAlloc()
+{
+ //return new Tree*[VM_STACK_SIZE];
+
+ return (Tree**)mmap( 0, sizeof(Tree*)*VM_STACK_SIZE,
+ PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0 );
+}
+
+Tree **vm_root( struct ColmProgram *prg )
+{
+ return prg->vm_root;
+}
+
+Tree *returnVal( struct ColmProgram *prg )
+{
+ return prg->returnVal;
+}
+
+
+Program *colmNewProgram( RuntimeData *rtd, int argc, const char **argv )
+{
+ Program *prg = malloc(sizeof(Program));
+ memset( prg, 0, sizeof(Program) );
+ prg->argc = argc;
+ prg->argv = argv;
+ prg->rtd = rtd;
+ prg->ctxDepParsing = 1;
+ prg->global = 0;
+ prg->heap = 0;
+ prg->stdinVal = 0;
+ prg->stdoutVal = 0;
+ prg->stderrVal = 0;
+ prg->induceExit = 0;
+ prg->exitStatus = 0;
+
+ initPoolAlloc( &prg->kidPool, sizeof(Kid) );
+ initPoolAlloc( &prg->treePool, sizeof(Tree) );
+ initPoolAlloc( &prg->parseTreePool, sizeof(ParseTree) );
+ initPoolAlloc( &prg->listElPool, sizeof(ListEl) );
+ initPoolAlloc( &prg->mapElPool, sizeof(MapEl) );
+ initPoolAlloc( &prg->headPool, sizeof(Head) );
+ initPoolAlloc( &prg->locationPool, sizeof(Location) );
+
+ Int *trueInt = (Int*) treeAllocate( prg );
+ trueInt->id = LEL_ID_BOOL;
+ trueInt->refs = 1;
+ trueInt->value = 1;
+
+ Int *falseInt = (Int*) treeAllocate( prg );
+ falseInt->id = LEL_ID_BOOL;
+ falseInt->refs = 1;
+ falseInt->value = 0;
+
+ prg->trueVal = (Tree*)trueInt;
+ prg->falseVal = (Tree*)falseInt;
+
+ prg->allocRunBuf = 0;
+ prg->returnVal = 0;
+ prg->lastParseError = 0;
+
+ return prg;
+}
+
+int colmDeleteProgram( Program *prg )
+{
+ Tree **sp = prg->vm_root;
+ int exitStatus = prg->exitStatus;
+
+ #ifdef COLM_LOG_BYTECODE
+ if ( colm_log_bytecode ) {
+ cerr << "clearing the prg" << endl;
+ }
+ #endif
+
+ treeDownref( prg, sp, prg->returnVal );
+ treeDownref( prg, sp, prg->lastParseError );
+ clearGlobal( prg, sp );
+
+ /* Clear the heap. */
+ Kid *a = prg->heap;
+ while ( a != 0 ) {
+ Kid *next = a->next;
+ treeDownref( prg, sp, a->tree );
+ kidFree( prg, a );
+ a = next;
+ }
+
+ //assert( trueVal->refs == 1 );
+ //assert( falseVal->refs == 1 );
+ treeDownref( prg, sp, prg->trueVal );
+ treeDownref( prg, sp, prg->falseVal );
+
+ treeDownref( prg, sp, (Tree*)prg->stdinVal );
+ treeDownref( prg, sp, (Tree*)prg->stdoutVal );
+ treeDownref( prg, sp, (Tree*)prg->stderrVal );
+
+#if DEBUG
+ long kidLost = kidNumLost( prg );
+ long treeLost = treeNumLost( prg );
+ long parseTreeLost = parseTreeNumLost( prg );
+ long listLost = listElNumLost( prg );
+ long mapLost = mapElNumLost( prg );
+ long headLost = headNumLost( prg );
+ long locationLost = locationNumLost( prg );
+
+ if ( kidLost )
+ message( "warning: lost kids: %ld\n", kidLost );
+
+ if ( treeLost )
+ message( "warning: lost trees: %ld\n", treeLost );
+
+ if ( parseTreeLost )
+ message( "warning: lost parse trees: %ld\n", parseTreeLost );
+
+ if ( listLost )
+ message( "warning: lost listEls: %ld\n", listLost );
+
+ if ( mapLost )
+ message( "warning: lost mapEls: %ld\n", mapLost );
+
+ if ( headLost )
+ message( "warning: lost heads: %ld\n", headLost );
+
+ if ( locationLost )
+ message( "warning: lost locations: %ld\n", locationLost );
+#endif
+
+ kidClear( prg );
+ treeClear( prg );
+ headClear( prg );
+ parseTreeClear( prg );
+ listElClear( prg );
+ mapElClear( prg );
+ locationClear( prg );
+
+ //memset( vm_stack, 0, sizeof(Tree*) * VM_STACK_SIZE);
+
+ RunBuf *rb = prg->allocRunBuf;
+ while ( rb != 0 ) {
+ RunBuf *next = rb->next;
+ free( rb );
+ rb = next;
+ }
+
+ free( prg );
+
+ return exitStatus;
+}
+
+
diff --git a/src/program.h b/src/program.h
new file mode 100644
index 00000000..9a50274d
--- /dev/null
+++ b/src/program.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __COLM_PROGRAM_H
+#define __COLM_PROGRAM_H
+
+#include <pdarun.h>
+
+typedef struct ColmRuntimeData
+{
+ LangElInfo *lelInfo;
+ long numLangEls;
+
+ ProdInfo *prodInfo;
+ long numProds;
+
+ RegionInfo *regionInfo;
+ long numRegions;
+
+ Code *rootCode;
+ long rootCodeLen;
+ long rootFrameId;
+
+ FrameInfo *frameInfo;
+ long numFrames;
+
+ FunctionInfo *functionInfo;
+ long numFunctions;
+
+ PatReplInfo *patReplInfo;
+ long numPatterns;
+
+ PatReplNode *patReplNodes;
+ long numPatternNodes;
+
+ GenericInfo *genericInfo;
+ long numGenerics;
+
+ long argvGenericId;
+
+ const char **litdata;
+ long *litlen;
+ Head **literals;
+ long numLiterals;
+
+ CaptureAttr *captureAttr;
+ long numCapturedAttr;
+
+ FsmTables *fsmTables;
+ PdaTables *pdaTables;
+ int *startStates;
+ int *eofLelIds;
+ int *parserLelIds;
+ long numParsers;
+
+ long globalSize;
+
+ long firstNonTermId;
+
+ long integerId;
+ long stringId;
+ long anyId;
+ long eofId;
+ long noTokenId;
+} RuntimeData;
+
+
+typedef struct ColmProgram
+{
+ int argc;
+ const char **argv;
+
+ unsigned char ctxDepParsing;
+ RuntimeData *rtd;
+ Tree *global;
+ int induceExit;
+ int exitStatus;
+
+ PoolAlloc kidPool;
+ PoolAlloc treePool;
+ PoolAlloc parseTreePool;
+ PoolAlloc listElPool;
+ PoolAlloc mapElPool;
+ PoolAlloc headPool;
+ PoolAlloc locationPool;
+
+ Tree *trueVal;
+ Tree *falseVal;
+
+ Kid *heap;
+
+ Tree **se;
+
+ Stream *stdinVal;
+ Stream *stdoutVal;
+ Stream *stderrVal;
+
+ RunBuf *allocRunBuf;
+
+ Tree **vm_stack;
+ Tree **vm_root;
+
+ /* Returned from the main line. Should have exports instead. */
+ Tree *returnVal;
+
+ /* The most recent parse error. Should be returned from the parsing function. */
+ Tree *lastParseError;
+} Program;
+
+#endif
diff --git a/src/redbuild.cc b/src/redbuild.cc
new file mode 100644
index 00000000..ae5faf38
--- /dev/null
+++ b/src/redbuild.cc
@@ -0,0 +1,650 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "global.h"
+#include "redbuild.h"
+#include "fsmgraph.h"
+#include "redfsm.h"
+#include "fsmcodegen.h"
+#include <string.h>
+
+using namespace std;
+
+RedFsmBuild::RedFsmBuild( char *fsmName, Compiler *pd, FsmGraph *fsm )
+:
+ fsmName(fsmName),
+ pd(pd),
+ fsm(fsm),
+ nextActionTableId(0),
+ startState(-1),
+ errState(-1)
+{
+}
+
+void RedFsmBuild::initActionList( unsigned long length )
+{
+ redFsm->allActions = new GenAction[length];
+ memset( redFsm->allActions, 0, sizeof(GenAction) * length );
+ for ( unsigned long a = 0; a < length; a++ )
+ redFsm->genActionList.append( redFsm->allActions+a );
+}
+
+
+void RedFsmBuild::makeActionList()
+{
+ /* Determine which actions to write. */
+ int nextActionId = 0;
+ for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
+ if ( act->numRefs() > 0 || act->numCondRefs > 0 )
+ act->actionId = nextActionId++;
+ }
+
+ initActionList( nextActionId );
+ curAction = 0;
+
+ for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
+ if ( act->actionId >= 0 )
+ makeAction( act );
+ }
+}
+
+void RedFsmBuild::initActionTableList( unsigned long length )
+{
+ redFsm->allActionTables = new RedAction[length];
+}
+
+void RedFsmBuild::initStateList( unsigned long length )
+{
+ redFsm->allStates = new RedState[length];
+ for ( unsigned long s = 0; s < length; s++ )
+ redFsm->stateList.append( redFsm->allStates+s );
+
+ /* We get the start state as an offset, set the pointer now. */
+ assert( startState >= 0 );
+ redFsm->startState = redFsm->allStates + startState;
+ if ( errState >= 0 )
+ redFsm->errState = redFsm->allStates + errState;
+ for ( EntryIdVect::Iter en = redFsm->entryPointIds; en.lte(); en++ )
+ redFsm->entryPoints.insert( redFsm->allStates + *en );
+
+ /* The nextStateId is no longer used to assign state ids (they come in set
+ * from the frontend now), however generation code still depends on it.
+ * Should eventually remove this variable. */
+ redFsm->nextStateId = redFsm->stateList.length();
+}
+
+void RedFsmBuild::addEntryPoint( int entryId, char *name, unsigned long entryState )
+{
+ redFsm->entryPointIds.append( entryState );
+ redFsm->entryPointNames.append( name );
+ redFsm->redEntryMap.insert( entryId, entryState );
+}
+
+void RedFsmBuild::addRegionToEntry( int regionId, int entryId )
+{
+ assert( regionId == redFsm->regionToEntry.length() );
+ redFsm->regionToEntry.append( entryId );
+}
+
+void RedFsmBuild::initTransList( int snum, unsigned long length )
+{
+ /* Could preallocate the out range to save time growing it. For now do
+ * nothing. */
+}
+
+void RedFsmBuild::newTrans( int snum, int tnum, Key lowKey,
+ Key highKey, long targ, long action )
+{
+ /* Get the current state and range. */
+ RedState *curState = redFsm->allStates + snum;
+ RedTransList &destRange = curState->outRange;
+
+ if ( curState == redFsm->errState )
+ return;
+
+ /* Make the new transitions. */
+ RedState *targState = targ >= 0 ? (redFsm->allStates + targ) :
+ redFsm->wantComplete ? redFsm->getErrorState() : 0;
+ RedAction *actionTable = action >= 0 ? (redFsm->allActionTables + action) : 0;
+ RedTrans *trans = redFsm->allocateTrans( targState, actionTable );
+ RedTransEl transEl( lowKey, highKey, trans );
+
+ if ( redFsm->wantComplete ) {
+ /* If the machine is to be complete then we need to fill any gaps with
+ * the error transitions. */
+ if ( destRange.length() == 0 ) {
+ /* Range is currently empty. */
+ if ( keyOps->minKey < lowKey ) {
+ /* The first range doesn't start at the low end. */
+ Key fillHighKey = lowKey;
+ fillHighKey.decrement();
+
+ /* Create the filler with the state's error transition. */
+ RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ else {
+ /* The range list is not empty, get the the last range. */
+ RedTransEl *last = &destRange[destRange.length()-1];
+ Key nextKey = last->highKey;
+ nextKey.increment();
+ if ( nextKey < lowKey ) {
+ /* There is a gap to fill. Make the high key. */
+ Key fillHighKey = lowKey;
+ fillHighKey.decrement();
+
+ /* Create the filler with the state's error transtion. */
+ RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ }
+
+ /* Filler taken care of. Append the range. */
+ destRange.append( RedTransEl( lowKey, highKey, trans ) );
+}
+
+void RedFsmBuild::finishTransList( int snum )
+{
+ /* Get the current state and range. */
+ RedState *curState = redFsm->allStates + snum;
+ RedTransList &destRange = curState->outRange;
+
+ if ( curState == redFsm->errState )
+ return;
+
+ /* If building a complete machine we may need filler on the end. */
+ if ( redFsm->wantComplete ) {
+ /* Check if there are any ranges already. */
+ if ( destRange.length() == 0 ) {
+ /* Fill with the whole alphabet. */
+ /* Add the range on the lower and upper bound. */
+ RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ else {
+ /* Get the last and check for a gap on the end. */
+ RedTransEl *last = &destRange[destRange.length()-1];
+ if ( last->highKey < keyOps->maxKey ) {
+ /* Make the high key. */
+ Key fillLowKey = last->highKey;
+ fillLowKey.increment();
+
+ /* Create the new range with the error trans and append it. */
+ RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ }
+}
+
+void RedFsmBuild::setId( int snum, int id )
+{
+ RedState *curState = redFsm->allStates + snum;
+ curState->id = id;
+}
+
+void RedFsmBuild::setEofTrans( int snum, int eofTarget, int actId )
+{
+ RedState *curState = redFsm->allStates + snum;
+ RedState *targState = redFsm->allStates + eofTarget;
+ RedAction *eofAct = redFsm->allActionTables + actId;
+ curState->eofTrans = redFsm->allocateTrans( targState, eofAct );
+}
+
+void RedFsmBuild::setFinal( int snum )
+{
+ RedState *curState = redFsm->allStates + snum;
+ curState->isFinal = true;
+}
+
+
+void RedFsmBuild::setStateActions( int snum, long toStateAction,
+ long fromStateAction, long eofAction )
+{
+ RedState *curState = redFsm->allStates + snum;
+ if ( toStateAction >= 0 )
+ curState->toStateAction = redFsm->allActionTables + toStateAction;
+ if ( fromStateAction >= 0 )
+ curState->fromStateAction = redFsm->allActionTables + fromStateAction;
+ if ( eofAction >= 0 )
+ curState->eofAction = redFsm->allActionTables + eofAction;
+}
+
+void RedFsmBuild::closeMachine()
+{
+ //for ( GenActionList::Iter a = redFsm->actionList; a.lte(); a++ )
+ // resolveTargetStates( a->inlineList );
+
+ /* Note that even if we want a complete graph we do not give the error
+ * state a default transition. All machines break out of the processing
+ * loop when in the error state. */
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ for ( GenStateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ )
+ st->stateCondVect.append( sci );
+ }
+}
+
+void RedFsmBuild::initCondSpaceList( ulong length )
+{
+ redFsm->allCondSpaces = new GenCondSpace[length];
+ for ( ulong c = 0; c < length; c++ )
+ redFsm->condSpaceList.append( redFsm->allCondSpaces + c );
+}
+
+void RedFsmBuild::newCondSpace( int cnum, int condSpaceId, Key baseKey )
+{
+ GenCondSpace *cond = redFsm->allCondSpaces + cnum;
+ cond->condSpaceId = condSpaceId;
+ cond->baseKey = baseKey;
+}
+
+void RedFsmBuild::condSpaceItem( int cnum, long condActionId )
+{
+ GenCondSpace *cond = redFsm->allCondSpaces + cnum;
+ cond->condSet.append( redFsm->allActions + condActionId );
+}
+
+void RedFsmBuild::initStateCondList( int snum, ulong length )
+{
+ /* Could preallocate these, as we could with transitions. */
+}
+
+void RedFsmBuild::addStateCond( int snum, Key lowKey, Key highKey, long condNum )
+{
+ RedState *curState = redFsm->allStates + snum;
+
+ /* Create the new state condition. */
+ GenStateCond *stateCond = new GenStateCond;
+ stateCond->lowKey = lowKey;
+ stateCond->highKey = highKey;
+
+ /* Assign it a cond space. */
+ GenCondSpace *condSpace = redFsm->allCondSpaces + condNum;
+ stateCond->condSpace = condSpace;
+
+ curState->stateCondList.append( stateCond );
+}
+
+
+void RedFsmBuild::setForcedErrorState()
+{
+ redFsm->forcedErrorState = true;
+}
+
+Key RedFsmBuild::findMaxKey()
+{
+ Key maxKey = keyOps->maxKey;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ assert( st->outSingle.length() == 0 );
+ assert( st->defTrans == 0 );
+
+ long rangeLen = st->outRange.length();
+ if ( rangeLen > 0 ) {
+ Key highKey = st->outRange[rangeLen-1].highKey;
+ if ( highKey > maxKey )
+ maxKey = highKey;
+ }
+ }
+ return maxKey;
+}
+
+
+void RedFsmBuild::makeActionTableList()
+{
+ /* Must first order the action tables based on their id. */
+ int numTables = nextActionTableId;
+ RedActionTable **tables = new RedActionTable*[numTables];
+ for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ )
+ tables[at->id] = at;
+
+ initActionTableList( numTables );
+ curActionTable = 0;
+
+ for ( int t = 0; t < numTables; t++ ) {
+ long length = tables[t]->key.length();
+
+ /* Collect the action table. */
+ RedAction *redAct = redFsm->allActionTables + curActionTable;
+ redAct->actListId = curActionTable;
+ redAct->key.setAsNew( length );
+
+ int pos = 0;
+ for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) {
+ int actionId = atel->value->actionId;
+ redAct->key[pos].key = 0;
+ redAct->key[pos].value = redFsm->allActions+actionId;
+ pos += 1;
+ }
+
+ /* Insert into the action table map. */
+ redFsm->actionMap.insert( redAct );
+
+ curActionTable += 1;
+
+ }
+
+ delete[] tables;
+}
+
+void RedFsmBuild::reduceActionTables()
+{
+ /* Reduce the actions tables to a set. */
+ for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
+ RedActionTable *actionTable = 0;
+
+ /* Reduce To State Actions. */
+ if ( st->toStateActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Reduce From State Actions. */
+ if ( st->fromStateActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Reduce EOF actions. */
+ if ( st->eofActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->eofActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Loop the transitions and reduce their actions. */
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->actionTable.length() > 0 ) {
+ if ( actionTableMap.insert( trans->actionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+ }
+ }
+}
+
+void RedFsmBuild::appendTrans( TransListVect &outList, Key lowKey,
+ Key highKey, FsmTrans *trans )
+{
+ if ( trans->toState != 0 || trans->actionTable.length() > 0 )
+ outList.append( TransEl( lowKey, highKey, trans ) );
+}
+
+void RedFsmBuild::makeTrans( Key lowKey, Key highKey, FsmTrans *trans )
+{
+ /* First reduce the action. */
+ RedActionTable *actionTable = 0;
+ if ( trans->actionTable.length() > 0 )
+ actionTable = actionTableMap.find( trans->actionTable );
+
+ long targ = trans->toState == 0 ? -1 : trans->toState->alg.stateNum;
+ long action = actionTable == 0 ? -1 : actionTable->id;
+
+ newTrans( curState, curTrans++, lowKey, highKey, targ, action );
+}
+
+void RedFsmBuild::makeTransList( FsmState *state )
+{
+ TransListVect outList;
+
+ /* If there is only are no ranges the task is simple. */
+ if ( state->outList.length() > 0 ) {
+ /* Loop each source range. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* Reduce the transition. If it reduced to anything then add it. */
+ appendTrans( outList, trans->lowKey, trans->highKey, trans );
+ }
+ }
+
+ long length = outList.length();
+ initTransList( curState, length );
+ curTrans = 0;
+
+ for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ )
+ makeTrans( tvi->lowKey, tvi->highKey, tvi->value );
+ finishTransList( curState );
+}
+
+void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *action )
+{
+ redFsm->allActions[anum].actionId = anum;
+ redFsm->allActions[anum].name = name;
+ redFsm->allActions[anum].loc.line = line;
+ redFsm->allActions[anum].loc.col = col;
+ redFsm->allActions[anum].inlineList = action->inlineList;
+ redFsm->allActions[anum].objField = action->objField;
+ redFsm->allActions[anum].markType = action->markType;
+ redFsm->allActions[anum].markId = action->markId + 1;
+}
+
+void RedFsmBuild::makeAction( Action *action )
+{
+ int line = action->loc.line;
+ int col = action->loc.col;
+
+ char *name = 0;
+ if ( action->name != 0 )
+ name = action->name;
+
+ newAction( curAction++, name, line, col, action );
+}
+
+void xmlEscapeHost( std::ostream &out, char *data, int len )
+{
+ char *end = data + len;
+ while ( data != end ) {
+ switch ( *data ) {
+ case '<': out << "&lt;"; break;
+ case '>': out << "&gt;"; break;
+ case '&': out << "&amp;"; break;
+ default: out << *data; break;
+ }
+ data += 1;
+ }
+}
+
+void RedFsmBuild::makeStateActions( FsmState *state )
+{
+ RedActionTable *toStateActions = 0;
+ if ( state->toStateActionTable.length() > 0 )
+ toStateActions = actionTableMap.find( state->toStateActionTable );
+
+ RedActionTable *fromStateActions = 0;
+ if ( state->fromStateActionTable.length() > 0 )
+ fromStateActions = actionTableMap.find( state->fromStateActionTable );
+
+ RedActionTable *eofActions = 0;
+ if ( state->eofActionTable.length() > 0 )
+ eofActions = actionTableMap.find( state->eofActionTable );
+
+ if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) {
+ long toStateAction = -1;
+ long fromStateAction = -1;
+ long eofAction = -1;
+
+ if ( toStateActions != 0 )
+ toStateAction = toStateActions->id;
+ if ( fromStateActions != 0 )
+ fromStateAction = fromStateActions->id;
+ if ( eofActions != 0 )
+ eofAction = eofActions->id;
+
+ setStateActions( curState, toStateAction,
+ fromStateAction, eofAction );
+ }
+}
+
+void RedFsmBuild::makeStateConditions( FsmState *state )
+{
+ if ( state->stateCondList.length() > 0 ) {
+
+ long length = state->stateCondList.length();
+ initStateCondList( curState, length );
+ curStateCond = 0;
+
+ for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) {
+ Key lowKey = scdi->lowKey;
+ Key highKey = scdi->highKey;
+ long condId = scdi->condSpace->condSpaceId;
+ addStateCond( curState, lowKey, highKey, condId );
+ }
+ }
+}
+
+void RedFsmBuild::makeStateList()
+{
+ /* Write the list of states. */
+ long length = fsm->stateList.length();
+ initStateList( length );
+ curState = 0;
+
+ for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
+ /* Both or neither should be set. */
+ assert( !( (st->eofTarget != 0) xor (st->eofActionTable.length() > 0) ) );
+
+ makeStateActions( st );
+ makeStateConditions( st );
+ makeTransList( st );
+
+ setId( curState, st->alg.stateNum );
+ if ( st->isFinState() )
+ setFinal( curState );
+
+ /* If there is an eof target, make an eof transition. */
+ if ( st->eofTarget != 0 ) {
+ /* Find the eof actions. */
+ RedActionTable *eofActions = 0;
+ eofActions = actionTableMap.find( st->eofActionTable );
+ setEofTrans( curState, st->eofTarget->alg.stateNum, eofActions->id );
+ }
+
+ curState += 1;
+ }
+}
+
+void RedFsmBuild::makeEntryPoints()
+{
+ if ( fsm->lmRequiresErrorState )
+ setForcedErrorState();
+
+ for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) {
+ /* Get the name instantiation from nameIndex. */
+ NameInst *nameInst = fsm->nameIndex[en->key];
+ FsmState *state = en->value;
+ char *name = nameInst->name;
+ long entry = state->alg.stateNum;
+ addEntryPoint( en->key, name, entry );
+ }
+
+ for ( RegionList::Iter reg = pd->regionList; reg.lte(); reg++ ) {
+ assert( reg->regionNameInst != 0 );
+
+ TokenRegion *use = reg;
+
+ if ( use->isCiOnly )
+ use = use->derivedFrom->ignoreOnlyRegion;
+
+ NameInst *regionName = use->regionNameInst;
+ addRegionToEntry( reg->id, regionName->id );
+ }
+}
+
+void RedFsmBuild::makeMachine()
+{
+ /* Action tables. */
+ reduceActionTables();
+
+ makeActionList();
+ makeActionTableList();
+ makeConditions();
+
+ /* Start state. */
+ startState = fsm->startState->alg.stateNum;
+
+ /* Error state. */
+ if ( fsm->errState != 0 )
+ errState = fsm->errState->alg.stateNum;
+
+ makeEntryPoints();
+ makeStateList();
+}
+
+void RedFsmBuild::makeConditions()
+{
+ if ( condData->condSpaceMap.length() > 0 ) {
+ long nextCondSpaceId = 0;
+ for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ )
+ cs->condSpaceId = nextCondSpaceId++;
+
+ long length = condData->condSpaceMap.length();
+ initCondSpaceList( length );
+ curCondSpace = 0;
+
+ for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) {
+ long condSpaceId = cs->condSpaceId;
+ Key baseKey = cs->baseKey;
+
+ newCondSpace( curCondSpace, condSpaceId, baseKey );
+ for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) {
+ long actionOffset = (*csi)->actionId;
+ condSpaceItem( curCondSpace, actionOffset );
+ }
+
+ curCondSpace += 1;
+ }
+ }
+}
+
+RedFsm *RedFsmBuild::reduceMachine()
+{
+ redFsm = new RedFsm();
+ redFsm->wantComplete = true;
+
+ /* Open the definition. */
+ makeMachine();
+
+ /* Do this before distributing transitions out to singles and defaults
+ * makes life easier. */
+ redFsm->maxKey = findMaxKey();
+
+ redFsm->assignActionLocs();
+
+ /* Find the first final state (The final state with the lowest id). */
+ redFsm->findFirstFinState();
+
+ /* Choose default transitions and the single transition. */
+ redFsm->chooseDefaultSpan();
+
+ /* Maybe do flat expand, otherwise choose single. */
+ redFsm->chooseSingle();
+
+ /* Set up incoming transitions. */
+ redFsm->setInTrans();
+
+ /* Anlayze Machine will find the final action reference counts, among
+ * other things. We will use these in reporting the usage
+ * of fsm directives in action code. */
+ redFsm->analyzeMachine();
+
+ return redFsm;
+}
+
diff --git a/src/redbuild.h b/src/redbuild.h
new file mode 100644
index 00000000..dbbb3e19
--- /dev/null
+++ b/src/redbuild.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _FSMREDUCE_H
+#define _FSMREDUCE_H
+
+#include <iostream>
+#include "avltree.h"
+#include "fsmgraph.h"
+#include "parsedata.h"
+#include "fsmrun.h"
+
+/* Forwards. */
+struct FsmTrans;
+struct FsmGraph;
+struct Compiler;
+struct FsmCodeGen;
+struct RedFsm;
+struct GenCondSpace;
+struct Condition;
+
+struct RedActionTable
+:
+ public AvlTreeEl<RedActionTable>
+{
+ RedActionTable( const ActionTable &key )
+ :
+ key(key),
+ id(0)
+ { }
+
+ const ActionTable &getKey()
+ { return key; }
+
+ ActionTable key;
+ int id;
+};
+
+typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap;
+
+struct NextRedTrans
+{
+ Key lowKey, highKey;
+ FsmTrans *trans;
+ FsmTrans *next;
+
+ void load() {
+ if ( trans != 0 ) {
+ next = trans->next;
+ lowKey = trans->lowKey;
+ highKey = trans->highKey;
+ }
+ }
+
+ NextRedTrans( FsmTrans *t ) {
+ trans = t;
+ load();
+ }
+
+ void increment() {
+ trans = next;
+ load();
+ }
+};
+
+class RedFsmBuild
+{
+public:
+ RedFsmBuild( char *fsmName, Compiler *pd, FsmGraph *fsm );
+ RedFsm *reduceMachine( );
+
+private:
+ void appendTrans( TransListVect &outList, Key lowKey, Key highKey, FsmTrans *trans );
+ void makeStateActions( FsmState *state );
+ void makeStateList();
+ void makeStateConditions( FsmState *state );
+
+ void initActionList( unsigned long length );
+ void newAction( int anum, char *name, int line, int col, Action *action );
+ void initActionTableList( unsigned long length );
+ void initCondSpaceList( ulong length );
+ void condSpaceItem( int cnum, long condActionId );
+ void newCondSpace( int cnum, int condSpaceId, Key baseKey );
+ void initStateCondList( int snum, ulong length );
+ void addStateCond( int snum, Key lowKey, Key highKey, long condNum );
+ void initStateList( unsigned long length );
+ void addRegionToEntry( int regionId, int entryId );
+ void addEntryPoint( int entryId, char *name, unsigned long entryState );
+ void setId( int snum, int id );
+ void initTransList( int snum, unsigned long length );
+ void newTrans( int snum, int tnum, Key lowKey, Key highKey,
+ long targ, long act );
+ void finishTransList( int snum );
+ void setFinal( int snum );
+ void setEofTrans( int snum, int eofTarget, int actId );
+ void setStateActions( int snum, long toStateAction,
+ long fromStateAction, long eofAction );
+ void setForcedErrorState();
+ void closeMachine();
+ Key findMaxKey();
+
+
+ void makeEntryPoints();
+ void makeGetKeyExpr();
+ void makeAccessExpr();
+ void makeCurStateExpr();
+ void makeConditions();
+ void makeInlineList( InlineList *inlineList, InlineItem *context );
+ void makeActionList();
+ void makeActionTableList();
+ void reduceTrans( FsmTrans *trans );
+ void reduceActionTables();
+ void makeTransList( FsmState *state );
+ void makeTrans( Key lowKey, Key highKey, FsmTrans *defTrans );
+ void makeAction( Action *action );
+ void makeLmSwitch( InlineItem *item );
+ void makeMachine();
+ void makeActionExec( InlineItem *item );
+ void makeActionExecTE( InlineItem *item );
+
+ char *fsmName;
+ Compiler *pd;
+ FsmGraph *fsm;
+ ActionTableMap actionTableMap;
+ int nextActionTableId;
+
+ int startState;
+ int errState;
+
+public:
+ RedFsm *redFsm;
+
+private:
+ int curAction;
+ int curActionTable;
+ int curTrans;
+ int curState;
+ int curCondSpace;
+ int curStateCond;
+};
+
+
+#endif /* _FSMREDUCE_H */
diff --git a/src/redfsm.cc b/src/redfsm.cc
new file mode 100644
index 00000000..d3a65b7c
--- /dev/null
+++ b/src/redfsm.cc
@@ -0,0 +1,1112 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <sstream>
+#include "redfsm.h"
+#include "avlmap.h"
+#include "mergesort.h"
+#include "fsmgraph.h"
+#include "parsetree.h"
+#include "fsmrun.h"
+
+using std::ostringstream;
+
+string nameOrLoc( GenAction *genAction )
+{
+ if ( genAction->name != 0 )
+ return string(genAction->name);
+ else {
+ ostringstream ret;
+ ret << genAction->loc.line << ":" << genAction->loc.col;
+ return ret.str();
+ }
+}
+
+RedFsm::RedFsm()
+:
+ wantComplete(false),
+ forcedErrorState(false),
+ nextActionId(0),
+ nextTransId(0),
+ errState(0),
+ errTrans(0),
+ firstFinState(0),
+ numFinStates(0),
+ allActions(0),
+ allActionTables(0),
+ allConditions(0),
+ allCondSpaces(0),
+ allStates(0),
+ bAnyToStateActions(false),
+ bAnyFromStateActions(false),
+ bAnyRegActions(false),
+ bAnyEofActions(false),
+ bAnyActionGotos(false),
+ bAnyActionCalls(false),
+ bAnyActionRets(false),
+ bAnyRegActionRets(false),
+ bAnyRegActionByValControl(false),
+ bAnyRegNextStmt(false),
+ bAnyRegCurStateRef(false),
+ bAnyRegBreak(false),
+ bAnyLmSwitchError(false),
+ bAnyConditions(false)
+{
+}
+
+/* Does the machine have any actions. */
+bool RedFsm::anyActions()
+{
+ return actionMap.length() > 0;
+}
+
+void RedFsm::depthFirstOrdering( RedState *state )
+{
+ /* Nothing to do if the state is already on the list. */
+ if ( state->onStateList )
+ return;
+
+ /* Doing depth first, put state on the list. */
+ state->onStateList = true;
+ stateList.append( state );
+
+// /* At this point transitions should only be in ranges. */
+// assert( state->outSingle.length() == 0 );
+// assert( state->defTrans == 0 );
+
+ /* Recurse on singles. */
+ for ( RedTransList::Iter stel = state->outSingle; stel.lte(); stel++ ) {
+ if ( stel->value->targ != 0 )
+ depthFirstOrdering( stel->value->targ );
+ }
+
+ /* Recurse on everything ranges. */
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->targ != 0 )
+ depthFirstOrdering( rtel->value->targ );
+ }
+
+ if ( state->defTrans != 0 && state->defTrans->targ != 0 )
+ depthFirstOrdering( state->defTrans->targ );
+}
+
+/* Ordering states by transition connections. */
+void RedFsm::depthFirstOrdering()
+{
+ /* Init on state list flags. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ st->onStateList = false;
+
+ /* Clear out the state list, we will rebuild it. */
+ int stateListLen = stateList.length();
+ stateList.abandon();
+
+ /* Add back to the state list from the start state and all other entry
+ * points. */
+ depthFirstOrdering( startState );
+ for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ )
+ depthFirstOrdering( *en );
+ if ( forcedErrorState )
+ depthFirstOrdering( errState );
+
+ /* Make sure we put everything back on. */
+ assert( stateListLen == stateList.length() );
+}
+
+/* Assign state ids by appearance in the state list. */
+void RedFsm::sequentialStateIds()
+{
+ /* Table based machines depend on the state numbers starting at zero. */
+ nextStateId = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ st->id = nextStateId++;
+}
+
+/* Stable sort the states by final state status. */
+void RedFsm::sortStatesByFinal()
+{
+ /* Move forward through the list and throw final states onto the end. */
+ RedState *state = 0;
+ RedState *next = stateList.head;
+ RedState *last = stateList.tail;
+ while ( state != last ) {
+ /* Move forward and load up the next. */
+ state = next;
+ next = state->next;
+
+ /* Throw to the end? */
+ if ( state->isFinal ) {
+ stateList.detach( state );
+ stateList.append( state );
+ }
+ }
+}
+
+/* Assign state ids by final state state status. */
+void RedFsm::sortStateIdsByFinal()
+{
+ /* Table based machines depend on this starting at zero. */
+ nextStateId = 0;
+
+ /* First pass to assign non final ids. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( ! st->isFinal )
+ st->id = nextStateId++;
+ }
+
+ /* Second pass to assign final ids. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->isFinal )
+ st->id = nextStateId++;
+ }
+}
+
+struct CmpStateById
+{
+ static int compare( RedState *st1, RedState *st2 )
+ {
+ if ( st1->id < st2->id )
+ return -1;
+ else if ( st1->id > st2->id )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+void RedFsm::sortByStateId()
+{
+ /* Make the array. */
+ int pos = 0;
+ RedState **ptrList = new RedState*[stateList.length()];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ ptrList[pos++] = st;
+
+ MergeSort<RedState*, CmpStateById> mergeSort;
+ mergeSort.sort( ptrList, stateList.length() );
+
+ stateList.abandon();
+ for ( int st = 0; st < pos; st++ )
+ stateList.append( ptrList[st] );
+
+ delete[] ptrList;
+}
+
+/* Find the final state with the lowest id. */
+void RedFsm::findFirstFinState()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) )
+ firstFinState = st;
+ }
+}
+
+void RedFsm::assignActionLocs()
+{
+ int nextLocation = 0;
+ for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) {
+ /* Store the loc, skip over the array and a null terminator. */
+ act->location = nextLocation;
+ nextLocation += act->key.length() + 1;
+ }
+}
+
+/* Check if we can extend the current range by displacing any ranges
+ * ahead to the singles. */
+bool RedFsm::canExtend( const RedTransList &list, int pos )
+{
+ /* Get the transition that we want to extend. */
+ RedTrans *extendTrans = list[pos].value;
+
+ /* Look ahead in the transition list. */
+ for ( int next = pos + 1; next < list.length(); pos++, next++ ) {
+ /* If they are not continuous then cannot extend. */
+ Key nextKey = list[next].lowKey;
+ nextKey.decrement();
+ if ( list[pos].highKey != nextKey )
+ break;
+
+ /* Check for the extenstion property. */
+ if ( extendTrans == list[next].value )
+ return true;
+
+ /* If the span of the next element is more than one, then don't keep
+ * checking, it won't be moved to single. */
+ unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey );
+ if ( nextSpan > 1 )
+ break;
+ }
+ return false;
+}
+
+/* Move ranges to the singles list. */
+void RedFsm::moveTransToSingle( RedState *state )
+{
+ RedTransList &range = state->outRange;
+ RedTransList &single = state->outSingle;
+ for ( int rpos = 0; rpos < range.length(); ) {
+ /* Check if this is a range we can extend. */
+ if ( canExtend( range, rpos ) ) {
+ /* Transfer singles over. */
+ while ( range[rpos].value != range[rpos+1].value ) {
+ /* Transfer the range to single. */
+ single.append( range[rpos+1] );
+ range.remove( rpos+1 );
+ }
+
+ /* Extend. */
+ range[rpos].highKey = range[rpos+1].highKey;
+ range.remove( rpos+1 );
+ }
+ /* Maybe move it to the singles. */
+ else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) {
+ single.append( range[rpos] );
+ range.remove( rpos );
+ }
+ else {
+ /* Keeping it in the ranges. */
+ rpos += 1;
+ }
+ }
+}
+
+/* Look through ranges and choose suitable single character transitions. */
+void RedFsm::chooseSingle()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Rewrite the transition list taking out the suitable single
+ * transtions. */
+ moveTransToSingle( st );
+ }
+}
+
+void RedFsm::makeFlat()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->stateCondList.length() == 0 ) {
+ st->condLowKey = 0;
+ st->condHighKey = 0;
+ }
+ else {
+ st->condLowKey = st->stateCondList.head->lowKey;
+ st->condHighKey = st->stateCondList.tail->highKey;
+
+ unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
+ st->condList = new GenCondSpace*[ span ];
+ memset( st->condList, 0, sizeof(GenCondSpace*)*span );
+
+ for ( GenStateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) {
+ unsigned long long base, trSpan;
+ base = keyOps->span( st->condLowKey, sci->lowKey )-1;
+ trSpan = keyOps->span( sci->lowKey, sci->highKey );
+ for ( unsigned long long pos = 0; pos < trSpan; pos++ )
+ st->condList[base+pos] = sci->condSpace;
+ }
+ }
+
+ if ( st->outRange.length() == 0 ) {
+ st->lowKey = st->highKey = 0;
+ st->transList = 0;
+ }
+ else {
+ st->lowKey = st->outRange[0].lowKey;
+ st->highKey = st->outRange[st->outRange.length()-1].highKey;
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ st->transList = new RedTrans*[ span ];
+ memset( st->transList, 0, sizeof(RedTrans*)*span );
+
+ for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) {
+ unsigned long long base, trSpan;
+ base = keyOps->span( st->lowKey, trans->lowKey )-1;
+ trSpan = keyOps->span( trans->lowKey, trans->highKey );
+ for ( unsigned long long pos = 0; pos < trSpan; pos++ )
+ st->transList[base+pos] = trans->value;
+ }
+
+ /* Fill in the gaps with the default transition. */
+ for ( unsigned long long pos = 0; pos < span; pos++ ) {
+ if ( st->transList[pos] == 0 )
+ st->transList[pos] = st->defTrans;
+ }
+ }
+ }
+}
+
+
+/* A default transition has been picked, move it from the outRange to the
+ * default pointer. */
+void RedFsm::moveToDefault( RedTrans *defTrans, RedState *state )
+{
+ /* Rewrite the outRange, omitting any ranges that use
+ * the picked default. */
+ RedTransList outRange;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* If it does not take the default, copy it over. */
+ if ( rtel->value != defTrans )
+ outRange.append( *rtel );
+ }
+
+ /* Save off the range we just created into the state's range. */
+ state->outRange.transfer( outRange );
+
+ /* Store the default. */
+ state->defTrans = defTrans;
+}
+
+bool RedFsm::alphabetCovered( RedTransList &outRange )
+{
+ /* Cannot cover without any out ranges. */
+ if ( outRange.length() == 0 )
+ return false;
+
+ /* If the first range doesn't start at the the lower bound then the
+ * alphabet is not covered. */
+ RedTransList::Iter rtel = outRange;
+ if ( keyOps->minKey < rtel->lowKey )
+ return false;
+
+ /* Check that every range is next to the previous one. */
+ rtel.increment();
+ for ( ; rtel.lte(); rtel++ ) {
+ Key highKey = rtel[-1].highKey;
+ highKey.increment();
+ if ( highKey != rtel->lowKey )
+ return false;
+ }
+
+ /* The last must extend to the upper bound. */
+ RedTransEl *last = &outRange[outRange.length()-1];
+ if ( last->highKey < keyOps->maxKey )
+ return false;
+
+ return true;
+}
+
+RedTrans *RedFsm::chooseDefaultSpan( RedState *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransPtrSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
+ stateTransSet.insert( rtel->value );
+
+ /* For each transition in the find how many alphabet characters the
+ * transition spans. */
+ unsigned long long *span = new unsigned long long[stateTransSet.length()];
+ memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() );
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* Lookup the transition in the set. */
+ RedTrans **inSet = stateTransSet.find( rtel->value );
+ int pos = inSet - stateTransSet.data;
+ span[pos] += keyOps->span( rtel->lowKey, rtel->highKey );
+ }
+
+ /* Find the max span, choose it for making the default. */
+ RedTrans *maxTrans = 0;
+ unsigned long long maxSpan = 0;
+ for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
+ if ( span[rtel.pos()] > maxSpan ) {
+ maxSpan = span[rtel.pos()];
+ maxTrans = *rtel;
+ }
+ }
+
+ delete[] span;
+ return maxTrans;
+}
+
+/* Pick default transitions from ranges for the states. */
+void RedFsm::chooseDefaultSpan()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Only pick a default transition if the alphabet is covered. This
+ * avoids any transitions in the out range that go to error and avoids
+ * the need for an ERR state. */
+ if ( alphabetCovered( st->outRange ) ) {
+ /* Pick a default transition by largest span. */
+ RedTrans *defTrans = chooseDefaultSpan( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+ }
+}
+
+RedTrans *RedFsm::chooseDefaultGoto( RedState *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransPtrSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->targ == state->next )
+ return rtel->value;
+ }
+ return 0;
+}
+
+void RedFsm::chooseDefaultGoto()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Pick a default transition. */
+ RedTrans *defTrans = chooseDefaultGoto( st );
+ if ( defTrans == 0 )
+ defTrans = chooseDefaultSpan( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+}
+
+RedTrans *RedFsm::chooseDefaultNumRanges( RedState *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransPtrSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
+ stateTransSet.insert( rtel->value );
+
+ /* For each transition in the find how many ranges use the transition. */
+ int *numRanges = new int[stateTransSet.length()];
+ memset( numRanges, 0, sizeof(int) * stateTransSet.length() );
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* Lookup the transition in the set. */
+ RedTrans **inSet = stateTransSet.find( rtel->value );
+ numRanges[inSet - stateTransSet.data] += 1;
+ }
+
+ /* Find the max number of ranges. */
+ RedTrans *maxTrans = 0;
+ int maxNumRanges = 0;
+ for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
+ if ( numRanges[rtel.pos()] > maxNumRanges ) {
+ maxNumRanges = numRanges[rtel.pos()];
+ maxTrans = *rtel;
+ }
+ }
+
+ delete[] numRanges;
+ return maxTrans;
+}
+
+void RedFsm::chooseDefaultNumRanges()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Pick a default transition. */
+ RedTrans *defTrans = chooseDefaultNumRanges( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+}
+
+RedTrans *RedFsm::getErrorTrans( )
+{
+ /* If the error trans has not been made aready, make it. */
+ if ( errTrans == 0 ) {
+ /* This insert should always succeed since no transition created by
+ * the user can point to the error state. */
+ errTrans = new RedTrans( getErrorState(), 0, nextTransId++ );
+ RedTrans *inRes = transSet.insert( errTrans );
+ assert( inRes != 0 );
+ }
+ return errTrans;
+}
+
+RedState *RedFsm::getErrorState()
+{
+ /* Something went wrong. An error state is needed but one was not supplied
+ * by the frontend. */
+ assert( errState != 0 );
+ return errState;
+}
+
+
+RedTrans *RedFsm::allocateTrans( RedState *targ, RedAction *action )
+{
+ /* Create a reduced trans and look for it in the transiton set. */
+ RedTrans redTrans( targ, action, 0 );
+ RedTrans *inDict = transSet.find( &redTrans );
+ if ( inDict == 0 ) {
+ inDict = new RedTrans( targ, action, nextTransId++ );
+ transSet.insert( inDict );
+ }
+ return inDict;
+}
+
+void RedFsm::partitionFsm( int nparts )
+{
+ /* At this point the states are ordered by a depth-first traversal. We
+ * will allocate to partitions based on this ordering. */
+ this->nParts = nparts;
+ int partSize = stateList.length() / nparts;
+ int remainder = stateList.length() % nparts;
+ int numInPart = partSize;
+ int partition = 0;
+ if ( remainder-- > 0 )
+ numInPart += 1;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ st->partition = partition;
+
+ numInPart -= 1;
+ if ( numInPart == 0 ) {
+ partition += 1;
+ numInPart = partSize;
+ if ( remainder-- > 0 )
+ numInPart += 1;
+ }
+ }
+}
+
+void RedFsm::setInTrans()
+{
+ /* First pass counts the number of transitions. */
+ for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ )
+ trans->targ->numInTrans += 1;
+
+ /* Pass over states to allocate the needed memory. Reset the counts so we
+ * can use them as the current size. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ st->inTrans = new RedTrans*[st->numInTrans];
+ st->numInTrans = 0;
+ }
+
+ /* Second pass over transitions copies pointers into the in trans list. */
+ for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ )
+ trans->targ->inTrans[trans->targ->numInTrans++] = trans;
+}
+
+GenCondSpace *RedFsm::findCondSpace( Key lowKey, Key highKey )
+{
+ for ( CondSpaceList::Iter cs = condSpaceList; cs.lte(); cs++ ) {
+ Key csHighKey = cs->baseKey;
+ csHighKey += keyOps->alphSize() * (1 << cs->condSet.length());
+
+ if ( lowKey >= cs->baseKey && highKey <= csHighKey )
+ return cs;
+ }
+ return 0;
+}
+
+Condition *RedFsm::findCondition( Key key )
+{
+ for ( ConditionList::Iter cond = conditionList; cond.lte(); cond++ ) {
+ Key upperKey = cond->baseKey + (1 << cond->condSet.length());
+ if ( cond->baseKey <= key && key <= upperKey )
+ return cond;
+ }
+ return 0;
+}
+
+void RedFsm::setValueLimits()
+{
+ maxSingleLen = 0;
+ maxRangeLen = 0;
+ maxKeyOffset = 0;
+ maxIndexOffset = 0;
+ maxActListId = 0;
+ maxActionLoc = 0;
+ maxActArrItem = 0;
+ maxSpan = 0;
+ maxCondSpan = 0;
+ maxFlatIndexOffset = 0;
+ maxCondOffset = 0;
+ maxCondLen = 0;
+ maxCondSpaceId = 0;
+ maxCondIndexOffset = 0;
+
+ /* In both of these cases the 0 index is reserved for no value, so the max
+ * is one more than it would be if they started at 0. */
+ maxIndex = transSet.length();
+ maxCond = condSpaceList.length();
+
+ /* The nextStateId - 1 is the last state id assigned. */
+ maxState = nextStateId - 1;
+
+ for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) {
+ if ( csi->condSpaceId > maxCondSpaceId )
+ maxCondSpaceId = csi->condSpaceId;
+ }
+
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Maximum cond length. */
+ if ( st->stateCondList.length() > maxCondLen )
+ maxCondLen = st->stateCondList.length();
+
+ /* Maximum single length. */
+ if ( st->outSingle.length() > maxSingleLen )
+ maxSingleLen = st->outSingle.length();
+
+ /* Maximum range length. */
+ if ( st->outRange.length() > maxRangeLen )
+ maxRangeLen = st->outRange.length();
+
+ /* The key offset index offset for the state after last is not used, skip it.. */
+ if ( ! st.last() ) {
+ maxCondOffset += st->stateCondList.length();
+ maxKeyOffset += st->outSingle.length() + st->outRange.length()*2;
+ maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1;
+ }
+
+ /* Max cond span. */
+ if ( st->condList != 0 ) {
+ unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
+ if ( span > maxCondSpan )
+ maxCondSpan = span;
+ }
+
+ /* Max key span. */
+ if ( st->transList != 0 ) {
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ if ( span > maxSpan )
+ maxSpan = span;
+ }
+
+ /* Max cond index offset. */
+ if ( ! st.last() ) {
+ if ( st->condList != 0 )
+ maxCondIndexOffset += keyOps->span( st->condLowKey, st->condHighKey );
+ }
+
+ /* Max flat index offset. */
+ if ( ! st.last() ) {
+ if ( st->transList != 0 )
+ maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey );
+ maxFlatIndexOffset += 1;
+ }
+ }
+
+ for ( GenActionTableMap::Iter at = actionMap; at.lte(); at++ ) {
+ /* Maximum id of action lists. */
+ if ( at->actListId+1 > maxActListId )
+ maxActListId = at->actListId+1;
+
+ /* Maximum location of items in action array. */
+ if ( at->location+1 > maxActionLoc )
+ maxActionLoc = at->location+1;
+
+ /* Maximum values going into the action array. */
+ if ( at->key.length() > maxActArrItem )
+ maxActArrItem = at->key.length();
+ for ( GenActionTable::Iter item = at->key; item.lte(); item++ ) {
+ if ( item->value->actionId > maxActArrItem )
+ maxActArrItem = item->value->actionId;
+ }
+ }
+}
+
+void RedFsm::findFinalActionRefs()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Rerence count out of single transitions. */
+ for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 ) {
+ rtel->value->action->numTransRefs += 1;
+ for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+ }
+
+ /* Reference count out of range transitions. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 ) {
+ rtel->value->action->numTransRefs += 1;
+ for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+ }
+
+ /* Reference count default transition. */
+ if ( st->defTrans != 0 && st->defTrans->action != 0 ) {
+ st->defTrans->action->numTransRefs += 1;
+ for ( GenActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+
+ /* Reference count to state actions. */
+ if ( st->toStateAction != 0 ) {
+ st->toStateAction->numToStateRefs += 1;
+ for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ )
+ item->value->numToStateRefs += 1;
+ }
+
+ /* Reference count from state actions. */
+ if ( st->fromStateAction != 0 ) {
+ st->fromStateAction->numFromStateRefs += 1;
+ for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ )
+ item->value->numFromStateRefs += 1;
+ }
+
+ /* Reference count EOF actions. */
+ if ( st->eofAction != 0 ) {
+ st->eofAction->numEofRefs += 1;
+ for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ )
+ item->value->numEofRefs += 1;
+ }
+ }
+}
+
+void RedFsm::analyzeAction( GenAction *act, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ /* Check for various things in regular actions. */
+ if ( act->numTransRefs > 0 || act->numToStateRefs > 0 ||
+ act->numFromStateRefs > 0 || act->numEofRefs > 0 )
+ {
+ if ( item->type == InlineItem::LmSwitch &&
+ item->tokenRegion->lmSwitchHandlesError )
+ {
+ bAnyLmSwitchError = true;
+ }
+ }
+
+ if ( item->children != 0 )
+ analyzeAction( act, item->children );
+ }
+}
+
+void RedFsm::analyzeActionList( RedAction *redAct, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ if ( item->children != 0 )
+ analyzeActionList( redAct, item->children );
+ }
+}
+
+/* Assign ids to referenced actions. */
+void RedFsm::assignActionIds()
+{
+ int nextActionId = 0;
+ for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) {
+ /* Only ever interested in referenced actions. */
+ if ( numRefs( act ) > 0 )
+ act->actionId = nextActionId++;
+ }
+}
+
+/* Gather various info on the machine. */
+void RedFsm::analyzeMachine()
+{
+ /* Find the true count of action references. */
+ findFinalActionRefs();
+
+ /* Check if there are any calls in action code. */
+ for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) {
+ /* Record the occurrence of various kinds of actions. */
+ if ( act->numToStateRefs > 0 )
+ bAnyToStateActions = true;
+ if ( act->numFromStateRefs > 0 )
+ bAnyFromStateActions = true;
+ if ( act->numEofRefs > 0 )
+ bAnyEofActions = true;
+ if ( act->numTransRefs > 0 )
+ bAnyRegActions = true;
+
+ /* Recurse through the action's parse tree looking for various things. */
+ analyzeAction( act, act->inlineList );
+ }
+
+ /* Analyze reduced action lists. */
+ for ( GenActionTableMap::Iter redAct = actionMap; redAct.lte(); redAct++ ) {
+ for ( GenActionTable::Iter act = redAct->key; act.lte(); act++ )
+ analyzeActionList( redAct, act->value->inlineList );
+ }
+
+ /* Find states that have transitions with actions that have next
+ * statements. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Check any actions out of outSinge. */
+ for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+ }
+
+ /* Check any actions out of outRange. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+ }
+
+ /* Check any action out of default. */
+ if ( st->defTrans != 0 && st->defTrans->action != 0 &&
+ st->defTrans->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+
+ if ( st->stateCondList.length() > 0 )
+ bAnyConditions = true;
+ }
+
+ /* Assign ids to actions that are referenced. */
+ assignActionIds();
+
+ /* Set the maximums of various values used for deciding types. */
+ setValueLimits();
+}
+
+int transAction( RedTrans *trans )
+{
+ int retAct = 0;
+ if ( trans->action != 0 )
+ retAct = trans->action->location+1;
+ return retAct;
+}
+
+int toStateAction( RedState *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ return act;
+}
+
+int fromStateAction( RedState *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ return act;
+}
+
+int eofAction( RedState *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->location+1;
+ return act;
+}
+
+
+FsmTables *RedFsm::makeFsmTables()
+{
+ /* The fsm runtime needs states sorted by id. */
+ sortByStateId();
+
+ int pos, curKeyOffset, curIndOffset;
+ FsmTables *fsmTables = new FsmTables;
+ fsmTables->numStates = stateList.length();
+
+ /*
+ * actions
+ */
+
+ fsmTables->numActions = 1;
+ for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ )
+ fsmTables->numActions += 1 + act->key.length();
+
+ pos = 0;
+ fsmTables->actions = new long[fsmTables->numActions];
+ fsmTables->actions[pos++] = 0;
+ for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) {
+ fsmTables->actions[pos++] = act->key.length();
+ for ( GenActionTable::Iter item = act->key; item.lte(); item++ )
+ fsmTables->actions[pos++] = item->value->actionId;
+ }
+
+ /*
+ * keyOffset
+ */
+ pos = 0, curKeyOffset = 0;
+ fsmTables->keyOffsets = new long[fsmTables->numStates];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Store the current offset. */
+ fsmTables->keyOffsets[pos++] = curKeyOffset;
+
+ /* Move the key offset ahead. */
+ curKeyOffset += st->outSingle.length() + st->outRange.length()*2;
+ }
+
+ /*
+ * transKeys
+ */
+ fsmTables->numTransKeys = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ fsmTables->numTransKeys += st->outSingle.length();
+ fsmTables->numTransKeys += 2 * st->outRange.length();
+ }
+
+ pos = 0;
+ fsmTables->transKeys = new char[fsmTables->numTransKeys];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ )
+ fsmTables->transKeys[pos++] = stel->lowKey.getVal();
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ fsmTables->transKeys[pos++] = rtel->lowKey.getVal();
+ fsmTables->transKeys[pos++] = rtel->highKey.getVal();
+ }
+ }
+
+ /*
+ * singleLengths
+ */
+ pos = 0;
+ fsmTables->singleLengths = new long[fsmTables->numStates];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->singleLengths[pos++] = st->outSingle.length();
+
+ /*
+ * rangeLengths
+ */
+ pos = 0;
+ fsmTables->rangeLengths = new long[fsmTables->numStates];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->rangeLengths[pos++] = st->outRange.length();
+
+ /*
+ * indexOffsets
+ */
+ pos = 0, curIndOffset = 0;
+ fsmTables->indexOffsets = new long[fsmTables->numStates];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ fsmTables->indexOffsets[pos++] = curIndOffset;
+
+ curIndOffset += st->outSingle.length() + st->outRange.length();
+ if ( st->defTrans != 0 )
+ curIndOffset += 1;
+ }
+
+ /*
+ * transTargsWI
+ */
+ fsmTables->numTransTargsWI = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ fsmTables->numTransTargsWI += st->outSingle.length();
+ fsmTables->numTransTargsWI += st->outRange.length();
+ if ( st->defTrans != 0 )
+ fsmTables->numTransTargsWI += 1;
+ }
+
+ pos = 0;
+ fsmTables->transTargsWI = new long[fsmTables->numTransTargsWI];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ )
+ fsmTables->transTargsWI[pos++] = stel->value->targ->id;
+
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ )
+ fsmTables->transTargsWI[pos++] = rtel->value->targ->id;
+
+ if ( st->defTrans != 0 )
+ fsmTables->transTargsWI[pos++] = st->defTrans->targ->id;
+ }
+
+ /*
+ * transActionsWI
+ */
+ fsmTables->numTransActionsWI = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ fsmTables->numTransActionsWI += st->outSingle.length();
+ fsmTables->numTransActionsWI += st->outRange.length();
+ if ( st->defTrans != 0 )
+ fsmTables->numTransActionsWI += 1;
+ }
+
+ pos = 0;
+ fsmTables->transActionsWI = new long[fsmTables->numTransActionsWI];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ )
+ fsmTables->transActionsWI[pos++] = transAction( stel->value );
+
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ )
+ fsmTables->transActionsWI[pos++] = transAction( rtel->value );
+
+ if ( st->defTrans != 0 )
+ fsmTables->transActionsWI[pos++] = transAction( st->defTrans );
+ }
+
+ /*
+ * toStateActions
+ */
+ pos = 0;
+ fsmTables->toStateActions = new long[fsmTables->numStates];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->toStateActions[pos++] = toStateAction( st );
+
+ /*
+ * fromStateActions
+ */
+ pos = 0;
+ fsmTables->fromStateActions = new long[fsmTables->numStates];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->fromStateActions[pos++] = fromStateAction( st );
+
+ /*
+ * eofActions
+ */
+ pos = 0;
+ fsmTables->eofActions = new long[fsmTables->numStates];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->eofActions[pos++] = eofAction( st );
+
+ /*
+ * eofTargs
+ */
+ pos = 0;
+ fsmTables->eofTargs = new long[fsmTables->numStates];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ int targ = -1;
+ if ( st->eofTrans != 0 )
+ targ = st->eofTrans->targ->id;
+ fsmTables->eofTargs[pos++] = targ;
+ }
+
+ /* Start state. */
+ fsmTables->startState = startState->id;
+
+ /* First final state. */
+ fsmTables->firstFinal = ( firstFinState != 0 ) ?
+ firstFinState->id : nextStateId;
+
+ /* The error state. */
+ fsmTables->errorState = ( errState != 0 ) ?
+ errState->id : -1;
+
+ /* The array pointing to actions. */
+ pos = 0;
+ fsmTables->numActionSwitch = genActionList.length();
+ fsmTables->actionSwitch = new GenAction*[fsmTables->numActionSwitch];
+ for ( GenActionList::Iter act = genActionList; act.lte(); act++ )
+ fsmTables->actionSwitch[pos++] = act;
+
+ /*
+ * entryByRegion
+ */
+
+ fsmTables->numRegions = regionToEntry.length()+1;
+ fsmTables->entryByRegion = new long[fsmTables->numRegions];
+ fsmTables->entryByRegion[0] = fsmTables->errorState;
+
+ pos = 1;
+ for ( RegionToEntry::Iter en = regionToEntry; en.lte(); en++ ) {
+ /* Find the entry state from the entry id. */
+ RedEntryMapEl *entryMapEl = redEntryMap.find( *en );
+
+ /* Save it off. */
+ fsmTables->entryByRegion[pos++] = entryMapEl != 0 ? entryMapEl->value
+ : fsmTables->errorState;
+ }
+
+ return fsmTables;
+}
+
+
diff --git a/src/redfsm.h b/src/redfsm.h
new file mode 100644
index 00000000..39b98d5f
--- /dev/null
+++ b/src/redfsm.h
@@ -0,0 +1,524 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _REDFSM_H
+#define _REDFSM_H
+
+#include <assert.h>
+#include <string.h>
+#include <string>
+#include "keyops.h"
+#include "vector.h"
+#include "dlist.h"
+#include "compare.h"
+#include "bstmap.h"
+#include "bstset.h"
+#include "avlmap.h"
+#include "avltree.h"
+#include "avlbasic.h"
+#include "mergesort.h"
+#include "sbstmap.h"
+#include "sbstset.h"
+#include "sbsttable.h"
+#include "global.h"
+#include "pdarun.h"
+
+#define TRANS_ERR_TRANS 0
+#define STATE_ERR_STATE 0
+#define FUNC_NO_FUNC 0
+
+using std::string;
+
+struct RedState;
+struct InlineList;
+struct Compiler;
+struct ObjField;
+
+/* Element in list of actions. Contains the string for the code to exectute. */
+struct GenAction
+{
+ /* Data collected during parse. */
+ InputLoc loc;
+ char *name;
+ InlineList *inlineList;
+ int actionId;
+ MarkType markType;
+ ObjField *objField;
+ long markId;
+
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+
+ GenAction *prev, *next;
+};
+
+typedef DList<GenAction> GenActionList;
+string nameOrLoc( GenAction *genAction );
+
+/* Number of references in the final machine. */
+inline int numRefs( GenAction *genAction )
+{
+ return genAction->numTransRefs +
+ genAction->numToStateRefs +
+ genAction->numFromStateRefs +
+ genAction->numEofRefs;
+}
+
+
+/* Forwards. */
+struct RedState;
+struct FsmState;
+
+/* Transistion GenAction Element. */
+typedef SBstMapEl< int, GenAction* > GenActionTableEl;
+
+/* Transition GenAction Table. */
+struct GenActionTable
+ : public SBstMap< int, GenAction*, CmpOrd<int> >
+{
+ void setAction( int ordering, GenAction *action );
+ void setActions( int *orderings, GenAction **actions, int nActs );
+ void setActions( const GenActionTable &other );
+};
+
+/* Compare of a whole action table element (key & value). */
+struct GenCmpActionTableEl
+{
+ static int compare( const GenActionTableEl &action1,
+ const GenActionTableEl &action2 )
+ {
+ if ( action1.key < action2.key )
+ return -1;
+ else if ( action1.key > action2.key )
+ return 1;
+ else if ( action1.value < action2.value )
+ return -1;
+ else if ( action1.value > action2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for GenActionTable. */
+typedef CmpSTable< GenActionTableEl, GenCmpActionTableEl > GenCmpActionTable;
+
+/* Set of states. */
+typedef BstSet<RedState*> RedStateSet;
+typedef BstSet<int> IntSet;
+
+/* Reduced action. */
+struct RedAction
+:
+ public AvlTreeEl<RedAction>
+{
+ RedAction( )
+ :
+ key(),
+ eofRefs(0),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0),
+ bAnyNextStmt(false),
+ bAnyCurStateRef(false),
+ bAnyBreakStmt(false)
+ { }
+
+ const GenActionTable &getKey()
+ { return key; }
+
+ GenActionTable key;
+ int actListId;
+ int location;
+ IntSet *eofRefs;
+
+ /* Number of references in the final machine. */
+ bool numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+
+ bool anyNextStmt() { return bAnyNextStmt; }
+ bool anyCurStateRef() { return bAnyCurStateRef; }
+ bool anyBreakStmt() { return bAnyBreakStmt; }
+
+ bool bAnyNextStmt;
+ bool bAnyCurStateRef;
+ bool bAnyBreakStmt;
+};
+typedef AvlTree<RedAction, GenActionTable, GenCmpActionTable> GenActionTableMap;
+
+/* Reduced transition. */
+struct RedTrans
+:
+ public AvlTreeEl<RedTrans>
+{
+ RedTrans( RedState *targ, RedAction *action, int id )
+ : targ(targ), action(action), id(id), labelNeeded(true) { }
+
+ RedState *targ;
+ RedAction *action;
+ int id;
+ bool partitionBoundary;
+ bool labelNeeded;
+};
+
+/* Compare of transitions for the final reduction of transitions. Comparison
+ * is on target and the pointer to the shared action table. It is assumed that
+ * when this is used the action tables have been reduced. */
+struct CmpRedTrans
+{
+ static int compare( const RedTrans &t1, const RedTrans &t2 )
+ {
+ if ( t1.targ < t2.targ )
+ return -1;
+ else if ( t1.targ > t2.targ )
+ return 1;
+ else if ( t1.action < t2.action )
+ return -1;
+ else if ( t1.action > t2.action )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+typedef AvlBasic<RedTrans, CmpRedTrans> RedTransSet;
+
+/* Element in out range. */
+struct RedTransEl
+{
+ /* Constructors. */
+ RedTransEl( Key lowKey, Key highKey, RedTrans *value )
+ : lowKey(lowKey), highKey(highKey), value(value) { }
+
+ Key lowKey, highKey;
+ RedTrans *value;
+};
+
+typedef Vector<RedTransEl> RedTransList;
+typedef Vector<RedState*> RedStateVect;
+
+typedef BstMapEl<RedState*, unsigned long long> RedSpanMapEl;
+typedef BstMap<RedState*, unsigned long long> RedSpanMap;
+
+/* Compare used by span map sort. Reverse sorts by the span. */
+struct CmpRedSpanMapEl
+{
+ static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 )
+ {
+ if ( smel1.value > smel2.value )
+ return -1;
+ else if ( smel1.value < smel2.value )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+/* Sorting state-span map entries by span. */
+typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort;
+
+/* Set of entry ids that go into this state. */
+typedef Vector<int> EntryIdVect;
+typedef Vector<char*> EntryNameVect;
+
+/* Maps entry ids (defined by the frontend, to reduced state ids. */
+typedef BstMap<int, int> RedEntryMap;
+typedef BstMapEl<int, int> RedEntryMapEl;
+
+typedef Vector<int> RegionToEntry;
+
+typedef Vector< GenAction* > GenCondSet;
+
+struct Condition
+{
+ Condition( )
+ : key(0), baseKey(0) {}
+
+ Key key;
+ Key baseKey;
+ GenCondSet condSet;
+
+ Condition *next, *prev;
+};
+typedef DList<Condition> ConditionList;
+
+struct GenCondSpace
+{
+ Key baseKey;
+ GenCondSet condSet;
+ int condSpaceId;
+
+ GenCondSpace *next, *prev;
+};
+typedef DList<GenCondSpace> CondSpaceList;
+
+struct GenStateCond
+{
+ Key lowKey;
+ Key highKey;
+
+ GenCondSpace *condSpace;
+
+ GenStateCond *prev, *next;
+};
+typedef DList<GenStateCond> GenStateCondList;
+typedef Vector<GenStateCond*> StateCondVect;
+
+/* Reduced state. */
+struct RedState
+{
+ RedState()
+ :
+ defTrans(0),
+ condList(0),
+ transList(0),
+ isFinal(false),
+ labelNeeded(false),
+ outNeeded(false),
+ onStateList(false),
+ toStateAction(0),
+ fromStateAction(0),
+ eofAction(0),
+ eofTrans(0),
+ id(0),
+ bAnyRegCurStateRef(false),
+ partitionBoundary(false),
+ inTrans(0),
+ numInTrans(0)
+ { }
+
+ /* Transitions out. */
+ RedTransList outSingle;
+ RedTransList outRange;
+ RedTrans *defTrans;
+
+ /* For flat conditions. */
+ Key condLowKey, condHighKey;
+ GenCondSpace **condList;
+
+ /* For flat keys. */
+ Key lowKey, highKey;
+ RedTrans **transList;
+
+ /* The list of states that transitions from this state go to. */
+ RedStateVect targStates;
+
+ bool isFinal;
+ bool labelNeeded;
+ bool outNeeded;
+ bool onStateList;
+ RedAction *toStateAction;
+ RedAction *fromStateAction;
+ RedAction *eofAction;
+ RedTrans *eofTrans;
+ int id;
+ GenStateCondList stateCondList;
+ StateCondVect stateCondVect;
+
+ /* Pointers for the list of states. */
+ RedState *prev, *next;
+
+ bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
+ bool bAnyRegCurStateRef;
+
+ int partition;
+ bool partitionBoundary;
+
+ RedTrans **inTrans;
+ int numInTrans;
+};
+
+/* List of states. */
+typedef DList<RedState> RedStateList;
+
+/* Set of reduced transitons. Comparison is by pointer. */
+typedef BstSet< RedTrans*, CmpOrd<RedTrans*> > RedTransPtrSet;
+
+/* Next version of the fsm machine. */
+struct RedFsm
+{
+ RedFsm();
+
+ bool wantComplete;
+ bool forcedErrorState;
+
+ int nextActionId;
+ int nextTransId;
+
+ /* Next State Id doubles as the total number of state ids. */
+ int nextStateId;
+
+ RedTransSet transSet;
+ GenActionTableMap actionMap;
+ RedStateList stateList;
+ RedStateSet entryPoints;
+ RedState *startState;
+ RedState *errState;
+ RedTrans *errTrans;
+ RedTrans *errActionTrans;
+ RedState *firstFinState;
+ int numFinStates;
+ int nParts;
+
+ GenAction *allActions;
+ RedAction *allActionTables;
+ Condition *allConditions;
+ GenCondSpace *allCondSpaces;
+ RedState *allStates;
+ GenActionList genActionList;
+ ConditionList conditionList;
+ CondSpaceList condSpaceList;
+ EntryIdVect entryPointIds;
+ EntryNameVect entryPointNames;
+ RedEntryMap redEntryMap;
+ RegionToEntry regionToEntry;
+
+ bool bAnyToStateActions;
+ bool bAnyFromStateActions;
+ bool bAnyRegActions;
+ bool bAnyEofActions;
+ bool bAnyActionGotos;
+ bool bAnyActionCalls;
+ bool bAnyActionRets;
+ bool bAnyRegActionRets;
+ bool bAnyRegActionByValControl;
+ bool bAnyRegNextStmt;
+ bool bAnyRegCurStateRef;
+ bool bAnyRegBreak;
+ bool bAnyLmSwitchError;
+ bool bAnyConditions;
+
+ int maxState;
+ int maxSingleLen;
+ int maxRangeLen;
+ int maxKeyOffset;
+ int maxIndexOffset;
+ int maxIndex;
+ int maxActListId;
+ int maxActionLoc;
+ int maxActArrItem;
+ unsigned long long maxSpan;
+ unsigned long long maxCondSpan;
+ int maxFlatIndexOffset;
+ Key maxKey;
+ int maxCondOffset;
+ int maxCondLen;
+ int maxCondSpaceId;
+ int maxCondIndexOffset;
+ int maxCond;
+
+ bool anyActions();
+ bool anyToStateActions() { return bAnyToStateActions; }
+ bool anyFromStateActions() { return bAnyFromStateActions; }
+ bool anyRegActions() { return bAnyRegActions; }
+ bool anyEofActions() { return bAnyEofActions; }
+ bool anyActionGotos() { return bAnyActionGotos; }
+ bool anyActionCalls() { return bAnyActionCalls; }
+ bool anyActionRets() { return bAnyActionRets; }
+ bool anyRegActionRets() { return bAnyRegActionRets; }
+ bool anyRegActionByValControl() { return bAnyRegActionByValControl; }
+ bool anyRegNextStmt() { return bAnyRegNextStmt; }
+ bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
+ bool anyRegBreak() { return bAnyRegBreak; }
+ bool anyLmSwitchError() { return bAnyLmSwitchError; }
+ bool anyConditions() { return bAnyConditions; }
+
+ GenCondSpace *findCondSpace( Key lowKey, Key highKey );
+ Condition *findCondition( Key key );
+
+ /* Is is it possible to extend a range by bumping ranges that span only
+ * one character to the singles array. */
+ bool canExtend( const RedTransList &list, int pos );
+
+ /* Pick single transitions from the ranges. */
+ void moveTransToSingle( RedState *state );
+ void chooseSingle();
+
+ void makeFlat();
+
+ /* Move a selected transition from ranges to default. */
+ void moveToDefault( RedTrans *defTrans, RedState *state );
+
+ /* Pick a default transition by largest span. */
+ RedTrans *chooseDefaultSpan( RedState *state );
+ void chooseDefaultSpan();
+
+ /* Pick a default transition by most number of ranges. */
+ RedTrans *chooseDefaultNumRanges( RedState *state );
+ void chooseDefaultNumRanges();
+
+ /* Pick a default transition tailored towards goto driven machine. */
+ RedTrans *chooseDefaultGoto( RedState *state );
+ void chooseDefaultGoto();
+
+ /* Ordering states by transition connections. */
+ void optimizeStateOrdering( RedState *state );
+ void optimizeStateOrdering();
+
+ /* Ordering states by transition connections. */
+ void depthFirstOrdering( RedState *state );
+ void depthFirstOrdering();
+
+ /* Set state ids. */
+ void sequentialStateIds();
+ void sortStateIdsByFinal();
+
+ /* Arrange states in by final id. This is a stable sort. */
+ void sortStatesByFinal();
+
+ /* Sorting states by id. */
+ void sortByStateId();
+
+ /* Locating the first final state. This is the final state with the lowest
+ * id. */
+ void findFirstFinState();
+
+ void assignActionLocs();
+
+ RedTrans *getErrorTrans();
+ RedState *getErrorState();
+
+ /* Is every char in the alphabet covered? */
+ bool alphabetCovered( RedTransList &outRange );
+
+ RedTrans *allocateTrans( RedState *targState, RedAction *actionTable );
+
+ void partitionFsm( int nParts );
+
+ void setInTrans();
+ void setValueLimits();
+ void assignActionIds();
+ void analyzeActionList( RedAction *redAct, InlineList *inlineList );
+ void analyzeAction( GenAction *act, InlineList *inlineList );
+ void findFinalActionRefs();
+ void analyzeMachine();
+
+ FsmTables *makeFsmTables();
+};
+
+
+#endif /* _REDFSM_H */
diff --git a/src/resolve.cc b/src/resolve.cc
new file mode 100644
index 00000000..a661e68e
--- /dev/null
+++ b/src/resolve.cc
@@ -0,0 +1,805 @@
+/*
+ * Copyright 2009-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "bytecode.h"
+#include "parsedata.h"
+#include "fsmrun.h"
+#include <iostream>
+#include <assert.h>
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+UniqueType *TypeRef::lookupTypeName( Compiler *pd )
+{
+ /* Lookup up the qualifiction and then the name. */
+ nspace = nspaceQual->getQual( pd );
+
+ if ( nspace == 0 )
+ error(loc) << "do not have region for resolving reference" << endp;
+
+ while ( nspace != 0 ) {
+ /* Search for the token in the region by typeName. */
+ TypeMapEl *inDict = nspace->typeMap.find( typeName );
+
+ if ( inDict != 0 ) {
+ switch ( inDict->type ) {
+ /* Defer to the typeRef we are an alias of. We need to guard against loops here. */
+ case TypeMapEl::TypeAliasType:
+ return inDict->typeRef->lookupType( pd );
+
+ case TypeMapEl::LangElType:
+ return pd->findUniqueType( TYPE_TREE, inDict->value );
+ }
+ }
+
+ nspace = nspace->parentNamespace;
+ }
+
+ error(loc) << "unknown type in typeof expression" << endp;
+ return 0;
+}
+
+UniqueType *TypeRef::lookupTypeLiteral( Compiler *pd )
+{
+ /* Lookup up the qualifiction and then the name. */
+ nspace = nspaceQual->getQual( pd );
+
+ if ( nspace == 0 )
+ error(loc) << "do not have region for resolving reference" << endp;
+
+ /* Interpret escape sequences and remove quotes. */
+ bool unusedCI;
+ String interp;
+ prepareLitString( interp, unusedCI, pdaLiteral->token.data,
+ pdaLiteral->token.loc );
+
+ while ( nspace != 0 ) {
+ LiteralDictEl *ldel = nspace->literalDict.find( interp );
+
+ if ( ldel != 0 )
+ return pd->findUniqueType( TYPE_TREE, ldel->value->tdLangEl );
+
+ nspace = nspace->parentNamespace;
+ }
+
+ error(loc) << "unknown type in typeof expression" << endp;
+ return 0;
+}
+
+UniqueType *TypeRef::lookupTypeMap( Compiler *pd )
+{
+ /* Lookup up the qualifiction and then the name. */
+ nspace = nspaceQual->getQual( pd );
+
+ UniqueType *utKey = typeRef1->lookupType( pd );
+ UniqueType *utValue = typeRef2->lookupType( pd );
+
+ UniqueMap searchKey( utKey, utValue );
+ UniqueMap *inMap = pd->uniqueMapMap.find( &searchKey );
+ if ( inMap == 0 ) {
+ inMap = new UniqueMap( utKey, utValue );
+ pd->uniqueMapMap.insert( inMap );
+
+ /* FIXME: Need uniqe name allocator for types. */
+ static int mapId = 0;
+ String name( 36, "__map%d", mapId++ );
+
+ GenericType *generic = new GenericType( name, GEN_MAP,
+ pd->nextGenericId++, 0/*langEl*/, typeRef2 );
+ generic->keyTypeArg = typeRef1;
+
+ nspace->genericList.append( generic );
+
+ generic->declare( pd, nspace );
+
+ inMap->generic = generic;
+ }
+
+ generic = inMap->generic;
+ return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
+}
+
+UniqueType *TypeRef::lookupTypeList( Compiler *pd )
+{
+ /* Lookup up the qualifiction and then the name. */
+ nspace = nspaceQual->getQual( pd );
+
+ UniqueType *utValue = typeRef1->lookupType( pd );
+
+ UniqueList searchKey( utValue );
+ UniqueList *inMap = pd->uniqueListMap.find( &searchKey );
+ if ( inMap == 0 ) {
+ inMap = new UniqueList( utValue );
+ pd->uniqueListMap.insert( inMap );
+
+ /* FIXME: Need uniqe name allocator for types. */
+ static int listId = 0;
+ String name( 36, "__list%d", listId++ );
+
+ GenericType *generic = new GenericType( name, GEN_LIST,
+ pd->nextGenericId++, 0/*langEl*/, typeRef1 );
+
+ nspace->genericList.append( generic );
+
+ generic->declare( pd, nspace );
+
+ inMap->generic = generic;
+ }
+
+ generic = inMap->generic;
+ return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
+}
+
+UniqueType *TypeRef::lookupTypeVector( Compiler *pd )
+{
+ /* Lookup up the qualifiction and then the name. */
+ nspace = nspaceQual->getQual( pd );
+
+ UniqueType *utValue = typeRef1->lookupType( pd );
+
+ UniqueVector searchKey( utValue );
+ UniqueVector *inMap = pd->uniqueVectorMap.find( &searchKey );
+ if ( inMap == 0 ) {
+ inMap = new UniqueVector( utValue );
+ pd->uniqueVectorMap.insert( inMap );
+
+ /* FIXME: Need uniqe name allocator for types. */
+ static int vectorId = 0;
+ String name( 36, "__vector%d", vectorId++ );
+
+ GenericType *generic = new GenericType( name, GEN_VECTOR,
+ pd->nextGenericId++, 0/*langEl*/, typeRef1 );
+
+ nspace->genericList.append( generic );
+
+ generic->declare( pd, nspace );
+
+ inMap->generic = generic;
+ }
+
+ generic = inMap->generic;
+ return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
+}
+
+UniqueType *TypeRef::lookupTypeParser( Compiler *pd )
+{
+ /* Lookup up the qualifiction and then the name. */
+ nspace = nspaceQual->getQual( pd );
+
+ UniqueType *utParse = typeRef1->lookupType( pd );
+
+ UniqueParser searchKey( utParse );
+ UniqueParser *inMap = pd->uniqueParserMap.find( &searchKey );
+ if ( inMap == 0 ) {
+ inMap = new UniqueParser( utParse );
+ pd->uniqueParserMap.insert( inMap );
+
+ /* FIXME: Need uniqe name allocator for types. */
+ static int accumId = 0;
+ String name( 36, "__accum%d", accumId++ );
+
+ GenericType *generic = new GenericType( name, GEN_PARSER,
+ pd->nextGenericId++, 0/*langEl*/, typeRef1 );
+
+ nspace->genericList.append( generic );
+
+ generic->declare( pd, nspace );
+
+ inMap->generic = generic;
+ }
+
+ generic = inMap->generic;
+ return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
+}
+
+UniqueType *TypeRef::lookupTypePtr( Compiler *pd )
+{
+ typeRef1->lookupType( pd );
+ return pd->findUniqueType( TYPE_PTR, typeRef1->uniqueType->langEl );
+}
+
+UniqueType *TypeRef::lookupTypeRef( Compiler *pd )
+{
+ typeRef1->lookupType( pd );
+ return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl );
+}
+
+void TypeRef::resolveRepeat( Compiler *pd )
+{
+ if ( uniqueType->typeId != TYPE_TREE )
+ error(loc) << "cannot repeat non-tree type" << endp;
+
+ UniqueRepeat searchKey( repeatType, uniqueType->langEl );
+ UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey );
+ if ( uniqueRepeat == 0 ) {
+ uniqueRepeat = new UniqueRepeat( repeatType, uniqueType->langEl );
+ pd->uniqeRepeatMap.insert( uniqueRepeat );
+
+ LangEl *declLangEl = 0;
+
+ switch ( repeatType ) {
+ case RepeatRepeat: {
+ /* If the factor is a repeat, create the repeat element and link the
+ * factor to it. */
+ String repeatName( 128, "_repeat_%s", typeName.data );
+ declLangEl = pd->makeRepeatProd( nspace, repeatName, nspaceQual, typeName );
+ break;
+ }
+ case RepeatList: {
+ /* If the factor is a repeat, create the repeat element and link the
+ * factor to it. */
+ String listName( 128, "_list_%s", typeName.data );
+ declLangEl = pd->makeListProd( nspace, listName, nspaceQual, typeName );
+ break;
+ }
+ case RepeatOpt: {
+ /* If the factor is an opt, create the opt element and link the factor
+ * to it. */
+ String optName( 128, "_opt_%s", typeName.data );
+ declLangEl = pd->makeOptProd( nspace, optName, nspaceQual, typeName );
+ break;
+ }
+
+ case RepeatNone:
+ break;
+ }
+
+ uniqueRepeat->declLangEl = declLangEl;
+ declLangEl->repeatOf = uniqueRepeat->langEl;
+ }
+
+ uniqueType = pd->findUniqueType( TYPE_TREE, uniqueRepeat->declLangEl );
+}
+
+
+UniqueType *TypeRef::lookupType( Compiler *pd )
+{
+ if ( uniqueType != 0 )
+ return uniqueType;
+
+ /* Not an iterator. May be a reference. */
+ switch ( type ) {
+ case Name:
+ uniqueType = lookupTypeName( pd );
+ break;
+ case Literal:
+ uniqueType = lookupTypeLiteral( pd );
+ break;
+ case Map:
+ uniqueType = lookupTypeMap( pd );
+ break;
+ case List:
+ uniqueType = lookupTypeList( pd );
+ break;
+ case Vector:
+ uniqueType = lookupTypeVector( pd );
+ break;
+ case Parser:
+ uniqueType = lookupTypeParser( pd );
+ break;
+ case Ptr:
+ uniqueType = lookupTypePtr( pd );
+ break;
+ case Ref:
+ uniqueType = lookupTypeRef( pd );
+ break;
+ case Iterator:
+ case Unspecified:
+ /* No lookup needed, unique type(s) set when constructed. */
+ break;
+ }
+
+ if ( repeatType != RepeatNone )
+ resolveRepeat( pd );
+
+ return uniqueType;
+}
+
+void Compiler::resolveFactor( ProdEl *fact )
+{
+ fact->typeRef->lookupType( this );
+ fact->langEl = fact->typeRef->uniqueType->langEl;
+}
+
+void LangTerm::resolve( Compiler *pd )
+{
+ switch ( type ) {
+ case ConstructType:
+ typeRef->lookupType( pd );
+
+ /* Evaluate the initialization expressions. */
+ if ( fieldInitArgs != 0 ) {
+ for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ )
+ (*pi)->expr->resolve( pd );
+ }
+ break;
+ case VarRefType:
+ break;
+
+ case MakeTreeType:
+ case MakeTokenType:
+ case MethodCallType:
+ if ( args != 0 ) {
+ for ( ExprVect::Iter pe = *args; pe.lte(); pe++ )
+ (*pe)->resolve( pd );
+ }
+ break;
+
+ case NumberType:
+ case StringType:
+ case MatchType:
+ break;
+ case NewType:
+ expr->resolve( pd );
+ break;
+ case TypeIdType:
+ typeRef->lookupType( pd );
+ break;
+ case SearchType:
+ typeRef->lookupType( pd );
+ break;
+ case NilType:
+ case TrueType:
+ case FalseType:
+ break;
+
+ case ParseType:
+ case ParseStopType:
+ typeRef->lookupType( pd );
+ parserTypeRef->lookupType( pd );
+ generic = parserTypeRef->generic;
+ break;
+
+ case EmbedStringType:
+ break;
+ }
+}
+
+void LangVarRef::resolve( Compiler *pd ) const
+{
+
+}
+
+void LangExpr::resolve( Compiler *pd ) const
+{
+ switch ( type ) {
+ case BinaryType: {
+ left->resolve( pd );
+ right->resolve( pd );
+ break;
+ }
+ case UnaryType: {
+ right->resolve( pd );
+ break;
+ }
+ case TermType: {
+ term->resolve( pd );
+ break;
+ }
+ }
+}
+
+void LangStmt::resolveParserItems( Compiler *pd ) const
+{
+ /* Assign bind ids to the variables in the replacement. */
+ for ( ReplItemList::Iter item = *parserText->list; item.lte(); item++ ) {
+ varRef->resolve( pd );
+
+ switch ( item->type ) {
+ case ReplItem::FactorType:
+ break;
+ case ReplItem::InputText:
+ break;
+ case ReplItem::ExprType:
+ item->expr->resolve( pd );
+ break;
+ }
+ }
+}
+
+void LangStmt::resolve( Compiler *pd ) const
+{
+ switch ( type ) {
+ case PrintType:
+ case PrintXMLACType:
+ case PrintXMLType:
+ case PrintStreamType: {
+ /* Push the args backwards. */
+ for ( ExprVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- )
+ (*pex)->resolve( pd );
+ break;
+ }
+ case ExprType: {
+ /* Evaluate the exrepssion, then pop it immediately. */
+ expr->resolve( pd );
+ break;
+ }
+ case IfType: {
+ /* Evaluate the test. */
+ expr->resolve( pd );
+
+ /* Analyze the if true branch. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+
+ if ( elsePart != 0 )
+ elsePart->resolve( pd );
+ break;
+ }
+ case ElseType: {
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+ break;
+ }
+ case RejectType:
+ break;
+ case WhileType: {
+ expr->resolve( pd );
+
+ /* Compute the while block. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+ break;
+ }
+ case AssignType: {
+ /* Evaluate the exrepssion. */
+// cout << "Assign Type" << endl;
+ expr->resolve( pd );
+ break;
+ }
+ case ForIterType: {
+ typeRef->lookupType( pd );
+
+ /* Evaluate and push the arguments. */
+ langTerm->resolve( pd );
+
+ /* Compile the contents. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+
+ break;
+ }
+ case ReturnType: {
+ /* Evaluate the exrepssion. */
+ expr->resolve( pd );
+ break;
+ }
+ case BreakType: {
+ break;
+ }
+ case YieldType: {
+ /* take a reference and yield it. Immediately reset the referece. */
+ varRef->resolve( pd );
+ break;
+ }
+ case ParserType: {
+ //for ( )
+ break;
+ }
+ }
+}
+
+void ObjectDef::resolve( Compiler *pd )
+{
+ for ( ObjFieldList::Iter fli = *objFieldList; fli.lte(); fli++ ) {
+ ObjField *field = fli->value;
+
+ if ( field->typeRef != 0 ) {
+ field->typeRef->lookupType( pd );
+ }
+ }
+}
+
+void CodeBlock::resolve( Compiler *pd ) const
+{
+ if ( localFrame != 0 )
+ localFrame->resolve( pd );
+
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+}
+
+void Compiler::resolveFunction( Function *func )
+{
+ CodeBlock *block = func->codeBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveUserIter( Function *func )
+{
+ CodeBlock *block = func->codeBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolvePreEof( TokenRegion *region )
+{
+ CodeBlock *block = region->preEofBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveRootBlock()
+{
+ rootLocalFrame->resolve( this );
+
+ CodeBlock *block = rootCodeBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveTranslateBlock( LangEl *langEl )
+{
+ CodeBlock *block = langEl->transBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveReductionCode( Definition *prod )
+{
+ CodeBlock *block = prod->redBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveParseTree()
+{
+ /* Compile functions. */
+ for ( FunctionList::Iter f = functionList; f.lte(); f++ ) {
+ if ( f->isUserIter )
+ resolveUserIter( f );
+ else
+ resolveFunction( f );
+
+ if ( f->typeRef != 0 )
+ f->typeRef->lookupType( this );
+
+ for ( ParameterList::Iter param = *f->paramList; param.lte(); param++ )
+ param->typeRef->lookupType( this );
+ }
+
+ /* Compile the reduction code. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->redBlock != 0 )
+ resolveReductionCode( prod );
+ }
+
+ /* Compile the token translation code. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->transBlock != 0 )
+ resolveTranslateBlock( lel );
+ }
+
+ /* Compile preeof blocks. */
+ for ( RegionList::Iter r = regionList; r.lte(); r++ ) {
+ if ( r->preEofBlock != 0 )
+ resolvePreEof( r );
+ }
+
+ /* Compile the init code */
+ resolveRootBlock( );
+
+ /* Init all user object fields (need consistent size). */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ ObjectDef *objDef = lel->objectDef;
+ if ( objDef != 0 ) {
+ /* Init all fields of the object. */
+ for ( ObjFieldList::Iter f = *objDef->objFieldList; f.lte(); f++ ) {
+ f->value->typeRef->lookupType( this );
+ }
+ }
+ }
+
+ /* Init all fields of the global object. */
+ for ( ObjFieldList::Iter f = *globalObjectDef->objFieldList; f.lte(); f++ ) {
+ f->value->typeRef->lookupType( this );
+ }
+
+}
+
+
+void Compiler::resolveUses()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->objectDefUses != 0 ) {
+ /* Look for the production's associated region. */
+ Namespace *nspace = lel->objectDefUsesQual->getQual( this );
+
+ if ( nspace == 0 )
+ error() << "do not have namespace for resolving reference" << endp;
+
+ /* Look up the language element in the region. */
+ LangEl *langEl = findType( this, nspace, lel->objectDefUses );
+ lel->objectDef = langEl->objectDef;
+ }
+ }
+}
+
+void Compiler::resolvePatternEls()
+{
+ for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
+ for ( PatternItemList::Iter item = *pat->list; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case PatternItem::FactorType:
+ /* Use pdaFactor reference resolving. */
+ resolveFactor( item->factor );
+ break;
+ case PatternItem::InputText:
+ /* Nothing to do here. */
+ break;
+ }
+ }
+ }
+}
+
+void Compiler::resolveReplacementEls()
+{
+ for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
+ for ( ReplItemList::Iter item = *repl->list; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case ReplItem::FactorType:
+ /* Use pdaFactor reference resolving. */
+ resolveFactor( item->factor );
+ break;
+ case ReplItem::InputText:
+ case ReplItem::ExprType:
+ break;
+ }
+ }
+ }
+}
+
+void Compiler::resolveParserEls()
+{
+ for ( ParserTextList::Iter accum = parserTextList; accum.lte(); accum++ ) {
+ for ( ReplItemList::Iter item = *accum->list; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case ReplItem::FactorType:
+ resolveFactor( item->factor );
+ break;
+ case ReplItem::InputText:
+ case ReplItem::ExprType:
+ break;
+ }
+ }
+ }
+}
+
+/* Resolves production els and computes the precedence of each prod. */
+void Compiler::resolveProductionEls()
+{
+ /* NOTE: as we process this list it may be growing! */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ /* First resolve. */
+ for ( ProdElList::Iter fact = *prod->prodElList; fact.lte(); fact++ )
+ resolveFactor( fact );
+
+ /* If there is no explicit precdence ... */
+ if ( prod->predOf == 0 ) {
+ /* Compute the precedence of the productions. */
+ for ( ProdElList::Iter fact = prod->prodElList->last(); fact.gtb(); fact-- ) {
+ /* Production inherits the precedence of the last terminal with
+ * precedence. */
+ if ( fact->langEl->predType != PredNone ) {
+ prod->predOf = fact->langEl;
+ break;
+ }
+ }
+ }
+ }
+}
+
+void Compiler::resolveGenericTypes()
+{
+ for ( NamespaceList::Iter ns = namespaceList; ns.lte(); ns++ ) {
+ for ( GenericList::Iter gen = ns->genericList; gen.lte(); gen++ ) {
+// cout << __PRETTY_FUNCTION__ << " " << gen->name.data << " " << gen->typeArg << endl;
+
+ gen->utArg = gen->typeArg->lookupType( this );
+
+ if ( gen->typeId == GEN_MAP )
+ gen->keyUT = gen->keyTypeArg->lookupType( this );
+ }
+ }
+}
+
+void Compiler::makeTerminalWrappers()
+{
+ /* Make terminal language elements corresponding to each nonterminal in
+ * the grammar. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->type == LangEl::NonTerm ) {
+ String name( lel->name.length() + 5, "_T_%s", lel->name.data );
+ LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term );
+
+ /* Give the dup the attributes of the nonterminal. This ensures
+ * that the attributes are allocated when patterns and
+ * constructors are parsed. */
+ termDup->objectDef = lel->objectDef;
+
+ langEls.append( termDup );
+ lel->termDup = termDup;
+ termDup->termDup = lel;
+ }
+ }
+}
+
+void Compiler::makeEofElements()
+{
+ /* Make eof language elements for each user terminal. This is a bit excessive and
+ * need to be reduced to the ones that we need parsers for, but we don't know that yet.
+ * Another pass before this one is needed. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->eofLel == 0 &&
+ lel != eofLangEl &&
+ lel != errorLangEl &&
+ lel != noTokenLangEl &&
+ !( lel->tokenDef != 0 && lel->tokenDef->dupOf != 0 ) )
+ {
+ String name( lel->name.length() + 5, "_eof_%s", lel->name.data );
+ LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term );
+
+ langEls.append( eofLel );
+ lel->eofLel = eofLel;
+ eofLel->eofLel = lel;
+ eofLel->isEOF = true;
+ }
+ }
+}
+
+void Compiler::makeIgnoreCollectors()
+{
+ for ( RegionList::Iter region = regionList; region.lte(); region++ ) {
+ if ( region->isFullRegion ) {
+ String name( region->name.length() + 5, "_ign_%s", region->name.data );
+ LangEl *ignLel = new LangEl( rootNamespace, name, LangEl::Term );
+ langEls.append( ignLel );
+ ignLel->isCI = true;
+ ignLel->ciRegion = region;
+
+ region->ciLel = ignLel;
+ }
+ }
+}
+
+void Compiler::typeResolve()
+{
+ /*
+ * Type Resolving.
+ */
+
+ /* Resolve uses statements. */
+ resolveUses();
+
+ /* Resolve pattern and replacement elements. */
+ resolvePatternEls();
+ resolveReplacementEls();
+ resolveParserEls();
+
+ resolveParseTree();
+
+ resolveGenericTypes();
+
+ argvTypeRef->lookupType( this );
+
+ /* We must do this as the last step in the type resolution process because
+ * all type resolves can cause new language elments with associated
+ * productions. They get tacked onto the end of the list of productions.
+ * Doing it at the end results processing a growing list. */
+ resolveProductionEls();
+}
diff --git a/src/rtvector.h b/src/rtvector.h
new file mode 100644
index 00000000..e03a17f9
--- /dev/null
+++ b/src/rtvector.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2002, 2006, 2009 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Aapl.
+ *
+ * Aapl is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _RT_VECTOR_H
+#define _RT_VECTOR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/string.c b/src/string.c
new file mode 100644
index 00000000..7508a39a
--- /dev/null
+++ b/src/string.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <pool.h>
+#include <pdarun.h>
+#include <bytecode.h>
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+/*
+ * In this system strings are not null terminated. Often strings come from a
+ * parse, in which case the string is just a pointer into the the data string.
+ * A block in a parsed stream can house many tokens and there is no room for
+ * nulls.
+ */
+
+Head *stringCopy( Program *prg, Head *head )
+{
+ Head *result = 0;
+ if ( head != 0 ) {
+ if ( (char*)(head+1) == head->data )
+ result = stringAllocFull( prg, head->data, head->length );
+ else
+ result = stringAllocPointer( prg, head->data, head->length );
+ }
+ return result;
+}
+
+void stringFree( Program *prg, Head *head )
+{
+ if ( head != 0 ) {
+ if ( head->location != 0 )
+ locationFree( prg, head->location );
+
+ if ( (char*)(head+1) == head->data ) {
+ /* Full string allocation. */
+ free( head );
+ }
+ else {
+ /* Just a string head. */
+ headFree( prg, head );
+ }
+ }
+}
+
+const char *stringData( Head *head )
+{
+ if ( head == 0 )
+ return 0;
+ return head->data;
+}
+
+long stringLength( Head *head )
+{
+ if ( head == 0 )
+ return 0;
+ return head->length;
+}
+
+void stringShorten( Head *head, long newlen )
+{
+ assert( newlen <= head->length );
+ head->length = newlen;
+}
+
+Head *initStrSpace( long length )
+{
+ /* Find the length and allocate the space for the shared string. */
+ Head *head = (Head*) malloc( sizeof(Head) + length );
+ //if ( head == 0 )
+ // throw std::bad_alloc();
+
+ /* Init the header. */
+ head->data = (char*)(head+1);
+ head->length = length;
+ head->location = 0;
+
+ /* Save the pointer to the data. */
+ return head;
+}
+
+/* Create from a c-style string. */
+Head *stringAllocFull( Program *prg, const char *data, long length )
+{
+ /* Init space for the data. */
+ Head *head = initStrSpace( length );
+
+ /* Copy in the data. */
+ memcpy( (head+1), data, length );
+
+ return head;
+}
+
+/* Create from a c-style string. */
+Head *stringAllocPointer( Program *prg, const char *data, long length )
+{
+ /* Find the length and allocate the space for the shared string. */
+ Head *head = headAllocate( prg );
+
+ /* Init the header. */
+ head->data = data;
+ head->length = length;
+
+ return head;
+}
+
+Head *concatStr( Head *s1, Head *s2 )
+{
+ long s1Len = s1->length;
+ long s2Len = s2->length;
+
+ /* Init space for the data. */
+ Head *head = initStrSpace( s1Len + s2Len );
+
+ /* Copy in the data. */
+ memcpy( (head+1), s1->data, s1Len );
+ memcpy( (char*)(head+1) + s1Len, s2->data, s2Len );
+
+ return head;
+}
+
+Head *stringToUpper( Head *s )
+{
+ /* Init space for the data. */
+ long len = s->length;
+ Head *head = initStrSpace( len );
+
+ /* Copy in the data. */
+ const char *src = s->data;
+ char *dst = (char*)(head+1);
+ int i;
+ for ( i = 0; i < len; i++ )
+ *dst++ = toupper( *src++ );
+
+ return head;
+}
+
+Head *stringToLower( Head *s )
+{
+ /* Init space for the data. */
+ long len = s->length;
+ Head *head = initStrSpace( len );
+
+ /* Copy in the data. */
+ const char *src = s->data;
+ char *dst = (char*)(head+1);
+ int i;
+ for ( i = 0; i < len; i++ )
+ *dst++ = tolower( *src++ );
+
+ return head;
+}
+
+
+/* Compare two strings. If identical returns 1, otherwise 0. */
+Word cmpString( Head *s1, Head *s2 )
+{
+ if ( s1->length < s2->length )
+ return -1;
+ else if ( s1->length > s2->length )
+ return 1;
+ else {
+ char *d1 = (char*)(s1->data);
+ char *d2 = (char*)(s2->data);
+ return memcmp( d1, d2, s1->length );
+ }
+}
+
+Word strAtoi( Head *str )
+{
+ /* FIXME: need to implement this by hand. There is no null terminator. */
+ char *nulled = (char*)malloc( str->length + 1 );
+ memcpy( nulled, str->data, str->length );
+ nulled[str->length] = 0;
+ int res = atoi( nulled );
+ free( nulled );
+ return res;
+}
+
+Head *intToStr( Program *prg, Word i )
+{
+ char data[20];
+ sprintf( data, "%ld", i );
+ return stringAllocFull( prg, data, strlen(data) );
+}
+
+Word strUord16( Head *head )
+{
+ uchar *data = (uchar*)(head->data);
+ ulong res;
+ res = (ulong)data[1];
+ res |= ((ulong)data[0]) << 8;
+ return res;
+}
+
+Word strUord8( Head *head )
+{
+ uchar *data = (uchar*)(head->data);
+ ulong res = (ulong)data[0];
+ return res;
+}
+
+Head *makeLiteral( Program *prg, long offset )
+{
+ return stringAllocPointer( prg,
+ prg->rtd->litdata[offset],
+ prg->rtd->litlen[offset] );
+}
+
+Head *stringSprintf( Program *prg, Str *format, Int *integer )
+{
+ Head *formatHead = format->value;
+ long written = snprintf( 0, 0, stringData(formatHead), integer->value );
+ Head *head = initStrSpace( written+1 );
+ written = snprintf( (char*)head->data, written+1, stringData(formatHead), integer->value );
+ head->length -= 1;
+ return head;
+}
diff --git a/src/synthesis.cc b/src/synthesis.cc
new file mode 100644
index 00000000..794927ad
--- /dev/null
+++ b/src/synthesis.cc
@@ -0,0 +1,3277 @@
+/*
+ * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "bytecode.h"
+#include "parsedata.h"
+#include "fsmrun.h"
+#include "pdarun.h"
+#include "input.h"
+#include <iostream>
+#include <assert.h>
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+void Compiler::initUniqueTypes( )
+{
+ uniqueTypeNil = new UniqueType( TYPE_NIL );
+ uniqueTypePtr = new UniqueType( TYPE_TREE, ptrLangEl );
+ uniqueTypeBool = new UniqueType( TYPE_TREE, boolLangEl );
+ uniqueTypeInt = new UniqueType( TYPE_TREE, intLangEl );
+ uniqueTypeStr = new UniqueType( TYPE_TREE, strLangEl );
+ uniqueTypeStream = new UniqueType( TYPE_TREE, streamLangEl );
+ uniqueTypeInput = new UniqueType( TYPE_TREE, inputLangEl );
+ uniqueTypeIgnore = new UniqueType( TYPE_TREE, ignoreLangEl );
+ uniqueTypeAny = new UniqueType( TYPE_TREE, anyLangEl );
+
+ uniqeTypeMap.insert( uniqueTypeNil );
+ uniqeTypeMap.insert( uniqueTypePtr );
+ uniqeTypeMap.insert( uniqueTypeBool );
+ uniqeTypeMap.insert( uniqueTypeInt );
+ uniqeTypeMap.insert( uniqueTypeStr );
+ uniqeTypeMap.insert( uniqueTypeStream );
+ uniqeTypeMap.insert( uniqueTypeInput );
+ uniqeTypeMap.insert( uniqueTypeIgnore );
+ uniqeTypeMap.insert( uniqueTypeAny );
+}
+
+IterDef::IterDef( Type type ) :
+ type(type),
+ func(0),
+ useFuncId(false),
+ useSearchUT(false)
+{
+ switch ( type ) {
+ case Tree:
+ inCreateWV = IN_TRITER_FROM_REF;
+ inCreateWC = IN_TRITER_FROM_REF;
+ inDestroy = IN_TRITER_DESTROY;
+ inAdvance = IN_TRITER_ADVANCE;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+ case Child:
+ inCreateWV = IN_TRITER_FROM_REF;
+ inCreateWC = IN_TRITER_FROM_REF;
+ inDestroy = IN_TRITER_DESTROY;
+ inAdvance = IN_TRITER_NEXT_CHILD;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+ case RevChild:
+ inCreateWV = IN_REV_TRITER_FROM_REF;
+ inCreateWC = IN_REV_TRITER_FROM_REF;
+ inDestroy = IN_REV_TRITER_DESTROY;
+ inAdvance = IN_REV_TRITER_PREV_CHILD;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+
+ case Repeat:
+ inCreateWV = IN_TRITER_FROM_REF;
+ inCreateWC = IN_TRITER_FROM_REF;
+ inDestroy = IN_TRITER_DESTROY;
+ inAdvance = IN_TRITER_NEXT_REPEAT;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+
+ case RevRepeat:
+ inCreateWV = IN_TRITER_FROM_REF;
+ inCreateWC = IN_TRITER_FROM_REF;
+ inDestroy = IN_TRITER_DESTROY;
+ inAdvance = IN_TRITER_PREV_REPEAT;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+
+ case User:
+ assert(false);
+ }
+}
+
+IterDef::IterDef( Type type, Function *func ) :
+ type(type),
+ func(func),
+ useFuncId(true),
+ useSearchUT(true),
+ inCreateWV(IN_UITER_CREATE_WV),
+ inCreateWC(IN_UITER_CREATE_WC),
+ inDestroy(IN_UITER_DESTROY),
+ inAdvance(IN_UITER_ADVANCE),
+ inGetCurR(IN_UITER_GET_CUR_R),
+ inGetCurWC(IN_UITER_GET_CUR_WC),
+ inSetCurWC(IN_UITER_SET_CUR_WC),
+ inRefFromCur(IN_UITER_REF_FROM_CUR)
+{}
+
+ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj,
+ const String &name, int methIdWV, int methIdWC, bool isConst )
+{
+ ObjMethod *objMethod = new ObjMethod( retType, name,
+ methIdWV, methIdWC, 0, 0, 0, isConst );
+ obj->objMethodMap->insert( name, objMethod );
+ return objMethod;
+}
+
+ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj,
+ const String &name, int methIdWV, int methIdWC, UniqueType *arg1, bool isConst )
+{
+ UniqueType *args[] = { arg1 };
+ ObjMethod *objMethod = new ObjMethod( retType, name,
+ methIdWV, methIdWC, 1, args, 0, isConst );
+ obj->objMethodMap->insert( name, objMethod );
+ return objMethod;
+}
+
+ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj,
+ const String &name, int methIdWV, int methIdWC,
+ UniqueType *arg1, UniqueType *arg2, bool isConst )
+{
+ UniqueType *args[] = { arg1, arg2 };
+ ObjMethod *objMethod = new ObjMethod( retType, name,
+ methIdWV, methIdWC, 2, args, 0, isConst );
+ obj->objMethodMap->insert( name, objMethod );
+ return objMethod;
+}
+
+IterDef *Compiler::findIterDef( IterDef::Type type, Function *func )
+{
+ IterDefSetEl *el = iterDefSet.find( IterDef( type, func ) );
+ if ( el == 0 )
+ el = iterDefSet.insert( IterDef( type, func ) );
+ return &el->key;
+}
+
+IterDef *Compiler::findIterDef( IterDef::Type type )
+{
+ IterDefSetEl *el = iterDefSet.find( IterDef( type ) );
+ if ( el == 0 )
+ el = iterDefSet.insert( IterDef( type ) );
+ return &el->key;
+}
+
+UniqueType *Compiler::findUniqueType( int typeId )
+{
+ UniqueType searchKey( typeId );
+ UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
+ if ( uniqueType == 0 ) {
+ uniqueType = new UniqueType( typeId );
+ uniqeTypeMap.insert( uniqueType );
+ }
+ return uniqueType;
+}
+
+UniqueType *Compiler::findUniqueType( int typeId, LangEl *langEl )
+{
+ UniqueType searchKey( typeId, langEl );
+ UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
+ if ( uniqueType == 0 ) {
+ uniqueType = new UniqueType( typeId, langEl );
+ uniqeTypeMap.insert( uniqueType );
+ }
+ return uniqueType;
+}
+
+UniqueType *Compiler::findUniqueType( int typeId, IterDef *iterDef )
+{
+ UniqueType searchKey( typeId, iterDef );
+ UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
+ if ( uniqueType == 0 ) {
+ uniqueType = new UniqueType( typeId, iterDef );
+ uniqeTypeMap.insert( uniqueType );
+ }
+ return uniqueType;
+}
+
+void ObjectDef::iterPushScope()
+{
+ //cout << "iter push scope ";
+ if ( scope->childIter == 0 ) {
+ scope->childIter = scope->children.head;
+ }
+ else {
+ scope->childIter = scope->childIter->next;
+ /* Resetting. */
+ if ( scope->childIter == 0 )
+ scope ->childIter = scope->children.head;
+ }
+
+ scope = scope->childIter;
+}
+
+void ObjectDef::iterPopScope()
+{
+ //cout << "iter pop scope" << endl;
+ scope = scope->parentScope;
+}
+
+void ObjectDef::pushScope()
+{
+ ObjNameScope *newScope = new ObjNameScope;
+ newScope->objFieldMap = new ObjFieldMap;
+
+ newScope->parentScope = scope;
+ scope->children.append( newScope );
+
+ scope = newScope;
+}
+
+void ObjectDef::popScope()
+{
+ scope = scope->parentScope;
+}
+
+void ObjectDef::insertField( const String &name, ObjField *value )
+{
+ scope->objFieldMap->insert( name, value );
+ objFieldList->append( value );
+}
+
+/* Recurisve find through a single object def's scope. */
+ObjField *ObjectDef::findFieldInScope( const String &name, ObjNameScope *inScope )
+{
+ ObjFieldMapEl *objDefMapEl = inScope->objFieldMap->find( name );
+ if ( objDefMapEl != 0 )
+ return objDefMapEl->value;
+ if ( inScope->parentScope != 0 )
+ return findFieldInScope( name, inScope->parentScope );
+ return 0;
+}
+
+ObjField *ObjectDef::checkRedecl( const String &name )
+{
+ //cout << "looking for " << name << endl;
+ ObjFieldMapEl *objDefMapEl = scope->objFieldMap->find( name );
+ if ( objDefMapEl != 0 )
+ return objDefMapEl->value;
+ return 0;
+
+}
+
+/* 0-based. */
+ObjField *ObjectDef::findFieldNum( long offset )
+{
+ int fn = 0;
+ ObjFieldList::Iter field = *objFieldList;
+ while ( fn < offset ) {
+ fn++;
+ field++;
+ }
+ return field->value;
+}
+
+ObjField *ObjectDef::findField( const String &name )
+{
+ //cout << "looking for " << name << endl;
+ ObjField *objField = findFieldInScope( name, scope );
+ if ( objField != 0 )
+ return objField;
+ return 0;
+}
+
+ObjMethod *ObjectDef::findMethod( const String &name )
+{
+ ObjMethodMapEl *objMethodMapEl = objMethodMap->find( name );
+ if ( objMethodMapEl != 0 )
+ return objMethodMapEl->value;
+ return 0;
+}
+
+long sizeOfField( UniqueType *fieldUT )
+{
+ long size = 0;
+ if ( fieldUT->typeId == TYPE_ITER ) {
+ /* Select on the iterator type. */
+ switch ( fieldUT->iterDef->type ) {
+ case IterDef::Tree:
+ case IterDef::Child:
+ case IterDef::Repeat:
+ case IterDef::RevRepeat:
+ size = sizeof(TreeIter) / sizeof(Word);
+ break;
+ case IterDef::RevChild:
+ size = sizeof(RevTreeIter) / sizeof(Word);
+ break;
+
+ case IterDef::User:
+ /* User iterators are just a pointer to the UserIter struct. The
+ * struct needs to go right beneath the call to the user iterator
+ * so it can be found by a yield. It is therefore allocated on the
+ * stack right before the call. */
+ size = 1;
+ break;
+ }
+ }
+ else if ( fieldUT->typeId == TYPE_REF )
+ size = 2;
+ else
+ size = 1;
+
+ return size;
+}
+
+void ObjectDef::referenceField( Compiler *pd, ObjField *field )
+{
+ field->beenReferenced = true;
+ initField( pd, field );
+}
+
+void ObjectDef::initField( Compiler *pd, ObjField *field )
+{
+ if ( !field->beenInitialized ) {
+ field->beenInitialized = true;
+ UniqueType *fieldUT = field->typeRef->uniqueType;
+
+ if ( type == FrameType ) {
+ nextOffset += sizeOfField( fieldUT );
+ field->offset = -nextOffset;
+
+ pd->initLocalInstructions( field );
+ }
+ else if ( field->isRhsGet ) {
+ field->useOffset = false;
+ field->inGetR = IN_GET_RHS_VAL_R;
+ field->inGetWC = IN_GET_RHS_VAL_WC;
+ field->inGetWV = IN_GET_RHS_VAL_WV;
+ field->inSetWC = IN_SET_RHS_VAL_WC;
+ field->inSetWV = IN_SET_RHS_VAL_WC;
+ }
+ else {
+ field->offset = nextOffset;
+ nextOffset += sizeOfField( fieldUT );
+
+ /* Initialize the instructions. */
+ pd->initFieldInstructions( field );
+ }
+ }
+}
+
+UniqueType *LangVarRef::loadFieldInstr( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, ObjField *el, bool forWriting, bool revert ) const
+{
+ /* Ensure that the field is referenced. */
+ inObject->referenceField( pd, el );
+
+ UniqueType *elUT = el->typeRef->uniqueType;
+
+ /* If it's a reference then we load it read always. */
+ if ( forWriting ) {
+ /* The instruction, depends on whether or not we are reverting. */
+ if ( elUT->typeId == TYPE_ITER )
+ code.append( elUT->iterDef->inGetCurWC );
+ else if ( pd->revertOn && revert )
+ code.append( el->inGetWV );
+ else
+ code.append( el->inGetWC );
+ }
+ else {
+ /* Loading something for writing */
+ if ( elUT->typeId == TYPE_ITER )
+ code.append( elUT->iterDef->inGetCurR );
+ else
+ code.append( el->inGetR );
+ }
+
+ if ( el->useOffset ) {
+ /* Gets of locals and fields require offsets. Fake vars like token
+ * data and lhs don't require it. */
+ code.appendHalf( el->offset );
+ }
+ else if ( el->isRhsGet ) {
+ /* Need to place the array computing the val. */
+ code.append( el->rhsVal.length() );
+ for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) {
+ code.append( rg->prodNum );
+ code.append( rg->childNum );
+ }
+ }
+
+ /* If we are dealing with an iterator then dereference it. */
+ if ( elUT->typeId == TYPE_ITER )
+ elUT = el->typeRef->searchUniqueType;
+
+ return elUT;
+}
+
+ObjectDef *objDefFromUT( Compiler *pd, UniqueType *ut )
+{
+ ObjectDef *objDef = 0;
+ if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_REF )
+ objDef = ut->langEl->objectDef;
+ else {
+ /* This should have generated a compiler error. */
+ assert(false);
+ }
+ return objDef;
+}
+
+/* The qualification must start at a local frame. There cannot be any pointer. */
+long LangVarRef::loadQualificationRefs( Compiler *pd, CodeVect &code ) const
+{
+ long count = 0;
+ ObjectDef *rootObj = pd->curLocalFrame;
+
+ /* Start the search from the root object. */
+ ObjectDef *searchObjDef = rootObj;
+
+ for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) {
+ /* Lookup the field in the current qualification. */
+ ObjField *el = searchObjDef->findField( qi->data );
+ if ( el == 0 )
+ error(qi->loc) << "cannot resolve qualification " << qi->data << endp;
+
+ if ( qi.pos() > 0 ) {
+ code.append( IN_REF_FROM_QUAL_REF );
+ code.appendHalf( 0 );
+ code.appendHalf( el->offset );
+ }
+ else if ( el->typeRef->iterDef != 0 ) {
+ code.append( el->typeRef->iterDef->inRefFromCur );
+ code.appendHalf( el->offset );
+ }
+ else if ( el->typeRef->type == TypeRef::Ref ) {
+ code.append( IN_REF_FROM_REF );
+ code.appendHalf( el->offset );
+ }
+ else {
+ code.append( IN_REF_FROM_LOCAL );
+ code.appendHalf( el->offset );
+ }
+
+ UniqueType *elUT = el->typeRef->uniqueType;
+ if ( elUT->typeId == TYPE_ITER )
+ elUT = el->typeRef->searchUniqueType;
+
+ assert( qi->type == QualItem::Dot );
+
+ searchObjDef = objDefFromUT( pd, elUT );
+ count += 1;
+ }
+ return count;
+}
+
+void LangVarRef::loadQualification( Compiler *pd, CodeVect &code,
+ ObjectDef *rootObj, int lastPtrInQual, bool forWriting, bool revert ) const
+{
+ /* Start the search from the root object. */
+ ObjectDef *searchObjDef = rootObj;
+
+ for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) {
+ /* Lookup the field int the current qualification. */
+ ObjField *el = searchObjDef->findField( qi->data );
+ if ( el == 0 )
+ error(qi->loc) << "cannot resolve qualification " << qi->data << endp;
+
+ if ( forWriting && el->refActive )
+ error(qi->loc) << "reference active, cannot write to object" << endp;
+
+ bool lfForWriting = forWriting;
+ bool lfRevert = revert;
+
+ /* If there is a pointer in the qualification, we need to compute
+ * forWriting and revert. */
+ if ( lastPtrInQual >= 0 ) {
+ if ( qi.pos() <= lastPtrInQual ) {
+ /* If we are before or at the pointer we are strictly read
+ * only, regardless of the origin. */
+ lfForWriting = false;
+ lfRevert = false;
+ }
+ else {
+ /* If we are past the pointer then we are always reverting
+ * because the object is global. Forwriting is as passed in.
+ * */
+ lfRevert = true;
+ }
+ }
+
+ UniqueType *qualUT = loadFieldInstr( pd, code, searchObjDef,
+ el, lfForWriting, lfRevert );
+
+ if ( qi->type == QualItem::Dot ) {
+ /* Cannot a reference. Iterator yes (access of the iterator not
+ * hte current) */
+ if ( qualUT->typeId == TYPE_PTR )
+ error(loc) << "dot cannot be used to access a pointer" << endp;
+ }
+ else if ( qi->type == QualItem::Arrow ) {
+ if ( qualUT->typeId == TYPE_PTR ) {
+ /* Always dereference references when used for qualification. If
+ * this is the last one then we must start with the reverse
+ * execution business. */
+ if ( pd->revertOn && qi.pos() == lastPtrInQual && forWriting ) {
+ /* This is like a global load. */
+ code.append( IN_PTR_DEREF_WV );
+ }
+ else {
+ /* If reading or not yet the last in ref then we only need a
+ * reading deref. */
+ code.append( IN_PTR_DEREF_R );
+ }
+
+ qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl );
+ }
+ else {
+ error(loc) << "arrow operator cannot be used to access this type" << endp;
+ }
+ }
+
+ searchObjDef = objDefFromUT( pd, qualUT );
+ }
+}
+
+void LangVarRef::loadContextObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ /* Start the search in the global object. */
+ ObjectDef *rootObj = pd->context->contextObjDef;
+
+ if ( forWriting && lastPtrInQual < 0 ) {
+ /* If we are writing an no reference was found in the qualification
+ * then load the gloabl with a revert. */
+ if ( pd->revertOn )
+ code.append( IN_LOAD_CONTEXT_WV );
+ else
+ code.append( IN_LOAD_CONTEXT_WC );
+ }
+ else {
+ /* Either we are reading or we are loading a pointer that will be
+ * dereferenced. */
+ code.append( IN_LOAD_CONTEXT_R );
+ }
+
+ loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, true );
+}
+
+void LangVarRef::loadGlobalObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ /* Start the search in the global object. */
+ ObjectDef *rootObj = pd->globalObjectDef;
+
+ if ( forWriting && lastPtrInQual < 0 ) {
+ /* If we are writing an no reference was found in the qualification
+ * then load the gloabl with a revert. */
+ if ( pd->revertOn )
+ code.append( IN_LOAD_GLOBAL_WV );
+ else
+ code.append( IN_LOAD_GLOBAL_WC );
+ }
+ else {
+ /* Either we are reading or we are loading a pointer that will be
+ * dereferenced. */
+ code.append( IN_LOAD_GLOBAL_R );
+ }
+
+ loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, true );
+}
+
+void LangVarRef::loadCustom( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ /* Start the search in the local frame. */
+ loadQualification( pd, code, pd->curLocalFrame, lastPtrInQual, forWriting, true );
+}
+
+void LangVarRef::loadLocalObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ /* Start the search in the local frame. */
+ loadQualification( pd, code, pd->curLocalFrame, lastPtrInQual, forWriting, false );
+}
+
+bool LangVarRef::isLocalRef( Compiler *pd ) const
+{
+ if ( qual->length() > 0 ) {
+ if ( pd->curLocalFrame->findField( qual->data[0].data ) != 0 )
+ return true;
+ }
+ else if ( pd->curLocalFrame->findField( name ) != 0 )
+ return true;
+ else if ( pd->curLocalFrame->findMethod( name ) != 0 )
+ return true;
+
+ return false;
+}
+
+bool LangVarRef::isContextRef( Compiler *pd ) const
+{
+ if ( pd->context != 0 ) {
+ if ( qual->length() > 0 ) {
+ if ( pd->context->contextObjDef->findField( qual->data[0].data ) != 0 )
+ return true;
+ }
+ else if ( pd->context->contextObjDef->findField( name ) != 0 )
+ return true;
+ else if ( pd->context->contextObjDef->findMethod( name ) != 0 )
+ return true;
+ }
+
+ return false;
+}
+
+bool LangVarRef::isCustom( Compiler *pd ) const
+{
+ if ( qual->length() > 0 ) {
+ ObjField *field = pd->curLocalFrame->findField( qual->data[0].data );
+ if ( field != 0 && field->isCustom )
+ return true;
+ }
+ else {
+ ObjField *field = pd->curLocalFrame->findField( name );
+ if ( field != 0 ) {
+ if ( field->isCustom )
+ return true;
+ }
+ else {
+ ObjMethod *method = pd->curLocalFrame->findMethod( name );
+ if ( method != 0 && method->isCustom )
+ return true;
+ }
+
+ }
+ return false;
+}
+
+void LangVarRef::loadObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ if ( isCustom( pd ) )
+ loadCustom( pd, code, lastPtrInQual, forWriting );
+ else if ( isLocalRef( pd ) )
+ loadLocalObj( pd, code, lastPtrInQual, forWriting );
+ else if ( isContextRef( pd ) )
+ loadContextObj( pd, code, lastPtrInQual, forWriting );
+ else
+ loadGlobalObj( pd, code, lastPtrInQual, forWriting );
+}
+
+VarRefLookup LangVarRef::lookupQualification( Compiler *pd, ObjectDef *rootDef ) const
+{
+ int lastPtrInQual = -1;
+ ObjectDef *searchObjDef = rootDef;
+ int firstConstPart = -1;
+
+ for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) {
+ /* Lookup the field int the current qualification. */
+ ObjField *el = searchObjDef->findField( qi->data );
+ if ( el == 0 )
+ error(qi->loc) << "cannot resolve qualification " << qi->data << endp;
+
+ /* Lookup the type of the field. */
+ UniqueType *qualUT = el->typeRef->uniqueType;
+
+ /* If we are dealing with an iterator then dereference it. */
+ if ( qualUT->typeId == TYPE_ITER )
+ qualUT = el->typeRef->searchUniqueType;
+
+ /* Is it const? */
+ if ( firstConstPart < 0 && el->isConst )
+ firstConstPart = qi.pos();
+
+ /* Check for references. When loop is done we will have the last one
+ * present, if any. */
+ if ( qualUT->typeId == TYPE_PTR )
+ lastPtrInQual = qi.pos();
+
+ if ( qi->type == QualItem::Dot ) {
+ /* Cannot dot a reference. Iterator yes (access of the iterator
+ * not the current) */
+ if ( qualUT->typeId == TYPE_PTR )
+ error(loc) << "dot cannot be used to access a pointer" << endp;
+ }
+ else if ( qi->type == QualItem::Arrow ) {
+ if ( qualUT->typeId == TYPE_ITER )
+ qualUT = el->typeRef->searchUniqueType;
+ else if ( qualUT->typeId == TYPE_PTR )
+ qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl );
+ }
+
+ searchObjDef = objDefFromUT( pd, qualUT );
+ }
+
+ return VarRefLookup( lastPtrInQual, firstConstPart, searchObjDef );
+}
+
+VarRefLookup LangVarRef::lookupObj( Compiler *pd ) const
+{
+ ObjectDef *rootDef;
+ if ( isLocalRef( pd ) )
+ rootDef = pd->curLocalFrame;
+ else if ( isContextRef( pd ) )
+ rootDef = pd->context->contextObjDef;
+ else
+ rootDef = pd->globalObjectDef;
+
+ return lookupQualification( pd, rootDef );
+}
+
+VarRefLookup LangVarRef::lookupField( Compiler *pd ) const
+{
+ /* Lookup the object that the field is in. */
+ VarRefLookup lookup = lookupObj( pd );
+
+ /* Lookup the field. */
+ ObjField *field = lookup.inObject->findField( name );
+ if ( field == 0 )
+ error(loc) << "cannot find name " << name << " in object" << endp;
+
+ lookup.objField = field;
+ lookup.uniqueType = field->typeRef->uniqueType;
+
+ if ( field->typeRef->searchUniqueType != 0 )
+ lookup.iterSearchUT = field->typeRef->searchUniqueType;
+
+ return lookup;
+}
+
+
+VarRefLookup LangVarRef::lookupMethod( Compiler *pd )
+{
+ /* Lookup the object that the field is in. */
+ VarRefLookup lookup = lookupObj( pd );
+
+ /* Find the method. */
+ assert( lookup.inObject->objMethodMap != 0 );
+ ObjMethod *method = lookup.inObject->findMethod( name );
+ if ( method == 0 ) {
+ /* Not found as a method, try it as an object on which we will call a
+ * default function. */
+ qual->append( QualItem( InputLoc(), name, QualItem::Dot ) );
+ name = "finish";
+
+ /* Lookup the object that the field is in. */
+ VarRefLookup lookup = lookupObj( pd );
+
+ /* Find the method. */
+ assert( lookup.inObject->objMethodMap != 0 );
+ method = lookup.inObject->findMethod( name );
+ if ( method == 0 )
+ error(loc) << "cannot find " << name << "(...) in object" << endp;
+ }
+
+ lookup.objMethod = method;
+ lookup.uniqueType = method->returnUT;
+
+ return lookup;
+}
+
+void LangVarRef::setFieldInstr( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, ObjField *el, UniqueType *exprUT, bool revert ) const
+{
+ /* Ensure that the field is referenced. */
+ inObject->referenceField( pd, el );
+
+ if ( pd->revertOn && revert )
+ code.append( el->inSetWV );
+ else
+ code.append( el->inSetWC );
+
+ /* Maybe write out an offset. */
+ if ( el->useOffset )
+ code.appendHalf( el->offset );
+}
+
+bool castAssignment( Compiler *pd, CodeVect &code, UniqueType *destUT,
+ UniqueType *destSearchUT, UniqueType *srcUT )
+{
+ if ( destUT == srcUT )
+ return true;
+
+ /* Casting trees to any. */
+ if ( destUT->typeId == TYPE_TREE && destUT->langEl == pd->anyLangEl &&
+ srcUT->typeId == TYPE_TREE )
+ return true;
+
+ /* Setting a reference from a tree. */
+ if ( destUT->typeId == TYPE_REF && srcUT->typeId == TYPE_TREE &&
+ destUT->langEl == srcUT->langEl )
+ return true;
+
+ /* Setting a tree from a reference. */
+ if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_REF &&
+ destUT->langEl == srcUT->langEl )
+ return true;
+
+ /* Setting an iterator from a tree. */
+ if ( destUT->typeId == TYPE_ITER && srcUT->typeId == TYPE_TREE &&
+ destSearchUT->langEl == srcUT->langEl )
+ return true;
+
+ /* Assigning nil to a tree. */
+ if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_NIL )
+ return true;
+
+ /* Assigning nil to a pointer. */
+ if ( destUT->typeId == TYPE_PTR && srcUT->typeId == TYPE_NIL )
+ return true;
+
+ return false;
+}
+
+void LangVarRef::setField( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, UniqueType *exprUT, bool revert ) const
+{
+ ObjField *el = inObject->findField( name );
+ if ( el == 0 )
+ error(loc) << "cannot find name " << name << " in object" << endp;
+
+ setFieldInstr( pd, code, inObject, el, exprUT, revert );
+}
+
+void LangVarRef::setFieldIter( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, UniqueType *objUT, UniqueType *exprType, bool revert ) const
+{
+ ObjField *el = inObject->findField( name );
+ if ( el == 0 )
+ error(loc) << "cannot find name " << name << " in object" << endp;
+
+ code.append( objUT->iterDef->inSetCurWC );
+ code.appendHalf( el->offset );
+}
+
+UniqueType *LangVarRef::evaluate( Compiler *pd, CodeVect &code, bool forWriting ) const
+{
+ /* Lookup the loadObj. */
+ VarRefLookup lookup = lookupField( pd );
+
+ /* Load the object, if any. */
+ loadObj( pd, code, lookup.lastPtrInQual, forWriting );
+
+ /* Load the field. */
+ UniqueType *ut = loadFieldInstr( pd, code, lookup.inObject,
+ lookup.objField, forWriting, false );
+
+ return ut;
+}
+
+void LangVarRef::canTakeRef( Compiler *pd, VarRefLookup &lookup ) const
+{
+ bool canTake = false;
+
+ /* If the var is not a local, it must be an attribute accessed
+ * via a local and attributes. */
+ if ( lookup.inObject->type == ObjectDef::FrameType )
+ canTake = true;
+ else if ( isLocalRef(pd) && lookup.lastPtrInQual < 0 && lookup.uniqueType->typeId != TYPE_PTR )
+ canTake = true;
+
+ if ( !canTake ) {
+ error(loc) << "can only take references of locals or "
+ "attributes accessed via a local" << endp;
+ }
+
+ if ( lookup.objField->refActive )
+ error(loc) << "reference currently active, cannot take another" << endp;
+}
+
+/* Return the field referenced. */
+ObjField *LangVarRef::preEvaluateRef( Compiler *pd, CodeVect &code ) const
+{
+ VarRefLookup lookup = lookupField( pd );
+
+ canTakeRef( pd, lookup );
+
+ loadQualificationRefs( pd, code );
+
+ return lookup.objField;
+}
+
+/* Return the field referenced. */
+ObjField *LangVarRef::evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const
+{
+ VarRefLookup lookup = lookupField( pd );
+
+ canTakeRef( pd, lookup );
+
+ /* Ensure that the field is referenced. */
+ lookup.inObject->referenceField( pd, lookup.objField );
+
+ /* Note that we could have modified children. */
+ if ( qual->length() == 0 )
+ lookup.objField->refActive = true;
+
+ /* Whenever we take a reference we have to assume writing and that the
+ * tree is dirty. */
+ lookup.objField->dirtyTree = true;
+
+ if ( qual->length() > 0 ) {
+ code.append( IN_REF_FROM_QUAL_REF );
+ code.appendHalf( pushCount );
+ code.appendHalf( lookup.objField->offset );
+ }
+ else if ( lookup.objField->typeRef->iterDef != 0 ) {
+ code.append( lookup.objField->typeRef->iterDef->inRefFromCur );
+ code.appendHalf( lookup.objField->offset );
+ }
+ else if ( lookup.objField->typeRef->type == TypeRef::Ref ) {
+ code.append( IN_REF_FROM_REF );
+ code.appendHalf( lookup.objField->offset );
+ }
+ else {
+ code.append( IN_REF_FROM_LOCAL );
+ code.appendHalf( lookup.objField->offset );
+ }
+
+ return lookup.objField;
+}
+
+ObjField **LangVarRef::evaluateArgs( Compiler *pd, CodeVect &code,
+ VarRefLookup &lookup, ExprVect *args ) const
+{
+ /* Parameter list is given only for user defined methods. Otherwise it
+ * will be null. */
+ ParameterList *paramList = lookup.objMethod->paramList;
+
+ /* Match the number of arguments. */
+ int numArgs = args != 0 ? args->length() : 0;
+ if ( numArgs != lookup.objMethod->numParams )
+ error(loc) << "wrong number of arguments" << endp;
+
+ /* This is for storing the object fields used by references. */
+ ObjField **paramRefs = new ObjField*[numArgs];
+ memset( paramRefs, 0, sizeof(ObjField*) * numArgs );
+
+ /* Evaluate and push the args. */
+ if ( args != 0 ) {
+ /* We use this only if there is a paramter list. */
+ ParameterList::Iter p;
+ long pushCount = 0;
+
+ /* First pass we need to push object loads for reference parameters. */
+ paramList != 0 && ( p = *paramList );
+ for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Get the expression and the UT for the arg. */
+ LangExpr *expression = *pe;
+ UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()];
+
+ if ( paramUT->typeId == TYPE_REF ) {
+ /* Make sure we are dealing with a variable reference. */
+ if ( expression->type != LangExpr::TermType )
+ error(loc) << "not a term: argument must be a local variable" << endp;
+ if ( expression->term->type != LangTerm::VarRefType )
+ error(loc) << "not a variable: argument must be a local variable" << endp;
+
+ /* Lookup the field. */
+ LangVarRef *varRef = expression->term->varRef;
+
+ ObjField *refOf = varRef->preEvaluateRef( pd, code );
+ paramRefs[pe.pos()] = refOf;
+
+ pushCount += varRef->qual->length() * 2;
+ }
+
+ /* Advance the parameter list iterator if we have it. */
+ paramList != 0 && p.increment();
+ }
+
+ paramList != 0 && ( p = *paramList );
+ for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Get the expression and the UT for the arg. */
+ LangExpr *expression = *pe;
+ UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()];
+
+ if ( paramUT->typeId == TYPE_REF ) {
+
+ /* Make sure we are dealing with a variable reference. */
+ if ( expression->type != LangExpr::TermType )
+ error(loc) << "not a term: argument must be a local variable" << endp;
+ if ( expression->term->type != LangTerm::VarRefType )
+ error(loc) << "not a variable: argument must be a local variable" << endp;
+
+ /* Lookup the field. */
+ LangVarRef *varRef = expression->term->varRef;
+
+ pushCount -= varRef->qual->length() * 2;
+
+ ObjField *refOf = varRef->evaluateRef( pd, code, pushCount );
+ paramRefs[pe.pos()] = refOf;
+
+ pushCount += 2;
+ }
+ else {
+ UniqueType *exprUT = expression->evaluate( pd, code );
+
+ if ( !castAssignment( pd, code, paramUT, 0, exprUT ) )
+ error(loc) << "arg " << pe.pos()+1 << " is of the wrong type" << endp;
+
+ pushCount += 1;
+ }
+
+ /* Advance the parameter list iterator if we have it. */
+ paramList != 0 && p.increment();
+ }
+ }
+
+ return paramRefs;
+}
+
+void LangVarRef::resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjField **paramRefs ) const
+{
+ /* Parameter list is given only for user defined methods. Otherwise it
+ * will be null. */
+ for ( long p = 0; p < lookup.objMethod->numParams; p++ ) {
+ if ( paramRefs[p] != 0 )
+ paramRefs[p]->refActive = false;
+ }
+}
+
+
+void LangVarRef::callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const
+{
+ /* This is for writing if it is a non-const builtin. */
+ bool forWriting = lookup.objMethod->func == 0 &&
+ !lookup.objMethod->isConst;
+
+ if ( lookup.objMethod->useCallObj ) {
+ /* Load the object, if any. */
+ loadObj( pd, code, lookup.lastPtrInQual, forWriting );
+ }
+
+ /* Check if we need to revert the function. If it operates on a reference
+ * or if it is not local then we need to revert it. */
+ bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd);
+
+ /* The call instruction. */
+ if ( pd->revertOn && revert ) {
+ if ( lookup.objMethod->opcodeWV == IN_PARSE_FINISH_WV ) {
+ code.append( IN_PARSE_SAVE_STEPS );
+ code.append( IN_PARSE_LOAD_START );
+ code.append( IN_PARSE_FINISH_WV );
+ code.appendHalf( 0 );
+ code.append( IN_PCR_CALL );
+ code.append( IN_PARSE_FINISH_WV3 );
+ }
+ else {
+ code.append( lookup.objMethod->opcodeWV );
+ }
+ }
+ else {
+ if ( lookup.objMethod->opcodeWC == IN_PARSE_FINISH_WC ) {
+ code.append( IN_PARSE_SAVE_STEPS );
+ code.append( IN_PARSE_LOAD_START );
+ code.append( IN_PARSE_FINISH_WC );
+ code.appendHalf( 0 );
+ code.append( IN_PCR_CALL );
+ code.append( IN_PARSE_FINISH_WC3 );
+ }
+ else {
+ code.append( lookup.objMethod->opcodeWC );
+ }
+ }
+
+ if ( lookup.objMethod->useFuncId )
+ code.appendHalf( lookup.objMethod->funcId );
+}
+
+void LangVarRef::popRefQuals( Compiler *pd, CodeVect &code,
+ VarRefLookup &lookup, ExprVect *args ) const
+{
+ long popCount = 0;
+
+ /* Evaluate and push the args. */
+ if ( args != 0 ) {
+ /* We use this only if there is a paramter list. */
+ for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Get the expression and the UT for the arg. */
+ LangExpr *expression = *pe;
+ UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()];
+
+ if ( paramUT->typeId == TYPE_REF ) {
+ /* Lookup the field. */
+ LangVarRef *varRef = expression->term->varRef;
+ popCount += varRef->qual->length() * 2;
+ }
+ }
+ if ( popCount > 0 ) {
+ code.append( IN_POP_N_WORDS );
+ code.appendHalf( (short)popCount );
+ }
+ }
+}
+
+UniqueType *LangVarRef::evaluateCall( Compiler *pd, CodeVect &code, ExprVect *args )
+{
+ /* Evaluate the object. */
+ VarRefLookup lookup = lookupMethod( pd );
+
+ /* Evaluate and push the arguments. */
+ ObjField **paramRefs = evaluateArgs( pd, code, lookup, args );
+
+ /* Write the call opcode. */
+ callOperation( pd, code, lookup );
+
+ popRefQuals( pd, code, lookup, args );
+
+ resetActiveRefs( pd, lookup, paramRefs);
+ delete[] paramRefs;
+
+ /* Return the type to the expression. */
+ return lookup.uniqueType;
+}
+
+UniqueType *LangTerm::evaluateMatch( Compiler *pd, CodeVect &code ) const
+{
+ /* Add the vars bound by the pattern into the local scope. */
+ for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) {
+ if ( item->varRef != 0 )
+ item->bindId = pattern->nextBindId++;
+ }
+
+ UniqueType *ut = varRef->evaluate( pd, code );
+ if ( ut->typeId != TYPE_TREE )
+ error(varRef->loc) << "expected match against a tree type" << endp;
+
+ /* Store the language element type in the pattern. This is needed by
+ * the pattern parser. */
+ pattern->langEl = ut->langEl;
+
+ code.append( IN_MATCH );
+ code.appendHalf( pattern->patRepId );
+
+ for ( PatternItemList::Iter item = pattern->list->last(); item.gtb(); item-- ) {
+ if ( item->varRef != 0 ) {
+ /* Compute the unique type. */
+ UniqueType *exprType = pd->findUniqueType( TYPE_TREE, item->factor->langEl );
+
+ /* Get the type of the variable being assigned to. */
+ VarRefLookup lookup = item->varRef->lookupField( pd );
+
+ item->varRef->loadObj( pd, code, lookup.lastPtrInQual, false );
+ item->varRef->setField( pd, code, lookup.inObject, exprType, false );
+ }
+ }
+
+ return ut;
+}
+
+UniqueType *LangTerm::evaluateNew( Compiler *pd, CodeVect &code ) const
+{
+ /* Evaluate the expression. */
+ UniqueType *ut = expr->evaluate( pd, code );
+ if ( ut->typeId != TYPE_TREE )
+ error() << "new can only be applied to tree types" << endp;
+
+ code.append( IN_TREE_NEW );
+ return pd->findUniqueType( TYPE_PTR, ut->langEl );
+}
+
+void LangTerm::assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const
+{
+ /* Now assign the field initializations. Note that we need to do this in
+ * reverse because the last expression evaluated is at the top of the
+ * stack. */
+ if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) {
+ ObjectDef *objDef = objDefFromUT( pd, replUT );
+ /* Note the reverse traversal. */
+ for ( FieldInitVect::Iter pi = fieldInitArgs->last(); pi.gtb(); pi-- ) {
+ FieldInit *fieldInit = *pi;
+ ObjField *field = objDef->findFieldNum( pi.pos() );
+ if ( field == 0 ) {
+ error(fieldInit->loc) << "failed to find init pos " <<
+ pi.pos() << " in object" << endp;
+ }
+
+ /* Lookup the type of the field and compare it to the type of the
+ * expression. */
+ UniqueType *fieldUT = field->typeRef->uniqueType;
+ if ( !castAssignment( pd, code, fieldUT, 0, fieldInit->exprUT ) )
+ error(fieldInit->loc) << "type mismatch in initialization" << endp;
+
+ /* The set field instruction must leave the object on the top of
+ * the stack. */
+ code.append( IN_SET_FIELD_LEAVE_WC );
+ code.appendHalf( field->offset );
+ }
+ }
+}
+
+UniqueType *LangTerm::evaluateConstruct( Compiler *pd, CodeVect &code ) const
+{
+ /* Evaluate the initialization expressions. */
+ if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) {
+ for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) {
+ FieldInit *fieldInit = *pi;
+ fieldInit->exprUT = fieldInit->expr->evaluate( pd, code );
+ }
+ }
+
+ /* Assign bind ids to the variables in the replacement. */
+ for ( ReplItemList::Iter item = *replacement->list; item.lte(); item++ ) {
+ if ( item->expr != 0 )
+ item->bindId = replacement->nextBindId++;
+ }
+
+ /* Evaluate variable references. */
+ for ( ReplItemList::Iter item = replacement->list->last(); item.gtb(); item-- ) {
+ if ( item->type == ReplItem::ExprType ) {
+ UniqueType *ut = item->expr->evaluate( pd, code );
+
+ if ( ut->typeId != TYPE_TREE )
+ error() << "variables used in replacements must be trees" << endp;
+
+ item->langEl = ut->langEl;
+ }
+ }
+
+ /* Construct the tree using the tree information stored in the compiled
+ * code. */
+ code.append( IN_CONSTRUCT );
+ code.appendHalf( replacement->patRepId );
+
+ /* Lookup the type of the replacement and store it in the replacement
+ * object so that replacement parsing has a target. */
+ UniqueType *replUT = typeRef->uniqueType;
+ if ( replUT->typeId != TYPE_TREE )
+ error(loc) << "don't know how to construct this type" << endp;
+
+ if ( replUT->langEl->generic != 0 && replUT->langEl->generic->typeId == GEN_PARSER ) {
+ code.append( IN_CONSTRUCT_INPUT );
+ code.append( IN_DUP_TOP_OFF );
+ code.appendHalf( 1 );
+ code.append( IN_SET_INPUT );
+ }
+
+ replacement->langEl = replUT->langEl;
+ assignFieldArgs( pd, code, replUT );
+
+ if ( varRef != 0 ) {
+ code.append( IN_DUP_TOP );
+
+ /* Get the type of the variable being assigned to. */
+ VarRefLookup lookup = varRef->lookupField( pd );
+
+ varRef->loadObj( pd, code, lookup.lastPtrInQual, false );
+ varRef->setField( pd, code, lookup.inObject, replUT, false );
+ }
+
+ return replUT;
+}
+
+UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const
+{
+ UniqueType *ut = typeRef->uniqueType;
+ assert( ut != 0 );
+
+ if ( ut->typeId != TYPE_TREE )
+ error(loc) << "can only parse trees" << endl;
+
+ /* Should be one arg, a stream. */
+ if ( args == 0 || ( args->length() != 1 && args->length() != 2 ) )
+ error(loc) << "expecting one or two args" << endp;
+
+ int context, input;
+ if ( ut->langEl->contextIn == 0 ) {
+ if ( args->length() != 1 )
+ error(loc) << "parse command requires just input" << endp;
+ context = -1;
+ input = 0;
+ }
+ else {
+ if ( args->length() != 2 )
+ error(loc) << "parse command requires context and input" << endp;
+ context = 0;
+ input = 1;
+ }
+
+ /*
+ * Make the parser.
+ */
+ code.append( IN_CONSTRUCT );
+ code.appendHalf( replacement->patRepId );
+
+ /* Dup once for the context load, again for the argument load, again for
+ * the parse frag, leaving the original there for the finish. */
+ code.append( IN_DUP_TOP );
+// code.append( IN_DUP_TOP );
+// code.append( IN_DUP_TOP );
+
+ /*
+ * First load the context into the parser.
+ */
+ if ( context < 0 ) {
+ code.append( IN_LOAD_NIL );
+ }
+ else {
+ UniqueType *argUT = args->data[context]->evaluate( pd, code );
+ if ( argUT != pd->uniqueTypeStream && argUT->typeId != TYPE_TREE )
+ error(loc) << "context argument must be a stream or a tree" << endp;
+ }
+
+ /* FIXME: need to select right one here. */
+ code.append( IN_DUP_TOP_OFF );
+ code.appendHalf( 1 );
+ code.append( IN_SET_ACCUM_CTX_WC );
+
+ /*
+ * Evaluate the parse arg.
+ */
+
+ /* Evaluate the parse args. */
+ UniqueType *argUT = args->data[input]->evaluate( pd, code );
+ if ( argUT != pd->uniqueTypeStream && argUT->typeId != TYPE_TREE )
+ error(loc) << "input argument must be a stream or a tree" << endp;
+
+ /* Allocate a parser id. This will cause a parser to be built for
+ * the type. */
+ if ( ut->langEl->parserId < 0 )
+ ut->langEl->parserId = pd->nextParserId++;
+
+ /* If this is a parse stop then we need to verify that the type is
+ * compatible with parse stop. */
+ if ( stop )
+ ut->langEl->parseStop = true;
+
+ if ( argUT != pd->uniqueTypeInput ) {
+ code.append( IN_CONSTRUCT_INPUT );
+ if ( pd->revertOn )
+ code.append( IN_INPUT_APPEND_WV );
+ else
+ code.append( IN_INPUT_APPEND_WC );
+ }
+
+ code.append( IN_DUP_TOP_OFF );
+ code.appendHalf( 1 );
+ code.append( IN_SET_INPUT );
+
+ int stopId = stop ? ut->langEl->id : 0;
+
+ /* Parse instruction, dependent on whether or not we are producing revert
+ * or commit code. */
+ if ( pd->revertOn ) {
+ code.append( IN_PARSE_SAVE_STEPS );
+ code.append( IN_PARSE_LOAD_START );
+ code.append( IN_PARSE_FRAG_WV );
+ code.appendHalf( stopId );
+ code.append( IN_PCR_CALL );
+ code.append( IN_PARSE_FRAG_WV3 );
+
+ /* Finish immediately. */
+ code.append( IN_PARSE_SAVE_STEPS );
+ code.append( IN_PARSE_LOAD_START );
+ code.append( IN_PARSE_FINISH_WV );
+ code.appendHalf( stopId );
+ code.append( IN_PCR_CALL );
+ code.append( IN_PARSE_FINISH_WV3 );
+ }
+ else {
+ code.append( IN_PARSE_SAVE_STEPS );
+ code.append( IN_PARSE_LOAD_START );
+ code.append( IN_PARSE_FRAG_WC );
+ code.appendHalf( stopId );
+ code.append( IN_PCR_CALL );
+ code.append( IN_PARSE_FRAG_WC3 );
+
+ /* Finish immediately. */
+ code.append( IN_PARSE_SAVE_STEPS );
+ code.append( IN_PARSE_LOAD_START );
+ code.append( IN_PARSE_FINISH_WC );
+ code.appendHalf( stopId );
+ code.append( IN_PCR_CALL );
+ code.append( IN_PARSE_FINISH_WC3 );
+ }
+
+ /* Lookup the type of the replacement and store it in the replacement
+ * object so that replacement parsing has a target. */
+ replacement->langEl = generic->langEl;
+
+ if ( varRef != 0 ) {
+ code.append( IN_DUP_TOP );
+
+ /* Get the type of the variable being assigned to. */
+ VarRefLookup lookup = varRef->lookupField( pd );
+
+ varRef->loadObj( pd, code, lookup.lastPtrInQual, false );
+ varRef->setField( pd, code, lookup.inObject, ut, false );
+ }
+
+ return ut;
+}
+
+UniqueType *LangTerm::evaluateEmbedString( Compiler *pd, CodeVect &code ) const
+{
+ /* Assign bind ids to the variables in the replacement. */
+ for ( ReplItemList::Iter item = *replItemList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case ReplItem::FactorType: {
+ String result;
+ bool unusedCI;
+ prepareLitString( result, unusedCI,
+ item->factor->typeRef->pdaLiteral->token.data,
+ item->factor->typeRef->pdaLiteral->token.loc );
+
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( result, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ReplItem::InputText: {
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( item->data, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ReplItem::ExprType:
+ item->expr->evaluate( pd, code );
+ break;
+ }
+
+ }
+
+ long items = replItemList->length();
+ for ( long i = 0; i < items-1; i++ )
+ code.append( IN_CONCAT_STR );
+
+ return pd->uniqueTypeStr;
+}
+
+UniqueType *LangTerm::evaluate( Compiler *pd, CodeVect &code ) const
+{
+ switch ( type ) {
+ case VarRefType:
+ return varRef->evaluate( pd, code );
+ case MethodCallType:
+ return varRef->evaluateCall( pd, code, args );
+ case NilType:
+ code.append( IN_LOAD_NIL );
+ return pd->uniqueTypeNil;
+ case TrueType:
+ code.append( IN_LOAD_TRUE );
+ return pd->uniqueTypeBool;
+ case FalseType:
+ code.append( IN_LOAD_FALSE );
+ return pd->uniqueTypeBool;
+ case MakeTokenType:
+ return evaluateMakeToken( pd, code );
+ case MakeTreeType:
+ return evaluateMakeTree( pd, code );
+ case NumberType: {
+ unsigned int n = atoi( data );
+ code.append( IN_LOAD_INT );
+ code.appendWord( n );
+ return pd->uniqueTypeInt;
+ }
+ case StringType: {
+ String interp;
+ bool unused;
+ prepareLitString( interp, unused, data, InputLoc() );
+
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( interp, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ return pd->uniqueTypeStr;
+ }
+ case MatchType:
+ return evaluateMatch( pd, code );
+ case ParseType:
+ return evaluateParse( pd, code, false );
+ case ParseStopType:
+ return evaluateParse( pd, code, true );
+ case ConstructType:
+ return evaluateConstruct( pd, code );
+ case NewType:
+ return evaluateNew( pd, code );
+ case TypeIdType: {
+ /* Evaluate the expression. */
+ UniqueType *ut = typeRef->uniqueType;
+ if ( ut->typeId != TYPE_TREE )
+ error() << "typeid can only be applied to tree types" << endp;
+
+ code.append( IN_LOAD_INT );
+ code.appendWord( ut->langEl->id );
+ return pd->uniqueTypeInt;
+ }
+ case SearchType: {
+ /* Evaluate the expression. */
+ UniqueType *ut = typeRef->uniqueType;
+ if ( ut->typeId != TYPE_TREE )
+ error(loc) << "can only search for tree types" << endp;
+
+ UniqueType *treeUT = varRef->evaluate( pd, code );
+ if ( treeUT->typeId != TYPE_TREE )
+ error(loc) << "search can be applied only to tree types" << endl;
+
+ code.append( IN_TREE_SEARCH );
+ code.appendWord( ut->langEl->id );
+ return ut;
+ };
+ case EmbedStringType: {
+ return evaluateEmbedString( pd, code );
+ }
+ }
+ return 0;
+}
+
+UniqueType *LangExpr::evaluate( Compiler *pd, CodeVect &code ) const
+{
+ switch ( type ) {
+ case BinaryType: {
+ switch ( op ) {
+ case '+': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
+ code.append( IN_ADD_INT );
+ return pd->uniqueTypeInt;
+ }
+
+ if ( lt == pd->uniqueTypeStr && rt == pd->uniqueTypeStr ) {
+ code.append( IN_CONCAT_STR );
+ return pd->uniqueTypeStr;
+ }
+
+ error(loc) << "do not have an addition operator for these types" << endp;
+ break;
+ }
+ case '-': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
+ code.append( IN_SUB_INT );
+ return pd->uniqueTypeInt;
+ }
+
+ error(loc) << "do not have an addition operator for these types" << endp;
+ break;
+ }
+ case '*': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
+ code.append( IN_MULT_INT );
+ return pd->uniqueTypeInt;
+ }
+
+ error(loc) << "do not have an multiplication "
+ "operator for these types" << endp;
+ break;
+ }
+ case '/': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
+ code.append( IN_DIV_INT );
+ return pd->uniqueTypeInt;
+ }
+
+ error(loc) << "do not have an division"
+ "operator for these types" << endp;
+ break;
+ }
+ case OP_DoubleEql: {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt != rt )
+ error(loc) << "comparison of different types" << endp;
+
+ code.append( IN_TST_EQL );
+ return pd->uniqueTypeBool;
+ }
+ case OP_NotEql: {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt != rt )
+ error(loc) << "comparison of different types" << endp;
+
+ code.append( IN_TST_NOT_EQL );
+ return pd->uniqueTypeBool;
+ }
+ case '<': {
+ left->evaluate( pd, code );
+ right->evaluate( pd, code );
+
+ code.append( IN_TST_LESS );
+ return pd->uniqueTypeBool;
+ }
+ case '>': {
+ left->evaluate( pd, code );
+ right->evaluate( pd, code );
+
+ code.append( IN_TST_GRTR );
+ return pd->uniqueTypeBool;
+ }
+ case OP_LessEql: {
+ left->evaluate( pd, code );
+ right->evaluate( pd, code );
+
+ code.append( IN_TST_LESS_EQL );
+ return pd->uniqueTypeBool;
+ }
+ case OP_GrtrEql: {
+ left->evaluate( pd, code );
+ right->evaluate( pd, code );
+
+ code.append( IN_TST_GRTR_EQL );
+ return pd->uniqueTypeBool;
+ }
+ case OP_LogicalAnd: {
+ /* Evaluate the left and duplicate it. */
+ left->evaluate( pd, code );
+ code.append( IN_DUP_TOP );
+
+ /* Jump over the right if false, leaving the original left
+ * result on the top of the stack. We don't know the
+ * distance yet so record the position of the jump. */
+ long jump = code.length();
+ code.append( IN_JMP_FALSE );
+ code.appendHalf( 0 );
+
+ /* Evauluate the right, add the test. Store it separately. */
+ right->evaluate( pd, code );
+ code.append( IN_TST_LOGICAL_AND );
+
+ /* Set the distance of the jump. */
+ long distance = code.length() - jump - 3;
+ code.setHalf( jump+1, distance );
+
+ return pd->uniqueTypeInt;
+ }
+ case OP_LogicalOr: {
+ /* Evaluate the left and duplicate it. */
+ left->evaluate( pd, code );
+ code.append( IN_DUP_TOP );
+
+ /* Jump over the right if true, leaving the original left
+ * result on the top of the stack. We don't know the
+ * distance yet so record the position of the jump. */
+ long jump = code.length();
+ code.append( IN_JMP_TRUE );
+ code.appendHalf( 0 );
+
+ /* Evauluate the right, add the test. */
+ right->evaluate( pd, code );
+ code.append( IN_TST_LOGICAL_OR );
+
+ /* Set the distance of the jump. */
+ long distance = code.length() - jump - 3;
+ code.setHalf( jump+1, distance );
+
+ return pd->uniqueTypeInt;
+ }
+ }
+
+ assert(false);
+ return 0;
+ }
+ case UnaryType: {
+ switch ( op ) {
+ case '!': {
+ /* Evaluate the left and duplicate it. */
+ right->evaluate( pd, code );
+ code.append( IN_NOT );
+ return pd->uniqueTypeBool;
+ }
+ case '$': {
+ right->evaluate( pd, code );
+ code.append( IN_TREE_TO_STR );
+ return pd->uniqueTypeStr;
+
+ }
+ case '%': {
+ right->evaluate( pd, code );
+ code.append( IN_TREE_TO_STR_NOTRIM );
+ return pd->uniqueTypeStr;
+ }
+ case '^': {
+ UniqueType *rt = right->evaluate( pd, code );
+ code.append( IN_TREE_TRIM );
+ return rt;
+ }
+ case OP_Deref: {
+ UniqueType *ut = right->evaluate( pd, code );
+ if ( ut->typeId != TYPE_PTR )
+ error(loc) << "can only dereference pointers" << endl;
+
+ code.append( IN_PTR_DEREF_R );
+ ut = pd->findUniqueType( TYPE_TREE, ut->langEl );
+ return ut;
+ }
+ default:
+ assert(false);
+ }
+ return 0;
+ }
+ case TermType: {
+ return term->evaluate( pd, code );
+ }
+ }
+ return 0;
+}
+
+void LangVarRef::assignValue( Compiler *pd, CodeVect &code,
+ UniqueType *exprUT ) const
+{
+ /* Lookup the left hand side of the assignment. */
+ VarRefLookup lookup = lookupField( pd );
+
+ if ( lookup.objField->refActive )
+ error(loc) << "reference active, cannot write to object" << endp;
+
+ if ( lookup.firstConstPart >= 0 ) {
+ error(loc) << "left hand side qualification \"" <<
+ qual->data[lookup.firstConstPart].data << "\" is const" << endp;
+ }
+
+ if ( lookup.objField->isConst )
+ error(loc) << "field \"" << name << "\" is const" << endp;
+
+ /* Writing guarantees the field is dirty. tree is dirty. */
+ lookup.objField->dirtyTree = true;
+
+ /* Check the types of the assignment and possibly cast. */
+ UniqueType *objUT = lookup.objField->typeRef->uniqueType;
+ assert( lookup.uniqueType == lookup.objField->typeRef->uniqueType );
+ if ( !castAssignment( pd, code, objUT, lookup.iterSearchUT, exprUT ) )
+ error(loc) << "type mismatch in assignment" << endp;
+
+ /* Decide if we need to revert the assignment. */
+ bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd);
+
+ /* Load the object and generate the field setting code. */
+ loadObj( pd, code, lookup.lastPtrInQual, true );
+
+ if ( lookup.uniqueType->typeId == TYPE_ITER )
+ setFieldIter( pd, code, lookup.inObject, lookup.uniqueType, exprUT, false );
+ else
+ setField( pd, code, lookup.inObject, exprUT, revert );
+}
+
+UniqueType *LangTerm::evaluateMakeToken( Compiler *pd, CodeVect &code ) const
+{
+// if ( pd->compileContext != Compiler::CompileTranslation )
+// error(loc) << "make_token can be used only in a translation block" << endp;
+
+ /* Match the number of arguments. */
+ int numArgs = args != 0 ? args->length() : 0;
+ if ( numArgs < 2 )
+ error(loc) << "need at least two arguments" << endp;
+
+ for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Evaluate. */
+ UniqueType *exprUT = (*pe)->evaluate( pd, code );
+
+ if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt )
+ error(loc) << "first arg, id, must be an int" << endp;
+
+ if ( pe.pos() == 1 && exprUT != pd->uniqueTypeStr )
+ error(loc) << "second arg, length, must be a string" << endp;
+ }
+
+ /* The token is now created, send it. */
+ code.append( IN_MAKE_TOKEN );
+ code.append( args->length() );
+
+ return pd->uniqueTypeAny;
+}
+
+UniqueType *LangTerm::evaluateMakeTree( Compiler *pd, CodeVect &code ) const
+{
+ if ( pd->compileContext != Compiler::CompileTranslation )
+ error(loc) << "make_tree can be used only in a translation block" << endp;
+
+ /* Match the number of arguments. */
+ int numArgs = args != 0 ? args->length() : 0;
+ if ( numArgs < 1 )
+ error(loc) << "need at least one argument" << endp;
+
+ for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Evaluate. */
+ UniqueType *exprUT = (*pe)->evaluate( pd, code );
+
+ if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt )
+ error(loc) << "first arg, nonterm id, must be an int" << endp;
+ }
+
+ /* The token is now created, send it. */
+ code.append( IN_MAKE_TREE );
+ code.append( args->length() );
+
+ return pd->uniqueTypeAny;
+}
+
+void LangStmt::compileForIterBody( Compiler *pd,
+ CodeVect &code, UniqueType *iterUT ) const
+{
+ /* Remember the top of the loop. */
+ long top = code.length();
+
+ /* Advance */
+ code.append( iterUT->iterDef->inAdvance );
+ code.appendHalf( objField->offset );
+
+ /* Test: jump past the while block if false. Note that we don't have the
+ * distance yet. */
+ long jumpFalse = code.length();
+ code.append( IN_JMP_FALSE );
+ code.appendHalf( 0 );
+
+ /*
+ * Set up the loop cleanup code.
+ */
+
+ /* Set up the current loop cleanup. */
+ CodeVect loopCleanup;
+ if ( pd->loopCleanup != 0 )
+ loopCleanup.setAs( *pd->loopCleanup );
+
+ /* Add the cleanup for the current loop. */
+ loopCleanup.append( iterUT->iterDef->inDestroy );
+ loopCleanup.appendHalf( objField->offset );
+
+ /* Push the loop cleanup. */
+ CodeVect *oldLoopCleanup = pd->loopCleanup;
+ pd->loopCleanup = &loopCleanup;
+
+ /* Compile the contents. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+
+ pd->loopCleanup = oldLoopCleanup;
+
+ /* Jump back to the top to retest. */
+ long retestDist = code.length() - top + 3;
+ code.append( IN_JMP );
+ code.appendHalf( -retestDist );
+
+ /* Set the jump false distance. */
+ long falseDist = code.length() - jumpFalse - 3;
+ code.setHalf( jumpFalse+1, falseDist );
+
+ /* Compute the jump distance for the break jumps. */
+ for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) {
+ long distance = code.length() - *brk - 3;
+ code.setHalf( *brk+1, distance );
+ }
+ pd->breakJumps.empty();
+
+ /* Destroy the iterator. */
+ code.append( iterUT->iterDef->inDestroy );
+ code.appendHalf( objField->offset );
+
+ /* Clean up any prepush args. */
+}
+
+LangTerm *LangStmt::chooseDefaultIter( Compiler *pd, LangTerm *fromVarRef ) const
+{
+ /* Lookup the lang term and decide what iterator to use based
+ * on its type. */
+ VarRefLookup lookup = fromVarRef->varRef->lookupField( pd );
+
+ if ( lookup.inObject->type != ObjectDef::FrameType )
+ error(loc) << "root of iteration must be a local" << endp;
+
+ LangVarRef *callVarRef = 0;
+ if ( lookup.uniqueType->typeId == TYPE_TREE ||
+ lookup.uniqueType->typeId == TYPE_REF ||
+ lookup.uniqueType->typeId == TYPE_ITER ||
+ lookup.uniqueType->typeId == TYPE_PTR )
+ {
+ /* The iterator name. */
+ callVarRef = new LangVarRef( loc, new QualItemVect, "triter" );
+ }
+ else {
+ error(loc) << "there is no default iterator for a "
+ "root of that type" << endp;
+ }
+
+ /* The parameters. */
+ ExprVect *callExprVect = new ExprVect;
+ LangExpr *callExpr = new LangExpr( new LangTerm(
+ LangTerm::VarRefType, fromVarRef->varRef ) );
+ callExprVect->append( callExpr );
+
+ LangTerm *callLangTerm = new LangTerm( callVarRef, callExprVect );
+
+ return callLangTerm;
+}
+
+void LangStmt::compileForIter( Compiler *pd, CodeVect &code ) const
+{
+ pd->curLocalFrame->iterPushScope();
+
+ LangTerm *iterCallTerm = langTerm;
+ if ( iterCallTerm->type != LangTerm::MethodCallType )
+ iterCallTerm = chooseDefaultIter( pd, langTerm );
+
+ /* The type we are searching for. */
+ UniqueType *searchUT = typeRef->uniqueType;
+
+ /*
+ * Declare the iterator variable.
+ */
+ VarRefLookup lookup = iterCallTerm->varRef->lookupMethod( pd );
+ if ( lookup.objMethod->iterDef == 0 ) {
+ error(loc) << "attempt to iterate using something "
+ "that is not an iterator" << endp;
+ }
+
+ /* Now that we have done the iterator call lookup we can make the type
+ * reference for the object field. */
+ UniqueType *iterUniqueType = pd->findUniqueType( TYPE_ITER, lookup.objMethod->iterDef );
+ objField->typeRef = new TypeRef( loc, lookup.objMethod->iterDef, iterUniqueType, searchUT );
+
+ /* Also force the field to be initialized. */
+ pd->curLocalFrame->initField( pd, objField );
+
+ /*
+ * Create the iterator from the local var.
+ */
+
+ UniqueType *iterUT = objField->typeRef->uniqueType;
+
+ /* Evaluate and push the arguments. */
+ ObjField **paramRefs = iterCallTerm->varRef->evaluateArgs(
+ pd, code, lookup, iterCallTerm->args );
+
+ if ( pd->revertOn )
+ code.append( iterUT->iterDef->inCreateWV );
+ else
+ code.append( iterUT->iterDef->inCreateWC );
+
+ code.appendHalf( objField->offset );
+ if ( lookup.objMethod->func != 0 )
+ code.appendHalf( lookup.objMethod->func->funcId );
+
+ if ( iterUT->iterDef->useSearchUT ) {
+ if ( searchUT->typeId == TYPE_PTR )
+ code.appendHalf( pd->uniqueTypePtr->langEl->id );
+ else
+ code.appendHalf( searchUT->langEl->id );
+ }
+
+ compileForIterBody( pd, code, iterUT );
+
+ iterCallTerm->varRef->popRefQuals( pd, code, lookup, iterCallTerm->args );
+
+ iterCallTerm->varRef->resetActiveRefs( pd, lookup, paramRefs );
+ delete[] paramRefs;
+
+ pd->curLocalFrame->iterPopScope();
+}
+
+void LangStmt::compileWhile( Compiler *pd, CodeVect &code ) const
+{
+ pd->curLocalFrame->iterPushScope();
+
+ /* Generate code for the while test. Remember the top. */
+ long top = code.length();
+ expr->evaluate( pd, code );
+
+ /* Jump past the while block if false. Note that we don't have the
+ * distance yet. */
+ long jumpFalse = code.length();
+ code.append( IN_JMP_FALSE );
+ code.appendHalf( 0 );
+
+ /* Compute the while block. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+
+ /* Jump back to the top to retest. */
+ long retestDist = code.length() - top + 3;
+ code.append( IN_JMP );
+ code.appendHalf( -retestDist );
+
+ /* Set the jump false distance. */
+ long falseDist = code.length() - jumpFalse - 3;
+ code.setHalf( jumpFalse+1, falseDist );
+
+ /* Compute the jump distance for the break jumps. */
+ for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) {
+ long distance = code.length() - *brk - 3;
+ code.setHalf( *brk+1, distance );
+ }
+ pd->breakJumps.empty();
+
+ pd->curLocalFrame->iterPopScope();
+}
+
+void LangStmt::evaluateParserItems( Compiler *pd, CodeVect &code ) const
+{
+ varRef->evaluate( pd, code );
+
+ /* Assign bind ids to the variables in the replacement. */
+ for ( ReplItemList::Iter item = *parserText->list; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case ReplItem::FactorType: {
+ String result;
+ bool unusedCI;
+ prepareLitString( result, unusedCI,
+ item->factor->typeRef->pdaLiteral->token.data,
+ item->factor->typeRef->pdaLiteral->token.loc );
+
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( result, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ReplItem::InputText: {
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( item->data, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ReplItem::ExprType:
+ item->expr->evaluate( pd, code );
+ break;
+ }
+
+ code.append( IN_DUP_TOP_OFF );
+ code.appendHalf( 1 );
+
+ /* Not a stream. Get the input first. */
+ code.append( IN_GET_INPUT );
+ if ( pd->revertOn )
+ code.append( IN_INPUT_APPEND_WV );
+ else
+ code.append( IN_INPUT_APPEND_WC );
+ code.append( IN_POP );
+
+ code.append( IN_DUP_TOP );
+
+ /* Parse instruction, dependent on whether or not we are producing
+ * revert or commit code. */
+ if ( pd->revertOn ) {
+ code.append( IN_PARSE_SAVE_STEPS );
+ code.append( IN_PARSE_LOAD_START );
+ code.append( IN_PARSE_FRAG_WV );
+ code.appendHalf( 0 );
+ code.append( IN_PCR_CALL );
+ code.append( IN_PARSE_FRAG_WV3 );
+ }
+ else {
+ code.append( IN_PARSE_SAVE_STEPS );
+ code.append( IN_PARSE_LOAD_START );
+ code.append( IN_PARSE_FRAG_WC );
+ code.appendHalf( 0 );
+ code.append( IN_PCR_CALL );
+ code.append( IN_PARSE_FRAG_WC3 );
+ }
+ }
+ code.append( IN_POP );
+}
+
+void LangStmt::compile( Compiler *pd, CodeVect &code ) const
+{
+ switch ( type ) {
+ case PrintType:
+ case PrintXMLACType:
+ case PrintXMLType:
+ case PrintStreamType: {
+ UniqueType **types = new UniqueType*[exprPtrVect->length()];
+
+ /* Push the args backwards. */
+ for ( ExprVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- )
+ types[pex.pos()] = (*pex)->evaluate( pd, code );
+
+ /* Run the printing forwards. */
+ if ( type == PrintType ) {
+ code.append( IN_PRINT );
+ code.append( exprPtrVect->length() );
+ }
+ else if ( type == PrintXMLACType ) {
+ code.append( IN_PRINT_XML_AC );
+ code.append( exprPtrVect->length() );
+ }
+ else if ( type == PrintXMLType ) {
+ code.append( IN_PRINT_XML );
+ code.append( exprPtrVect->length() );
+ }
+ else if ( type == PrintStreamType ) {
+ /* Minus one because the first arg is the stream. */
+ code.append( IN_PRINT_STREAM );
+ code.append( exprPtrVect->length() - 1 );
+ }
+
+ delete[] types;
+
+ break;
+ }
+ case ExprType: {
+ /* Evaluate the exrepssion, then pop it immediately. */
+ expr->evaluate( pd, code );
+ code.append( IN_POP );
+ break;
+ }
+ case IfType: {
+ pd->curLocalFrame->iterPushScope();
+
+ long jumpFalse = 0, jumpPastElse = 0, distance = 0;
+
+ /* Evaluate the test. */
+ expr->evaluate( pd, code );
+
+ /* Jump past the if block if false. We don't know the distance
+ * yet so store the location of the jump. */
+ jumpFalse = code.length();
+ code.append( IN_JMP_FALSE );
+ code.appendHalf( 0 );
+
+ /* Compile the if true branch. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+
+ if ( elsePart != 0 ) {
+ /* Jump past the else code for the if true branch. */
+ jumpPastElse = code.length();
+ code.append( IN_JMP );
+ code.appendHalf( 0 );
+ }
+
+ /* Set the distance for the jump false case. */
+ distance = code.length() - jumpFalse - 3;
+ code.setHalf( jumpFalse+1, distance );
+
+ pd->curLocalFrame->iterPopScope();
+
+ if ( elsePart != 0 ) {
+ /* Compile the else branch. */
+ elsePart->compile( pd, code );
+
+ /* Set the distance for jump over the else part. */
+ distance = code.length() - jumpPastElse - 3;
+ code.setHalf( jumpPastElse+1, distance );
+ }
+
+ break;
+ }
+ case ElseType: {
+ pd->curLocalFrame->iterPushScope();
+
+ /* Compile the else branch. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+
+ pd->curLocalFrame->iterPopScope();
+ break;
+ }
+ case RejectType: {
+ code.append( IN_REJECT );
+ break;
+ }
+ case WhileType: {
+ compileWhile( pd, code );
+ break;
+ }
+ case AssignType: {
+ /* Evaluate the exrepssion. */
+ UniqueType *exprUT = expr->evaluate( pd, code );
+
+ /* Do the assignment. */
+ varRef->assignValue( pd, code, exprUT );
+ break;
+ }
+ case ForIterType: {
+ compileForIter( pd, code );
+ break;
+ }
+ case ReturnType: {
+ /* Evaluate the exrepssion. */
+ UniqueType *exprUT = expr->evaluate( pd, code );
+
+ if ( pd->curFunction == 0 ) {
+ /* In the main function */
+ pd->mainReturnUT = exprUT;
+ }
+ else {
+ UniqueType *resUT = pd->curFunction->typeRef->uniqueType;
+ if ( !castAssignment( pd, code, resUT, 0, exprUT ) )
+ error(loc) << "return value wrong type" << endp;
+ }
+
+ code.append( IN_SAVE_RET );
+
+ /* The loop cleanup code. */
+ if ( pd->loopCleanup != 0 )
+ code.append( *pd->loopCleanup );
+
+ /* Jump to the return label. The distnacnce will be filled in
+ * later. */
+ pd->returnJumps.append( code.length() );
+ code.append( IN_JMP );
+ code.appendHalf( 0 );
+ break;
+ }
+ case BreakType: {
+ pd->breakJumps.append( code.length() );
+ code.append( IN_JMP );
+ code.appendHalf( 0 );
+ break;
+ }
+ case YieldType: {
+ /* take a reference and yield it. Immediately reset the referece. */
+ varRef->preEvaluateRef( pd, code );
+ ObjField *objField = varRef->evaluateRef( pd, code, 0 );
+ code.append( IN_YIELD );
+
+ if ( varRef->qual->length() > 0 ) {
+ code.append( IN_POP_N_WORDS );
+ code.appendHalf( (short)(varRef->qual->length()*2) );
+ }
+
+ objField->refActive = false;
+ break;
+ }
+ case ParserType: {
+ evaluateParserItems( pd, code );
+ break;
+ }
+ }
+}
+
+void CodeBlock::compile( Compiler *pd, CodeVect &code ) const
+{
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+}
+
+void Compiler::addMatchLength( ObjectDef *frame, LangEl *lel )
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt );
+
+ /* Create the field and insert it into the map. */
+ ObjField *el = new ObjField( InputLoc(), typeRef, "match_length" );
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->isConst = true;
+ el->useOffset = false;
+ el->inGetR = IN_GET_MATCH_LENGTH_R;
+ frame->insertField( el->name, el );
+}
+
+void Compiler::addMatchText( ObjectDef *frame, LangEl *lel )
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr );
+
+ /* Create the field and insert it into the map. */
+ ObjField *el = new ObjField( InputLoc(), typeRef, "match_text" );
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->isConst = true;
+ el->useOffset = false;
+ el->inGetR = IN_GET_MATCH_TEXT_R;
+ frame->insertField( el->name, el );
+}
+
+void Compiler::addInput( ObjectDef *frame )
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInput );
+
+ /* Create the field and insert it into the map. */
+ ObjField *el = new ObjField( InputLoc(), typeRef, "input" );
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->isConst = false;
+ el->useOffset = false;
+ el->isCustom = true;
+ el->inGetR = IN_LOAD_INPUT_R;
+ el->inGetWV = IN_LOAD_INPUT_WV;
+ el->inGetWC = IN_LOAD_INPUT_WC;
+ frame->insertField( el->name, el );
+}
+
+void Compiler::addCtx( ObjectDef *frame )
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStream );
+
+ /* Create the field and insert it into the map. */
+ ObjField *el = new ObjField( InputLoc(), typeRef, "ctx" );
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->isConst = false;
+ el->useOffset = false;
+ el->isCustom = true;
+ el->inGetR = IN_LOAD_CTX_R;
+ el->inGetWV = IN_LOAD_CTX_WV;
+ el->inGetWC = IN_LOAD_CTX_WC;
+ frame->insertField( el->name, el );
+}
+
+void Compiler::initFieldInstructions( ObjField *el )
+{
+ el->inGetR = IN_GET_FIELD_R;
+ el->inGetWC = IN_GET_FIELD_WC;
+ el->inGetWV = IN_GET_FIELD_WV;
+ el->inSetWC = IN_SET_FIELD_WC;
+ el->inSetWV = IN_SET_FIELD_WV;
+}
+
+void Compiler::initLocalInstructions( ObjField *el )
+{
+ el->inGetR = IN_GET_LOCAL_R;
+ el->inGetWC = IN_GET_LOCAL_WC;
+ el->inSetWC = IN_SET_LOCAL_WC;
+}
+
+void Compiler::initLocalRefInstructions( ObjField *el )
+{
+ el->inGetR = IN_GET_LOCAL_REF_R;
+ el->inGetWC = IN_GET_LOCAL_REF_WC;
+ el->inSetWC = IN_SET_LOCAL_REF_WC;
+}
+
+void Compiler::initIntObject( )
+{
+ intObj = new ObjectDef( ObjectDef::BuiltinType, "int", nextObjectId++ );
+ intLangEl->objectDef = intObj;
+
+ initFunction( uniqueTypeStr, intObj, "to_string", IN_INT_TO_STR, IN_INT_TO_STR, true );
+}
+
+/* Add a constant length field to the object.
+ * Opcode supplied by the caller. */
+void Compiler::addLengthField( ObjectDef *objDef, Code getLength )
+{
+ /* Create the "length" field. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt );
+ ObjField *el = new ObjField( InputLoc(), typeRef, "length" );
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->isConst = true;
+ el->useOffset = false;
+ el->inGetR = getLength;
+
+ objDef->insertField( el->name, el );
+}
+
+void Compiler::initStrObject( )
+{
+ strObj = new ObjectDef( ObjectDef::BuiltinType, "str", nextObjectId++ );
+ strLangEl->objectDef = strObj;
+
+ initFunction( uniqueTypeInt, strObj, "atoi", IN_STR_ATOI, IN_STR_ATOI, true );
+ initFunction( uniqueTypeInt, strObj, "uord8", IN_STR_UORD8, IN_STR_UORD8, true );
+ initFunction( uniqueTypeInt, strObj, "sord8", IN_STR_SORD8, IN_STR_SORD8, true );
+ initFunction( uniqueTypeInt, strObj, "uord16", IN_STR_UORD16, IN_STR_UORD16, true );
+ initFunction( uniqueTypeInt, strObj, "sord16", IN_STR_SORD16, IN_STR_SORD16, true );
+ initFunction( uniqueTypeInt, strObj, "uord32", IN_STR_UORD32, IN_STR_UORD32, true );
+ initFunction( uniqueTypeInt, strObj, "sord32", IN_STR_SORD32, IN_STR_SORD32, true );
+ addLengthField( strObj, IN_STR_LENGTH );
+
+ initFunction( uniqueTypeStr, globalObjectDef, "sprintf",
+ IN_SPRINTF, IN_SPRINTF, uniqueTypeStr, uniqueTypeInt, true );
+}
+
+void Compiler::initStreamObject( )
+{
+ streamObj = new ObjectDef( ObjectDef::BuiltinType,
+ "stream", nextObjectId++ );
+ streamLangEl->objectDef = streamObj;
+}
+
+void Compiler::initInputObject( )
+{
+ inputObj = new ObjectDef( ObjectDef::BuiltinType,
+ "accum_stream", nextObjectId++ );
+ inputLangEl->objectDef = inputObj;
+
+ initFunction( uniqueTypeStr, inputObj, "pull",
+ IN_INPUT_PULL_WV, IN_INPUT_PULL_WV, uniqueTypeInt, false );
+ initFunction( uniqueTypeStr, inputObj, "push",
+ IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false );
+ initFunction( uniqueTypeStr, inputObj, "push_ignore",
+ IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false );
+}
+
+ObjField *Compiler::makeDataEl()
+{
+ /* Create the "data" field. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr );
+ ObjField *el = new ObjField( InputLoc(), typeRef, "data" );
+
+ /* Setting beenReferenced to true prevents us from assigning instructions
+ * and an offset to the field. */
+
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->useOffset = false;
+ el->inGetR = IN_GET_TOKEN_DATA_R;
+ el->inSetWC = IN_SET_TOKEN_DATA_WC;
+ el->inSetWV = IN_SET_TOKEN_DATA_WV;
+ return el;
+}
+
+ObjField *Compiler::makePosEl()
+{
+ /* Create the "data" field. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt );
+ ObjField *el = new ObjField( InputLoc(), typeRef, "pos" );
+
+ /* Setting beenReferenced to true prevents us from assigning instructions
+ * and an offset to the field. */
+
+ el->isConst = true;
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->useOffset = false;
+ el->inGetR = IN_GET_TOKEN_POS_R;
+ return el;
+}
+
+ObjField *Compiler::makeLineEl()
+{
+ /* Create the "data" field. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt );
+ ObjField *el = new ObjField( InputLoc(), typeRef, "line" );
+
+ /* Setting beenReferenced to true prevents us from assigning instructions
+ * and an offset to the field. */
+
+ el->isConst = true;
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->useOffset = false;
+ el->inGetR = IN_GET_TOKEN_LINE_R;
+ return el;
+}
+
+void Compiler::initTokenObjects( )
+{
+ /* Make a default object Definition. */
+ tokenObj = new ObjectDef( ObjectDef::BuiltinType, "token", nextObjectId++ );
+
+ ObjField *dataEl = makeDataEl();
+ tokenObj->insertField( dataEl->name, dataEl );
+
+ ObjField *posEl = makePosEl();
+ tokenObj->insertField( posEl->name, posEl );
+
+ ObjField *lineEl = makeLineEl();
+ tokenObj->insertField( lineEl->name, lineEl );
+
+ /* Give all user terminals the token object type. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->isUserTerm ) {
+ if ( lel->objectDef == 0 )
+ lel->objectDef = tokenObj;
+ else {
+ /* Create the "data" field. */
+ ObjField *dataEl = makeDataEl();
+ lel->objectDef->insertField( dataEl->name, dataEl );
+
+ /* Create the "pos" field. */
+ ObjField *posEl = makePosEl();
+ lel->objectDef->insertField( posEl->name, posEl );
+
+ /* Create the "line" field. */
+ ObjField *lineEl = makeLineEl();
+ lel->objectDef->insertField( lineEl->name, lineEl );
+ }
+ }
+ }
+}
+
+void Compiler::findLocalTrees( CharSet &trees )
+{
+ /* We exlcude "lhs" from being downrefed because we need to use if after
+ * the frame is is cleaned and so it must survive. */
+ for ( ObjFieldList::Iter ol = *curLocalFrame->objFieldList; ol.lte(); ol++ ) {
+ ObjField *el = ol->value;
+ /* FIXME: This test needs to be improved. Match_text was getting
+ * through before useOffset was tested. What will? */
+ if ( el->useOffset && !el->isLhsEl && ( el->beenReferenced || el->isParam ) ) {
+ UniqueType *ut = el->typeRef->uniqueType;
+ if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_PTR )
+ trees.insert( el->offset );
+ }
+ }
+}
+
+void Compiler::makeProdCopies( Definition *prod )
+{
+ int pos = 0;
+ for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++, pos++) {
+ if ( pel->captureField != 0 ) {
+ prod->copy.append( pel->captureField->offset );
+ prod->copy.append( pos );
+ }
+ }
+}
+
+void Compiler::compileReductionCode( Definition *prod )
+{
+ CodeBlock *block = prod->redBlock;
+
+ /* Init the compilation context. */
+ compileContext = CompileReduction;
+ curLocalFrame = block->localFrame;
+ revertOn = true;
+ block->frameId = nextFrameId++;
+
+ CodeVect &code = block->codeWV;
+
+ /* Add the alloc frame opcode. We don't have the right
+ * frame size yet. We will fill it in later. */
+ code.append( IN_INIT_LOCALS );
+ code.appendHalf( 0 );
+ long afterInit = code.length();
+
+ /* Compile the reduce block. */
+ block->compile( this, code );
+
+ /* We have the frame size now. Set in the alloc frame instruction. */
+ long frameSize = curLocalFrame->size();
+ code.setHalf( 1, frameSize );
+
+ /* Might need to load right hand side values. */
+ addProdRHSLoads( prod, code, afterInit );
+
+ addProdLHSLoad( prod, code, afterInit );
+ addPushBackLHS( prod, code, afterInit );
+
+ code.append( IN_PCR_RET );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocalTrees( block->trees );
+}
+
+void Compiler::compileTranslateBlock( LangEl *langEl )
+{
+ CodeBlock *block = langEl->transBlock;
+
+ /* Set up compilation context. */
+ compileContext = CompileTranslation;
+ curLocalFrame = block->localFrame;
+ revertOn = true;
+ block->frameId = nextFrameId++;
+
+ /* References to the reduce item. */
+ addMatchLength( curLocalFrame, langEl );
+ addMatchText( curLocalFrame, langEl );
+ addInput( curLocalFrame );
+ addCtx( curLocalFrame );
+
+ CodeVect &code = block->codeWV;
+
+ /* Add the alloc frame opcode. We don't have the right
+ * frame size yet. We will fill it in later. */
+ code.append( IN_INIT_LOCALS );
+ code.appendHalf( 0 );
+
+ if ( langEl->tokenDef->reCaptureVect.length() > 0 ) {
+ code.append( IN_INIT_CAPTURES );
+ code.append( langEl->tokenDef->reCaptureVect.length() );
+
+ ObjFieldList::Iter f = *curLocalFrame->objFieldList;
+ for ( int i = 0; i < langEl->tokenDef->reCaptureVect.length(); i++, f++ )
+ curLocalFrame->referenceField( this, f->value );
+ }
+
+ /* Set the local frame and compile the reduce block. */
+ block->compile( this, code );
+
+ /* We have the frame size now. Set in the alloc frame instruction. */
+ long frameSize = curLocalFrame->size();
+ code.setHalf( 1, frameSize );
+
+ code.append( IN_PCR_RET );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocalTrees( block->trees );
+}
+
+void Compiler::compilePreEof( TokenRegion *region )
+{
+ CodeBlock *block = region->preEofBlock;
+
+ /* Set up compilation context. */
+ compileContext = CompileTranslation;
+ curLocalFrame = region->preEofBlock->localFrame;
+ revertOn = true;
+ block->frameId = nextFrameId++;
+
+ addInput( curLocalFrame );
+ addCtx( curLocalFrame );
+
+ CodeVect &code = block->codeWV;
+
+ /* Add the alloc frame opcode. We don't have the right
+ * frame size yet. We will fill it in later. */
+ code.append( IN_INIT_LOCALS );
+ code.appendHalf( 0 );
+
+ /* Set the local frame and compile the reduce block. */
+ block->compile( this, code );
+
+ /* We have the frame size now. Set in the alloc frame instruction. */
+ long frameSize = curLocalFrame->size();
+ code.setHalf( 1, frameSize );
+
+ code.append( IN_PCR_RET );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocalTrees( block->trees );
+}
+
+void Compiler::compileRootBlock( )
+{
+ CodeBlock *block = rootCodeBlock;
+
+ /* The root block never needs to be reverted. */
+
+ /* Set up the compile context. No locals are needed for the root code
+ * block, but we need an empty local frame for the compile. */
+ compileContext = CompileRoot;
+ curLocalFrame = rootLocalFrame;
+ revertOn = false;
+
+ /* The block needs a frame id. */
+ block->frameId = nextFrameId++;
+
+ /* The root block is not reverted. */
+ CodeVect &code = block->codeWC;
+
+ /* Add the alloc frame opcode. We don't have the right
+ * frame size yet. We will fill it in later. */
+ code.append( IN_INIT_LOCALS );
+ code.appendHalf( 0 );
+
+ code.append( IN_LOAD_ARGV );
+ code.appendHalf( argvOffset() );
+
+ block->compile( this, code );
+
+ /* We have the frame size now. Store it in frame init. */
+ long frameSize = curLocalFrame->size();
+ code.setHalf( 1, frameSize );
+
+ code.append( IN_STOP );
+
+ /* Make the local trees descriptor. */
+ findLocalTrees( block->trees );
+}
+
+void Compiler::initAllLanguageObjects()
+{
+ /* Init all user object fields (need consistent size). */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ ObjectDef *objDef = lel->objectDef;
+ if ( objDef != 0 ) {
+ /* Init all fields of the object. */
+ for ( ObjFieldList::Iter f = *objDef->objFieldList; f.lte(); f++ )
+ objDef->initField( this, f->value );
+ }
+ }
+
+ /* Init all fields of the global object. */
+ for ( ObjFieldList::Iter f = *globalObjectDef->objFieldList; f.lte(); f++ )
+ globalObjectDef->initField( this, f->value );
+}
+
+void Compiler::initMapFunctions( GenericType *gen )
+{
+ addLengthField( gen->objDef, IN_MAP_LENGTH );
+ initFunction( gen->utArg, gen->objDef, "find",
+ IN_MAP_FIND, IN_MAP_FIND, gen->keyUT, true );
+ initFunction( uniqueTypeInt, gen->objDef, "insert",
+ IN_MAP_INSERT_WV, IN_MAP_INSERT_WC, gen->keyUT, gen->utArg, false );
+ initFunction( uniqueTypeInt, gen->objDef, "store",
+ IN_MAP_STORE_WV, IN_MAP_STORE_WC, gen->keyUT, gen->utArg, false );
+ initFunction( gen->utArg, gen->objDef, "remove",
+ IN_MAP_REMOVE_WV, IN_MAP_REMOVE_WC, gen->keyUT, false );
+}
+
+void Compiler::initListFunctions( GenericType *gen )
+{
+ addLengthField( gen->objDef, IN_LIST_LENGTH );
+
+ initFunction( uniqueTypeInt, gen->objDef, "append",
+ IN_LIST_APPEND_WV, IN_LIST_APPEND_WC, gen->utArg, false );
+ initFunction( uniqueTypeInt, gen->objDef, "push",
+ IN_LIST_APPEND_WV, IN_LIST_APPEND_WC, gen->utArg, false );
+
+ initFunction( gen->utArg, gen->objDef, "remove_end",
+ IN_LIST_REMOVE_END_WV, IN_LIST_REMOVE_END_WC, false );
+ initFunction( gen->utArg, gen->objDef, "pop",
+ IN_LIST_REMOVE_END_WV, IN_LIST_REMOVE_END_WC, false );
+}
+
+void Compiler::initListField( GenericType *gen, const char *name, int offset )
+{
+ /* Make the type ref and create the field. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), gen->utArg );
+ ObjField *el = new ObjField( InputLoc(), typeRef, name );
+
+ el->inGetR = IN_GET_LIST_MEM_R;
+ el->inGetWC = IN_GET_LIST_MEM_WC;
+ el->inGetWV = IN_GET_LIST_MEM_WV;
+ el->inSetWC = IN_SET_LIST_MEM_WC;
+ el->inSetWV = IN_SET_LIST_MEM_WV;
+
+ gen->objDef->insertField( el->name, el );
+
+ el->useOffset = true;
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+
+ /* Zero for head, One for tail. */
+ el->offset = offset;
+}
+
+void Compiler::initListFields( GenericType *gen )
+{
+ initListField( gen, "head", 0 );
+ initListField( gen, "tail", 1 );
+ initListField( gen, "top", 1 );
+}
+
+void Compiler::initVectorFunctions( GenericType *gen )
+{
+ addLengthField( gen->objDef, IN_VECTOR_LENGTH );
+ initFunction( uniqueTypeInt, gen->objDef, "append",
+ IN_VECTOR_APPEND_WV, IN_VECTOR_APPEND_WC, gen->utArg, false );
+ initFunction( uniqueTypeInt, gen->objDef, "insert",
+ IN_VECTOR_INSERT_WV, IN_VECTOR_INSERT_WC, uniqueTypeInt, gen->utArg, false );
+}
+
+void Compiler::initParserFunctions( GenericType *gen )
+{
+ initFunction( gen->utArg, gen->objDef, "finish",
+ IN_PARSE_FINISH_WV, IN_PARSE_FINISH_WC, true );
+}
+
+void Compiler::initCtxField( GenericType *gen )
+{
+ LangEl *langEl = gen->utArg->langEl;
+ Context *context = langEl->contextIn;
+
+ /* Make the type ref and create the field. */
+ UniqueType *ctxUT = findUniqueType( TYPE_TREE, context->lel );
+ TypeRef *typeRef = new TypeRef( InputLoc(), ctxUT );
+ ObjField *el = new ObjField( InputLoc(), typeRef, "ctx" );
+
+ el->inGetR = IN_GET_ACCUM_CTX_R;
+ el->inGetWC = IN_GET_ACCUM_CTX_WC;
+ el->inGetWV = IN_GET_ACCUM_CTX_WV;
+ el->inSetWC = IN_SET_ACCUM_CTX_WC;
+ el->inSetWV = IN_SET_ACCUM_CTX_WV;
+
+ gen->objDef->insertField( el->name, el );
+
+ el->useOffset = false;
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+}
+
+void Compiler::initParserFields( GenericType *gen )
+{
+ LangEl *langEl = gen->utArg->langEl;
+ if ( langEl->contextIn != 0 )
+ initCtxField( gen );
+}
+
+void Compiler::initGenericTypes()
+{
+ for ( NamespaceList::Iter ns = namespaceList; ns.lte(); ns++ ) {
+ for ( GenericList::Iter gen = ns->genericList; gen.lte(); gen++ ) {
+ gen->utArg = gen->typeArg->uniqueType;
+
+ if ( gen->typeId == GEN_MAP )
+ gen->keyUT = gen->keyTypeArg->uniqueType;
+
+ gen->objDef = new ObjectDef( ObjectDef::BuiltinType,
+ gen->name, nextObjectId++ );
+
+ switch ( gen->typeId ) {
+ case GEN_MAP:
+ initMapFunctions( gen );
+ break;
+ case GEN_LIST:
+ initListFunctions( gen );
+ initListFields( gen );
+ break;
+ case GEN_VECTOR:
+ initVectorFunctions( gen );
+ break;
+ case GEN_PARSER:
+ /* Need to generate a parser for the type. */
+ gen->utArg->langEl->parserId = nextParserId++;
+ initParserFunctions( gen );
+ initParserFields( gen );
+ break;
+ }
+
+ gen->langEl->objectDef = gen->objDef;
+ }
+ }
+}
+
+void Compiler::makeFuncVisible( Function *func, bool isUserIter )
+{
+ func->localFrame = func->codeBlock->localFrame;
+
+ /* Set up the parameters. */
+ long paramPos = 0, paramListSize = 0;
+ UniqueType **paramUTs = new UniqueType*[func->paramList->length()];
+ for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) {
+ paramUTs[paramPos] = param->typeRef->uniqueType;
+
+ if ( func->localFrame->findField( param->name ) != 0 )
+ error(param->loc) << "parameter " << param->name << " redeclared" << endp;
+
+ func->localFrame->insertField( param->name, param );
+ param->beenInitialized = true;
+ param->pos = paramPos;
+
+ /* Initialize the object field as a local variable. We also want trees
+ * downreffed. */
+ if ( paramUTs[paramPos]->typeId == TYPE_REF )
+ initLocalRefInstructions( param );
+ else
+ initLocalInstructions( param );
+
+ paramListSize += sizeOfField( paramUTs[paramPos] );
+ paramPos += 1;
+ }
+
+ /* Param offset is relative to one past the last item in the array of
+ * words containing the args. */
+ long paramOffset = 0;
+ for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) {
+ /* Moving downward, and need the offset to point to the lower half of
+ * the argument. */
+ paramOffset -= sizeOfField( paramUTs[param->pos] );
+
+ /* How much space do we need to make for call overhead. */
+ long frameAfterArgs = isUserIter ? IFR_AA : FR_AA;
+
+ /* Going up first we have the frame data, then maybe
+ * the user iterator, then the args from high to low. */
+ param->offset = frameAfterArgs +
+ ( isUserIter ? ( sizeof(UserIter) / sizeof(Word) ) : 0 ) +
+ paramListSize + paramOffset;
+ }
+
+ func->paramListSize = paramListSize;
+ func->paramUTs = paramUTs;
+
+ /* Insert the function into the global function map. */
+ UniqueType *returnUT = func->typeRef != 0 ?
+ func->typeRef->uniqueType : uniqueTypeInt;
+ ObjMethod *objMethod = new ObjMethod( returnUT, func->name,
+ IN_CALL_WV, IN_CALL_WC,
+ func->paramList->length(), paramUTs, func->paramList, false );
+ objMethod->funcId = func->funcId;
+ objMethod->useFuncId = true;
+ objMethod->useCallObj = false;
+ objMethod->func = func;
+
+ if ( isUserIter ) {
+ IterDef *uiter = findIterDef( IterDef::User, func );
+ objMethod->iterDef = uiter;
+ }
+
+ globalObjectDef->objMethodMap->insert( func->name, objMethod );
+}
+
+void Compiler::compileUserIter( Function *func, CodeVect &code )
+{
+ CodeBlock *block = func->codeBlock;
+
+ /* Add the alloc frame opcode. We don't have the right
+ * frame size yet. We will fill it in later. */
+ code.append( IN_INIT_LOCALS );
+ code.appendHalf( 0 );
+
+ /* Compile the block. */
+ block->compile( this, code );
+
+ /* We have the frame size now. Set in the alloc frame instruction. */
+ int frameSize = func->localFrame->size();
+ code.setHalf( 1, frameSize );
+
+ /* Check for a return statement. */
+ if ( block->stmtList->length() == 0 ||
+ block->stmtList->tail->type != LangStmt::YieldType )
+ {
+ /* Push the return value. */
+ code.append( IN_LOAD_NIL );
+ code.append( IN_YIELD );
+ }
+}
+
+void Compiler::compileUserIter( Function *func )
+{
+ CodeBlock *block = func->codeBlock;
+
+ /* Set up the context. */
+ compileContext = CompileFunction;
+ curFunction = func;
+ block->frameId = nextFrameId++;
+
+ /* Need an object for the local frame. */
+ curLocalFrame = func->codeBlock->localFrame;
+
+ /* Compile for revert and commit. */
+ revertOn = true;
+ compileUserIter( func, block->codeWV );
+
+ revertOn = false;
+ compileUserIter( func, block->codeWC );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocalTrees( block->trees );
+
+ /* FIXME: Need to deal with the freeing of local trees. */
+}
+
+/* Called for each type of function compile: revert and commit. */
+void Compiler::compileFunction( Function *func, CodeVect &code )
+{
+ CodeBlock *block = func->codeBlock;
+
+ /* Add the alloc frame opcode. We don't have the right
+ * frame size yet. We will fill it in later. */
+ code.append( IN_INIT_LOCALS );
+ code.appendHalf( 0 );
+
+ /* Compile the block. */
+ block->compile( this, code );
+
+ /* We have the frame size now. Set in the alloc frame instruction. */
+ int frameSize = func->localFrame->size();
+ code.setHalf( 1, frameSize );
+
+ /* Check for a return statement. */
+ if ( block->stmtList->length() == 0 ||
+ block->stmtList->tail->type != LangStmt::ReturnType )
+ {
+ /* Push the return value. */
+ code.append( IN_LOAD_NIL );
+ code.append( IN_SAVE_RET );
+ }
+
+ /* Compute the jump distance for the return jumps. */
+ for ( LongVect::Iter rj = returnJumps; rj.lte(); rj++ ) {
+ long distance = code.length() - *rj - 3;
+ code.setHalf( *rj+1, distance );
+ }
+
+ /* Reset the vector of return jumps. */
+ returnJumps.empty();
+
+ /* Return cleans up the stack (including the args) and leaves the return
+ * value on the top. */
+ code.append( IN_RET );
+}
+
+void Compiler::compileFunction( Function *func )
+{
+ CodeBlock *block = func->codeBlock;
+
+ /* Set up the compilation context. */
+ compileContext = CompileFunction;
+ curFunction = func;
+
+ /* Assign a frame Id. */
+ block->frameId = nextFrameId++;
+
+ /* Need an object for the local frame. */
+ curLocalFrame = func->codeBlock->localFrame;
+
+ /* Compile once for revert. */
+ revertOn = true;
+ compileFunction( func, block->codeWV );
+
+ /* Compile once for commit. */
+ revertOn = false;
+ compileFunction( func, block->codeWC );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocalTrees( block->trees );
+}
+
+void Compiler::makeDefaultIterators()
+{
+ /* Tree iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
+ "triter", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::Tree );
+ objMethod->iterDef = triter;
+ }
+
+ /* Child iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
+ "child", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::Child );
+ objMethod->iterDef = triter;
+ }
+
+ /* Reverse iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
+ "rev_child", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::RevChild );
+ objMethod->iterDef = triter;
+ }
+
+ /* Repeat iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
+ "repeat", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::Repeat );
+ objMethod->iterDef = triter;
+ }
+
+ /* Reverse repeat iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
+ "rev_repeat", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::RevRepeat );
+ objMethod->iterDef = triter;
+ }
+}
+
+void Compiler::addStdin()
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStream );
+
+ /* Create the field and insert it into the map. */
+ ObjField *el = new ObjField( InputLoc(), typeRef, "stdin" );
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->isConst = true;
+ el->useOffset = false;
+ el->inGetR = IN_GET_STDIN;
+ globalObjectDef->insertField( el->name, el );
+}
+
+void Compiler::addStdout()
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr );
+
+ /* Create the field and insert it into the map. */
+ ObjField *el = new ObjField( InputLoc(), typeRef, "stout" );
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->isConst = true;
+ el->useOffset = false;
+ el->inGetR = IN_GET_STDOUT;
+ globalObjectDef->insertField( el->name, el );
+}
+
+void Compiler::addStderr()
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr );
+
+ /* Create the field and insert it into the map. */
+ ObjField *el = new ObjField( InputLoc(), typeRef, "stderr" );
+ el->beenReferenced = true;
+ el->beenInitialized = true;
+ el->isConst = true;
+ el->useOffset = false;
+ el->inGetR = IN_GET_STDERR;
+ globalObjectDef->insertField( el->name, el );
+}
+
+void Compiler::addArgv()
+{
+ /* Create the field and insert it into the map. */
+ ObjField *el = new ObjField( InputLoc(), argvTypeRef, "argv" );
+ el->isArgv = true;
+ el->isConst = true;
+ globalObjectDef->insertField( el->name, el );
+}
+
+int Compiler::argvOffset()
+{
+ for ( ObjFieldList::Iter field = *globalObjectDef->objFieldList;
+ field.lte(); field++ )
+ {
+ if ( field->value->isArgv ) {
+ globalObjectDef->referenceField( this, field->value );
+ return field->value->offset;
+ }
+ }
+ assert(false);
+}
+
+void Compiler::initGlobalFunctions()
+{
+ ObjMethod *method;
+
+ method = initFunction( uniqueTypeStream, globalObjectDef, "open",
+ IN_OPEN_FILE, IN_OPEN_FILE, uniqueTypeStr, uniqueTypeStr, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeStr, globalObjectDef, "tolower",
+ IN_TO_LOWER, IN_TO_LOWER, uniqueTypeStr, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeStr, globalObjectDef, "toupper",
+ IN_TO_UPPER, IN_TO_UPPER, uniqueTypeStr, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeInt, globalObjectDef, "exit",
+ IN_EXIT, IN_EXIT, uniqueTypeInt, true );
+
+ method = initFunction( uniqueTypeStr, globalObjectDef, "error",
+ IN_ERROR, IN_ERROR, true );
+
+ addStdin();
+ addStdout();
+ addStderr();
+ addArgv();
+}
+
+void Compiler::removeNonUnparsableRepls()
+{
+ for ( ReplList::Iter repl = replList; repl.lte(); ) {
+ Replacement *maybeDel = repl++;
+ if ( !maybeDel->parse )
+ replList.detach( maybeDel );
+ }
+}
+
+void Compiler::compileByteCode()
+{
+// initUniqueTypes();
+ initIntObject();
+ initStrObject();
+ initStreamObject();
+ initInputObject();
+ initTokenObjects();
+ makeDefaultIterators();
+ initAllLanguageObjects();
+ initGenericTypes();
+
+ initGlobalFunctions();
+
+ for ( FunctionList::Iter f = functionList; f.lte(); f++ )
+ makeFuncVisible( f, f->isUserIter );
+
+ /* This may be comment rot: The function info structure relies on functions
+ * being compiled first, then iterators. */
+
+ /* Compile functions. */
+ for ( FunctionList::Iter f = functionList; f.lte(); f++ ) {
+ if ( f->inContext != 0 )
+ context = f->inContext;
+ if ( f->isUserIter )
+ compileUserIter( f );
+ else
+ compileFunction( f );
+ context = 0;
+ }
+
+ /* Compile the reduction code. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ makeProdCopies( prod );
+ if ( prod->redBlock != 0 ) {
+ if ( prod->redBlock->context != 0 )
+ context = prod->redBlock->context;
+ compileReductionCode( prod );
+ context = 0;
+ }
+ }
+
+ /* Compile the token translation code. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->transBlock != 0 ) {
+ if ( lel->transBlock->context != 0 )
+ context = lel->transBlock->context;
+ compileTranslateBlock( lel );
+ context = 0;
+ }
+ }
+
+ /* Compile preeof blocks. */
+ for ( RegionList::Iter r = regionList; r.lte(); r++ ) {
+ if ( r->preEofBlock != 0 )
+ compilePreEof( r );
+ }
+
+ /* Compile the init code */
+ compileRootBlock( );
+ removeNonUnparsableRepls();
+}
diff --git a/src/tree.c b/src/tree.c
new file mode 100644
index 00000000..14f7d81f
--- /dev/null
+++ b/src/tree.c
@@ -0,0 +1,2484 @@
+/*
+ * Copyright 2008-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <pdarun.h>
+#include <tree.h>
+#include <pool.h>
+#include <bytecode.h>
+#include <debug.h>
+#include <map.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define true 1
+#define false 0
+
+#define BUFFER_INITIAL_SIZE 4096
+
+void listPrepend( List *list, ListEl *new_el) { listAddBefore(list, list->head, new_el); }
+void listAppend( List *list, ListEl *new_el) { listAddAfter(list, list->tail, new_el); }
+
+ListEl *listDetach( List *list, ListEl *el );
+ListEl *listDetachFirst(List *list ) { return listDetach(list, list->head); }
+ListEl *listDetachLast(List *list ) { return listDetach(list, list->tail); }
+
+long listLength(List *list)
+ { return list->listLen; }
+
+void initTreeIter( TreeIter *treeIter, const Ref *rootRef, int searchId, Tree **stackRoot )
+{
+ treeIter->rootRef = *rootRef;
+ treeIter->searchId = searchId;
+ treeIter->stackRoot = stackRoot;
+ treeIter->stackSize = 0;
+ treeIter->ref.kid = 0;
+ treeIter->ref.next = 0;
+}
+
+void initRevTreeIter( RevTreeIter *revTriter, const Ref *rootRef,
+ int searchId, Tree **stackRoot, int children )
+{
+ revTriter->rootRef = *rootRef;
+ revTriter->searchId = searchId;
+ revTriter->stackRoot = stackRoot;
+ revTriter->stackSize = children;
+ revTriter->kidAtYield = 0;
+ revTriter->children = children;
+ revTriter->ref.kid = 0;
+ revTriter->ref.next = 0;
+}
+
+void initUserIter( UserIter *userIter, Tree **stackRoot, long argSize, long searchId )
+{
+ userIter->stackRoot = stackRoot;
+ userIter->argSize = argSize;
+ userIter->stackSize = 0;
+ userIter->resume = 0;
+ userIter->frame = 0;
+ userIter->searchId = searchId;
+
+ userIter->ref.kid = 0;
+ userIter->ref.next = 0;
+}
+
+Kid *allocAttrs( Program *prg, long length )
+{
+ Kid *cur = 0;
+ long i;
+ for ( i = 0; i < length; i++ ) {
+ Kid *next = cur;
+ cur = kidAllocate( prg );
+ cur->next = next;
+ }
+ return cur;
+}
+
+void freeAttrs( Program *prg, Kid *attrs )
+{
+ Kid *cur = attrs;
+ while ( cur != 0 ) {
+ Kid *next = cur->next;
+ kidFree( prg, cur );
+ cur = next;
+ }
+}
+
+void freeKidList( Program *prg, Kid *kid )
+{
+ while ( kid != 0 ) {
+ Kid *next = kid->next;
+ kidFree( prg, kid );
+ kid = next;
+ }
+}
+
+void setAttr( Tree *tree, long pos, Tree *val )
+{
+ long i;
+ Kid *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ for ( i = 0; i < pos; i++ )
+ kid = kid->next;
+ kid->tree = val;
+}
+
+Tree *getGlobal( Program *prg, long pos )
+ { return getAttr( prg->global, pos ); }
+
+Tree *getAttr( Tree *tree, long pos )
+{
+ long i;
+ Kid *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ for ( i = 0; i < pos; i++ )
+ kid = kid->next;
+ return kid->tree;
+}
+
+
+Tree *getRepeatNext( Tree *tree )
+{
+ Kid *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid->next->tree;
+}
+
+Tree *getRepeatVal( Tree *tree )
+{
+ Kid *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid->tree;
+}
+
+int repeatEnd( Tree *tree )
+{
+ Kid *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid == 0;
+}
+
+int listLast( Tree *tree )
+{
+ Kid *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid->next == 0;
+}
+
+Kid *getAttrKid( Tree *tree, long pos )
+{
+ long i;
+ Kid *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ for ( i = 0; i < pos; i++ )
+ kid = kid->next;
+ return kid;
+}
+
+Kid *kidListConcat( Kid *list1, Kid *list2 )
+{
+ if ( list1 == 0 )
+ return list2;
+ else if ( list2 == 0 )
+ return list1;
+
+ Kid *dest = list1;
+ while ( dest->next != 0 )
+ dest = dest->next;
+ dest->next = list2;
+ return list1;
+}
+
+
+Stream *openStreamFile( Program *prg, FILE *file )
+{
+ Stream *res = (Stream*)mapElAllocate( prg );
+ res->id = LEL_ID_STREAM;
+ res->file = file;
+ res->in = newSourceStreamFile( file );
+ initSourceStream( res->in );
+ return res;
+}
+
+Stream *openStreamFd( Program *prg, long fd )
+{
+ Stream *res = (Stream*)mapElAllocate( prg );
+ res->id = LEL_ID_STREAM;
+ res->in = newSourceStreamFd( fd );
+ initSourceStream( res->in );
+ return res;
+}
+
+Stream *openFile( Program *prg, Tree *name, Tree *mode )
+{
+ Head *headName = ((Str*)name)->value;
+ Head *headMode = ((Str*)mode)->value;
+
+ const char *givenMode = stringData(headMode);
+ const char *fopenMode = 0;
+ if ( memcmp( givenMode, "r", stringLength(headMode) ) == 0 )
+ fopenMode = "rb";
+ else if ( memcmp( givenMode, "w", stringLength(headMode) ) == 0 )
+ fopenMode = "wb";
+ else {
+ fatal( "unknown file open mode: %s\n", givenMode );
+ }
+
+ /* Need to make a C-string (null terminated). */
+ char *fileName = (char*)malloc(stringLength(headName)+1);
+ memcpy( fileName, stringData(headName), stringLength(headName) );
+ fileName[stringLength(headName)] = 0;
+ FILE *file = fopen( fileName, fopenMode );
+ free(fileName);
+ return openStreamFile( prg, file );
+}
+
+Tree *constructInteger( Program *prg, long i )
+{
+ Int *integer = (Int*) treeAllocate( prg );
+ integer->id = LEL_ID_INT;
+ integer->value = i;
+
+ return (Tree*)integer;
+}
+
+Tree *constructString( Program *prg, Head *s )
+{
+ Str *str = (Str*) treeAllocate( prg );
+ str->id = LEL_ID_STR;
+ str->value = s;
+
+ return (Tree*)str;
+}
+
+Tree *constructPointer( Program *prg, Tree *tree )
+{
+ Kid *kid = kidAllocate( prg );
+ kid->tree = tree;
+ kid->next = prg->heap;
+ prg->heap = kid;
+
+ Pointer *pointer = (Pointer*) treeAllocate( prg );
+ pointer->id = LEL_ID_PTR;
+ pointer->value = kid;
+
+ return (Tree*)pointer;
+}
+
+Tree *constructTerm( Program *prg, Word id, Head *tokdata )
+{
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+
+ Tree *tree = treeAllocate( prg );
+ tree->id = id;
+ tree->refs = 0;
+ tree->tokdata = tokdata;
+
+ int objectLength = lelInfo[tree->id].objectLength;
+ tree->child = allocAttrs( prg, objectLength );
+
+ return tree;
+}
+
+Tree *constructInput( Program *prg )
+{
+ Input *input = inputAllocate( prg );
+ input->refs = 0;
+ input->id = LEL_ID_INPUT;
+ input->in = malloc( sizeof(InputStream) );
+ initInputStream( input->in );
+ return (Tree*)input;
+}
+
+Kid *constructReplacementKid( Tree **bindings, Program *prg, Kid *prev, long pat );
+
+static Kid *constructIgnoreList( Program *prg, long ignoreInd )
+{
+ PatReplNode *nodes = prg->rtd->patReplNodes;
+
+ Kid *first = 0, *last = 0;
+ while ( ignoreInd >= 0 ) {
+ Head *ignoreData = stringAllocPointer( prg, nodes[ignoreInd].data, nodes[ignoreInd].length );
+
+ Tree *ignTree = treeAllocate( prg );
+ ignTree->refs = 1;
+ ignTree->id = nodes[ignoreInd].id;
+ ignTree->tokdata = ignoreData;
+
+ Kid *ignKid = kidAllocate( prg );
+ ignKid->tree = ignTree;
+ ignKid->next = 0;
+
+ if ( last == 0 )
+ first = ignKid;
+ else
+ last->next = ignKid;
+
+ ignoreInd = nodes[ignoreInd].next;
+ last = ignKid;
+ }
+
+ return first;
+}
+
+static Kid *constructLeftIgnoreList( Program *prg, long pat )
+{
+ PatReplNode *nodes = prg->rtd->patReplNodes;
+ return constructIgnoreList( prg, nodes[pat].leftIgnore );
+}
+
+static Kid *constructRightIgnoreList( Program *prg, long pat )
+{
+ PatReplNode *nodes = prg->rtd->patReplNodes;
+ return constructIgnoreList( prg, nodes[pat].rightIgnore );
+}
+
+static void insLeftIgnore( Program *prg, Tree *tree, Tree *ignoreList )
+{
+ assert( ! (tree->flags & AF_LEFT_IGNORE) );
+
+ /* Allocate. */
+ Kid *kid = kidAllocate( prg );
+ kid->tree = ignoreList;
+ treeUpref( ignoreList );
+
+ /* Attach it. */
+ kid->next = tree->child;
+ tree->child = kid;
+
+ tree->flags |= AF_LEFT_IGNORE;
+}
+
+static void insRightIgnore( Program *prg, Tree *tree, Tree *ignoreList )
+{
+ assert( ! (tree->flags & AF_RIGHT_IGNORE) );
+
+ /* Insert an ignore head in the child list. */
+ Kid *kid = kidAllocate( prg );
+ kid->tree = ignoreList;
+ treeUpref( ignoreList );
+
+ /* Attach it. */
+ if ( tree->flags & AF_LEFT_IGNORE ) {
+ kid->next = tree->child->next;
+ tree->child->next = kid;
+ }
+ else {
+ kid->next = tree->child;
+ tree->child = kid;
+ }
+
+ tree->flags |= AF_RIGHT_IGNORE;
+}
+
+Tree *pushRightIgnore( Program *prg, Tree *pushTo, Tree *rightIgnore )
+{
+ /* About to alter the data tree. Split first. */
+ pushTo = splitTree( prg, pushTo );
+
+ if ( pushTo->flags & AF_RIGHT_IGNORE ) {
+ /* The previous token already has a right ignore. Merge by
+ * attaching it as a left ignore of the new list. */
+ Kid *curIgnore = treeRightIgnoreKid( prg, pushTo );
+ insLeftIgnore( prg, rightIgnore, curIgnore->tree );
+
+ /* Replace the current ignore. Safe to access refs here because we just
+ * upreffed it in insLeftIgnore. */
+ curIgnore->tree->refs -= 1;
+ curIgnore->tree = rightIgnore;
+ treeUpref( rightIgnore );
+ }
+ else {
+ /* Attach The ignore list. */
+ insRightIgnore( prg, pushTo, rightIgnore );
+ }
+
+ return pushTo;
+}
+
+Tree *pushLeftIgnore( Program *prg, Tree *pushTo, Tree *leftIgnore )
+{
+ pushTo = splitTree( prg, pushTo );
+
+ /* Attach as left ignore to the token we are sending. */
+ if ( pushTo->flags & AF_LEFT_IGNORE ) {
+ /* The token already has a left-ignore. Merge by attaching it as a
+ * right ignore of the new list. */
+ Kid *curIgnore = treeLeftIgnoreKid( prg, pushTo );
+ insRightIgnore( prg, leftIgnore, curIgnore->tree );
+
+ /* Replace the current ignore. Safe to upref here because we just
+ * upreffed it in insRightIgnore. */
+ curIgnore->tree->refs -= 1;
+ curIgnore->tree = leftIgnore;
+ treeUpref( leftIgnore );
+ }
+ else {
+ /* Attach the ignore list. */
+ insLeftIgnore( prg, pushTo, leftIgnore );
+ }
+
+ return pushTo;
+}
+
+static void remLeftIgnore( Program *prg, Tree **sp, Tree *tree )
+{
+ assert( tree->flags & AF_LEFT_IGNORE );
+
+ Kid *next = tree->child->next;
+ treeDownref( prg, sp, tree->child->tree );
+ kidFree( prg, tree->child );
+ tree->child = next;
+
+ tree->flags &= ~AF_LEFT_IGNORE;
+}
+
+static void remRightIgnore( Program *prg, Tree **sp, Tree *tree )
+{
+ assert( tree->flags & AF_RIGHT_IGNORE );
+
+ if ( tree->flags & AF_LEFT_IGNORE ) {
+ Kid *next = tree->child->next->next;
+ treeDownref( prg, sp, tree->child->next->tree );
+ kidFree( prg, tree->child->next );
+ tree->child->next = next;
+ }
+ else {
+ Kid *next = tree->child->next;
+ treeDownref( prg, sp, tree->child->tree );
+ kidFree( prg, tree->child );
+ tree->child = next;
+ }
+
+ tree->flags &= ~AF_RIGHT_IGNORE;
+}
+
+Tree *popRightIgnore( Program *prg, Tree **sp, Tree *popFrom, Tree **rightIgnore )
+{
+ /* Modifying the tree we are detaching from. */
+ popFrom = splitTree( prg, popFrom );
+
+ Kid *riKid = treeRightIgnoreKid( prg, popFrom );
+
+ /* If the right ignore has a left ignore, then that was the original
+ * right ignore. */
+ Kid *li = treeLeftIgnoreKid( prg, riKid->tree );
+ if ( li != 0 ) {
+ treeUpref( li->tree );
+ remLeftIgnore( prg, sp, riKid->tree );
+ *rightIgnore = riKid->tree;
+ treeUpref( *rightIgnore );
+ riKid->tree = li->tree;
+ }
+ else {
+ *rightIgnore = riKid->tree;
+ treeUpref( *rightIgnore );
+ remRightIgnore( prg, sp, popFrom );
+ }
+
+ return popFrom;
+}
+
+Tree *popLeftIgnore( Program *prg, Tree **sp, Tree *popFrom, Tree **leftIgnore )
+{
+ /* Modifying, make the write safe. */
+ popFrom = splitTree( prg, popFrom );
+
+ Kid *liKid = treeLeftIgnoreKid( prg, popFrom );
+
+ /* If the left ignore has a right ignore, then that was the original
+ * left ignore. */
+ Kid *ri = treeRightIgnoreKid( prg, liKid->tree );
+ if ( ri != 0 ) {
+ treeUpref( ri->tree );
+ remRightIgnore( prg, sp, liKid->tree );
+ *leftIgnore = liKid->tree;
+ treeUpref( *leftIgnore );
+ liKid->tree = ri->tree;
+ }
+ else {
+ *leftIgnore = liKid->tree;
+ treeUpref( *leftIgnore );
+ remLeftIgnore( prg, sp, popFrom );
+ }
+
+ return popFrom;
+}
+
+
+/* Returns an uprefed tree. Saves us having to downref and bindings to zero to
+ * return a zero-ref tree. */
+Tree *constructReplacementTree( Kid *kid, Tree **bindings, Program *prg, long pat )
+{
+ PatReplNode *nodes = prg->rtd->patReplNodes;
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ Tree *tree = 0;
+
+ if ( nodes[pat].bindId > 0 ) {
+ /* All bindings have been uprefed. */
+ tree = bindings[nodes[pat].bindId];
+
+ long ignore = nodes[pat].leftIgnore;
+ Tree *leftIgnore = 0;
+ if ( ignore >= 0 ) {
+ Kid *ignore = constructLeftIgnoreList( prg, pat );
+
+ leftIgnore = treeAllocate( prg );
+ leftIgnore->id = LEL_ID_IGNORE;
+ leftIgnore->child = ignore;
+
+ tree = pushLeftIgnore( prg, tree, leftIgnore );
+ }
+
+ ignore = nodes[pat].rightIgnore;
+ Tree *rightIgnore = 0;
+ if ( ignore >= 0 ) {
+ Kid *ignore = constructRightIgnoreList( prg, pat );
+
+ rightIgnore = treeAllocate( prg );
+ rightIgnore->id = LEL_ID_IGNORE;
+ rightIgnore->child = ignore;
+
+ tree = pushRightIgnore( prg, tree, rightIgnore );
+ }
+ }
+ else {
+ tree = treeAllocate( prg );
+ tree->id = nodes[pat].id;
+ tree->refs = 1;
+ tree->tokdata = nodes[pat].length == 0 ? 0 :
+ stringAllocPointer( prg,
+ nodes[pat].data, nodes[pat].length );
+
+ int objectLength = lelInfo[tree->id].objectLength;
+
+ Kid *attrs = allocAttrs( prg, objectLength );
+ Kid *child = constructReplacementKid( bindings, prg,
+ 0, nodes[pat].child );
+
+ tree->child = kidListConcat( attrs, child );
+
+ /* Right first, then left. */
+ Kid *ignore = constructRightIgnoreList( prg, pat );
+ if ( ignore != 0 ) {
+ Tree *ignoreList = treeAllocate( prg );
+ ignoreList->id = LEL_ID_IGNORE;
+ ignoreList->refs = 1;
+ ignoreList->child = ignore;
+
+ Kid *ignoreHead = kidAllocate( prg );
+ ignoreHead->tree = ignoreList;
+ ignoreHead->next = tree->child;
+ tree->child = ignoreHead;
+
+ tree->flags |= AF_RIGHT_IGNORE;
+ }
+
+ ignore = constructLeftIgnoreList( prg, pat );
+ if ( ignore != 0 ) {
+ Tree *ignoreList = treeAllocate( prg );
+ ignoreList->id = LEL_ID_IGNORE;
+ ignoreList->refs = 1;
+ ignoreList->child = ignore;
+
+ Kid *ignoreHead = kidAllocate( prg );
+ ignoreHead->tree = ignoreList;
+ ignoreHead->next = tree->child;
+ tree->child = ignoreHead;
+
+ tree->flags |= AF_LEFT_IGNORE;
+ }
+
+ int i;
+ for ( i = 0; i < lelInfo[tree->id].numCaptureAttr; i++ ) {
+ long ci = pat+1+i;
+ CaptureAttr *ca = prg->rtd->captureAttr + lelInfo[tree->id].captureAttr + i;
+ Tree *attr = treeAllocate( prg );
+ attr->id = nodes[ci].id;
+ attr->refs = 1;
+ attr->tokdata = nodes[ci].length == 0 ? 0 :
+ stringAllocPointer( prg,
+ nodes[ci].data, nodes[ci].length );
+
+ setAttr( tree, ca->offset, attr );
+ }
+ }
+
+ return tree;
+}
+
+Kid *constructReplacementKid( Tree **bindings, Program *prg, Kid *prev, long pat )
+{
+ PatReplNode *nodes = prg->rtd->patReplNodes;
+ Kid *kid = 0;
+
+ if ( pat != -1 ) {
+ kid = kidAllocate( prg );
+ kid->tree = constructReplacementTree( kid, bindings, prg, pat );
+
+ /* Recurse down next. */
+ Kid *next = constructReplacementKid( bindings, prg,
+ kid, nodes[pat].next );
+
+ kid->next = next;
+ }
+
+ return kid;
+}
+
+Tree *constructToken( Program *prg, Tree **root, long nargs )
+{
+ Tree **const sp = root;
+ Tree **base = vm_ptop() + nargs;
+
+ Int *idInt = (Int*)base[-1];
+ Str *textStr = (Str*)base[-2];
+
+ long id = idInt->value;
+ Head *tokdata = stringCopy( prg, textStr->value );
+
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ Tree *tree;
+
+ if ( lelInfo[id].ignore ) {
+ tree = treeAllocate( prg );
+ tree->refs = 1;
+ tree->id = id;
+ tree->tokdata = tokdata;
+ }
+ else {
+ long objectLength = lelInfo[id].objectLength;
+ Kid *attrs = allocAttrs( prg, objectLength );
+
+ tree = treeAllocate( prg );
+ tree->id = id;
+ tree->refs = 1;
+ tree->tokdata = tokdata;
+
+ tree->child = attrs;
+
+ assert( nargs-2 <= objectLength );
+ long id;
+ for ( id = 0; id < nargs-2; id++ ) {
+ setAttr( tree, id, base[-3-id] );
+ treeUpref( getAttr( tree, id) );
+ }
+ }
+ return tree;
+}
+
+Tree *makeTree( Program *prg, Tree **root, long nargs )
+{
+ Tree **const sp = root;
+ Tree **base = vm_ptop() + nargs;
+
+ Int *idInt = (Int*)base[-1];
+
+ long id = idInt->value;
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+
+ Tree *tree = treeAllocate( prg );
+ tree->id = id;
+ tree->refs = 1;
+
+ long objectLength = lelInfo[id].objectLength;
+ Kid *attrs = allocAttrs( prg, objectLength );
+
+ Kid *last = 0, *child = 0;
+ for ( id = 0; id < nargs-1; id++ ) {
+ Kid *kid = kidAllocate( prg );
+ kid->tree = base[-2-id];
+ treeUpref( kid->tree );
+
+ if ( last == 0 )
+ child = kid;
+ else
+ last->next = kid;
+
+ last = kid;
+ }
+
+ tree->child = kidListConcat( attrs, child );
+
+ return tree;
+}
+
+int testFalse( Program *prg, Tree *tree )
+{
+ int flse = (
+ tree == 0 ||
+ tree == prg->falseVal ||
+ ( tree->id == LEL_ID_INT && ((Int*)tree)->value == 0 ) );
+ return flse;
+}
+
+Kid *copyIgnoreList( Program *prg, Kid *ignoreHeader )
+{
+ Kid *newHeader = kidAllocate( prg );
+ Kid *last = 0, *ic = (Kid*)ignoreHeader->tree;
+ while ( ic != 0 ) {
+ Kid *newIc = kidAllocate( prg );
+
+ newIc->tree = ic->tree;
+ newIc->tree->refs += 1;
+
+ /* List pointers. */
+ if ( last == 0 )
+ newHeader->tree = (Tree*)newIc;
+ else
+ last->next = newIc;
+
+ ic = ic->next;
+ last = newIc;
+ }
+ return newHeader;
+}
+
+Kid *copyKidList( Program *prg, Kid *kidList )
+{
+ Kid *newList = 0, *last = 0, *ic = kidList;
+
+ while ( ic != 0 ) {
+ Kid *newIc = kidAllocate( prg );
+
+ newIc->tree = ic->tree;
+ treeUpref( newIc->tree );
+
+ /* List pointers. */
+ if ( last == 0 )
+ newList = newIc;
+ else
+ last->next = newIc;
+
+ ic = ic->next;
+ last = newIc;
+ }
+ return newList;
+}
+
+/* New tree has zero ref. */
+Tree *copyRealTree( Program *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown )
+{
+ /* Need to keep a lookout for next down. If
+ * copying it, return the copy. */
+ Tree *newTree = treeAllocate( prg );
+
+ newTree->id = tree->id;
+ newTree->tokdata = stringCopy( prg, tree->tokdata );
+
+ /* Copy the child list. Start with ignores, then the list. */
+ Kid *child = tree->child, *last = 0;
+
+ /* Left ignores. */
+ if ( tree->flags & AF_LEFT_IGNORE ) {
+ newTree->flags |= AF_LEFT_IGNORE;
+// Kid *newHeader = copyIgnoreList( prg, child );
+//
+// /* Always the head. */
+// newTree->child = newHeader;
+//
+// child = child->next;
+// last = newHeader;
+ }
+
+ /* Right ignores. */
+ if ( tree->flags & AF_RIGHT_IGNORE ) {
+ newTree->flags |= AF_RIGHT_IGNORE;
+// Kid *newHeader = copyIgnoreList( prg, child );
+// if ( last == 0 )
+// newTree->child = newHeader;
+// else
+// last->next = newHeader;
+// child = child->next;
+// last = newHeader;
+ }
+
+ /* Attributes and children. */
+ while ( child != 0 ) {
+ Kid *newKid = kidAllocate( prg );
+
+ /* Watch out for next down. */
+ if ( child == oldNextDown )
+ *newNextDown = newKid;
+
+ newKid->tree = child->tree;
+ newKid->next = 0;
+
+ /* May be an attribute. */
+ if ( newKid->tree != 0 )
+ newKid->tree->refs += 1;
+
+ /* Store the first child. */
+ if ( last == 0 )
+ newTree->child = newKid;
+ else
+ last->next = newKid;
+
+ child = child->next;
+ last = newKid;
+ }
+
+ return newTree;
+}
+
+List *copyList( Program *prg, List *list, Kid *oldNextDown, Kid **newNextDown )
+{
+// #ifdef COLM_LOG_BYTECODE
+// if ( colm_log_bytecode ) {
+// cerr << "splitting list: " << list << " refs: " <<
+// list->refs << endl;
+// }
+// #endif
+
+ /* Not a need copy. */
+ List *newList = (List*)mapElAllocate( prg );
+ newList->id = list->genericInfo->langElId;
+ newList->genericInfo = list->genericInfo;
+
+ ListEl *src = list->head;
+ while( src != 0 ) {
+ ListEl *newEl = listElAllocate( prg );
+ newEl->value = src->value;
+ treeUpref( newEl->value );
+
+ listAppend( newList, newEl );
+
+ /* Watch out for next down. */
+ if ( (Kid*)src == oldNextDown )
+ *newNextDown = (Kid*)newEl;
+
+ src = src->next;
+ }
+
+ return newList;
+}
+
+Map *copyMap( Program *prg, Map *map, Kid *oldNextDown, Kid **newNextDown )
+{
+// #ifdef COLM_LOG_BYTECODE
+// if ( colm_log_bytecode ) {
+// cerr << "splitting map: " << map << " refs: " <<
+// map->refs << endl;
+// }
+// #endif
+
+ Map *newMap = (Map*)mapElAllocate( prg );
+ newMap->id = map->genericInfo->langElId;
+ newMap->genericInfo = map->genericInfo;
+ newMap->treeSize = map->treeSize;
+ newMap->root = 0;
+
+ /* If there is a root, copy the tree. */
+ if ( map->root != 0 ) {
+ newMap->root = mapCopyBranch( prg, newMap, map->root,
+ oldNextDown, newNextDown );
+ }
+ MapEl *el;
+ for ( el = newMap->head; el != 0; el = el->next ) {
+ assert( map->genericInfo->typeArg == TYPE_TREE );
+ treeUpref( el->tree );
+ }
+
+ return newMap;
+}
+
+Tree *copyTree( Program *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown )
+{
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ long genericId = lelInfo[tree->id].genericId;
+ if ( genericId > 0 ) {
+ GenericInfo *generic = &prg->rtd->genericInfo[genericId];
+ if ( generic->type == GEN_LIST )
+ tree = (Tree*) copyList( prg, (List*) tree, oldNextDown, newNextDown );
+ else if ( generic->type == GEN_MAP )
+ tree = (Tree*) copyMap( prg, (Map*) tree, oldNextDown, newNextDown );
+ else if ( generic->type == GEN_PARSER ) {
+ /* Need to figure out the semantics here. */
+ fatal( "ATTEMPT TO COPY PARSER\n" );
+ assert(false);
+ }
+ }
+ else if ( tree->id == LEL_ID_PTR )
+ assert(false);
+ else if ( tree->id == LEL_ID_BOOL )
+ assert(false);
+ else if ( tree->id == LEL_ID_INT )
+ assert(false);
+ else if ( tree->id == LEL_ID_STR )
+ assert(false);
+ else if ( tree->id == LEL_ID_STREAM )
+ assert(false);
+ else {
+ tree = copyRealTree( prg, tree, oldNextDown, newNextDown );
+ }
+
+ assert( tree->refs == 0 );
+ return tree;
+}
+
+Tree *splitTree( Program *prg, Tree *tree )
+{
+ if ( tree != 0 ) {
+ assert( tree->refs >= 1 );
+
+ if ( tree->refs > 1 ) {
+ Kid *oldNextDown = 0, *newNextDown = 0;
+ Tree *newTree = copyTree( prg, tree, oldNextDown, &newNextDown );
+ treeUpref( newTree );
+
+ /* Downref the original. Don't need to consider freeing because
+ * refs were > 1. */
+ tree->refs -= 1;
+
+ tree = newTree;
+ }
+
+ assert( tree->refs == 1 );
+ }
+ return tree;
+}
+
+Tree *createGeneric( Program *prg, long genericId )
+{
+ GenericInfo *genericInfo = &prg->rtd->genericInfo[genericId];
+ Tree *newGeneric = 0;
+ switch ( genericInfo->type ) {
+ case GEN_MAP: {
+ Map *map = (Map*)mapElAllocate( prg );
+ map->id = genericInfo->langElId;
+ map->genericInfo = genericInfo;
+ newGeneric = (Tree*) map;
+ break;
+ }
+ case GEN_LIST: {
+ List *list = (List*)mapElAllocate( prg );
+ list->id = genericInfo->langElId;
+ list->genericInfo = genericInfo;
+ newGeneric = (Tree*) list;
+ break;
+ }
+ case GEN_PARSER: {
+ Parser *parser = (Parser*)mapElAllocate( prg );
+ parser->id = genericInfo->langElId;
+ parser->genericInfo = genericInfo;
+ parser->fsmRun = malloc( sizeof(FsmRun) );
+ parser->pdaRun = malloc( sizeof(PdaRun) );
+
+ /* Start off the parsing process. */
+ initPdaRun( parser->pdaRun, prg, prg->rtd->pdaTables,
+ parser->fsmRun, genericInfo->parserId, false, false, 0 );
+ initFsmRun( parser->fsmRun, prg );
+ newToken( prg, parser->pdaRun, parser->fsmRun );
+
+ newGeneric = (Tree*) parser;
+ break;
+ }
+ default:
+ assert(false);
+ return 0;
+ }
+
+ return newGeneric;
+}
+
+
+/* We can't make recursive calls here since the tree we are freeing may be
+ * very large. Need the VM stack. */
+void treeFreeRec( Program *prg, Tree **sp, Tree *tree )
+{
+ Tree **top = sp;
+ LangElInfo *lelInfo;
+ long genericId;
+
+free_tree:
+ lelInfo = prg->rtd->lelInfo;
+ genericId = lelInfo[tree->id].genericId;
+ if ( genericId > 0 ) {
+ GenericInfo *generic = &prg->rtd->genericInfo[genericId];
+ if ( generic->type == GEN_LIST ) {
+ List *list = (List*) tree;
+ ListEl *el = list->head;
+ while ( el != 0 ) {
+ ListEl *next = el->next;
+ vm_push( el->value );
+ listElFree( prg, el );
+ el = next;
+ }
+ mapElFree( prg, (MapEl*)list );
+ }
+ else if ( generic->type == GEN_MAP ) {
+ Map *map = (Map*)tree;
+ MapEl *el = map->head;
+ while ( el != 0 ) {
+ MapEl *next = el->next;
+ vm_push( el->key );
+ vm_push( el->tree );
+ mapElFree( prg, el );
+ el = next;
+ }
+ mapElFree( prg, (MapEl*)map );
+ }
+ else if ( generic->type == GEN_PARSER ) {
+ Parser *parser = (Parser*)tree;
+ clearFsmRun( prg, parser->fsmRun );
+ clearPdaRun( prg, sp, parser->pdaRun );
+ free( parser->pdaRun );
+ free( parser->fsmRun );
+ treeDownref( prg, sp, (Tree*)parser->input );
+ mapElFree( prg, (MapEl*)parser );
+ }
+ else {
+ assert(false);
+ }
+ }
+ else {
+ if ( tree->id == LEL_ID_STR ) {
+ Str *str = (Str*) tree;
+ stringFree( prg, str->value );
+ treeFree( prg, tree );
+ }
+ else if ( tree->id == LEL_ID_BOOL || tree->id == LEL_ID_INT )
+ treeFree( prg, tree );
+ else if ( tree->id == LEL_ID_PTR )
+ treeFree( prg, tree );
+ else if ( tree->id == LEL_ID_STREAM ) {
+ Stream *stream = (Stream*)tree;
+ clearSourceStream( prg, sp, stream->in );
+ free( stream->in );
+ if ( stream->file != 0 )
+ fclose( stream->file );
+ streamFree( prg, stream );
+ }
+ else if ( tree->id == LEL_ID_INPUT ) {
+ Input *input = (Input*)tree;
+ clearInputStream( prg, sp, input->in );
+ free( input->in );
+ inputFree( prg, input );
+ }
+ else {
+ if ( tree->id != LEL_ID_IGNORE )
+ stringFree( prg, tree->tokdata );
+
+ /* Attributes and grammar-based children. */
+ Kid *child = tree->child;
+ while ( child != 0 ) {
+ Kid *next = child->next;
+ vm_push( child->tree );
+ kidFree( prg, child );
+ child = next;
+ }
+
+ treeFree( prg, tree );
+ }
+ }
+
+ /* Any trees to downref? */
+ while ( sp != top ) {
+ tree = vm_pop();
+ if ( tree != 0 ) {
+ assert( tree->refs > 0 );
+ tree->refs -= 1;
+ if ( tree->refs == 0 )
+ goto free_tree;
+ }
+ }
+}
+
+void treeUpref( Tree *tree )
+{
+ if ( tree != 0 )
+ tree->refs += 1;
+}
+
+void treeDownref( Program *prg, Tree **sp, Tree *tree )
+{
+ if ( tree != 0 ) {
+ assert( tree->refs > 0 );
+ tree->refs -= 1;
+ if ( tree->refs == 0 )
+ treeFreeRec( prg, sp, tree );
+ }
+}
+
+/* Find the first child of a tree. */
+Kid *treeChild( Program *prg, const Tree *tree )
+{
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ Kid *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ /* Skip over attributes. */
+ long objectLength = lelInfo[tree->id].objectLength;
+ long a;
+ for ( a = 0; a < objectLength; a++ )
+ kid = kid->next;
+
+ return kid;
+}
+
+/* Detach at the first real child of a tree. */
+Kid *treeExtractChild( Program *prg, Tree *tree )
+{
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ Kid *kid = tree->child, *last = 0;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ /* Skip over attributes. */
+ long a, objectLength = lelInfo[tree->id].objectLength;
+ for ( a = 0; a < objectLength; a++ ) {
+ last = kid;
+ kid = kid->next;
+ }
+
+ if ( last == 0 )
+ tree->child = 0;
+ else
+ last->next = 0;
+
+ return kid;
+}
+
+
+/* Find the first child of a tree. */
+Kid *treeAttr( Program *prg, const Tree *tree )
+{
+ Kid *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid;
+}
+
+Tree *treeLeftIgnore( Program *prg, Tree *tree )
+{
+ if ( tree->flags & AF_LEFT_IGNORE )
+ return tree->child->tree;
+ return 0;
+}
+
+Tree *treeRightIgnore( Program *prg, Tree *tree )
+{
+ if ( tree->flags & AF_RIGHT_IGNORE ) {
+ if ( tree->flags & AF_LEFT_IGNORE )
+ return tree->child->next->tree;
+ else
+ return tree->child->tree;
+ }
+ return 0;
+}
+
+Kid *treeLeftIgnoreKid( Program *prg, Tree *tree )
+{
+ if ( tree->flags & AF_LEFT_IGNORE )
+ return tree->child;
+ return 0;
+}
+
+Kid *treeRightIgnoreKid( Program *prg, Tree *tree )
+{
+ if ( tree->flags & AF_RIGHT_IGNORE ) {
+ if ( tree->flags & AF_LEFT_IGNORE )
+ return tree->child->next;
+ else
+ return tree->child;
+ }
+ return 0;
+}
+
+Tree *treeIterDerefCur( TreeIter *iter )
+{
+ return iter->ref.kid == 0 ? 0 : iter->ref.kid->tree;
+}
+
+void refSetValue( Ref *ref, Tree *v )
+{
+ Kid *firstKid = ref->kid;
+ while ( ref != 0 && ref->kid == firstKid ) {
+ ref->kid->tree = v;
+ ref = ref->next;
+ }
+}
+
+Tree *getRhsEl( Program *prg, Tree *lhs, long position )
+{
+ Kid *pos = treeChild( prg, lhs );
+ while ( position > 0 ) {
+ pos = pos->next;
+ position -= 1;
+ }
+ return pos->tree;
+}
+
+Tree *getRhsVal( Program *prg, Tree *tree, int *a )
+{
+ int i, len = a[0];
+ for ( i = 0; i < len; i++ ) {
+ int prodNum = a[1 + i * 2];
+ int childNum = a[1 + i * 2 + 1];
+ if ( tree->prodNum == prodNum )
+ return getRhsEl( prg, tree, childNum );
+ }
+ return 0;
+}
+
+void setField( Program *prg, Tree *tree, long field, Tree *value )
+{
+ assert( tree->refs == 1 );
+ if ( value != 0 )
+ assert( value->refs >= 1 );
+ setAttr( tree, field, value );
+}
+
+Tree *getField( Tree *tree, Word field )
+{
+ return getAttr( tree, field );
+}
+
+Kid *getFieldKid( Tree *tree, Word field )
+{
+ return getAttrKid( tree, field );
+}
+
+Tree *getFieldSplit( Program *prg, Tree *tree, Word field )
+{
+ Tree *val = getAttr( tree, field );
+ Tree *split = splitTree( prg, val );
+ setAttr( tree, field, split );
+ return split;
+}
+
+void setUiterCur( Program *prg, UserIter *uiter, Tree *tree )
+{
+ uiter->ref.kid->tree = tree;
+}
+
+void setTriterCur( Program *prg, TreeIter *iter, Tree *tree )
+{
+ iter->ref.kid->tree = tree;
+}
+
+Tree *getPtrVal( Pointer *ptr )
+{
+ return ptr->value->tree;
+}
+
+Tree *getPtrValSplit( Program *prg, Pointer *ptr )
+{
+ Tree *val = ptr->value->tree;
+ Tree *split = splitTree( prg, val );
+ ptr->value->tree = split;
+ return split;
+}
+
+/* This must traverse in the same order that the bindId assignments are done
+ * in. */
+int matchPattern( Tree **bindings, Program *prg, long pat, Kid *kid, int checkNext )
+{
+ PatReplNode *nodes = prg->rtd->patReplNodes;
+
+// #ifdef COLM_LOG_MATCH
+// if ( colm_log_match ) {
+// LangElInfo *lelInfo = prg->rtd->lelInfo;
+// cerr << "match pattern " << ( pat == -1 ? "NULL" : lelInfo[nodes[pat].id].name ) <<
+// " vs " << ( kid == 0 ? "NULL" : lelInfo[kid->tree->id].name ) << endl;
+// }
+// #endif
+
+ /* match node, recurse on children. */
+ if ( pat != -1 && kid != 0 ) {
+ if ( nodes[pat].id == kid->tree->id ) {
+ /* If the pattern node has data, then this means we need to match
+ * the data against the token data. */
+ if ( nodes[pat].data != 0 ) {
+ /* Check the length of token text. */
+ if ( nodes[pat].length != stringLength( kid->tree->tokdata ) )
+ return false;
+
+ /* Check the token text data. */
+ if ( nodes[pat].length > 0 && memcmp( nodes[pat].data,
+ stringData( kid->tree->tokdata ), nodes[pat].length ) != 0 )
+ return false;
+ }
+
+ /* No failure, all okay. */
+ if ( nodes[pat].bindId > 0 ) {
+// #ifdef COLM_LOG_MATCH
+// if ( colm_log_match ) {
+// cerr << "bindId: " << nodes[pat].bindId << endl;
+// }
+// #endif
+ bindings[nodes[pat].bindId] = kid->tree;
+ }
+
+ /* If we didn't match a terminal duplicate of a nonterm then check
+ * down the children. */
+ if ( !nodes[pat].stop ) {
+ /* Check for failure down child branch. */
+ int childCheck = matchPattern( bindings, prg,
+ nodes[pat].child, treeChild( prg, kid->tree ), true );
+ if ( ! childCheck )
+ return false;
+ }
+
+ /* If checking next, then look for failure there. */
+ if ( checkNext ) {
+ int nextCheck = matchPattern( bindings, prg,
+ nodes[pat].next, kid->next, true );
+ if ( ! nextCheck )
+ return false;
+ }
+
+ return true;
+ }
+ }
+ else if ( pat == -1 && kid == 0 ) {
+ /* Both null is a match. */
+ return 1;
+ }
+
+ return false;
+}
+
+
+long cmpTree( Program *prg, const Tree *tree1, const Tree *tree2 )
+{
+ long cmpres = 0;
+ if ( tree1 == 0 ) {
+ if ( tree2 == 0 )
+ return 0;
+ else
+ return -1;
+ }
+ else if ( tree2 == 0 )
+ return 1;
+ else if ( tree1->id < tree2->id )
+ return -1;
+ else if ( tree1->id > tree2->id )
+ return 1;
+ else if ( tree1->id == LEL_ID_PTR ) {
+ if ( ((Pointer*)tree1)->value < ((Pointer*)tree2)->value )
+ return -1;
+ else if ( ((Pointer*)tree1)->value > ((Pointer*)tree2)->value )
+ return 1;
+ }
+ else if ( tree1->id == LEL_ID_INT ) {
+ if ( ((Int*)tree1)->value < ((Int*)tree2)->value )
+ return -1;
+ else if ( ((Int*)tree1)->value > ((Int*)tree2)->value )
+ return 1;
+ }
+ else if ( tree1->id == LEL_ID_STR ) {
+ cmpres = cmpString( ((Str*)tree1)->value, ((Str*)tree2)->value );
+ if ( cmpres != 0 )
+ return cmpres;
+ }
+ else {
+ if ( tree1->tokdata == 0 && tree2->tokdata != 0 )
+ return -1;
+ else if ( tree1->tokdata != 0 && tree2->tokdata == 0 )
+ return 1;
+ else if ( tree1->tokdata != 0 && tree2->tokdata != 0 ) {
+ cmpres = cmpString( tree1->tokdata, tree2->tokdata );
+ if ( cmpres != 0 )
+ return cmpres;
+ }
+ }
+
+ Kid *kid1 = treeChild( prg, tree1 );
+ Kid *kid2 = treeChild( prg, tree2 );
+
+ while ( true ) {
+ if ( kid1 == 0 && kid2 == 0 )
+ return 0;
+ else if ( kid1 == 0 && kid2 != 0 )
+ return -1;
+ else if ( kid1 != 0 && kid2 == 0 )
+ return 1;
+ else {
+ cmpres = cmpTree( prg, kid1->tree, kid2->tree );
+ if ( cmpres != 0 )
+ return cmpres;
+ }
+ kid1 = kid1->next;
+ kid2 = kid2->next;
+ }
+}
+
+
+void splitRef( Program *prg, Tree ***psp, Ref *fromRef )
+{
+ /* Go up the chain of kids, turing the pointers down. */
+ Ref *last = 0, *ref = fromRef, *next = 0;
+ while ( ref->next != 0 ) {
+ next = ref->next;
+ ref->next = last;
+ last = ref;
+ ref = next;
+ }
+ ref->next = last;
+
+ /* Now traverse the list, which goes down. */
+ while ( ref != 0 ) {
+ if ( ref->kid->tree->refs > 1 ) {
+// #ifdef COLM_LOG_BYTECODE
+// if ( colm_log_bytecode ) {
+// cerr << "splitting tree: " << ref->kid << " refs: " <<
+// ref->kid->tree->refs << endl;
+// }
+// #endif
+
+ Ref *nextDown = ref->next;
+ while ( nextDown != 0 && nextDown->kid == ref->kid )
+ nextDown = nextDown->next;
+
+ Kid *oldNextKidDown = nextDown != 0 ? nextDown->kid : 0;
+ Kid *newNextKidDown = 0;
+
+ Tree *newTree = copyTree( prg, ref->kid->tree,
+ oldNextKidDown, &newNextKidDown );
+ treeUpref( newTree );
+
+ /* Downref the original. Don't need to consider freeing because
+ * refs were > 1. */
+ ref->kid->tree->refs -= 1;
+
+ while ( ref != 0 && ref != nextDown ) {
+ next = ref->next;
+ ref->next = 0;
+
+ ref->kid->tree = newTree;
+ ref = next;
+ }
+
+ /* Correct kid pointers down from ref. */
+ while ( nextDown != 0 && nextDown->kid == oldNextKidDown ) {
+ nextDown->kid = newNextKidDown;
+ nextDown = nextDown->next;
+ }
+ }
+ else {
+ /* Reset the list as we go down. */
+ next = ref->next;
+ ref->next = 0;
+ ref = next;
+ }
+ }
+}
+
+void splitIterCur( Program *prg, Tree ***psp, TreeIter *iter )
+{
+ if ( iter->ref.kid == 0 )
+ return;
+
+ splitRef( prg, psp, &iter->ref );
+}
+
+Tree *setListMem( List *list, Half field, Tree *value )
+{
+ assert( list->refs == 1 );
+ if ( value != 0 )
+ assert( value->refs >= 1 );
+
+ Tree *existing = 0;
+ switch ( field ) {
+ case 0:
+ existing = list->head->value;
+ list->head->value = value;
+ break;
+ case 1:
+ existing = list->tail->value;
+ list->tail->value = value;
+ break;
+ default:
+ assert( false );
+ break;
+ }
+ return existing;
+}
+
+TreePair mapRemove( Program *prg, Map *map, Tree *key )
+{
+ MapEl *mapEl = mapImplFind( prg, map, key );
+ TreePair result = { 0, 0 };
+ if ( mapEl != 0 ) {
+ mapDetach( prg, map, mapEl );
+ result.key = mapEl->key;
+ result.val = mapEl->tree;
+ mapElFree( prg, mapEl );
+ }
+
+ return result;
+}
+
+Tree *mapUnstore( Program *prg, Map *map, Tree *key, Tree *existing )
+{
+ Tree *stored = 0;
+ if ( existing == 0 ) {
+ MapEl *mapEl = mapDetachByKey( prg, map, key );
+ stored = mapEl->tree;
+ mapElFree( prg, mapEl );
+ }
+ else {
+ MapEl *mapEl = mapImplFind( prg, map, key );
+ stored = mapEl->tree;
+ mapEl->tree = existing;
+ }
+ return stored;
+}
+
+Tree *mapFind( Program *prg, Map *map, Tree *key )
+{
+ MapEl *mapEl = mapImplFind( prg, map, key );
+ return mapEl == 0 ? 0 : mapEl->tree;
+}
+
+long mapLength( Map *map )
+{
+ return map->treeSize;
+}
+
+void listAppend2( Program *prg, List *list, Tree *val )
+{
+ assert( list->refs == 1 );
+ if ( val != 0 )
+ assert( val->refs >= 1 );
+ ListEl *listEl = listElAllocate( prg );
+ listEl->value = val;
+ listAppend( list, listEl );
+}
+
+Tree *listRemoveEnd( Program *prg, List *list )
+{
+ Tree *tree = list->tail->value;
+ listElFree( prg, listDetachLast( list ) );
+ return tree;
+}
+
+Tree *getListMem( List *list, Word field )
+{
+ Tree *result = 0;
+ switch ( field ) {
+ case 0:
+ result = list->head->value;
+ break;
+ case 1:
+ result = list->tail->value;
+ break;
+ default:
+ assert( false );
+ break;
+ }
+ return result;
+}
+
+Tree *getListMemSplit( Program *prg, List *list, Word field )
+{
+ Tree *sv = 0;
+ switch ( field ) {
+ case 0:
+ sv = splitTree( prg, list->head->value );
+ list->head->value = sv;
+ break;
+ case 1:
+ sv = splitTree( prg, list->tail->value );
+ list->tail->value = sv;
+ break;
+ default:
+ assert( false );
+ break;
+ }
+ return sv;
+}
+
+
+int mapInsert( Program *prg, Map *map, Tree *key, Tree *element )
+{
+ MapEl *mapEl = mapInsertKey( prg, map, key, 0 );
+
+ if ( mapEl != 0 ) {
+ mapEl->tree = element;
+ return true;
+ }
+
+ return false;
+}
+
+void mapUnremove( Program *prg, Map *map, Tree *key, Tree *element )
+{
+ MapEl *mapEl = mapInsertKey( prg, map, key, 0 );
+ assert( mapEl != 0 );
+ mapEl->tree = element;
+}
+
+Tree *mapUninsert( Program *prg, Map *map, Tree *key )
+{
+ MapEl *el = mapDetachByKey( prg, map, key );
+ Tree *val = el->tree;
+ mapElFree( prg, el );
+ return val;
+}
+
+Tree *mapStore( Program *prg, Map *map, Tree *key, Tree *element )
+{
+ Tree *oldTree = 0;
+ MapEl *elInTree = 0;
+ MapEl *mapEl = mapInsertKey( prg, map, key, &elInTree );
+
+ if ( mapEl != 0 )
+ mapEl->tree = element;
+ else {
+ /* Element with key exists. Overwriting the value. */
+ oldTree = elInTree->tree;
+ elInTree->tree = element;
+ }
+
+ return oldTree;
+}
+
+void iterFind( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst )
+{
+ int anyTree = iter->searchId == prg->rtd->anyId;
+ Tree **top = iter->stackRoot;
+ Kid *child;
+ Tree **sp = *psp;
+
+rec_call:
+ if ( tryFirst && ( iter->ref.kid->tree->id == iter->searchId || anyTree ) ) {
+ *psp = sp;
+ return;
+ }
+ else {
+ child = treeChild( prg, iter->ref.kid->tree );
+ if ( child != 0 ) {
+ vm_push( (SW) iter->ref.next );
+ vm_push( (SW) iter->ref.kid );
+ iter->ref.kid = child;
+ iter->ref.next = (Ref*)vm_ptop();
+ while ( iter->ref.kid != 0 ) {
+ tryFirst = true;
+ goto rec_call;
+ rec_return:
+ iter->ref.kid = iter->ref.kid->next;
+ }
+ iter->ref.kid = (Kid*)vm_pop();
+ iter->ref.next = (Ref*)vm_pop();
+ }
+ }
+
+ if ( top != vm_ptop() )
+ goto rec_return;
+
+ iter->ref.kid = 0;
+ *psp = sp;
+}
+
+Tree *treeIterAdvance( Program *prg, Tree ***psp, TreeIter *iter )
+{
+ assert( iter->stackSize == iter->stackRoot - *psp );
+
+ if ( iter->ref.kid == 0 ) {
+ /* Kid is zero, start from the root. */
+ iter->ref = iter->rootRef;
+ iterFind( prg, psp, iter, true );
+ }
+ else {
+ /* Have a previous item, continue searching from there. */
+ iterFind( prg, psp, iter, false );
+ }
+
+ iter->stackSize = iter->stackRoot - *psp;
+
+ return (iter->ref.kid ? prg->trueVal : prg->falseVal );
+}
+
+Tree *treeIterNextChild( Program *prg, Tree ***psp, TreeIter *iter )
+{
+ Tree **sp = *psp;
+ assert( iter->stackSize == iter->stackRoot - vm_ptop() );
+ Kid *kid = 0;
+
+ if ( iter->ref.kid == 0 ) {
+ /* Kid is zero, start from the first child. */
+ Kid *child = treeChild( prg, iter->rootRef.kid->tree );
+
+ if ( child == 0 )
+ iter->ref.next = 0;
+ else {
+ /* Make a reference to the root. */
+ vm_push( (SW) iter->rootRef.next );
+ vm_push( (SW) iter->rootRef.kid );
+ iter->ref.next = (Ref*)vm_ptop();
+
+ kid = child;
+ }
+ }
+ else {
+ /* Start at next. */
+ kid = iter->ref.kid->next;
+ }
+
+ if ( iter->searchId != prg->rtd->anyId ) {
+ /* Have a previous item, go to the next sibling. */
+ while ( kid != 0 && kid->tree->id != iter->searchId )
+ kid = kid->next;
+ }
+
+ iter->ref.kid = kid;
+ iter->stackSize = iter->stackRoot - vm_ptop();
+ *psp = sp;
+ return ( iter->ref.kid ? prg->trueVal : prg->falseVal );
+}
+
+Tree *treeRevIterPrevChild( Program *prg, Tree ***psp, RevTreeIter *iter )
+{
+ Tree **sp = *psp;
+
+ assert( iter->stackSize == iter->stackRoot - vm_ptop() );
+
+ if ( iter->kidAtYield != iter->ref.kid ) {
+ /* Need to reload the kids. */
+ Kid *kid = treeChild( prg, iter->rootRef.kid->tree );
+ Kid **dst = (Kid**)iter->stackRoot - 1;
+ while ( kid != 0 ) {
+ *dst-- = kid;
+ kid = kid->next;
+ }
+ }
+
+ if ( iter->ref.kid == 0 )
+ iter->cur = (Kid**)iter->stackRoot - iter->children;
+ else
+ iter->cur += 1;
+
+ if ( iter->searchId != prg->rtd->anyId ) {
+ /* Have a previous item, go to the next sibling. */
+ while ( iter->cur != (Kid**)iter->stackRoot && (*iter->cur)->tree->id != iter->searchId )
+ iter->cur += 1;
+ }
+
+ if ( iter->cur == (Kid**)iter->stackRoot ) {
+ iter->ref.next = 0;
+ iter->ref.kid = 0;
+ }
+ else {
+ iter->ref.next = &iter->rootRef;
+ iter->ref.kid = *iter->cur;
+ }
+
+ /* We will use this to detect a split above the iterated tree. */
+ iter->kidAtYield = iter->ref.kid;
+
+ iter->stackSize = iter->stackRoot - vm_ptop();
+
+ *psp = sp;
+
+ return (iter->ref.kid ? prg->trueVal : prg->falseVal );
+}
+
+void iterFindRepeat( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst )
+{
+ Tree **sp = *psp;
+ int anyTree = iter->searchId == prg->rtd->anyId;
+ Tree **top = iter->stackRoot;
+ Kid *child;
+
+rec_call:
+ if ( tryFirst && ( iter->ref.kid->tree->id == iter->searchId || anyTree ) ) {
+ *psp = sp;
+ return;
+ }
+ else {
+ /* The repeat iterator is just like the normal top-down-left-right,
+ * execept it only goes into the children of a node if the node is the
+ * root of the iteration, or if does not have any neighbours to the
+ * right. */
+ if ( top == vm_ptop() || iter->ref.kid->next == 0 ) {
+ child = treeChild( prg, iter->ref.kid->tree );
+ if ( child != 0 ) {
+ vm_push( (SW) iter->ref.next );
+ vm_push( (SW) iter->ref.kid );
+ iter->ref.kid = child;
+ iter->ref.next = (Ref*)vm_ptop();
+ while ( iter->ref.kid != 0 ) {
+ tryFirst = true;
+ goto rec_call;
+ rec_return:
+ iter->ref.kid = iter->ref.kid->next;
+ }
+ iter->ref.kid = (Kid*)vm_pop();
+ iter->ref.next = (Ref*)vm_pop();
+ }
+ }
+ }
+
+ if ( top != vm_ptop() )
+ goto rec_return;
+
+ iter->ref.kid = 0;
+ *psp = sp;
+}
+
+Tree *treeIterNextRepeat( Program *prg, Tree ***psp, TreeIter *iter )
+{
+ assert( iter->stackSize == iter->stackRoot - *psp );
+
+ if ( iter->ref.kid == 0 ) {
+ /* Kid is zero, start from the root. */
+ iter->ref = iter->rootRef;
+ iterFindRepeat( prg, psp, iter, true );
+ }
+ else {
+ /* Have a previous item, continue searching from there. */
+ iterFindRepeat( prg, psp, iter, false );
+ }
+
+ iter->stackSize = iter->stackRoot - *psp;
+
+ return (iter->ref.kid ? prg->trueVal : prg->falseVal );
+}
+
+void iterFindRevRepeat( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst )
+{
+ Tree **sp = *psp;
+ int anyTree = iter->searchId == prg->rtd->anyId;
+ Tree **top = iter->stackRoot;
+ Kid *child;
+
+ if ( tryFirst ) {
+ while ( true ) {
+ if ( top == vm_ptop() || iter->ref.kid->next == 0 ) {
+ child = treeChild( prg, iter->ref.kid->tree );
+
+ if ( child == 0 )
+ break;
+ vm_push( (SW) iter->ref.next );
+ vm_push( (SW) iter->ref.kid );
+ iter->ref.kid = child;
+ iter->ref.next = (Ref*)vm_ptop();
+ }
+ else {
+ /* Not the top and not there is a next, go over to it. */
+ iter->ref.kid = iter->ref.kid->next;
+ }
+ }
+
+ goto first;
+ }
+
+ while ( true ) {
+ if ( top == vm_ptop() ) {
+ iter->ref.kid = 0;
+ return;
+ }
+
+ if ( iter->ref.kid->next == 0 ) {
+ /* Go up one and then down. Remember we can't use iter->ref.next
+ * because the chain may have been split, setting it null (to
+ * prevent repeated walks up). */
+ Ref *ref = (Ref*)vm_ptop();
+ iter->ref.kid = treeChild( prg, ref->kid->tree );
+ }
+ else {
+ iter->ref.kid = (Kid*)vm_pop();
+ iter->ref.next = (Ref*)vm_pop();
+ }
+first:
+ if ( iter->ref.kid->tree->id == iter->searchId || anyTree ) {
+ *psp = sp;
+ return;
+ }
+ }
+ *psp = sp;
+ return;
+}
+
+
+Tree *treeIterPrevRepeat( Program *prg, Tree ***psp, TreeIter *iter )
+{
+ assert( iter->stackSize == iter->stackRoot - *psp );
+
+ if ( iter->ref.kid == 0 ) {
+ /* Kid is zero, start from the root. */
+ iter->ref = iter->rootRef;
+ iterFindRevRepeat( prg, psp, iter, true );
+ }
+ else {
+ /* Have a previous item, continue searching from there. */
+ iterFindRevRepeat( prg, psp, iter, false );
+ }
+
+ iter->stackSize = iter->stackRoot - *psp;
+
+ return (iter->ref.kid ? prg->trueVal : prg->falseVal );
+}
+
+Tree *treeSearch( Program *prg, Kid *kid, long id )
+{
+ /* This node the one? */
+ if ( kid->tree->id == id )
+ return kid->tree;
+
+ Tree *res = 0;
+
+ /* Search children. */
+ Kid *child = treeChild( prg, kid->tree );
+ if ( child != 0 )
+ res = treeSearch( prg, child, id );
+
+ /* Search siblings. */
+ if ( res == 0 && kid->next != 0 )
+ res = treeSearch( prg, kid->next, id );
+
+ return res;
+}
+
+Tree *treeSearch2( Program *prg, Tree *tree, long id )
+{
+ Tree *res = 0;
+ if ( tree->id == id )
+ res = tree;
+ else {
+ Kid *child = treeChild( prg, tree );
+ if ( child != 0 )
+ res = treeSearch( prg, child, id );
+ }
+ return res;
+}
+
+void xmlEscapeData( struct ColmPrintArgs *printArgs, const char *data, long len )
+{
+ int i;
+ for ( i = 0; i < len; i++ ) {
+ if ( data[i] == '<' )
+ printArgs->out( printArgs, "&lt;", 4 );
+ else if ( data[i] == '>' )
+ printArgs->out( printArgs, "&gt;", 4 );
+ else if ( data[i] == '&' )
+ printArgs->out( printArgs, "&amp;", 5 );
+ else if ( (32 <= data[i] && data[i] <= 126) || data[i] == '\t' || data[i] == '\n' || data[i] == '\r' )
+ printArgs->out( printArgs, &data[i], 1 );
+ else {
+ char out[64];
+ sprintf( out, "&#%u;", ((unsigned)data[i]) );
+ printArgs->out( printArgs, out, strlen(out) );
+ }
+ }
+}
+
+void initStrCollect( StrCollect *collect )
+{
+ collect->data = (char*) malloc( BUFFER_INITIAL_SIZE );
+ collect->allocated = BUFFER_INITIAL_SIZE;
+ collect->length = 0;
+}
+
+void strCollectDestroy( StrCollect *collect )
+{
+ free( collect->data );
+}
+
+void strCollectAppend( StrCollect *collect, const char *data, long len )
+{
+ long newLen = collect->length + len;
+ if ( newLen > collect->allocated ) {
+ collect->allocated *= newLen * 2;
+ collect->data = (char*) realloc( collect->data, collect->allocated );
+ }
+ memcpy( collect->data + collect->length, data, len );
+ collect->length += len;
+}
+
+void strCollectClear( StrCollect *collect )
+{
+ collect->length = 0;
+}
+
+#define INT_SZ 32
+
+void printStr( struct ColmPrintArgs *printArgs, Head *str )
+{
+ printArgs->out( printArgs, (char*)(str->data), str->length );
+}
+
+void appendCollect( struct ColmPrintArgs *args, const char *data, int length )
+{
+ strCollectAppend( (StrCollect*) args->arg, data, length );
+}
+
+void appendFile( struct ColmPrintArgs *args, const char *data, int length )
+{
+ fwrite( data, length, 1, (FILE*)args->arg );
+}
+
+Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree )
+{
+ debug( REALM_PARSE, "attaching left ignore\n" );
+
+ /* Make the ignore list for the left-ignore. */
+ Tree *leftIgnore = treeAllocate( prg );
+ leftIgnore->id = LEL_ID_IGNORE;
+ leftIgnore->flags |= AF_SUPPRESS_RIGHT;
+
+ tree = pushLeftIgnore( prg, tree, leftIgnore );
+
+ debug( REALM_PARSE, "attaching ignore right\n" );
+
+ /* Copy the ignore list first if we need to attach it as a right
+ * ignore. */
+ Tree *rightIgnore = 0;
+ rightIgnore = treeAllocate( prg );
+ rightIgnore->id = LEL_ID_IGNORE;
+ rightIgnore->flags |= AF_SUPPRESS_LEFT;
+
+ tree = pushRightIgnore( prg, tree, rightIgnore );
+
+ return tree;
+}
+
+enum ReturnType
+{
+ Done = 1,
+ CollectIgnoreLeft,
+ CollectIgnoreRight,
+ RecIgnoreList,
+ ChildPrint
+};
+
+enum VisitType
+{
+ IgnoreWrapper,
+ IgnoreData,
+ Term,
+ NonTerm,
+};
+
+#define TF_TERM_SEEN 0x1
+
+void printKid( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid )
+{
+ enum ReturnType rt;
+ Kid *parent = 0;
+ Kid *leadingIgnore = 0;
+ enum VisitType visitType;
+ int flags = 0;
+
+ /* Iterate the kids passed in. We are expecting a next, which will allow us
+ * to print the trailing ignore list. */
+ while ( kid != 0 ) {
+ vm_push( (SW) Done );
+ goto rec_call;
+ rec_return_top:
+ kid = kid->next;
+ }
+
+ return;
+
+rec_call:
+ if ( kid->tree == 0 )
+ goto skip_null;
+
+ /* If not currently skipping ignore data, then print it. Ignore data can
+ * be associated with terminals and nonterminals. */
+ if ( kid->tree->flags & AF_LEFT_IGNORE ) {
+ vm_push( (SW)parent );
+ vm_push( (SW)kid );
+ parent = kid;
+ kid = treeLeftIgnoreKid( prg, kid->tree );
+ vm_push( (SW) CollectIgnoreLeft );
+ goto rec_call;
+ rec_return_ign_left:
+ kid = (Kid*)vm_pop();
+ parent = (Kid*)vm_pop();
+ }
+
+ if ( kid->tree->id == LEL_ID_IGNORE )
+ visitType = IgnoreWrapper;
+ else if ( parent != 0 && parent->tree->id == LEL_ID_IGNORE )
+ visitType = IgnoreData;
+ else if ( kid->tree->id < prg->rtd->firstNonTermId )
+ visitType = Term;
+ else
+ visitType = NonTerm;
+
+ debug( REALM_PRINT, "visit type: %d\n", visitType );
+
+ if ( visitType == IgnoreData ) {
+ debug( REALM_PRINT, "putting %p on ignore list\n", kid->tree );
+ Kid *newIgnore = kidAllocate( prg );
+ newIgnore->next = leadingIgnore;
+ leadingIgnore = newIgnore;
+ leadingIgnore->tree = kid->tree;
+ goto skip_node;
+ }
+
+ if ( visitType == IgnoreWrapper ) {
+ Kid *newIgnore = kidAllocate( prg );
+ newIgnore->next = leadingIgnore;
+ leadingIgnore = newIgnore;
+ leadingIgnore->tree = kid->tree;
+ /* Don't skip. */
+ }
+
+ /* print leading ignore? Triggered by terminals. */
+ if ( visitType == Term ) {
+ /* Reverse the leading ignore list. */
+ if ( leadingIgnore != 0 ) {
+ Kid *ignore = 0, *last = 0;
+
+ /* Reverse the list and take the opportunity to implement the
+ * suppress left. */
+ while ( true ) {
+ Kid *next = leadingIgnore->next;
+ leadingIgnore->next = last;
+
+ if ( leadingIgnore->tree->flags & AF_SUPPRESS_LEFT ) {
+ /* We are moving left. Chop off the tail. */
+ debug( REALM_PRINT, "suppressing left\n" );
+ freeKidList( prg, next );
+ break;
+ }
+
+ if ( next == 0 )
+ break;
+
+ last = leadingIgnore;
+ leadingIgnore = next;
+ }
+
+ /* Print the leading ignore list. Also implement the suppress right
+ * in the process. */
+ if ( printArgs->comm && (!printArgs->trim || (flags & TF_TERM_SEEN && kid->tree->id > 0)) ) {
+ ignore = leadingIgnore;
+ while ( ignore != 0 ) {
+ if ( ignore->tree->flags & AF_SUPPRESS_RIGHT )
+ break;
+
+ if ( ignore->tree->id != LEL_ID_IGNORE ) {
+ vm_push( (SW)visitType );
+ vm_push( (SW)leadingIgnore );
+ vm_push( (SW)ignore );
+ vm_push( (SW)parent );
+ vm_push( (SW)kid );
+
+ leadingIgnore = 0;
+ kid = ignore;
+ parent = 0;
+
+ debug( REALM_PRINT, "rec call on %p\n", kid->tree );
+ vm_push( (SW) RecIgnoreList );
+ goto rec_call;
+ rec_return_il:
+
+ kid = (Kid*)vm_pop();
+ parent = (Kid*)vm_pop();
+ ignore = (Kid*)vm_pop();
+ leadingIgnore = (Kid*)vm_pop();
+ visitType = (enum VisitType)vm_pop();
+ }
+
+ ignore = ignore->next;
+ }
+ }
+
+ /* Free the leading ignore list. */
+ freeKidList( prg, leadingIgnore );
+ leadingIgnore = 0;
+ }
+ }
+
+ if ( visitType == Term || visitType == NonTerm ) {
+ /* Open the tree. */
+ printArgs->openTree( prg, sp, printArgs, parent, kid );
+ }
+
+ if ( visitType == Term )
+ flags |= TF_TERM_SEEN;
+
+ if ( visitType == Term || visitType == IgnoreData ) {
+ /* Print contents. */
+ if ( kid->tree->id < prg->rtd->firstNonTermId ) {
+ debug( REALM_PRINT, "printing terminal %p\n", kid->tree );
+ if ( kid->tree->id != 0 )
+ printArgs->printTerm( prg, sp, printArgs, kid );
+ }
+ }
+
+ /* Print children. */
+ Kid *child = printArgs->attr ?
+ treeAttr( prg, kid->tree ) :
+ treeChild( prg, kid->tree );
+
+ if ( child != 0 ) {
+ vm_push( (SW)visitType );
+ vm_push( (SW)parent );
+ vm_push( (SW)kid );
+ parent = kid;
+ kid = child;
+ while ( kid != 0 ) {
+ vm_push( (SW) ChildPrint );
+ goto rec_call;
+ rec_return:
+ kid = kid->next;
+ }
+ kid = (Kid*)vm_pop();
+ parent = (Kid*)vm_pop();
+ visitType = (enum VisitType)vm_pop();
+ }
+
+ if ( visitType == Term || visitType == NonTerm ) {
+ /* close the tree. */
+ printArgs->closeTree( prg, sp, printArgs, parent, kid );
+ }
+
+skip_node:
+
+ /* If not currently skipping ignore data, then print it. Ignore data can
+ * be associated with terminals and nonterminals. */
+ if ( kid->tree->flags & AF_RIGHT_IGNORE ) {
+ debug( REALM_PRINT, "right ignore\n" );
+ vm_push( (SW)parent );
+ vm_push( (SW)kid );
+ parent = kid;
+ kid = treeRightIgnoreKid( prg, kid->tree );
+ vm_push( (SW) CollectIgnoreRight );
+ goto rec_call;
+ rec_return_ign_right:
+ kid = (Kid*)vm_pop();
+ parent = (Kid*)vm_pop();
+ }
+
+/* For skiping over content on null. */
+skip_null:
+
+ rt = (enum ReturnType)vm_pop();
+ switch ( rt ) {
+ case Done:
+ debug( REALM_PRINT, "return: done\n" );
+ goto rec_return_top;
+ break;
+ case CollectIgnoreLeft:
+ debug( REALM_PRINT, "return: ignore left\n" );
+ goto rec_return_ign_left;
+ case CollectIgnoreRight:
+ debug( REALM_PRINT, "return: ignore right\n" );
+ goto rec_return_ign_right;
+ case RecIgnoreList:
+ debug( REALM_PRINT, "return: ignore list\n" );
+ goto rec_return_il;
+ case ChildPrint:
+ debug( REALM_PRINT, "return: child print\n" );
+ goto rec_return;
+ }
+}
+
+void printTreeArgs( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Tree *tree )
+{
+ if ( tree == 0 )
+ printArgs->out( printArgs, "NIL", 3 );
+ else {
+ /* This term tree allows us to print trailing ignores. */
+ Tree termTree;
+ memset( &termTree, 0, sizeof(termTree) );
+
+ Kid kid, term;
+ term.tree = &termTree;
+ term.next = 0;
+ term.flags = 0;
+
+ kid.tree = tree;
+ kid.next = &term;
+ kid.flags = 0;
+
+ printKid( prg, sp, printArgs, &kid );
+ }
+}
+
+void printTermTree( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid )
+{
+ debug( REALM_PRINT, "printing term %p\n", kid->tree );
+
+ if ( kid->tree->id == LEL_ID_INT ) {
+ char buf[INT_SZ];
+ sprintf( buf, "%ld", ((Int*)kid->tree)->value );
+ printArgs->out( printArgs, buf, strlen(buf) );
+ }
+ else if ( kid->tree->id == LEL_ID_BOOL ) {
+ if ( ((Int*)kid->tree)->value )
+ printArgs->out( printArgs, "true", 4 );
+ else
+ printArgs->out( printArgs, "false", 5 );
+ }
+ else if ( kid->tree->id == LEL_ID_PTR ) {
+ char buf[INT_SZ];
+ printArgs->out( printArgs, "#", 1 );
+ sprintf( buf, "%p", (void*) ((Pointer*)kid->tree)->value );
+ printArgs->out( printArgs, buf, strlen(buf) );
+ }
+ else if ( kid->tree->id == LEL_ID_STR ) {
+ printStr( printArgs, ((Str*)kid->tree)->value );
+ }
+ else if ( kid->tree->id == LEL_ID_STREAM ) {
+ char buf[INT_SZ];
+ printArgs->out( printArgs, "#", 1 );
+ sprintf( buf, "%p", (void*) ((Stream*)kid->tree)->file );
+ printArgs->out( printArgs, buf, strlen(buf) );
+ }
+ else if ( kid->tree->tokdata != 0 &&
+ stringLength( kid->tree->tokdata ) > 0 )
+ {
+ printArgs->out( printArgs, stringData( kid->tree->tokdata ),
+ stringLength( kid->tree->tokdata ) );
+ }
+}
+
+
+void printNull( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid )
+{
+}
+
+void openTreeXml( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid )
+{
+ /* Skip the terminal that is for forcing trailing ignores out. */
+ if ( kid->tree->id == 0 )
+ return;
+
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+
+ /* List flattening: skip the repeats and lists that are a continuation of
+ * the list. */
+ if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 &&
+ ( lelInfo[parent->tree->id].repeat || lelInfo[parent->tree->id].list ) )
+ {
+ return;
+ }
+
+ const char *name = lelInfo[kid->tree->id].xmlTag;
+ args->out( args, "<", 1 );
+ args->out( args, name, strlen( name ) );
+ args->out( args, ">", 1 );
+}
+
+void printTermXml( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid )
+{
+ //Kid *child;
+
+ /*child = */ treeChild( prg, kid->tree );
+ if ( kid->tree->id == LEL_ID_PTR ) {
+ char ptr[32];
+ sprintf( ptr, "%p\n", (void*)((Pointer*)kid->tree)->value );
+ printArgs->out( printArgs, ptr, strlen(ptr) );
+ }
+ else if ( kid->tree->id == LEL_ID_BOOL ) {
+ if ( ((Int*)kid->tree)->value )
+ printArgs->out( printArgs, "true", 4 );
+ else
+ printArgs->out( printArgs, "false", 5 );
+ }
+ else if ( kid->tree->id == LEL_ID_INT ) {
+ char ptr[32];
+ sprintf( ptr, "%ld", ((Int*)kid->tree)->value );
+ printArgs->out( printArgs, ptr, strlen(ptr) );
+ }
+ else if ( kid->tree->id == LEL_ID_STR ) {
+ Head *head = (Head*) ((Str*)kid->tree)->value;
+
+ xmlEscapeData( printArgs, (char*)(head->data), head->length );
+ }
+ else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->firstNonTermId &&
+ kid->tree->id != LEL_ID_IGNORE &&
+ kid->tree->tokdata != 0 &&
+ stringLength( kid->tree->tokdata ) > 0 )
+ {
+ xmlEscapeData( printArgs, stringData( kid->tree->tokdata ),
+ stringLength( kid->tree->tokdata ) );
+ }
+}
+
+
+void closeTreeXml( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid )
+{
+ /* Skip the terminal that is for forcing trailing ignores out. */
+ if ( kid->tree->id == 0 )
+ return;
+
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+
+ /* List flattening: skip the repeats and lists that are a continuation of
+ * the list. */
+ if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 &&
+ ( lelInfo[parent->tree->id].repeat || lelInfo[parent->tree->id].list ) )
+ {
+ return;
+ }
+
+ const char *name = lelInfo[kid->tree->id].xmlTag;
+ args->out( args, "</", 2 );
+ args->out( args, name, strlen( name ) );
+ args->out( args, ">", 1 );
+}
+
+void printTreeCollect( Program *prg, Tree **sp, StrCollect *collect, Tree *tree, int trim )
+{
+ struct ColmPrintArgs printArgs = { collect, true, false, trim, &appendCollect,
+ &printNull, &printTermTree, &printNull };
+ printTreeArgs( prg, sp, &printArgs, tree );
+}
+
+void printTreeFile( Program *prg, Tree **sp, FILE *out, Tree *tree, int trim )
+{
+ struct ColmPrintArgs printArgs = { out, true, false, trim, &appendFile,
+ &printNull, &printTermTree, &printNull };
+ printTreeArgs( prg, sp, &printArgs, tree );
+}
+
+void printXmlStdout( Program *prg, Tree **sp, Tree *tree, int commAttr, int trim )
+{
+ struct ColmPrintArgs printArgs = { stdout, commAttr, commAttr, trim, &appendFile,
+ &openTreeXml, &printTermXml, &closeTreeXml };
+ printTreeArgs( prg, sp, &printArgs, tree );
+}
+
diff --git a/src/tree.h b/src/tree.h
new file mode 100644
index 00000000..29c0ee74
--- /dev/null
+++ b/src/tree.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __COLM_TREE_H
+#define __COLM_TREE_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <colm.h>
+
+typedef unsigned char Code;
+typedef unsigned long Word;
+typedef unsigned long Half;
+struct Bindings;
+
+typedef struct _File
+{
+ struct _File *prev;
+ struct _File *next;
+} File;
+
+typedef struct _Location
+{
+ File *file;
+ long line;
+ long column;
+ long byte;
+} Location;
+
+/* Header located just before string data. */
+typedef struct _Head
+{
+ const char *data;
+ long length;
+ Location *location;
+} Head;
+
+typedef struct ColmKid
+{
+ /* The tree needs to be first since pointers to kids are used to reference
+ * trees on the stack. A pointer to the word that is a Tree* is cast to
+ * a Kid*. */
+ struct ColmTree *tree;
+ struct ColmKid *next;
+ unsigned char flags;
+} Kid;
+
+typedef struct _Ref
+{
+ struct ColmKid *kid;
+ struct _Ref *next;
+} Ref;
+
+typedef struct ColmTree
+{
+ /* First four will be overlaid in other structures. */
+ short id;
+ unsigned short flags;
+ long refs;
+ Kid *child;
+
+ Head *tokdata;
+
+ /* FIXME: this needs to go somewhere else. Will do for now. */
+ unsigned short prodNum;
+} Tree;
+
+
+typedef struct _TreePair
+{
+ Tree *key;
+ Tree *val;
+} TreePair;
+
+typedef struct _ParseTree
+{
+ short id;
+ unsigned short flags;
+
+ struct _ParseTree *child;
+ struct _ParseTree *next;
+ struct _ParseTree *leftIgnore;
+ struct _ParseTree *rightIgnore;
+ Kid *shadow;
+
+ /* Parsing algorithm. */
+ long state;
+ long region;
+ short causeReduce;
+
+ /* FIXME: unify probably. */
+ char retryLower;
+ char retryUpper;
+} ParseTree;
+
+typedef struct _Int
+{
+ /* Must overlay Tree. */
+ short id;
+ unsigned short flags;
+ long refs;
+ Kid *child;
+
+ long value;
+} Int;
+
+typedef struct _Pointer
+{
+ /* Must overlay Tree. */
+ short id;
+ unsigned short flags;
+ long refs;
+ Kid *child;
+
+ Kid *value;
+} Pointer;
+
+typedef struct _Str
+{
+ /* Must overlay Tree. */
+ short id;
+ unsigned short flags;
+ long refs;
+ Kid *child;
+
+ Head *value;
+} Str;
+
+typedef struct _ListEl
+{
+ /* Must overlay kid. */
+ Tree *value;
+ struct _ListEl *next;
+ struct _ListEl *prev;
+} ListEl;
+
+/*
+ * Maps
+ */
+typedef struct _GenericInfo
+{
+ long type;
+ long typeArg;
+ long keyOffset;
+ long keyType;
+ long langElId;
+ long parserId;
+} GenericInfo;
+
+typedef struct _List
+{
+ /* Must overlay Tree. */
+ short id;
+ unsigned short flags;
+ long refs;
+ ListEl *head;
+
+ ListEl *tail;
+ long listLen;
+ GenericInfo *genericInfo;
+
+} List;
+
+typedef struct _Stream
+{
+ /* Must overlay Tree. */
+ short id;
+ unsigned short flags;
+ long refs;
+ Kid *child;
+
+ FILE *file;
+ SourceStream *in;
+} Stream;
+
+typedef struct _Input
+{
+ /* Must overlay Tree. */
+ short id;
+ unsigned short flags;
+ long refs;
+ Kid *child;
+
+ InputStream *in;
+} Input;
+
+typedef struct _Parser
+{
+ /* Must overlay Tree. */
+ short id;
+ unsigned short flags;
+ long refs;
+ Kid *child;
+
+ GenericInfo *genericInfo;
+
+ struct _PdaRun *pdaRun;
+ struct _FsmRun *fsmRun;
+ struct _Input *input;
+ Tree *result;
+} Parser;
+
+typedef struct _TreeIter
+{
+ Ref rootRef;
+ Ref ref;
+ long searchId;
+ Tree **stackRoot;
+ long stackSize;
+} TreeIter;
+
+/* This must overlay tree iter because some of the same bytecodes are used. */
+typedef struct _RevTreeIter
+{
+ Ref rootRef;
+ Ref ref;
+ long searchId;
+ Tree **stackRoot;
+ long stackSize;
+
+ /* For detecting a split at the leaf. */
+ Kid *kidAtYield;
+ long children;
+ Kid **cur;
+} RevTreeIter;
+
+
+typedef struct _UserIter
+{
+ /* The current item. */
+ Ref ref;
+ Tree **stackRoot;
+ long argSize;
+ long stackSize;
+ Code *resume;
+ Tree **frame;
+ long searchId;
+} UserIter;
+
+
+void treeUpref( Tree *tree );
+void treeDownref( struct ColmProgram *prg, Tree **sp, Tree *tree );
+long cmpTree( struct ColmProgram *prg, const Tree *tree1, const Tree *tree2 );
+
+Tree *pushRightIgnore( struct ColmProgram *prg, Tree *pushTo, Tree *rightIgnore );
+Tree *pushLeftIgnore( struct ColmProgram *prg, Tree *pushTo, Tree *leftIgnore );
+Tree *popRightIgnore( struct ColmProgram *prg, Tree **sp, Tree *popFrom, Tree **rightIgnore );
+Tree *popLeftIgnore( struct ColmProgram *prg, Tree **sp, Tree *popFrom, Tree **leftIgnore );
+Tree *treeLeftIgnore( struct ColmProgram *prg, Tree *tree );
+Tree *treeRightIgnore( struct ColmProgram *prg, Tree *tree );
+Kid *treeLeftIgnoreKid( struct ColmProgram *prg, Tree *tree );
+Kid *treeRightIgnoreKid( struct ColmProgram *prg, Tree *tree );
+Kid *treeChild( struct ColmProgram *prg, const Tree *tree );
+Kid *treeAttr( struct ColmProgram *prg, const Tree *tree );
+Kid *kidListConcat( Kid *list1, Kid *list2 );
+Kid *treeExtractChild( struct ColmProgram *prg, Tree *tree );
+Kid *reverseKidList( Kid *kid );
+
+Tree *constructInteger( struct ColmProgram *prg, long i );
+Tree *constructPointer( struct ColmProgram *prg, Tree *tree );
+Tree *constructTerm( struct ColmProgram *prg, Word id, Head *tokdata );
+Tree *constructReplacementTree( Kid *kid, Tree **bindings, struct ColmProgram *prg, long pat );
+Tree *createGeneric( struct ColmProgram *prg, long genericId );
+Tree *constructToken( struct ColmProgram *prg, Tree **root, long nargs );
+Tree *constructInput( struct ColmProgram *prg );
+
+
+int testFalse( struct ColmProgram *prg, Tree *tree );
+Tree *makeTree( struct ColmProgram *prg, Tree **root, long nargs );
+Stream *openFile( struct ColmProgram *prg, Tree *name, Tree *mode );
+Stream *openStreamFd( struct ColmProgram *prg, long fd );
+Kid *copyIgnoreList( struct ColmProgram *prg, Kid *ignoreHeader );
+Kid *copyKidList( struct ColmProgram *prg, Kid *kidList );
+void streamFree( struct ColmProgram *prg, Stream *s );
+Tree *copyTree( struct ColmProgram *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown );
+
+Tree *getPtrVal( Pointer *ptr );
+Tree *getPtrValSplit( struct ColmProgram *prg, Pointer *ptr );
+Tree *getField( Tree *tree, Word field );
+Tree *getFieldSplit( struct ColmProgram *prg, Tree *tree, Word field );
+Tree *getRhsEl( struct ColmProgram *prg, Tree *lhs, long position );
+void setField( struct ColmProgram *prg, Tree *tree, long field, Tree *value );
+
+void setTriterCur( struct ColmProgram *prg, TreeIter *iter, Tree *tree );
+void setUiterCur( struct ColmProgram *prg, UserIter *uiter, Tree *tree );
+void refSetValue( Ref *ref, Tree *v );
+Tree *treeSearch( struct ColmProgram *prg, Kid *kid, long id );
+Tree *treeSearch2( struct ColmProgram *prg, Tree *tree, long id );
+
+int matchPattern( Tree **bindings, struct ColmProgram *prg, long pat, Kid *kid, int checkNext );
+Tree *treeIterDerefCur( TreeIter *iter );
+
+/* For making references of attributes. */
+Kid *getFieldKid( Tree *tree, Word field );
+
+Tree *copyRealTree( struct ColmProgram *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown );
+void splitIterCur( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
+Tree *setListMem( List *list, Half field, Tree *value );
+
+void listAppend2( struct ColmProgram *prg, List *list, Tree *val );
+Tree *listRemoveEnd( struct ColmProgram *prg, List *list );
+Tree *getListMem( List *list, Word field );
+Tree *getListMemSplit( struct ColmProgram *prg, List *list, Word field );
+
+Tree *treeIterAdvance( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
+Tree *treeIterNextChild( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
+Tree *treeRevIterPrevChild( struct ColmProgram *prg, Tree ***psp, RevTreeIter *iter );
+Tree *treeIterNextRepeat( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
+Tree *treeIterPrevRepeat( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
+
+/* An automatically grown buffer for collecting tokens. Always reuses space;
+ * never down resizes. */
+typedef struct _StrCollect
+{
+ char *data;
+ int allocated;
+ int length;
+} StrCollect;
+
+void initStrCollect( StrCollect *collect );
+void strCollectDestroy( StrCollect *collect );
+void strCollectAppend( StrCollect *collect, const char *data, long len );
+void strCollectClear( StrCollect *collect );
+Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree );
+
+void printTreeCollect( struct ColmProgram *prg, Tree **sp, StrCollect *collect, Tree *tree, int trim );
+void printTreeFile( struct ColmProgram *prg, Tree **sp, FILE *out, Tree *tree, int trim );
+void printXmlStdout( struct ColmProgram *prg, Tree **sp, Tree *tree, int commAttr, int trim );
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
+