summaryrefslogtreecommitdiff
path: root/colm
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2012-07-01 12:48:22 -0400
committerAdrian Thurston <thurston@complang.org>2012-07-01 12:48:22 -0400
commit247904a84430b8c9151fa6afb68f01b60afb92c9 (patch)
tree58d498f783a935b02255120c814c387745dc6e41 /colm
parentd8cdec468bb7efad768d25872147533312cffe91 (diff)
downloadcolm-247904a84430b8c9151fa6afb68f01b60afb92c9.tar.gz
moved 'colm' dir to 'src'
Diffstat (limited to 'colm')
-rw-r--r--colm/.gitignore26
-rw-r--r--colm/Makefile.am183
-rw-r--r--colm/buffer.h55
-rw-r--r--colm/bytecode.c3579
-rw-r--r--colm/bytecode.h487
-rw-r--r--colm/closure.cc458
-rw-r--r--colm/codegen.cc50
-rw-r--r--colm/codevect.c183
-rw-r--r--colm/colm.h55
-rw-r--r--colm/compiler.cc1496
-rw-r--r--colm/ctinput.cc439
-rw-r--r--colm/debug.c78
-rw-r--r--colm/debug.h58
-rw-r--r--colm/declare.cc383
-rw-r--r--colm/defs.h.in49
-rw-r--r--colm/dotgen.cc113
-rw-r--r--colm/dotgen.h51
-rw-r--r--colm/exports.cc285
-rw-r--r--colm/fsmap.cc856
-rw-r--r--colm/fsmattach.cc425
-rw-r--r--colm/fsmbase.cc602
-rw-r--r--colm/fsmcodegen.cc1098
-rw-r--r--colm/fsmcodegen.h212
-rw-r--r--colm/fsmexec.cc208
-rw-r--r--colm/fsmgraph.cc1408
-rw-r--r--colm/fsmgraph.h1388
-rw-r--r--colm/fsmmin.cc732
-rw-r--r--colm/fsmrun.h36
-rw-r--r--colm/fsmstate.cc467
-rw-r--r--colm/global.h90
-rw-r--r--colm/input.c847
-rw-r--r--colm/input.h214
-rw-r--r--colm/keyops.h283
-rw-r--r--colm/list.c105
-rw-r--r--colm/lmparse.kh120
-rw-r--r--colm/lmparse.kl2677
-rw-r--r--colm/lmscan.h118
-rw-r--r--colm/lmscan.rl636
-rw-r--r--colm/main.cc623
-rw-r--r--colm/map.c763
-rw-r--r--colm/map.cc26
-rw-r--r--colm/map.h108
-rw-r--r--colm/parsedata.h1063
-rw-r--r--colm/parsetree.cc1776
-rw-r--r--colm/parsetree.h2253
-rw-r--r--colm/pcheck.cc154
-rw-r--r--colm/pcheck.h48
-rw-r--r--colm/pdabuild.cc2091
-rw-r--r--colm/pdacodegen.cc653
-rw-r--r--colm/pdacodegen.h106
-rw-r--r--colm/pdagraph.cc533
-rw-r--r--colm/pdagraph.h515
-rw-r--r--colm/pdarun.c2272
-rw-r--r--colm/pdarun.h473
-rw-r--r--colm/pool.c330
-rw-r--r--colm/pool.h86
-rw-r--r--colm/program.c254
-rw-r--r--colm/program.h128
-rw-r--r--colm/redbuild.cc650
-rw-r--r--colm/redbuild.h161
-rw-r--r--colm/redfsm.cc1112
-rw-r--r--colm/redfsm.h524
-rw-r--r--colm/resolve.cc805
-rw-r--r--colm/rtvector.h34
-rw-r--r--colm/string.c240
-rw-r--r--colm/synthesis.cc3277
-rw-r--r--colm/tree.c2484
-rw-r--r--colm/tree.h355
68 files changed, 0 insertions, 44447 deletions
diff --git a/colm/.gitignore b/colm/.gitignore
deleted file mode 100644
index fe077f2c..00000000
--- a/colm/.gitignore
+++ /dev/null
@@ -1,26 +0,0 @@
-# Common testing files.
-/tmp.lm
-/tmp.c
-/tmp.bin
-/input
-/out
-
-/*.o
-/Makefile.in
-/Makefile
-/.*.d
-/colm
-/lmparse.h
-/lmparse.cc
-/lmscan.cc
-/config.h.in
-/config.h
-/defs.h
-/version.h
-/tags
-/.deps
-/libcolmd.a
-/libcolmp.a
-/.libs
-/stamp-h1
-/stamp-h2
diff --git a/colm/Makefile.am b/colm/Makefile.am
deleted file mode 100644
index c4a3504a..00000000
--- a/colm/Makefile.am
+++ /dev/null
@@ -1,183 +0,0 @@
-#
-# Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
-#
-
-# This file is part of Colm.
-#
-# Colm is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Colm is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with Colm; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-INCLUDES = -I$(top_srcdir)/aapl
-
-bin_PROGRAMS = colm
-
-RUNTIME_SRC = \
- map.c pdarun.c list.c input.c debug.c \
- codevect.c pool.c string.c tree.c bytecode.c program.c
-
-RUNTIME_HDR = \
- bytecode.h config.h defs.h debug.h pool.h input.h \
- fsmrun.h pdarun.h map.h tree.h program.h colm.h
-
-lib_LIBRARIES = libcolmp.a libcolmd.a
-
-libcolmp_a_SOURCES = $(RUNTIME_SRC)
-libcolmp_a_CFLAGS = -I..
-
-libcolmd_a_SOURCES = $(RUNTIME_SRC)
-libcolmd_a_CFLAGS = -I..
-
-colm_CXXFLAGS = \
- -Wall \
- -DCOLM_LOG \
- -DPREFIX='"$(prefix)"' \
- -I..
-
-colm_LDADD = libcolmp.a
-
-# ADT DEFS_COLM += -DCOLM_LOG -DPREFIX='"$(prefix)"'
-
-colm_SOURCES = \
- buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \
- fsmrun.h input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \
- parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \
- redfsm.h rtvector.h tree.h version.h global.h colm.h \
- \
- resolve.cc synthesis.cc lmparse.cc lmscan.cc parsetree.cc \
- fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \
- fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \
- redfsm.cc fsmexec.cc main.cc redbuild.cc closure.cc fsmap.cc \
- dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc \
- exports.cc compiler.cc
-
-colmincdir = $(includedir)/colm
-
-colminc_HEADERS = $(RUNTIME_HDR)
-
-BUILT_SOURCES = \
- version.h lmscan.cc lmparse.h lmparse.cc
-
-version.h: Makefile
- echo '#define VERSION "$(PACKAGE_VERSION)"' > version.h
- echo '#define PUBDATE "$(PUBDATE)"' >> version.h
-
-if BUILD_PARSERS
-
-lmparse.h: lmparse.kh
- $(KELBT) -o $@ $<
-
-lmparse.cc: lmparse.kl lmparse.kh
- $(KELBT) -o $@ $<
-
-lmscan.cc: lmparse.h
-
-lmscan.cc: lmscan.rl
- $(RAGEL) -G2 -o $@ $<
-
-endif
-
-# ADT
-# ADT # Logging:
-# ADT # colm: rt on/off
-# ADT # rt_prd: off
-# ADT # rt_db: on
-# ADT # rt_clm: rt on/off
-# ADT
-# ADT INCS += -I../aapl
-# ADT
-# ADT DEFS_COLM += -DCOLM_LOG -DPREFIX='"$(prefix)"'
-# ADT DEFS_RT_P +=
-# ADT DEFS_RT_D += -DCOLM_LOG
-# ADT
-# ADT CFLAGS += -g -Wall -Wwrite-strings
-# ADT LDFLAGS +=
-# ADT
-# ADT # Files in ALL_SRC that are generated.
-# ADT GEN_SRC = version.h lmscan.cc lmparse.h lmparse.cc
-# ADT
-# ADT RUNTIME_P = libcolmp.a
-# ADT RUNTIME_D = libcolmd.a
-# ADT
-# ADT LIBS =
-# ADT
-# ADT #*************************************
-# ADT
-# ADT # Get the version info.
-# ADT include ../version.mk
-# ADT
-# ADT prefix = @prefix@
-# ADT
-# ADT BUILD_PARSERS = @BUILD_PARSERS@
-# ADT
-# ADT # Programs
-# ADT CXX = @CXX@
-# ADT CC = @CC@
-# ADT
-# ADT # Get objects and dependencies from sources.
-# ADT COLM_OBJ = $(COLM_SRC:%.cc=%.o)
-# ADT RUNTIME_OBJ_P = $(RUNTIME_SRC:%.c=%_p.o)
-# ADT RUNTIME_OBJ_D = $(RUNTIME_SRC:%.c=%_d.o)
-# ADT
-# ADT DEPS = $(COLM_SRC:%.cc=.%.d) $(RUNTIME_SRC:%.c=.%_p.d) $(RUNTIME_SRC:%.c=.%_d.d)
-# ADT
-# ADT # Rules.
-# ADT all: colm $(RUNTIME_P) $(RUNTIME_D)
-# ADT
-# ADT colm: $(GEN_SRC) $(COLM_OBJ) $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) $(LIBS)
-# ADT $(CXX) $(LDFLAGS) -o $@ $(COLM_OBJ) $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) $(LIBS)
-# ADT
-# ADT $(RUNTIME_P): $(RUNTIME_OBJ_P) $(RUNTIME_OBJ_C_P)
-# ADT ar -cr $@ $^
-# ADT
-# ADT $(RUNTIME_D): $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D)
-# ADT ar -cr $@ $^
-# ADT
-# ADT version.h: ../version.mk
-# ADT echo '#define VERSION "$(VERSION)"' > version.h
-# ADT echo '#define PUBDATE "$(PUBDATE)"' >> version.h
-# ADT
-# ADT
-# ADT $(COLM_OBJ): %.o: %.cc
-# ADT @$(CXX) -M $(DEFS_COLM) $(INCS) $< > .$*.d
-# ADT $(CXX) -c $(CFLAGS) $(DEFS_COLM) $(INCS) -o $@ $<
-# ADT
-# ADT $(RUNTIME_OBJ_P): %_p.o: %.c
-# ADT @$(CC) -M -MT $@ $(DEFS_RT_P) $< > .$*_p.d
-# ADT $(CC) -c $(CFLAGS) $(DEFS_RT_P) -o $@ $<
-# ADT
-# ADT $(RUNTIME_OBJ_D): %_d.o: %.c
-# ADT @$(CC) -M -MT $@ $(DEFS_RT_D) $< > .$*_d.d
-# ADT $(CC) -c $(CFLAGS) $(DEFS_RT_D) -o $@ $<
-# ADT
-# ADT distclean: clean
-# ADT rm -f Makefile config.h
-# ADT
-# ADT ifeq ($(BUILD_PARSERS),true)
-# ADT EXTRA_CLEAN = $(GEN_SRC)
-# ADT endif
-# ADT
-# ADT clean:
-# ADT rm -f tags .*.d *.o colm $(EXTRA_CLEAN) $(RUNTIME_P) $(RUNTIME_D)
-# ADT
-# ADT install: all
-# ADT install -d $(prefix)/bin
-# ADT install -d $(prefix)/include
-# ADT install -d $(prefix)/include/colm
-# ADT install -d $(prefix)/lib
-# ADT install -s colm $(prefix)/bin/colm
-# ADT install libcolmp.a libcolmd.a $(prefix)/lib
-# ADT install $(RUNTIME_HDR) $(prefix)/include/colm
-# ADT
-# ADT -include $(DEPS)
-
diff --git a/colm/buffer.h b/colm/buffer.h
deleted file mode 100644
index 9039ad4b..00000000
--- a/colm/buffer.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2003 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _BUFFER_H
-#define _BUFFER_H
-
-#define BUFFER_INITIAL_SIZE 4096
-
-/* An automatically grown buffer for collecting tokens. Always reuses space;
- * never down resizes. */
-struct Buffer
-{
- Buffer()
- {
- data = (char*) malloc( BUFFER_INITIAL_SIZE );
- allocated = BUFFER_INITIAL_SIZE;
- length = 0;
- }
- ~Buffer() { free(data); }
-
- void append( char p )
- {
- if ( length == allocated ) {
- allocated *= 2;
- data = (char*) realloc( data, allocated );
- }
- data[length++] = p;
- }
-
- void clear() { length = 0; }
-
- char *data;
- int allocated;
- int length;
-};
-
-#endif /* _BUFFER_H */
diff --git a/colm/bytecode.c b/colm/bytecode.c
deleted file mode 100644
index 57d31c78..00000000
--- a/colm/bytecode.c
+++ /dev/null
@@ -1,3579 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-//#define COLM_LOG
-
-#include <colm/pdarun.h>
-#include <colm/fsmrun.h>
-#include <colm/tree.h>
-#include <colm/bytecode.h>
-#include <colm/pool.h>
-#include <colm/debug.h>
-#include <colm/config.h>
-
-#include <alloca.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <assert.h>
-#include <stdlib.h>
-
-#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
- #error "SIZEOF_LONG contained an unexpected value"
-#endif
-
-#define true 1
-#define false 0
-
-/* More common macros are in bytecode.h. */
-#define vm_top_off(n) (sp[n])
-#define vm_popn(n) (sp += (n))
-#define vm_pushn(n) (sp -= (n))
-#define vm_local(o) (exec->framePtr[o])
-#define vm_plocal(o) (&exec->framePtr[o])
-#define vm_local_iframe(o) (exec->iframePtr[o])
-#define vm_plocal_iframe(o) (&exec->iframePtr[o])
-
-#define read_byte( i ) do { \
- i = ((uchar) *instr++); \
-} while(0)
-
-#define consume_byte( ) do { \
- instr += 1; \
-} while(0)
-
-
-#define read_word_p( i, p ) do { \
- i = ((Word) p[0]); \
- i |= ((Word) p[1]) << 8; \
- i |= ((Word) p[2]) << 16; \
- i |= ((Word) p[3]) << 24; \
-} while(0)
-
-/* There are better ways. */
-#if SIZEOF_LONG == 4
- #define read_word( i ) do { \
- i = ((Word) *instr++); \
- i |= ((Word) *instr++) << 8; \
- i |= ((Word) *instr++) << 16; \
- i |= ((Word) *instr++) << 24; \
- } while(0)
-#else
- #define read_word( i ) do { \
- i = ((Word) *instr++); \
- i |= ((Word) *instr++) << 8; \
- i |= ((Word) *instr++) << 16; \
- i |= ((Word) *instr++) << 24; \
- i |= ((Word) *instr++) << 32; \
- i |= ((Word) *instr++) << 40; \
- i |= ((Word) *instr++) << 48; \
- i |= ((Word) *instr++) << 56; \
- } while(0)
-#endif
-
-/* There are better ways. */
-#if SIZEOF_LONG == 4
- #define read_tree( i ) do { \
- Word w; \
- w = ((Word) *instr++); \
- w |= ((Word) *instr++) << 8; \
- w |= ((Word) *instr++) << 16; \
- w |= ((Word) *instr++) << 24; \
- i = (Tree*) w; \
- } while(0)
-
- #define read_word_type( Type, i ) do { \
- Word w; \
- w = ((Word) *instr++); \
- w |= ((Word) *instr++) << 8; \
- w |= ((Word) *instr++) << 16; \
- w |= ((Word) *instr++) << 24; \
- i = (Type) w; \
- } while(0)
-
- #define consume_word( ) do { \
- instr += 4; \
- } while(0)
-#else
- #define read_tree( i ) do { \
- Word w; \
- w = ((Word) *instr++); \
- w |= ((Word) *instr++) << 8; \
- w |= ((Word) *instr++) << 16; \
- w |= ((Word) *instr++) << 24; \
- w |= ((Word) *instr++) << 32; \
- w |= ((Word) *instr++) << 40; \
- w |= ((Word) *instr++) << 48; \
- w |= ((Word) *instr++) << 56; \
- i = (Tree*) w; \
- } while(0)
-
- #define read_word_type( Type, i ) do { \
- Word w; \
- w = ((Word) *instr++); \
- w |= ((Word) *instr++) << 8; \
- w |= ((Word) *instr++) << 16; \
- w |= ((Word) *instr++) << 24; \
- w |= ((Word) *instr++) << 32; \
- w |= ((Word) *instr++) << 40; \
- w |= ((Word) *instr++) << 48; \
- w |= ((Word) *instr++) << 56; \
- i = (Type) w; \
- } while(0)
-
- #define consume_word( ) do { \
- instr += 8; \
- } while(0)
-#endif
-
-#define read_half( i ) do { \
- i = ((Word) *instr++); \
- i |= ((Word) *instr++) << 8; \
-} while(0)
-
-int colm_log_bytecode = 0;
-int colm_log_parse = 0;
-int colm_log_match = 0;
-int colm_log_compile = 0;
-int colm_log_conds = 0;
-
-void vm_grow( Program *prg )
-{
- debug( REALM_BYTECODE, "growing stack\n" );
-}
-
-void parserSetContext( Program *prg, Tree **sp, Parser *parser, Tree *val )
-{
- parser->pdaRun->context = splitTree( prg, val );
-}
-
-Head *treeToStr( Program *prg, Tree **sp, Tree *tree, int trim )
-{
- /* Collect the tree data. */
- StrCollect collect;
- initStrCollect( &collect );
-
- printTreeCollect( prg, sp, &collect, tree, trim );
-
- /* Set up the input stream. */
- Head *ret = stringAllocFull( prg, collect.data, collect.length );
-
- strCollectDestroy( &collect );
-
- return ret;
-}
-
-Word streamAppend( Program *prg, Tree **sp, Tree *input, InputStream *inputStream )
-{
- long length = 0;
-
- if ( input->id == LEL_ID_STR ) {
- //assert(false);
- /* Collect the tree data. */
- StrCollect collect;
- initStrCollect( &collect );
- printTreeCollect( prg, sp, &collect, input, true );
-
- /* Load it into the input. */
- appendData( inputStream, collect.data, collect.length );
- length = collect.length;
- strCollectDestroy( &collect );
- }
- else if ( input->id == LEL_ID_STREAM ) {
- treeUpref( input );
- appendStream( inputStream, input );
- }
- else {
- treeUpref( input );
- appendTree( inputStream, input );
- }
-
- return length;
-}
-
-long parseFrag( Program *prg, Tree **sp, Parser *parser, long stopId, long entry )
-{
-switch ( entry ) {
-case PcrStart:
-
- if ( ! parser->pdaRun->parseError ) {
- parser->pdaRun->stopTarget = stopId;
-
- long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
-
- while ( pcr != PcrDone ) {
-
-return pcr;
-case PcrReduction:
-case PcrGeneration:
-case PcrPreEof:
-case PcrReverse:
-
- pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
- }
- }
-
-case PcrDone:
-break; }
-
- return PcrDone;
-}
-
-long parseFinish( Tree **result, Program *prg, Tree **sp,
- Parser *parser, int revertOn, long entry )
-{
-switch ( entry ) {
-case PcrStart:
-
- if ( parser->pdaRun->stopTarget <= 0 ) {
- setEof( parser->input->in );
-
- if ( ! parser->pdaRun->parseError ) {
- long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
-
- while ( pcr != PcrDone ) {
-
-return pcr;
-case PcrReduction:
-case PcrGeneration:
-case PcrPreEof:
-case PcrReverse:
-
- pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
- }
- }
- }
-
- /* FIXME: need something here to check that we aren' stopped waiting for
- * more data when we are actually expected to finish. This check doesn't
- * work (at time of writing). */
- //assert( (parser->pdaRun->stopTarget > 0 && parser->pdaRun->stopParsing) || parser->input->in->eofSent );
-
- if ( !revertOn )
- commitFull( prg, sp, parser->pdaRun, 0 );
-
- Tree *tree = getParsedRoot( parser->pdaRun, parser->pdaRun->stopTarget > 0 );
- treeUpref( tree );
-
- *result = tree;
-
-case PcrDone:
-break; }
-
- return PcrDone;
-}
-
-long undoParseFrag( Program *prg, Tree **sp, Parser *parser, long steps, long entry )
-{
- InputStream *inputStream = parser->input->in;
- FsmRun *fsmRun = parser->fsmRun;
- PdaRun *pdaRun = parser->pdaRun;
-
- debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps );
-
- resetToken( fsmRun );
-
-switch ( entry ) {
-case PcrStart:
-
- if ( steps < pdaRun->steps ) {
- /* Setup environment for going backwards until we reduced steps to
- * what we want. */
- pdaRun->numRetry += 1;
- pdaRun->targetSteps = steps;
- pdaRun->triggerUndo = 1;
-
- /* The parse loop will recognise the situation. */
- long pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry );
- while ( pcr != PcrDone ) {
-
-return pcr;
-case PcrReduction:
-case PcrGeneration:
-case PcrPreEof:
-case PcrReverse:
-
- pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry );
- }
-
- /* Reset environment. */
- pdaRun->triggerUndo = 0;
- pdaRun->targetSteps = -1;
- pdaRun->numRetry -= 1;
- }
-
-case PcrDone:
-break; }
-
- return PcrDone;
-}
-
-Tree *streamPullBc( Program *prg, FsmRun *fsmRun, InputStream *in, Tree *length )
-{
- long len = ((Int*)length)->value;
- Head *tokdata = streamPull( prg, fsmRun, in, len );
- return constructString( prg, tokdata );
-}
-
-void undoPull( Program *prg, FsmRun *fsmRun, InputStream *in, Tree *str )
-{
- const char *data = stringData( ( (Str*)str )->value );
- long length = stringLength( ( (Str*)str )->value );
- undoStreamPull( fsmRun, in, data, length );
-}
-
-long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *in, Tree *tree, int ignore )
-{
- if ( tree->id == LEL_ID_STR ) {
- /* This should become a compile error. If it's text, it's up to the
- * scanner to decide. Want to force it then send a token. */
- assert( !ignore );
-
- /* Collect the tree data. */
- StrCollect collect;
- initStrCollect( &collect );
- printTreeCollect( prg, sp, &collect, tree, true );
-
- streamPushText( fsmRun, in, collect.data, collect.length );
- long length = collect.length;
- strCollectDestroy( &collect );
-
- return length;
- }
- else {
- treeUpref( tree );
- streamPushTree( fsmRun, in, tree, ignore );
- return -1;
- }
-}
-
-void setLocal( Tree **frame, long field, Tree *tree )
-{
- if ( tree != 0 )
- assert( tree->refs >= 1 );
- frame[field] = tree;
-}
-
-Tree *getLocalSplit( Program *prg, Tree **frame, long field )
-{
- Tree *val = frame[field];
- Tree *split = splitTree( prg, val );
- frame[field] = split;
- return split;
-}
-
-void downrefLocalTrees( Program *prg, Tree **sp, Tree **frame, char *trees, long treesLen )
-{
- long i;
- for ( i = 0; i < treesLen; i++ ) {
- debug( REALM_BYTECODE, "local tree downref: %ld\n", (long)trees[i] );
-
- treeDownref( prg, sp, frame[((long)trees[i])] );
- }
-}
-
-UserIter *uiterCreate( Program *prg, Tree ***psp, FunctionInfo *fi, long searchId )
-{
- Tree **sp = *psp;
- vm_pushn( sizeof(UserIter) / sizeof(Word) );
- void *mem = vm_ptop();
-
- UserIter *uiter = mem;
- initUserIter( uiter, vm_ptop(), fi->argSize, searchId );
- *psp = sp;
- return uiter;
-}
-
-void uiterInit( Program *prg, Tree **sp, UserIter *uiter,
- FunctionInfo *fi, int revertOn )
-{
- /* Set up the first yeild so when we resume it starts at the beginning. */
- uiter->ref.kid = 0;
- uiter->stackSize = uiter->stackRoot - vm_ptop();
- uiter->frame = &uiter->stackRoot[-IFR_AA];
-
- if ( revertOn )
- uiter->resume = prg->rtd->frameInfo[fi->frameId].codeWV;
- else
- uiter->resume = prg->rtd->frameInfo[fi->frameId].codeWC;
-}
-
-void treeIterDestroy( Tree ***psp, TreeIter *iter )
-{
- Tree **sp = *psp;
- long curStackSize = iter->stackRoot - vm_ptop();
- assert( iter->stackSize == curStackSize );
- vm_popn( iter->stackSize );
- *psp = sp;
-}
-
-void userIterDestroy( Tree ***psp, UserIter *uiter )
-{
- Tree **sp = *psp;
-
- /* We should always be coming from a yield. The current stack size will be
- * nonzero and the stack size in the iterator will be correct. */
- long curStackSize = uiter->stackRoot - vm_ptop();
- assert( uiter->stackSize == curStackSize );
-
- long argSize = uiter->argSize;
-
- vm_popn( uiter->stackRoot - vm_ptop() );
- vm_popn( sizeof(UserIter) / sizeof(Word) );
- vm_popn( argSize );
-
- *psp = sp;
-}
-
-Tree *constructArgv( Program *prg, int argc, const char **argv )
-{
- Tree *list = createGeneric( prg, prg->rtd->argvGenericId );
- treeUpref( list );
- int i;
- for ( i = 0; i < argc; i++ ) {
- Head *head = stringAllocPointer( prg, argv[i], strlen(argv[i]) );
- Tree *arg = constructString( prg, head );
- treeUpref( arg );
- listAppend2( prg, (List*)list, arg );
- }
- return list;
-}
-
-/*
- * Execution environment
- */
-
-void initExecution( Execution *exec, Parser *parser, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, int frameId )
-{
- exec->parser = parser;
- exec->pdaRun = pdaRun;
- exec->fsmRun = fsmRun;
- exec->inputStream = inputStream;
- exec->framePtr = 0;
- exec->iframePtr = 0;
- exec->frameId = frameId;
- exec->rcodeUnitLen = 0;
-}
-
-void rcodeDownrefAll( Program *prg, Tree **sp, RtCodeVect *rev )
-{
- while ( rev->tabLen > 0 ) {
- /* Read the length */
- Code *prcode = rev->data + rev->tabLen - SIZEOF_WORD;
- Word len;
- read_word_p( len, prcode );
-
- /* Find the start of block. */
- long start = rev->tabLen - len - SIZEOF_WORD;
- prcode = rev->data + start;
-
- /* Execute it. */
- rcodeDownref( prg, sp, prcode );
-
- /* Backup over it. */
- rev->tabLen -= len + SIZEOF_WORD;
- }
-}
-
-void rcodeDownref( Program *prg, Tree **sp, Code *instr )
-{
-again:
- switch ( *instr++ ) {
- case IN_PARSE_LOAD_START: {
- debug( REALM_BYTECODE, "IN_PARSE_LOAD_START\n" );
- break;
- }
- case IN_PARSE_SAVE_STEPS: {
- debug( REALM_BYTECODE, "IN_PARSE_SAVE_STEPS\n" );
- break;
- }
- case IN_LOAD_TREE: {
- Word w;
- read_word( w );
- debug( REALM_BYTECODE, "IN_LOAD_TREE %p\n", (Tree*)w );
- treeDownref( prg, sp, (Tree*)w );
- break;
- }
- case IN_LOAD_WORD: {
- Word w;
- read_word( w );
- debug( REALM_BYTECODE, "IN_LOAD_WORD\n" );
- break;
- }
- case IN_RESTORE_LHS: {
- Tree *restore;
- read_tree( restore );
- debug( REALM_BYTECODE, "IN_RESTORE_LHS\n" );
- treeDownref( prg, sp, restore );
- break;
- }
-
- case IN_PARSE_FRAG_BKT: {
- Half stopId;
- read_half( stopId );
- debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" );
- break;
- }
- case IN_PARSE_FRAG_BKT3: {
- debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT3\n" );
- break;
- }
- case IN_PARSE_FINISH_BKT: {
- Half stopId;
- read_half( stopId );
- debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT\n" );
- break;
- }
- case IN_PARSE_FINISH_BKT3: {
- debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT3\n" );
- break;
- }
- case IN_PCR_CALL: {
- debug( REALM_BYTECODE, "IN_PCR_CALL\n" );
- break;
- }
- case IN_PCR_RET: {
- debug( REALM_BYTECODE, "IN_PCR_RET\n" );
- return;
- }
- case IN_PCR_END_DECK: {
- debug( REALM_BYTECODE, "IN_PCR_END_DECK\n" );
- return;
- }
- case IN_INPUT_APPEND_BKT: {
- Tree *parser;
- Tree *input;
- Word len;
- read_tree( parser );
- read_tree( input );
- read_word( len );
-
- debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" );
-
- treeDownref( prg, sp, parser );
- treeDownref( prg, sp, input );
- break;
- }
- case IN_INPUT_PULL_BKT: {
- Word f;
- Tree *string;
- read_tree( string );
- read_word( f );
-
- debug( REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" );
-
- treeDownref( prg, sp, string );
- break;
- }
- case IN_INPUT_PUSH_BKT: {
- Word len;
- read_word( len );
-
- debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" );
- break;
- }
- case IN_LOAD_GLOBAL_BKT: {
- debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" );
- break;
- }
- case IN_LOAD_CONTEXT_BKT: {
- debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" );
- break;
- }
- case IN_LOAD_ACCUM_BKT: {
- /* Tree *parser; */
- consume_word();
- debug( REALM_BYTECODE, "IN_LOAD_ACCUM_BKT\n" );
- break;
- }
- case IN_LOAD_INPUT_BKT: {
- /* Tree *input; */
- consume_word();
- debug( REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" );
- break;
- }
- case IN_GET_FIELD_BKT: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_FIELD_BKT %hd\n", field );
- break;
- }
- case IN_SET_FIELD_BKT: {
- short field;
- Tree *val;
- read_half( field );
- read_tree( val );
-
- debug( REALM_BYTECODE, "IN_SET_FIELD_BKT %hd\n", field );
-
- treeDownref( prg, sp, val );
- break;
- }
- case IN_PTR_DEREF_BKT: {
- Tree *ptr;
- read_tree( ptr );
-
- debug( REALM_BYTECODE, "IN_PTR_DEREF_BKT\n" );
-
- treeDownref( prg, sp, ptr );
- break;
- }
- case IN_SET_TOKEN_DATA_BKT: {
- Word oldval;
- read_word( oldval );
-
- debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT\n" );
-
- Head *head = (Head*)oldval;
- stringFree( prg, head );
- break;
- }
- case IN_LIST_APPEND_BKT: {
- debug( REALM_BYTECODE, "IN_LIST_APPEND_BKT\n" );
- break;
- }
- case IN_LIST_REMOVE_END_BKT: {
- Tree *val;
- read_tree( val );
-
- debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_BKT\n" );
-
- treeDownref( prg, sp, val );
- break;
- }
- case IN_GET_LIST_MEM_BKT: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_LIST_MEM_BKT %hd\n", field );
- break;
- }
- case IN_SET_LIST_MEM_BKT: {
- Half field;
- Tree *val;
- read_half( field );
- read_tree( val );
-
- debug( REALM_BYTECODE, "IN_SET_LIST_MEM_BKT %hd\n", field );
-
- treeDownref( prg, sp, val );
- break;
- }
- case IN_MAP_INSERT_BKT: {
- /* uchar inserted; */
- Tree *key;
- consume_byte();
- read_tree( key );
-
- debug( REALM_BYTECODE, "IN_MAP_INSERT_BKT\n" );
-
- treeDownref( prg, sp, key );
- break;
- }
- case IN_MAP_STORE_BKT: {
- Tree *key, *val;
- read_tree( key );
- read_tree( val );
-
- debug( REALM_BYTECODE,"IN_MAP_STORE_BKT\n" );
-
- treeDownref( prg, sp, key );
- treeDownref( prg, sp, val );
- break;
- }
- case IN_MAP_REMOVE_BKT: {
- Tree *key, *val;
- read_tree( key );
- read_tree( val );
-
- debug( REALM_BYTECODE, "IN_MAP_REMOVE_BKT\n" );
-
- treeDownref( prg, sp, key );
- treeDownref( prg, sp, val );
- break;
- }
- case IN_STOP: {
- return;
- }
- default: {
- fatal( "UNKNOWN INSTRUCTION 0x%2x: -- reverse code downref\n", *(instr-1));
- assert(false);
- break;
- }
- }
- goto again;
-}
-
-void mainExecution( Program *prg, Execution *exec, Code *code )
-{
- Tree **sp = prg->vm_root;
-
- /* Set up the stack as if we have called. We allow a return value. */
- vm_push( 0 );
- vm_push( 0 );
- vm_push( 0 );
- vm_push( 0 );
-
- /* Execution loop. */
- executeCode( prg, exec, sp, code );
-
- vm_pop_ignore();
- vm_pop_ignore();
- prg->returnVal = vm_pop();
-}
-
-int makeReverseCode( PdaRun *pdaRun )
-{
- RtCodeVect *reverseCode = &pdaRun->reverseCode;
- RtCodeVect *rcodeCollect = &pdaRun->rcodeCollect;
-
- /* Do we need to revert the left hand side? */
-
- /* Check if there was anything generated. */
- if ( rcodeCollect->tabLen == 0 )
- return false;
-
- if ( pdaRun->rcBlockCount == 0 ) {
- /* One reverse code run for the DECK terminator. */
- append( reverseCode, IN_PCR_END_DECK );
- append( reverseCode, IN_PCR_RET );
- appendWord( reverseCode, 2 );
- pdaRun->rcBlockCount += 1;
- incrementSteps( pdaRun );
- }
-
- long startLength = reverseCode->tabLen;
-
- /* Go backwards, group by group, through the reverse code. Push each group
- * to the global reverse code stack. */
- Code *p = rcodeCollect->data + rcodeCollect->tabLen;
- while ( p != rcodeCollect->data ) {
- p--;
- long len = *p;
- p = p - len;
- append2( reverseCode, p, len );
- }
-
- /* Stop, then place a total length in the global stack. */
- append( reverseCode, IN_PCR_RET );
- long length = reverseCode->tabLen - startLength;
- appendWord( reverseCode, length );
-
- /* Clear the revere code buffer. */
- rcodeCollect->tabLen = 0;
-
- pdaRun->rcBlockCount += 1;
- incrementSteps( pdaRun );
-
- return true;
-}
-
-void transferReverseCode( PdaRun *pdaRun, ParseTree *parseTree )
-{
- if ( pdaRun->rcBlockCount > 0 ) {
- debug( REALM_PARSE, "attaching reverse code to token\n" );
- parseTree->flags |= PF_HAS_RCODE;
- pdaRun->rcBlockCount = 0;
- }
-}
-
-Code *popReverseCode( RtCodeVect *allRev )
-{
- /* Read the length */
- Code *prcode = allRev->data + allRev->tabLen - SIZEOF_WORD;
- Word len;
- read_word_p( len, prcode );
-
- /* Find the start of block. */
- long start = allRev->tabLen - len - SIZEOF_WORD;
- prcode = allRev->data + start;
-
- /* Backup over it. */
- allRev->tabLen -= len + SIZEOF_WORD;
- return prcode;
-}
-
-Tree **executeCode( Program *prg, Execution *exec, Tree **sp, Code *instr )
-{
- /* When we exit we are going to verify that we did not eat up any stack
- * space. */
- Tree **root = sp;
- Code c;
-
-again:
- c = *instr++;
- //debug( REALM_BYTECODE, "--in 0x%x\n", c );
-
- switch ( c ) {
- case IN_RESTORE_LHS: {
- Tree *restore;
- read_tree( restore );
-
- debug( REALM_BYTECODE, "IN_RESTORE_LHS\n" );
- treeDownref( prg, sp, exec->pdaRun->parseInput->shadow->tree );
- exec->pdaRun->parseInput->shadow->tree = restore;
- break;
- }
- case IN_LOAD_NIL: {
- debug( REALM_BYTECODE, "IN_LOAD_NIL\n" );
- vm_push( 0 );
- break;
- }
- case IN_LOAD_TREE: {
- debug( REALM_BYTECODE, "IN_LOAD_TREE\n" );
- Tree *tree;
- read_tree( tree );
- vm_push( tree );
- break;
- }
- case IN_LOAD_WORD: {
- debug( REALM_BYTECODE, "IN_LOAD_WORD\n" );
- Word w;
- read_word( w );
- vm_push( (SW)w );
- break;
- }
- case IN_LOAD_TRUE: {
- debug( REALM_BYTECODE, "IN_LOAD_TRUE\n" );
- treeUpref( prg->trueVal );
- vm_push( prg->trueVal );
- break;
- }
- case IN_LOAD_FALSE: {
- debug( REALM_BYTECODE, "IN_LOAD_FALSE\n" );
- treeUpref( prg->falseVal );
- vm_push( prg->falseVal );
- break;
- }
- case IN_LOAD_INT: {
- Word i;
- read_word( i );
-
- debug( REALM_BYTECODE, "IN_LOAD_INT %d\n", i );
-
- Tree *tree = constructInteger( prg, i );
- treeUpref( tree );
- vm_push( tree );
- break;
- }
- case IN_LOAD_STR: {
- Word offset;
- read_word( offset );
-
- debug( REALM_BYTECODE, "IN_LOAD_STR %d\n", offset );
-
- Head *lit = makeLiteral( prg, offset );
- Tree *tree = constructString( prg, lit );
- treeUpref( tree );
- vm_push( tree );
- break;
- }
- case IN_PRINT: {
- int n;
- read_byte( n );
- debug( REALM_BYTECODE, "IN_PRINT %d\n", n );
-
- while ( n-- > 0 ) {
- Tree *tree = vm_pop();
- printTreeFile( prg, sp, stdout, tree, true );
- treeDownref( prg, sp, tree );
- }
- break;
- }
- case IN_PRINT_XML_AC: {
- int n;
- read_byte( n );
-
- debug( REALM_BYTECODE, "IN_PRINT_XML_AC %d\n", n );
-
- while ( n-- > 0 ) {
- Tree *tree = vm_pop();
- printXmlStdout( prg, sp, tree, true, true );
- treeDownref( prg, sp, tree );
- }
- break;
- }
- case IN_PRINT_XML: {
- int n;
- read_byte( n );
- debug( REALM_BYTECODE, "IN_PRINT_XML %d", n );
-
- while ( n-- > 0 ) {
- Tree *tree = vm_pop();
- printXmlStdout( prg, sp, tree, false, true );
- treeDownref( prg, sp, tree );
- }
- break;
- }
- case IN_PRINT_STREAM: {
- int n;
- read_byte( n );
- debug( REALM_BYTECODE, "IN_PRINT_STREAM\n" );
-
- Stream *stream = (Stream*)vm_pop();
- while ( n-- > 0 ) {
- Tree *tree = vm_pop();
- printTreeFile( prg, sp, stream->file, tree, true );
- treeDownref( prg, sp, tree );
- }
- treeDownref( prg, sp, (Tree*)stream );
- break;
- }
- case IN_LOAD_CONTEXT_R: {
- debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_R\n" );
-
- treeUpref( exec->pdaRun->context );
- vm_push( exec->pdaRun->context );
- break;
- }
- case IN_LOAD_CONTEXT_WV: {
- debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_WV\n" );
-
- treeUpref( exec->pdaRun->context );
- vm_push( exec->pdaRun->context );
-
- /* Set up the reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_LOAD_CONTEXT_BKT );
- exec->rcodeUnitLen = SIZEOF_CODE;
- break;
- }
- case IN_LOAD_CONTEXT_WC: {
- debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_WC\n" );
-
- /* This is identical to the _R version, but using it for writing
- * would be confusing. */
- treeUpref( exec->pdaRun->context );
- vm_push( exec->pdaRun->context );
- break;
- }
- case IN_LOAD_CONTEXT_BKT: {
- debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" );
-
- treeUpref( exec->pdaRun->context );
- vm_push( exec->pdaRun->context );
- break;
- }
- case IN_LOAD_GLOBAL_R: {
- debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_R\n" );
-
- treeUpref( prg->global );
- vm_push( prg->global );
- break;
- }
- case IN_LOAD_GLOBAL_WV: {
- debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_WV\n" );
-
- treeUpref( prg->global );
- vm_push( prg->global );
-
- /* Set up the reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_LOAD_GLOBAL_BKT );
- exec->rcodeUnitLen = SIZEOF_CODE;
- break;
- }
- case IN_LOAD_GLOBAL_WC: {
- debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_WC\n" );
-
- /* This is identical to the _R version, but using it for writing
- * would be confusing. */
- treeUpref( prg->global );
- vm_push( prg->global );
- break;
- }
- case IN_LOAD_GLOBAL_BKT: {
- debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" );
-
- treeUpref( prg->global );
- vm_push( prg->global );
- break;
- }
- case IN_LOAD_ACCUM_R: {
- debug( REALM_BYTECODE, "IN_LOAD_ACCUM_R\n" );
-
- treeUpref( (Tree*)exec->parser );
- vm_push( (Tree*)exec->parser );
- assert( exec->parser != 0 );
- break;
- }
- case IN_LOAD_ACCUM_WV: {
- debug( REALM_BYTECODE, "IN_LOAD_ACCUM_WV\n" );
-
- treeUpref( (Tree*)exec->parser );
- vm_push( (Tree*)exec->parser );
- assert( exec->parser != 0 );
-
- /* Set up the reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_LOAD_ACCUM_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser );
- exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD;
- break;
- }
- case IN_LOAD_ACCUM_WC: {
- debug( REALM_BYTECODE, "IN_LOAD_ACCUM_WC\n" );
-
- /* This is identical to the _R version, but using it for writing
- * would be confusing. */
- treeUpref( (Tree*)exec->parser );
- vm_push( (Tree*)exec->parser );
- assert( exec->parser != 0 );
- break;
- }
- case IN_LOAD_ACCUM_BKT: {
- Tree *parser;
- read_tree( parser );
-
- debug( REALM_BYTECODE, "IN_LOAD_ACCUM_BKT\n" );
-
- treeUpref( parser );
- vm_push( parser );
- break;
- }
- case IN_LOAD_INPUT_R: {
- debug( REALM_BYTECODE, "IN_LOAD_INPUT_R\n" );
-
- assert( exec->parser != 0 );
- treeUpref( (Tree*)exec->parser->input );
- vm_push( (Tree*)exec->parser->input );
- break;
- }
- case IN_LOAD_INPUT_WV: {
- debug( REALM_BYTECODE, "IN_LOAD_INPUT_WV\n" );
-
- assert( exec->parser != 0 );
- treeUpref( (Tree*)exec->parser->input );
- vm_push( (Tree*)exec->parser->input );
-
- /* Set up the reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_LOAD_INPUT_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser->input );
- exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD;
- break;
- }
- case IN_LOAD_INPUT_WC: {
- debug( REALM_BYTECODE, "IN_LOAD_INPUT_WC\n" );
-
- /* This is identical to the _R version, but using it for writing
- * would be confusing. */
- assert( exec->parser != 0 );
- treeUpref( (Tree*)exec->parser->input );
- vm_push( (Tree*)exec->parser->input );
- break;
- }
- case IN_LOAD_INPUT_BKT: {
- Tree *accumStream;
- read_tree( accumStream );
-
- debug( REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" );
-
- treeUpref( accumStream );
- vm_push( accumStream );
- break;
- }
- case IN_LOAD_CTX_R: {
- debug( REALM_BYTECODE, "IN_LOAD_CTX_R\n" );
-
- treeUpref( exec->pdaRun->context );
- vm_push( exec->pdaRun->context );
- break;
- }
- case IN_LOAD_CTX_WV: {
- debug( REALM_BYTECODE, "IN_LOAD_CTX_WV\n" );
-
- treeUpref( exec->pdaRun->context );
- vm_push( exec->pdaRun->context );
-
- /* Set up the reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_LOAD_ACCUM_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser );
- exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD;
- break;
- }
- case IN_LOAD_CTX_WC: {
- debug( REALM_BYTECODE, "IN_LOAD_CTX_WC\n" );
-
- /* This is identical to the _R version, but using it for writing
- * would be confusing. */
- treeUpref( exec->pdaRun->context );
- vm_push( exec->pdaRun->context );
- break;
- }
- case IN_LOAD_CTX_BKT: {
- debug( REALM_BYTECODE, "IN_LOAD_CTX_BKT\n" );
-
- treeUpref( exec->pdaRun->context );
- vm_push( exec->pdaRun->context );
- break;
- }
- case IN_INIT_CAPTURES: {
- /* uchar ncaps; */
- consume_byte();
-
- debug( REALM_BYTECODE, "IN_INIT_CAPTURES\n" );
-
- /* If there are captures (this is a translate block) then copy them into
- * the local frame now. */
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- char **mark = exec->fsmRun->mark;
-
- int i;
- for ( i = 0; i < lelInfo[exec->pdaRun->tokenId].numCaptureAttr; i++ ) {
- CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[exec->pdaRun->tokenId].captureAttr + i];
- Head *data = stringAllocFull( prg,
- mark[ca->mark_enter], mark[ca->mark_leave] - mark[ca->mark_enter] );
- Tree *string = constructString( prg, data );
- treeUpref( string );
- setLocal( exec->framePtr, -1 - i, string );
- }
- break;
- }
- case IN_INIT_RHS_EL: {
- Half position;
- short field;
- read_half( position );
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_INIT_RHS_EL %hd\n", field );
-
- Tree *val = getRhsEl( prg, exec->pdaRun->redLel->shadow->tree, position );
- treeUpref( val );
- vm_local(field) = val;
- break;
- }
-
- case IN_INIT_LHS_EL: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_INIT_LHS_EL %hd\n", field );
-
- /* We transfer it to to the local field. Possibly take a copy. */
- Tree *val = exec->pdaRun->redLel->shadow->tree;
-
- /* Save it. */
- treeUpref( val );
- exec->pdaRun->parsed = val;
-
- exec->pdaRun->redLel->shadow->tree = 0;
- vm_local(field) = val;
- break;
- }
- case IN_STORE_LHS_EL: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_STORE_LHS_EL %hd\n", field );
-
- Tree *val = vm_local(field);
- vm_local(field) = 0;
- exec->pdaRun->redLel->shadow->tree = val;
- break;
- }
- case IN_UITER_ADVANCE: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_UITER_ADVANCE\n" );
-
- /* Get the iterator. */
- UserIter *uiter = (UserIter*) vm_local(field);
-
- long stackSize = uiter->stackRoot - vm_ptop();
- assert( uiter->stackSize == stackSize );
-
- /* Fix the return instruction pointer. */
- uiter->stackRoot[-IFR_AA + IFR_RIN] = (SW)instr;
-
- instr = uiter->resume;
- exec->framePtr = uiter->frame;
- exec->iframePtr = &uiter->stackRoot[-IFR_AA];
- break;
- }
- case IN_UITER_GET_CUR_R: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_UITER_GET_CUR_R\n" );
-
- UserIter *uiter = (UserIter*) vm_local(field);
- Tree *val = uiter->ref.kid->tree;
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_UITER_GET_CUR_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_UITER_GET_CUR_WC\n" );
-
- UserIter *uiter = (UserIter*) vm_local(field);
- splitRef( prg, &sp, &uiter->ref );
- Tree *split = uiter->ref.kid->tree;
- treeUpref( split );
- vm_push( split );
- break;
- }
- case IN_UITER_SET_CUR_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_UITER_SET_CUR_WC\n" );
-
- Tree *t = vm_pop();
- UserIter *uiter = (UserIter*) vm_local(field);
- splitRef( prg, &sp, &uiter->ref );
- Tree *old = uiter->ref.kid->tree;
- setUiterCur( prg, uiter, t );
- treeDownref( prg, sp, old );
- break;
- }
- case IN_GET_LOCAL_R: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_LOCAL_R\n" );
-
- Tree *val = vm_local(field);
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_GET_LOCAL_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_LOCAL_WC\n" );
-
- Tree *split = getLocalSplit( prg, exec->framePtr, field );
- treeUpref( split );
- vm_push( split );
- break;
- }
- case IN_SET_LOCAL_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_SET_LOCAL_WC %d\n", field );
-
- Tree *val = vm_pop();
- treeDownref( prg, sp, vm_local(field) );
- setLocal( exec->framePtr, field, val );
- break;
- }
- case IN_SAVE_RET: {
- debug( REALM_BYTECODE, "IN_SAVE_RET\n" );
-
- Tree *val = vm_pop();
- vm_local(FR_RV) = val;
- break;
- }
- case IN_GET_LOCAL_REF_R: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_LOCAL_REF_R\n" );
-
- Ref *ref = (Ref*) vm_plocal(field);
- Tree *val = ref->kid->tree;
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_GET_LOCAL_REF_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_LOCAL_REF_WC\n" );
-
- Ref *ref = (Ref*) vm_plocal(field);
- splitRef( prg, &sp, ref );
- Tree *val = ref->kid->tree;
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_SET_LOCAL_REF_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_SET_LOCAL_REF_WC\n" );
-
- Tree *val = vm_pop();
- Ref *ref = (Ref*) vm_plocal(field);
- splitRef( prg, &sp, ref );
- refSetValue( ref, val );
- break;
- }
- case IN_GET_FIELD_R: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_FIELD_R %d\n", field );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *val = getField( obj, field );
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_GET_FIELD_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_FIELD_WC %d\n", field );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *split = getFieldSplit( prg, obj, field );
- treeUpref( split );
- vm_push( split );
- break;
- }
- case IN_GET_FIELD_WV: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_FIELD_WV\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *split = getFieldSplit( prg, obj, field );
- treeUpref( split );
- vm_push( split );
-
- /* Set up the reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_GET_FIELD_BKT );
- appendHalf( &exec->pdaRun->rcodeCollect, field );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF;
- break;
- }
- case IN_GET_FIELD_BKT: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_FIELD_BKT\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *split = getFieldSplit( prg, obj, field );
- treeUpref( split );
- vm_push( split );
- break;
- }
- case IN_SET_FIELD_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_SET_FIELD_WC %d\n", field );
-
- Tree *obj = vm_pop();
- Tree *val = vm_pop();
- treeDownref( prg, sp, obj );
-
- /* Downref the old value. */
- Tree *prev = getField( obj, field );
- treeDownref( prg, sp, prev );
-
- setField( prg, obj, field, val );
- break;
- }
- case IN_SET_FIELD_WV: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_SET_FIELD_WV %d\n", field );
-
- Tree *obj = vm_pop();
- Tree *val = vm_pop();
- treeDownref( prg, sp, obj );
-
- /* Save the old value, then set the field. */
- Tree *prev = getField( obj, field );
- setField( prg, obj, field, val );
-
- /* Set up the reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_SET_FIELD_BKT );
- appendHalf( &exec->pdaRun->rcodeCollect, field );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)prev );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF + SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
- /* FLUSH */
- break;
- }
- case IN_SET_FIELD_BKT: {
- short field;
- Tree *val;
- read_half( field );
- read_tree( val );
-
- debug( REALM_BYTECODE, "IN_SET_FIELD_BKT\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- /* Downref the old value. */
- Tree *prev = getField( obj, field );
- treeDownref( prg, sp, prev );
-
- setField( prg, obj, field, val );
- break;
- }
- case IN_SET_FIELD_LEAVE_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_SET_FIELD_LEAVE_WC\n" );
-
- /* Note that we don't downref the object here because we are
- * leaving it on the stack. */
- Tree *obj = vm_pop();
- Tree *val = vm_pop();
-
- /* Downref the old value. */
- Tree *prev = getField( obj, field );
- treeDownref( prg, sp, prev );
-
- /* Set the field. */
- setField( prg, obj, field, val );
-
- /* Leave the object on the top of the stack. */
- vm_push( obj );
- break;
- }
- case IN_GET_RHS_VAL_R: {
- debug( REALM_BYTECODE, "IN_GET_RHS_VAL_R\n" );
- int i, done = 0;
- uchar len;
-
- Tree *obj = vm_pop(), *val = 0;
- treeDownref( prg, sp, obj );
-
- read_byte( len );
- for ( i = 0; i < len; i++ ) {
- uchar prodNum, childNum;
- read_byte( prodNum );
- read_byte( childNum );
- if ( !done && obj->prodNum == prodNum ) {
- val = getRhsEl( prg, obj, childNum );
- done = 1;
- }
- }
-
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_POP: {
- debug( REALM_BYTECODE, "IN_POP\n" );
-
- Tree *val = vm_pop();
- treeDownref( prg, sp, val );
- break;
- }
- case IN_POP_N_WORDS: {
- short n;
- read_half( n );
-
- debug( REALM_BYTECODE, "IN_POP_N_WORDS\n" );
-
- vm_popn( n );
- break;
- }
- case IN_SPRINTF: {
- debug( REALM_BYTECODE, "IN_SPRINTF\n" );
-
- Tree *f = vm_pop();
- f++;
- Tree *integer = vm_pop();
- Tree *format = vm_pop();
- Head *res = stringSprintf( prg, (Str*)format, (Int*)integer );
- Tree *str = constructString( prg, res );
- treeUpref( str );
- vm_push( str );
- treeDownref( prg, sp, integer );
- treeDownref( prg, sp, format );
- break;
- }
- case IN_STR_ATOI: {
- debug( REALM_BYTECODE, "IN_STR_ATOI\n" );
-
- Str *str = (Str*)vm_pop();
- Word res = strAtoi( str->value );
- Tree *integer = constructInteger( prg, res );
- treeUpref( integer );
- vm_push( integer );
- treeDownref( prg, sp, (Tree*)str );
- break;
- }
- case IN_INT_TO_STR: {
- debug( REALM_BYTECODE, "IN_INT_TO_STR\n" );
-
- Int *i = (Int*)vm_pop();
- Head *res = intToStr( prg, i->value );
- Tree *str = constructString( prg, res );
- treeUpref( str );
- vm_push( str );
- treeDownref( prg, sp, (Tree*) i );
- break;
- }
- case IN_TREE_TO_STR: {
- debug( REALM_BYTECODE, "IN_TREE_TO_STR\n" );
-
- Tree *tree = vm_pop();
- Head *res = treeToStr( prg, sp, tree, true );
- Tree *str = constructString( prg, res );
- treeUpref( str );
- vm_push( str );
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_TREE_TO_STR_NOTRIM: {
- debug( REALM_BYTECODE, "IN_TREE_TO_STR_NOTRIM\n" );
-
- Tree *tree = vm_pop();
- Head *res = treeToStr( prg, sp, tree, false );
- Tree *str = constructString( prg, res );
- treeUpref( str );
- vm_push( str );
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_TREE_TRIM: {
- debug( REALM_BYTECODE, "IN_TREE_TRIM\n" );
-
- Tree *tree = vm_pop();
- Tree *trimmed = treeTrim( prg, sp, tree );
- vm_push( trimmed );
- break;
- }
- case IN_CONCAT_STR: {
- debug( REALM_BYTECODE, "IN_CONCAT_STR\n" );
-
- Str *s2 = (Str*)vm_pop();
- Str *s1 = (Str*)vm_pop();
- Head *res = concatStr( s1->value, s2->value );
- Tree *str = constructString( prg, res );
- treeUpref( str );
- treeDownref( prg, sp, (Tree*)s1 );
- treeDownref( prg, sp, (Tree*)s2 );
- vm_push( str );
- break;
- }
- case IN_STR_UORD8: {
- debug( REALM_BYTECODE, "IN_STR_UORD8\n" );
-
- Str *str = (Str*)vm_pop();
- Word res = strUord8( str->value );
- Tree *tree = constructInteger( prg, res );
- treeUpref( tree );
- vm_push( tree );
- treeDownref( prg, sp, (Tree*)str );
- break;
- }
- case IN_STR_UORD16: {
- debug( REALM_BYTECODE, "IN_STR_UORD16\n" );
-
- Str *str = (Str*)vm_pop();
- Word res = strUord16( str->value );
- Tree *tree = constructInteger( prg, res );
- treeUpref( tree );
- vm_push( tree );
- treeDownref( prg, sp, (Tree*)str );
- break;
- }
-
- case IN_STR_LENGTH: {
- debug( REALM_BYTECODE, "IN_STR_LENGTH\n" );
-
- Str *str = (Str*)vm_pop();
- long len = stringLength( str->value );
- Tree *res = constructInteger( prg, len );
- treeUpref( res );
- vm_push( res );
- treeDownref( prg, sp, (Tree*)str );
- break;
- }
- case IN_JMP_FALSE: {
- short dist;
- read_half( dist );
-
- debug( REALM_BYTECODE, "IN_JMP_FALSE %d\n", dist );
-
- Tree *tree = vm_pop();
- if ( testFalse( prg, tree ) )
- instr += dist;
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_JMP_TRUE: {
- short dist;
- read_half( dist );
-
- debug( REALM_BYTECODE, "IN_JMP_TRUE %d\n", dist );
-
- Tree *tree = vm_pop();
- if ( !testFalse( prg, tree ) )
- instr += dist;
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_JMP: {
- short dist;
- read_half( dist );
-
- debug( REALM_BYTECODE, "IN_JMP\n" );
-
- instr += dist;
- break;
- }
- case IN_REJECT: {
- debug( REALM_BYTECODE, "IN_REJECT\n" );
- exec->pdaRun->reject = true;
- break;
- }
-
- /*
- * Binary comparison operators.
- */
- case IN_TST_EQL: {
- debug( REALM_BYTECODE, "IN_TST_EQL\n" );
-
- Tree *o2 = vm_pop();
- Tree *o1 = vm_pop();
- long r = cmpTree( prg, o1, o2 );
- Tree *val = r ? prg->falseVal : prg->trueVal;
- treeUpref( val );
- vm_push( val );
- treeDownref( prg, sp, o1 );
- treeDownref( prg, sp, o2 );
- break;
- }
- case IN_TST_NOT_EQL: {
- debug( REALM_BYTECODE, "IN_TST_NOT_EQL\n" );
-
- Tree *o2 = vm_pop();
- Tree *o1 = vm_pop();
- long r = cmpTree( prg, o1, o2 );
- Tree *val = r ? prg->trueVal : prg->falseVal;
- treeUpref( val );
- vm_push( val );
- treeDownref( prg, sp, o1 );
- treeDownref( prg, sp, o2 );
- break;
- }
- case IN_TST_LESS: {
- debug( REALM_BYTECODE, "IN_TST_LESS\n" );
-
- Tree *o2 = vm_pop();
- Tree *o1 = vm_pop();
- long r = cmpTree( prg, o1, o2 );
- Tree *val = r < 0 ? prg->trueVal : prg->falseVal;
- treeUpref( val );
- vm_push( val );
- treeDownref( prg, sp, o1 );
- treeDownref( prg, sp, o2 );
- break;
- }
- case IN_TST_LESS_EQL: {
- debug( REALM_BYTECODE, "IN_TST_LESS_EQL\n" );
-
- Tree *o2 = vm_pop();
- Tree *o1 = vm_pop();
- long r = cmpTree( prg, o1, o2 );
- Tree *val = r <= 0 ? prg->trueVal : prg->falseVal;
- treeUpref( val );
- vm_push( val );
- treeDownref( prg, sp, o1 );
- treeDownref( prg, sp, o2 );
- }
- case IN_TST_GRTR: {
- debug( REALM_BYTECODE, "IN_TST_GRTR\n" );
-
- Tree *o2 = vm_pop();
- Tree *o1 = vm_pop();
- long r = cmpTree( prg, o1, o2 );
- Tree *val = r > 0 ? prg->trueVal : prg->falseVal;
- treeUpref( val );
- vm_push( val );
- treeDownref( prg, sp, o1 );
- treeDownref( prg, sp, o2 );
- break;
- }
- case IN_TST_GRTR_EQL: {
- debug( REALM_BYTECODE, "IN_TST_GRTR_EQL\n" );
-
- Tree *o2 = (Tree*)vm_pop();
- Tree *o1 = (Tree*)vm_pop();
- long r = cmpTree( prg, o1, o2 );
- Tree *val = r >= 0 ? prg->trueVal : prg->falseVal;
- treeUpref( val );
- vm_push( val );
- treeDownref( prg, sp, o1 );
- treeDownref( prg, sp, o2 );
- break;
- }
- case IN_TST_LOGICAL_AND: {
- debug( REALM_BYTECODE, "IN_TST_LOGICAL_AND\n" );
-
- Tree *o2 = vm_pop();
- Tree *o1 = vm_pop();
- long v2 = !testFalse( prg, o2 );
- long v1 = !testFalse( prg, o1 );
- Word r = v1 && v2;
- Tree *val = r ? prg->trueVal : prg->falseVal;
- treeUpref( val );
- vm_push( val );
- treeDownref( prg, sp, o1 );
- treeDownref( prg, sp, o2 );
- break;
- }
- case IN_TST_LOGICAL_OR: {
- debug( REALM_BYTECODE, "IN_TST_LOGICAL_OR\n" );
-
- Tree *o2 = vm_pop();
- Tree *o1 = vm_pop();
- long v2 = !testFalse( prg, o2 );
- long v1 = !testFalse( prg, o1 );
- Word r = v1 || v2;
- Tree *val = r ? prg->trueVal : prg->falseVal;
- treeUpref( val );
- vm_push( val );
- treeDownref( prg, sp, o1 );
- treeDownref( prg, sp, o2 );
- break;
- }
- case IN_NOT: {
- debug( REALM_BYTECODE, "IN_NOT\n" );
-
- Tree *tree = (Tree*)vm_pop();
- long r = testFalse( prg, tree );
- Tree *val = r ? prg->trueVal : prg->falseVal;
- treeUpref( val );
- vm_push( val );
- treeDownref( prg, sp, tree );
- break;
- }
-
- case IN_ADD_INT: {
- debug( REALM_BYTECODE, "IN_ADD_INT\n" );
-
- Int *o2 = (Int*)vm_pop();
- Int *o1 = (Int*)vm_pop();
- long r = o1->value + o2->value;
- Tree *tree = constructInteger( prg, r );
- treeUpref( tree );
- vm_push( tree );
- treeDownref( prg, sp, (Tree*)o1 );
- treeDownref( prg, sp, (Tree*)o2 );
- break;
- }
- case IN_MULT_INT: {
- debug( REALM_BYTECODE, "IN_MULT_INT\n" );
-
- Int *o2 = (Int*)vm_pop();
- Int *o1 = (Int*)vm_pop();
- long r = o1->value * o2->value;
- Tree *tree = constructInteger( prg, r );
- treeUpref( tree );
- vm_push( tree );
- treeDownref( prg, sp, (Tree*)o1 );
- treeDownref( prg, sp, (Tree*)o2 );
- break;
- }
- case IN_DIV_INT: {
- debug( REALM_BYTECODE, "IN_DIV_INT\n" );
-
- Int *o2 = (Int*)vm_pop();
- Int *o1 = (Int*)vm_pop();
- long r = o1->value / o2->value;
- Tree *tree = constructInteger( prg, r );
- treeUpref( tree );
- vm_push( tree );
- treeDownref( prg, sp, (Tree*)o1 );
- treeDownref( prg, sp, (Tree*)o2 );
- break;
- }
- case IN_SUB_INT: {
- debug( REALM_BYTECODE, "IN_SUB_INT\n" );
-
- Int *o2 = (Int*)vm_pop();
- Int *o1 = (Int*)vm_pop();
- long r = o1->value - o2->value;
- Tree *tree = constructInteger( prg, r );
- treeUpref( tree );
- vm_push( tree );
- treeDownref( prg, sp, (Tree*)o1 );
- treeDownref( prg, sp, (Tree*)o2 );
- break;
- }
- case IN_DUP_TOP_OFF: {
- short off;
- read_half( off );
-
- debug( REALM_BYTECODE, "IN_DUP_TOP_OFF %hd\n", off );
-
- Tree *val = vm_top_off(off);
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_DUP_TOP: {
- debug( REALM_BYTECODE, "IN_DUP_TOP\n" );
-
- Tree *val = vm_top();
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_TRITER_FROM_REF: {
- short field;
- Half searchTypeId;
- read_half( field );
- read_half( searchTypeId );
-
- debug( REALM_BYTECODE, "IN_TRITER_FROM_REF\n" );
-
- Ref rootRef;
- rootRef.kid = (Kid*)vm_pop();
- rootRef.next = (Ref*)vm_pop();
- void *mem = vm_plocal(field);
- initTreeIter( (TreeIter*)mem, &rootRef, searchTypeId, vm_ptop() );
- break;
- }
- case IN_TRITER_DESTROY: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_TRITER_DESTROY\n" );
-
- TreeIter *iter = (TreeIter*) vm_plocal(field);
- treeIterDestroy( &sp, iter );
- break;
- }
- case IN_REV_TRITER_FROM_REF: {
- short field;
- Half searchTypeId;
- read_half( field );
- read_half( searchTypeId );
-
- debug( REALM_BYTECODE, "IN_REV_TRITER_FROM_REF\n" );
-
- Ref rootRef;
- rootRef.kid = (Kid*)vm_pop();
- rootRef.next = (Ref*)vm_pop();
-
- Tree **stackRoot = vm_ptop();
-
- int children = 0;
- Kid *kid = treeChild( prg, rootRef.kid->tree );
- while ( kid != 0 ) {
- children++;
- vm_push( (SW) kid );
- kid = kid->next;
- }
-
- void *mem = vm_plocal(field);
- initRevTreeIter( (RevTreeIter*)mem, &rootRef, searchTypeId, stackRoot, children );
- break;
- }
- case IN_REV_TRITER_DESTROY: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_REV_TRITER_DESTROY\n" );
-
- RevTreeIter *iter = (RevTreeIter*) vm_plocal(field);
- long curStackSize = iter->stackRoot - vm_ptop();
- assert( iter->stackSize == curStackSize );
- vm_popn( iter->stackSize );
- break;
- }
- case IN_TREE_SEARCH: {
- Word id;
- read_word( id );
-
- debug( REALM_BYTECODE, "IN_TREE_SEARCH\n" );
-
- Tree *tree = vm_pop();
- Tree *res = treeSearch2( prg, tree, id );
- treeUpref( res );
- vm_push( res );
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_TRITER_ADVANCE: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_TRITER_ADVANCE\n" );
-
- TreeIter *iter = (TreeIter*) vm_plocal(field);
- Tree *res = treeIterAdvance( prg, &sp, iter );
- treeUpref( res );
- vm_push( res );
- break;
- }
- case IN_TRITER_NEXT_CHILD: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_TRITER_NEXT_CHILD\n" );
-
- TreeIter *iter = (TreeIter*) vm_plocal(field);
- Tree *res = treeIterNextChild( prg, &sp, iter );
- treeUpref( res );
- vm_push( res );
- break;
- }
- case IN_REV_TRITER_PREV_CHILD: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_REV_TRITER_PREV_CHILD\n" );
-
- RevTreeIter *iter = (RevTreeIter*) vm_plocal(field);
- Tree *res = treeRevIterPrevChild( prg, &sp, iter );
- treeUpref( res );
- vm_push( res );
- break;
- }
- case IN_TRITER_NEXT_REPEAT: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_TRITER_NEXT_REPEAT\n" );
-
- TreeIter *iter = (TreeIter*) vm_plocal(field);
- Tree *res = treeIterNextRepeat( prg, &sp, iter );
- treeUpref( res );
- vm_push( res );
- break;
- }
- case IN_TRITER_PREV_REPEAT: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_TRITER_PREV_REPEAT\n" );
-
- TreeIter *iter = (TreeIter*) vm_plocal(field);
- Tree *res = treeIterPrevRepeat( prg, &sp, iter );
- treeUpref( res );
- vm_push( res );
- break;
- }
- case IN_TRITER_GET_CUR_R: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_TRITER_GET_CUR_R\n" );
-
- TreeIter *iter = (TreeIter*) vm_plocal(field);
- Tree *tree = treeIterDerefCur( iter );
- treeUpref( tree );
- vm_push( tree );
- break;
- }
- case IN_TRITER_GET_CUR_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_TRITER_GET_CUR_WC\n" );
-
- TreeIter *iter = (TreeIter*) vm_plocal(field);
- splitIterCur( prg, &sp, iter );
- Tree *tree = treeIterDerefCur( iter );
- treeUpref( tree );
- vm_push( tree );
- break;
- }
- case IN_TRITER_SET_CUR_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_TRITER_SET_CUR_WC\n" );
-
- Tree *tree = vm_pop();
- TreeIter *iter = (TreeIter*) vm_plocal(field);
- splitIterCur( prg, &sp, iter );
- Tree *old = treeIterDerefCur( iter );
- setTriterCur( prg, iter, tree );
- treeDownref( prg, sp, old );
- break;
- }
- case IN_MATCH: {
- Half patternId;
- read_half( patternId );
-
- debug( REALM_BYTECODE, "IN_MATCH\n" );
-
- Tree *tree = vm_pop();
-
- /* Run the match, push the result. */
- int rootNode = prg->rtd->patReplInfo[patternId].offset;
-
- /* Bindings are indexed starting at 1. Zero bindId to represent no
- * binding. We make a space for it here rather than do math at
- * access them. */
- long numBindings = prg->rtd->patReplInfo[patternId].numBindings;
- Tree *bindings[1+numBindings];
- memset( bindings, 0, sizeof(Tree*)*(1+numBindings) );
-
- Kid kid;
- kid.tree = tree;
- kid.next = 0;
- int matched = matchPattern( bindings, prg, rootNode, &kid, false );
-
- if ( !matched )
- memset( bindings, 0, sizeof(Tree*)*(1+numBindings) );
- else {
- int b;
- for ( b = 1; b <= numBindings; b++ )
- assert( bindings[b] != 0 );
- }
-
- Tree *result = matched ? tree : 0;
- treeUpref( result );
- vm_push( result ? tree : 0 );
- int b;
- for ( b = 1; b <= numBindings; b++ ) {
- treeUpref( bindings[b] );
- vm_push( bindings[b] );
- }
-
- treeDownref( prg, sp, tree );
- break;
- }
-
- case IN_GET_ACCUM_CTX_R: {
- debug( REALM_BYTECODE, "IN_GET_ACCUM_CTX_R\n" );
-
- Tree *obj = vm_pop();
- Tree *ctx = ((Parser*)obj)->pdaRun->context;
- treeUpref( ctx );
- vm_push( ctx );
- treeDownref( prg, sp, obj );
- break;
- }
-
- case IN_SET_ACCUM_CTX_WC: {
- debug( REALM_BYTECODE, "IN_SET_ACCUM_CTX_WC\n" );
-
- Tree *parser = vm_pop();
- Tree *val = vm_pop();
- parserSetContext( prg, sp, (Parser*)parser, val );
- treeDownref( prg, sp, parser );
- break;
- }
-
-// case IN_GET_ACCUM_CTX_WC:
-// case IN_GET_ACCUM_CTX_WV:
-// case IN_SET_ACCUM_CTX_WC:
-// case IN_SET_ACCUM_CTX_WV:
-// break;
-
- case IN_INPUT_APPEND_WC: {
- debug( REALM_BYTECODE, "IN_INPUT_APPEND_WC \n" );
-
- Input *accumStream = (Input*)vm_pop();
- Tree *input = vm_pop();
- streamAppend( prg, sp, input, accumStream->in );
-
- vm_push( (Tree*)accumStream );
- treeDownref( prg, sp, input );
- break;
- }
- case IN_INPUT_APPEND_WV: {
- debug( REALM_BYTECODE, "IN_INPUT_APPEND_WV \n" );
-
- Input *accumStream = (Input*)vm_pop();
- Tree *input = vm_pop();
- Word len = streamAppend( prg, sp, input, accumStream->in );
-
- treeUpref( (Tree*)accumStream );
- vm_push( (Tree*)accumStream );
-
- append( &exec->pdaRun->rcodeCollect, IN_INPUT_APPEND_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word) accumStream );
- appendWord( &exec->pdaRun->rcodeCollect, (Word) input );
- appendWord( &exec->pdaRun->rcodeCollect, (Word) len );
- append( &exec->pdaRun->rcodeCollect, SIZEOF_CODE + 3 * SIZEOF_WORD );
- break;
- }
-
- case IN_INPUT_APPEND_BKT: {
- Tree *accumStream;
- Tree *input;
- Word len;
- read_tree( accumStream );
- read_tree( input );
- read_word( len );
-
- debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" );
-
- undoStreamAppend( prg, sp, 0, ((Input*)accumStream)->in, input, len );
- treeDownref( prg, sp, accumStream );
- treeDownref( prg, sp, input );
- break;
- }
-
- case IN_PARSE_LOAD_START: {
- debug( REALM_BYTECODE, "IN_PARSE_LOAD_START\n" );
- vm_push( (SW) PcrStart );
- break;
- }
-
- case IN_PARSE_SAVE_STEPS: {
- debug( REALM_BYTECODE, "IN_PARSE_SAVE_STEPS\n" );
-
- Parser *parser = (Parser*)vm_pop();
- long steps = parser->pdaRun->steps;
-
- vm_push( (SW)steps );
- vm_push( (SW)parser );
- break;
- }
-
- case IN_PCR_CALL: {
- debug( REALM_BYTECODE, "IN_PCR_CALL\n" );
-
- long pcr = (long)vm_pop();
- Parser *parser = (Parser*)vm_pop();
- long steps = (long)vm_pop();
-
- vm_push( (SW)steps );
- vm_push( (SW)parser );
- vm_push( (SW)pcr );
-
- vm_push( (SW)exec->parser );
- vm_push( (SW)exec->pdaRun );
- vm_push( (SW)exec->fsmRun );
- vm_push( (SW)exec->inputStream );
- vm_push( (SW)exec->framePtr );
- vm_push( (SW)exec->iframePtr );
- vm_push( (SW)exec->frameId );
- vm_push( (SW)exec->rcodeUnitLen );
-
- Code *returnTo = instr - ( SIZEOF_CODE + SIZEOF_CODE + SIZEOF_HALF );
- vm_push( (SW)returnTo );
-
- initExecution( exec, parser, parser->pdaRun, parser->fsmRun, parser->input->in, parser->pdaRun->frameId );
- instr = parser->pdaRun->code;
- break;
- }
-
- case IN_PCR_RET: {
- debug( REALM_BYTECODE, "IN_PCR_RET\n" );
-
- FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId];
- downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen );
- vm_popn( fi->frameSize );
-
- instr = (Code*) vm_pop();
- exec->rcodeUnitLen = ( long ) vm_pop();
- exec->frameId = ( long ) vm_pop();
- exec->iframePtr = ( Tree ** ) vm_pop();
- exec->framePtr = ( Tree ** ) vm_pop();
- exec->inputStream = ( InputStream * ) vm_pop();
- exec->fsmRun = ( FsmRun * ) vm_pop();
- exec->pdaRun = ( PdaRun * ) vm_pop();
- exec->parser = ( Parser * ) vm_pop();
-
- if ( instr == 0 ) {
- fflush( stdout );
- goto out;
- }
- break;
- }
-
- case IN_PCR_END_DECK: {
- debug( REALM_BYTECODE, "IN_PCR_END_DECK\n" );
- exec->pdaRun->onDeck = false;
- break;
- }
-
- case IN_PARSE_FRAG_WC: {
- debug( REALM_BYTECODE, "IN_PARSE_FRAG_WC\n" );
-
- Half stopId;
- read_half( stopId );
-
- long pcr = (long)vm_pop();
- Parser *parser = (Parser*)vm_pop();
- long steps = (long)vm_pop();
-
- pcr = parseFrag( prg, sp, parser, stopId, pcr );
-
- vm_push( (SW)steps );
- vm_push( (SW)parser );
- vm_push( (SW)pcr );
-
- /* If done, jump to the terminating instruction, otherwise fall
- * through to call some code, then jump back here. */
- if ( pcr == PcrDone )
- instr += SIZEOF_CODE;
- break;
- }
-
- case IN_PARSE_FRAG_WC3: {
- debug( REALM_BYTECODE, "IN_PARSE_FRAG_WC3\n" );
-
- vm_pop_ignore();
- Parser *parser = (Parser*)vm_pop();
- vm_pop_ignore();
-
- treeDownref( prg, sp, (Tree*)parser );
-
- if ( prg->induceExit )
- goto out;
-
- break;
- }
-
- case IN_PARSE_FRAG_WV: {
- Half stopId;
- read_half( stopId );
-
- debug( REALM_BYTECODE, "IN_PARSE_FRAG_WV\n" );
-
- long pcr = (long)vm_pop();
- Parser *parser = (Parser*)vm_pop();
- long steps = (long)vm_pop();
-
- pcr = parseFrag( prg, sp, parser, stopId, pcr );
-
- vm_push( (SW)steps );
- vm_push( (SW)parser );
- vm_push( (SW)pcr );
-
- /* If done, jump to the terminating instruction, otherwise fall
- * through to call some code, then jump back here. */
- if ( pcr == PcrDone )
- instr += SIZEOF_CODE;
- break;
- }
-
- case IN_PARSE_FRAG_WV3: {
- debug( REALM_BYTECODE, "IN_PARSE_FRAG_WV3 \n" );
-
- vm_pop_ignore();
- Parser *parser = (Parser*)vm_pop();
- long steps = (long)vm_pop();
-
- append( &exec->pdaRun->rcodeCollect, IN_LOAD_WORD );
- appendWord( &exec->pdaRun->rcodeCollect, steps );
- append( &exec->pdaRun->rcodeCollect, IN_LOAD_TREE );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)parser );
- append( &exec->pdaRun->rcodeCollect, IN_PARSE_LOAD_START );
- append( &exec->pdaRun->rcodeCollect, IN_PARSE_FRAG_BKT );
- appendHalf( &exec->pdaRun->rcodeCollect, 0 );
- append( &exec->pdaRun->rcodeCollect, IN_PCR_CALL );
- append( &exec->pdaRun->rcodeCollect, IN_PARSE_FRAG_BKT3 );
- append( &exec->pdaRun->rcodeCollect, 6 * SIZEOF_CODE + 2 * SIZEOF_WORD + SIZEOF_HALF );
-
- if ( prg->induceExit )
- goto out;
- break;
- }
-
- case IN_PARSE_FRAG_BKT: {
- Half stopId;
- read_half( stopId );
-
- debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" );
-
- long pcr = (long)vm_pop();
- Parser *parser = (Parser*)vm_pop();
- long steps = (long)vm_pop();
-
- pcr = undoParseFrag( prg, sp, parser, steps, pcr );
-
- vm_push( (SW)steps );
- vm_push( (SW)parser );
- vm_push( (SW)pcr );
-
- if ( pcr == PcrDone )
- instr += SIZEOF_CODE;
- break;
- }
-
- case IN_PARSE_FRAG_BKT3: {
- debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT3\n" );
-
- vm_pop_ignore();
- Parser *parser = (Parser*)vm_pop();
- vm_pop_ignore();
-
- treeDownref( prg, sp, (Tree*)parser );
- break;
- }
-
- case IN_PARSE_FINISH_WC: {
- Half stopId;
- read_half( stopId );
-
- debug( REALM_BYTECODE, "IN_PARSE_FINISH_WC\n" );
-
- long pcr = (long)vm_pop();
- Parser *parser = (Parser*)vm_pop();
- long steps = (long)vm_pop();
-
- parser->result = 0;
- pcr = parseFinish( &parser->result, prg, sp, parser, false, pcr );
-
- vm_push( (SW)steps );
- vm_push( (SW)parser );
- vm_push( (SW)pcr );
-
- /* If done, jump to the terminating instruction, otherwise fall
- * through to call some code, then jump back here. */
- if ( pcr == PcrDone )
- instr += SIZEOF_CODE;
- break;
- }
-
- case IN_PARSE_FINISH_WC3: {
- debug( REALM_BYTECODE, "IN_PARSE_FINISH_WC3\n" );
-
- vm_pop_ignore();
- Parser *parser = (Parser*)vm_pop();
- vm_pop_ignore();
-
- vm_push( parser->result );
- debug( REALM_BYTECODE, "parser refs: %d\n", parser->refs );
- treeDownref( prg, sp, (Tree*)parser );
- if ( prg->induceExit )
- goto out;
-
- break;
- }
-
- case IN_PARSE_FINISH_WV: {
- Half stopId;
- read_half( stopId );
-
- debug( REALM_BYTECODE, "IN_PARSE_FINISH_WV\n" );
-
- long pcr = (long)vm_pop();
- Parser *parser = (Parser*)vm_pop();
- long steps = (long)vm_pop();
-
- parser->result = 0;
- pcr = parseFinish( &parser->result, prg, sp, parser, true, pcr );
-
- vm_push( (SW)steps );
- vm_push( (SW)parser );
- vm_push( (SW)pcr );
-
- if ( pcr == PcrDone )
- instr += SIZEOF_CODE;
- break;
- }
-
- case IN_PARSE_FINISH_WV3: {
- debug( REALM_BYTECODE, "IN_PARSE_FINISH_WV3\n" );
-
- vm_pop_ignore();
- Parser *parser = (Parser*)vm_pop();
- long steps = (long)vm_pop();
-
- vm_push( parser->result );
-
- append( &exec->pdaRun->rcodeCollect, IN_LOAD_WORD );
- appendWord( &exec->pdaRun->rcodeCollect, steps );
- append( &exec->pdaRun->rcodeCollect, IN_LOAD_TREE );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)parser );
- append( &exec->pdaRun->rcodeCollect, IN_PARSE_LOAD_START );
- append( &exec->pdaRun->rcodeCollect, IN_PARSE_FINISH_BKT );
- appendHalf( &exec->pdaRun->rcodeCollect, 0 );
- append( &exec->pdaRun->rcodeCollect, IN_PCR_CALL );
- append( &exec->pdaRun->rcodeCollect, IN_PARSE_FINISH_BKT3 );
- append( &exec->pdaRun->rcodeCollect, 6 * SIZEOF_CODE + 2 * SIZEOF_WORD + SIZEOF_HALF );
-
- if ( prg->induceExit )
- goto out;
-
- break;
- }
-
- case IN_PARSE_FINISH_BKT: {
- Half stopId;
- read_half( stopId );
-
- debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT\n" );
-
- long pcr = (long)vm_pop();
- Parser *parser = (Parser*)vm_pop();
- long steps = (long)vm_pop();
-
- pcr = undoParseFrag( prg, sp, parser, steps, pcr );
-
- vm_push( (SW)steps );
- vm_push( (SW)parser );
- vm_push( (SW)pcr );
-
- if ( pcr == PcrDone )
- instr += SIZEOF_CODE;
- break;
- }
-
- case IN_PARSE_FINISH_BKT3: {
- debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT3\n" );
-
- vm_pop_ignore();
- Parser *parser = (Parser*)vm_pop();
- vm_pop_ignore();
-
- unsetEof( parser->input->in );
- treeDownref( prg, sp, (Tree*)parser );
- break;
- }
-
- case IN_INPUT_PULL_WV: {
- debug( REALM_BYTECODE, "IN_INPUT_PULL_WV\n" );
-
- Input *accumStream = (Input*)vm_pop();
- Tree *len = vm_pop();
- Tree *string = streamPullBc( prg, exec->fsmRun, accumStream->in, len );
- treeUpref( string );
- vm_push( string );
-
- /* Single unit. */
- treeUpref( string );
- append( &exec->pdaRun->rcodeCollect, IN_INPUT_PULL_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word) string );
- appendWord( &exec->pdaRun->rcodeCollect, (Word) exec->fsmRun );
- exec->rcodeUnitLen += SIZEOF_CODE + 2 *SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
-
- treeDownref( prg, sp, (Tree*)accumStream );
- treeDownref( prg, sp, len );
- break;
- }
- case IN_INPUT_PULL_BKT: {
- Word f;
- Tree *string;
- read_tree( string );
- read_word( f );
- FsmRun *fsmRun = (FsmRun*)f;
-
- Tree *accumStream = vm_pop();
-
- debug( REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" );
-
- undoPull( prg, fsmRun, ((Input*)accumStream)->in, string );
- treeDownref( prg, sp, accumStream );
- treeDownref( prg, sp, string );
- break;
- }
- case IN_INPUT_PUSH_WV: {
- debug( REALM_BYTECODE, "IN_INPUT_PUSH_WV\n" );
-
- Input *input = (Input*)vm_pop();
- Tree *tree = vm_pop();
- long len = streamPush( prg, sp, 0, input->in, tree, false );
- vm_push( 0 );
-
- /* Single unit. */
- append( &exec->pdaRun->rcodeCollect, IN_INPUT_PUSH_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, len );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
-
- treeDownref( prg, sp, (Tree*)input );
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_INPUT_PUSH_IGNORE_WV: {
- debug( REALM_BYTECODE, "IN_INPUT_PUSH_IGNORE_WV\n" );
-
- Input *input = (Input*)vm_pop();
- Tree *tree = vm_pop();
- long len = streamPush( prg, sp, 0, input->in, tree, true );
- vm_push( 0 );
-
- /* Single unit. */
- append( &exec->pdaRun->rcodeCollect, IN_INPUT_PUSH_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, len );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
-
- treeDownref( prg, sp, (Tree*)input );
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_INPUT_PUSH_BKT: {
- Word len;
- read_word( len );
-
- Input *input = (Input*)vm_pop();
-
- debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" );
-
- undoStreamPush( prg, sp, 0, input->in, len );
- treeDownref( prg, sp, (Tree*)input );
- break;
- }
- case IN_CONSTRUCT: {
- Half patternId;
- read_half( patternId );
-
- debug( REALM_BYTECODE, "IN_CONSTRUCT\n" );
-
- int rootNode = prg->rtd->patReplInfo[patternId].offset;
-
- /* Note that bindIds are indexed at one. Add one spot for them. */
- int numBindings = prg->rtd->patReplInfo[patternId].numBindings;
- Tree *bindings[1+numBindings];
-
- int b;
- for ( b = 1; b <= numBindings; b++ ) {
- bindings[b] = vm_pop();
- assert( bindings[b] != 0 );
- }
-
- Tree *replTree = 0;
- PatReplNode *nodes = prg->rtd->patReplNodes;
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- long genericId = lelInfo[nodes[rootNode].id].genericId;
- if ( genericId > 0 ) {
- replTree = createGeneric( prg, genericId );
- treeUpref( replTree );
- }
- else {
- replTree = constructReplacementTree( 0, bindings,
- prg, rootNode );
- }
-
- vm_push( replTree );
- break;
- }
- case IN_CONSTRUCT_INPUT: {
- debug( REALM_BYTECODE, "IN_CONSTRUCT_INPUT\n" );
-
- Tree *input = constructInput( prg );
- treeUpref( input );
- vm_push( input );
- break;
- }
- case IN_GET_INPUT: {
- debug( REALM_BYTECODE, "IN_GET_INPUT\n" );
-
- Parser *parser = (Parser*)vm_pop();
- treeUpref( (Tree*)parser->input );
- vm_push( (Tree*)parser->input );
- treeDownref( prg, sp, (Tree*)parser );
- break;
- }
- case IN_SET_INPUT: {
- debug( REALM_BYTECODE, "IN_SET_INPUT\n" );
-
- Parser *parser = (Parser*)vm_pop();
- Input *accumStream = (Input*)vm_pop();
- parser->input = accumStream;
- treeUpref( (Tree*)accumStream );
- treeDownref( prg, sp, (Tree*)parser );
- treeDownref( prg, sp, (Tree*)accumStream );
- break;
- }
- case IN_CONSTRUCT_TERM: {
- Half tokenId;
- read_half( tokenId );
-
- debug( REALM_BYTECODE, "IN_CONSTRUCT_TERM\n" );
-
- /* Pop the string we are constructing the token from. */
- Str *str = (Str*)vm_pop();
- Tree *res = constructTerm( prg, tokenId, str->value );
- treeUpref( res );
- vm_push( res );
- break;
- }
- case IN_MAKE_TOKEN: {
- uchar nargs;
- read_byte( nargs );
-
- debug( REALM_BYTECODE, "IN_MAKE_TOKEN\n" );
-
- Tree *result = constructToken( prg, sp, nargs );
- long i;
- for ( i = 0; i < nargs; i++ ) {
- Tree *arg = vm_pop();
- treeDownref( prg, sp, arg );
- }
- vm_push( result );
- break;
- }
- case IN_MAKE_TREE: {
- uchar nargs;
- read_byte( nargs );
-
- debug( REALM_BYTECODE, "IN_MAKE_TREE\n" );
-
- Tree *result = makeTree( prg, sp, nargs );
- long i;
- for ( i = 0; i < nargs; i++ ) {
- Tree *arg = vm_pop();
- treeDownref( prg, sp, arg );
- }
- vm_push( result );
- break;
- }
- case IN_TREE_NEW: {
- debug( REALM_BYTECODE, "IN_TREE_NEW \n" );
-
- Tree *tree = vm_pop();
- Tree *res = constructPointer( prg, tree );
- treeUpref( res );
- vm_push( res );
- break;
- }
- case IN_PTR_DEREF_R: {
- debug( REALM_BYTECODE, "IN_PTR_DEREF_R\n" );
-
- Pointer *ptr = (Pointer*)vm_pop();
- treeDownref( prg, sp, (Tree*)ptr );
-
- Tree *dval = getPtrVal( ptr );
- treeUpref( dval );
- vm_push( dval );
- break;
- }
- case IN_PTR_DEREF_WC: {
- debug( REALM_BYTECODE, "IN_PTR_DEREF_WC\n" );
-
- Pointer *ptr = (Pointer*)vm_pop();
- treeDownref( prg, sp, (Tree*)ptr );
-
- Tree *dval = getPtrValSplit( prg, ptr );
- treeUpref( dval );
- vm_push( dval );
- break;
- }
- case IN_PTR_DEREF_WV: {
- debug( REALM_BYTECODE, "IN_PTR_DEREF_WV\n" );
-
- Pointer *ptr = (Pointer*)vm_pop();
- /* Don't downref the pointer since it is going into the reverse
- * instruction. */
-
- Tree *dval = getPtrValSplit( prg, ptr );
- treeUpref( dval );
- vm_push( dval );
-
- /* This is an initial global load. Need to reverse execute it. */
- append( &exec->pdaRun->rcodeCollect, IN_PTR_DEREF_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word) ptr );
- exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD;
- break;
- }
- case IN_PTR_DEREF_BKT: {
- Word p;
- read_word( p );
-
- debug( REALM_BYTECODE, "IN_PTR_DEREF_BKT\n" );
-
- Pointer *ptr = (Pointer*)p;
-
- Tree *dval = getPtrValSplit( prg, ptr );
- treeUpref( dval );
- vm_push( dval );
-
- treeDownref( prg, sp, (Tree*)ptr );
- break;
- }
- case IN_REF_FROM_LOCAL: {
- short int field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_REF_FROM_LOCAL\n" );
-
- /* First push the null next pointer, then the kid pointer. */
- Tree **ptr = vm_plocal(field);
- vm_push( 0 );
- vm_push( (SW)ptr );
- break;
- }
- case IN_REF_FROM_REF: {
- short int field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_REF_FROM_REF\n" );
-
- Ref *ref = (Ref*)vm_plocal(field);
- vm_push( (SW)ref );
- vm_push( (SW)ref->kid );
- break;
- }
- case IN_REF_FROM_QUAL_REF: {
- short int back;
- short int field;
- read_half( back );
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_REF_FROM_QUAL_REF\n" );
-
- Ref *ref = (Ref*)(sp + back);
-
- Tree *obj = ref->kid->tree;
- Kid *attr_kid = getFieldKid( obj, field );
-
- vm_push( (SW)ref );
- vm_push( (SW)attr_kid );
- break;
- }
- case IN_TRITER_REF_FROM_CUR: {
- short int field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_TRITER_REF_FROM_CUR\n" );
-
- /* Push the next pointer first, then the kid. */
- TreeIter *iter = (TreeIter*) vm_plocal(field);
- Ref *ref = &iter->ref;
- vm_push( (SW)ref );
- vm_push( (SW)iter->ref.kid );
- break;
- }
- case IN_UITER_REF_FROM_CUR: {
- short int field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_UITER_REF_FROM_CUR\n" );
-
- /* Push the next pointer first, then the kid. */
- UserIter *uiter = (UserIter*) vm_local(field);
- vm_push( (SW)uiter->ref.next );
- vm_push( (SW)uiter->ref.kid );
- break;
- }
- case IN_GET_TOKEN_DATA_R: {
- debug( REALM_BYTECODE, "IN_GET_TOKEN_DATA_R\n" );
-
- Tree *tree = (Tree*) vm_pop();
- Head *data = stringCopy( prg, tree->tokdata );
- Tree *str = constructString( prg, data );
- treeUpref( str );
- vm_push( str );
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_SET_TOKEN_DATA_WC: {
- debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_WC\n" );
-
- Tree *tree = vm_pop();
- Tree *val = vm_pop();
- Head *head = stringCopy( prg, ((Str*)val)->value );
- stringFree( prg, tree->tokdata );
- tree->tokdata = head;
-
- treeDownref( prg, sp, tree );
- treeDownref( prg, sp, val );
- break;
- }
- case IN_SET_TOKEN_DATA_WV: {
- debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_WV\n" );
-
- Tree *tree = vm_pop();
- Tree *val = vm_pop();
-
- Head *oldval = tree->tokdata;
- Head *head = stringCopy( prg, ((Str*)val)->value );
- tree->tokdata = head;
-
- /* Set up reverse code. Needs no args. */
- append( &exec->pdaRun->rcodeCollect, IN_SET_TOKEN_DATA_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)oldval );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
-
- treeDownref( prg, sp, tree );
- treeDownref( prg, sp, val );
- break;
- }
- case IN_SET_TOKEN_DATA_BKT: {
- debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT \n" );
-
- Word oldval;
- read_word( oldval );
-
- Tree *tree = vm_pop();
- Head *head = (Head*)oldval;
- stringFree( prg, tree->tokdata );
- tree->tokdata = head;
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_GET_TOKEN_POS_R: {
- debug( REALM_BYTECODE, "IN_GET_TOKEN_POS_R\n" );
-
- Tree *tree = (Tree*) vm_pop();
- Tree *integer = 0;
- if ( tree->tokdata->location ) {
- integer = constructInteger( prg, tree->tokdata->location->byte );
- treeUpref( integer );
- }
- vm_push( integer );
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_GET_TOKEN_LINE_R: {
- debug( REALM_BYTECODE, "IN_GET_TOKEN_LINE_R\n" );
-
- Tree *tree = (Tree*) vm_pop();
- Tree *integer = 0;
- if ( tree->tokdata->location ) {
- integer = constructInteger( prg, tree->tokdata->location->line );
- treeUpref( integer );
- }
- vm_push( integer );
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_GET_MATCH_LENGTH_R: {
- debug( REALM_BYTECODE, "IN_GET_MATCH_LENGTH_R\n" );
-
- Tree *integer = constructInteger( prg, stringLength(exec->pdaRun->tokdata) );
- treeUpref( integer );
- vm_push( integer );
- break;
- }
- case IN_GET_MATCH_TEXT_R: {
- debug( REALM_BYTECODE, "IN_GET_MATCH_TEXT_R\n" );
-
- Head *s = stringCopy( prg, exec->pdaRun->tokdata );
- Tree *tree = constructString( prg, s );
- treeUpref( tree );
- vm_push( tree );
- break;
- }
- case IN_LIST_LENGTH: {
- debug( REALM_BYTECODE, "IN_LIST_LENGTH\n" );
-
- List *list = (List*) vm_pop();
- long len = listLength( list );
- Tree *res = constructInteger( prg, len );
- treeDownref( prg, sp, (Tree*)list );
- treeUpref( res );
- vm_push( res );
- break;
- }
- case IN_LIST_APPEND_WV: {
- debug( REALM_BYTECODE, "IN_LIST_APPEND_WV\n" );
-
- Tree *obj = vm_pop();
- Tree *val = vm_pop();
-
- treeDownref( prg, sp, obj );
-
- listAppend2( prg, (List*)obj, val );
- treeUpref( prg->trueVal );
- vm_push( prg->trueVal );
-
- /* Set up reverse code. Needs no args. */
- append( &exec->pdaRun->rcodeCollect, IN_LIST_APPEND_BKT );
- exec->rcodeUnitLen += SIZEOF_CODE;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
- /* FLUSH */
- break;
- }
- case IN_LIST_APPEND_WC: {
- debug( REALM_BYTECODE, "IN_LIST_APPEND_WC\n" );
-
- Tree *obj = vm_pop();
- Tree *val = vm_pop();
-
- treeDownref( prg, sp, obj );
-
- listAppend2( prg, (List*)obj, val );
- treeUpref( prg->trueVal );
- vm_push( prg->trueVal );
- break;
- }
- case IN_LIST_APPEND_BKT: {
- debug( REALM_BYTECODE, "IN_LIST_APPEND_BKT\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *tree = listRemoveEnd( prg, (List*)obj );
- treeDownref( prg, sp, tree );
- break;
- }
- case IN_LIST_REMOVE_END_WC: {
- debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_WC\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *end = listRemoveEnd( prg, (List*)obj );
- vm_push( end );
- break;
- }
- case IN_LIST_REMOVE_END_WV: {
- debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_WV\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *end = listRemoveEnd( prg, (List*)obj );
- vm_push( end );
-
- /* Set up reverse. The result comes off the list downrefed.
- * Need it up referenced for the reverse code too. */
- treeUpref( end );
- append( &exec->pdaRun->rcodeCollect, IN_LIST_REMOVE_END_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)end );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
- /* FLUSH */
- break;
- }
- case IN_LIST_REMOVE_END_BKT: {
- debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_BKT\n" );
-
- Tree *val;
- read_tree( val );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- listAppend2( prg, (List*)obj, val );
- break;
- }
- case IN_GET_LIST_MEM_R: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_LIST_MEM_R\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *val = getListMem( (List*)obj, field );
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_GET_LIST_MEM_WC: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_LIST_MEM_WC\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *val = getListMemSplit( prg, (List*)obj, field );
- treeUpref( val );
- vm_push( val );
- break;
- }
- case IN_GET_LIST_MEM_WV: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_LIST_MEM_WV\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *val = getListMemSplit( prg, (List*)obj, field );
- treeUpref( val );
- vm_push( val );
-
- /* Set up the reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_GET_LIST_MEM_BKT );
- appendHalf( &exec->pdaRun->rcodeCollect, field );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF;
- break;
- }
- case IN_GET_LIST_MEM_BKT: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_GET_LIST_MEM_BKT\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *res = getListMemSplit( prg, (List*)obj, field );
- treeUpref( res );
- vm_push( res );
- break;
- }
- case IN_SET_LIST_MEM_WC: {
- Half field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_SET_LIST_MEM_WC\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *val = vm_pop();
- Tree *existing = setListMem( (List*)obj, field, val );
- treeDownref( prg, sp, existing );
- break;
- }
- case IN_SET_LIST_MEM_WV: {
- Half field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_SET_LIST_MEM_WV\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *val = vm_pop();
- Tree *existing = setListMem( (List*)obj, field, val );
-
- /* Set up the reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_SET_LIST_MEM_BKT );
- appendHalf( &exec->pdaRun->rcodeCollect, field );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)existing );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF + SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
- /* FLUSH */
- break;
- }
- case IN_SET_LIST_MEM_BKT: {
- Half field;
- Tree *val;
- read_half( field );
- read_tree( val );
-
- debug( REALM_BYTECODE, "IN_SET_LIST_MEM_BKT\n" );
-
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
-
- Tree *undid = setListMem( (List*)obj, field, val );
- treeDownref( prg, sp, undid );
- break;
- }
- case IN_MAP_INSERT_WV: {
- debug( REALM_BYTECODE, "IN_MAP_INSERT_WV\n" );
-
- Tree *obj = vm_pop();
- Tree *val = vm_pop();
- Tree *key = vm_pop();
-
- treeDownref( prg, sp, obj );
-
- int inserted = mapInsert( prg, (Map*)obj, key, val );
- Tree *result = inserted ? prg->trueVal : prg->falseVal;
- treeUpref( result );
- vm_push( result );
-
- /* Set up the reverse instruction. If the insert fails still need
- * to pop the loaded map object. Just use the reverse instruction
- * since it's nice to see it in the logs. */
-
- /* Need to upref key for storage in reverse code. */
- treeUpref( key );
- append( &exec->pdaRun->rcodeCollect, IN_MAP_INSERT_BKT );
- append( &exec->pdaRun->rcodeCollect, inserted );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)key );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_CODE + SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
-
- if ( ! inserted ) {
- treeDownref( prg, sp, key );
- treeDownref( prg, sp, val );
- }
- break;
- }
- case IN_MAP_INSERT_WC: {
- debug( REALM_BYTECODE, "IN_MAP_INSERT_WC\n" );
-
- Tree *obj = vm_pop();
- Tree *val = vm_pop();
- Tree *key = vm_pop();
-
- treeDownref( prg, sp, obj );
-
- int inserted = mapInsert( prg, (Map*)obj, key, val );
- Tree *result = inserted ? prg->trueVal : prg->falseVal;
- treeUpref( result );
- vm_push( result );
-
- if ( ! inserted ) {
- treeDownref( prg, sp, key );
- treeDownref( prg, sp, val );
- }
- break;
- }
- case IN_MAP_INSERT_BKT: {
- uchar inserted;
- Tree *key;
- read_byte( inserted );
- read_tree( key );
-
- debug( REALM_BYTECODE, "IN_MAP_INSERT_BKT\n" );
-
- Tree *obj = vm_pop();
- if ( inserted ) {
- Tree *val = mapUninsert( prg, (Map*)obj, key );
- treeDownref( prg, sp, key );
- treeDownref( prg, sp, val );
- }
-
- treeDownref( prg, sp, obj );
- treeDownref( prg, sp, key );
- break;
- }
- case IN_MAP_STORE_WC: {
- debug( REALM_BYTECODE, "IN_MAP_STORE_WC\n" );
-
- Tree *obj = vm_pop();
- Tree *element = vm_pop();
- Tree *key = vm_pop();
-
- Tree *existing = mapStore( prg, (Map*)obj, key, element );
- Tree *result = existing == 0 ? prg->trueVal : prg->falseVal;
- treeUpref( result );
- vm_push( result );
-
- treeDownref( prg, sp, obj );
- if ( existing != 0 ) {
- treeDownref( prg, sp, key );
- treeDownref( prg, sp, existing );
- }
- break;
- }
- case IN_MAP_STORE_WV: {
- debug( REALM_BYTECODE, "IN_MAP_STORE_WV\n" );
-
- Tree *obj = vm_pop();
- Tree *element = vm_pop();
- Tree *key = vm_pop();
-
- Tree *existing = mapStore( prg, (Map*)obj, key, element );
- Tree *result = existing == 0 ? prg->trueVal : prg->falseVal;
- treeUpref( result );
- vm_push( result );
-
- /* Set up the reverse instruction. */
- treeUpref( key );
- treeUpref( existing );
- append( &exec->pdaRun->rcodeCollect, IN_MAP_STORE_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)key );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)existing );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD + SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
- /* FLUSH */
-
- treeDownref( prg, sp, obj );
- if ( existing != 0 ) {
- treeDownref( prg, sp, key );
- treeDownref( prg, sp, existing );
- }
- break;
- }
- case IN_MAP_STORE_BKT: {
- Tree *key, *val;
- read_tree( key );
- read_tree( val );
-
- debug( REALM_BYTECODE, "IN_MAP_STORE_BKT\n" );
-
- Tree *obj = vm_pop();
- Tree *stored = mapUnstore( prg, (Map*)obj, key, val );
-
- treeDownref( prg, sp, stored );
- if ( val == 0 )
- treeDownref( prg, sp, key );
-
- treeDownref( prg, sp, obj );
- treeDownref( prg, sp, key );
- break;
- }
- case IN_MAP_REMOVE_WC: {
- debug( REALM_BYTECODE, "IN_MAP_REMOVE_WC\n" );
-
- Tree *obj = vm_pop();
- Tree *key = vm_pop();
- TreePair pair = mapRemove( prg, (Map*)obj, key );
-
- vm_push( pair.val );
-
- treeDownref( prg, sp, obj );
- treeDownref( prg, sp, key );
- treeDownref( prg, sp, pair.key );
- break;
- }
- case IN_MAP_REMOVE_WV: {
- debug( REALM_BYTECODE, "IN_MAP_REMOVE_WV\n" );
-
- Tree *obj = vm_pop();
- Tree *key = vm_pop();
- TreePair pair = mapRemove( prg, (Map*)obj, key );
-
- treeUpref( pair.val );
- vm_push( pair.val );
-
- /* Reverse instruction. */
- append( &exec->pdaRun->rcodeCollect, IN_MAP_REMOVE_BKT );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)pair.key );
- appendWord( &exec->pdaRun->rcodeCollect, (Word)pair.val );
- exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD + SIZEOF_WORD;
- append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen );
-
- treeDownref( prg, sp, obj );
- treeDownref( prg, sp, key );
- break;
- }
- case IN_MAP_REMOVE_BKT: {
- Tree *key, *val;
- read_tree( key );
- read_tree( val );
-
- debug( REALM_BYTECODE, "IN_MAP_REMOVE_BKT\n" );
-
- /* Either both or neither. */
- assert( ( key == 0 ) ^ ( val != 0 ) );
-
- Tree *obj = vm_pop();
- if ( key != 0 )
- mapUnremove( prg, (Map*)obj, key, val );
-
- treeDownref( prg, sp, obj );
- break;
- }
- case IN_MAP_LENGTH: {
- debug( REALM_BYTECODE, "IN_MAP_LENGTH\n" );
-
- Tree *obj = vm_pop();
- long len = mapLength( (Map*)obj );
- Tree *res = constructInteger( prg, len );
- treeUpref( res );
- vm_push( res );
-
- treeDownref( prg, sp, obj );
- break;
- }
- case IN_MAP_FIND: {
- debug( REALM_BYTECODE, "IN_MAP_FIND\n" );
-
- Tree *obj = vm_pop();
- Tree *key = vm_pop();
- Tree *result = mapFind( prg, (Map*)obj, key );
- treeUpref( result );
- vm_push( result );
-
- treeDownref( prg, sp, obj );
- treeDownref( prg, sp, key );
- break;
- }
- case IN_INIT_LOCALS: {
- Half size;
- read_half( size );
-
- debug( REALM_BYTECODE, "IN_INIT_LOCALS\n" );
-
- exec->framePtr = vm_ptop();
- vm_pushn( size );
- memset( vm_ptop(), 0, sizeof(Word) * size );
- break;
- }
- case IN_CALL_WV: {
- Half funcId;
- read_half( funcId );
-
- FunctionInfo *fi = &prg->rtd->functionInfo[funcId];
-
- debug( REALM_BYTECODE, "IN_CALL_WV %ld\n", fi->name );
-
- vm_push( 0 ); /* Return value. */
- vm_push( (SW)instr );
- vm_push( (SW)exec->framePtr );
- vm_push( (SW)exec->frameId );
-
- instr = prg->rtd->frameInfo[fi->frameId].codeWV;
- exec->framePtr = vm_ptop();
- exec->frameId = fi->frameId;
- break;
- }
- case IN_CALL_WC: {
- Half funcId;
- read_half( funcId );
-
- FunctionInfo *fi = &prg->rtd->functionInfo[funcId];
-
- debug( REALM_BYTECODE, "IN_CALL_WC %ld\n", fi->name );
-
- vm_push( 0 ); /* Return value. */
- vm_push( (SW)instr );
- vm_push( (SW)exec->framePtr );
- vm_push( (SW)exec->frameId );
-
- instr = prg->rtd->frameInfo[fi->frameId].codeWC;
- exec->framePtr = vm_ptop();
- exec->frameId = fi->frameId;
- break;
- }
- case IN_YIELD: {
- debug( REALM_BYTECODE, "IN_YIELD\n" );
-
- Kid *kid = (Kid*)vm_pop();
- Ref *next = (Ref*)vm_pop();
- UserIter *uiter = (UserIter*) vm_plocal_iframe( IFR_AA );
-
- if ( kid == 0 || kid->tree == 0 ||
- kid->tree->id == uiter->searchId ||
- uiter->searchId == prg->rtd->anyId )
- {
- /* Store the yeilded value. */
- uiter->ref.kid = kid;
- uiter->ref.next = next;
- uiter->stackSize = uiter->stackRoot - vm_ptop();
- uiter->resume = instr;
- uiter->frame = exec->framePtr;
-
- /* Restore the instruction and frame pointer. */
- instr = (Code*) vm_local_iframe(IFR_RIN);
- exec->framePtr = (Tree**) vm_local_iframe(IFR_RFR);
- exec->iframePtr = (Tree**) vm_local_iframe(IFR_RIF);
-
- /* Return the yield result on the top of the stack. */
- Tree *result = uiter->ref.kid != 0 ? prg->trueVal : prg->falseVal;
- treeUpref( result );
- vm_push( result );
- }
- break;
- }
- case IN_UITER_CREATE_WV: {
- short field;
- Half funcId, searchId;
- read_half( field );
- read_half( funcId );
- read_half( searchId );
-
- debug( REALM_BYTECODE, "IN_UITER_CREATE_WV\n" );
-
- FunctionInfo *fi = prg->rtd->functionInfo + funcId;
- UserIter *uiter = uiterCreate( prg, &sp, fi, searchId );
- vm_local(field) = (SW) uiter;
-
- /* This is a setup similar to as a call, only the frame structure
- * is slightly different for user iterators. We aren't going to do
- * the call. We don't need to set up the return ip because the
- * uiter advance will set it. The frame we need to do because it
- * is set once for the lifetime of the iterator. */
- vm_push( 0 ); /* Return instruction pointer, */
- vm_push( (SW)exec->iframePtr ); /* Return iframe. */
- vm_push( (SW)exec->framePtr ); /* Return frame. */
-
- uiterInit( prg, sp, uiter, fi, true );
- break;
- }
- case IN_UITER_CREATE_WC: {
- short field;
- Half funcId, searchId;
- read_half( field );
- read_half( funcId );
- read_half( searchId );
-
- debug( REALM_BYTECODE, "IN_UITER_CREATE_WC\n" );
-
- FunctionInfo *fi = prg->rtd->functionInfo + funcId;
- UserIter *uiter = uiterCreate( prg, &sp, fi, searchId );
- vm_local(field) = (SW) uiter;
-
- /* This is a setup similar to as a call, only the frame structure
- * is slightly different for user iterators. We aren't going to do
- * the call. We don't need to set up the return ip because the
- * uiter advance will set it. The frame we need to do because it
- * is set once for the lifetime of the iterator. */
- vm_push( 0 ); /* Return instruction pointer, */
- vm_push( (SW)exec->iframePtr ); /* Return iframe. */
- vm_push( (SW)exec->framePtr ); /* Return frame. */
-
- uiterInit( prg, sp, uiter, fi, false );
- break;
- }
- case IN_UITER_DESTROY: {
- short field;
- read_half( field );
-
- debug( REALM_BYTECODE, "IN_UITER_DESTROY\n" );
-
- UserIter *uiter = (UserIter*) vm_local(field);
- userIterDestroy( &sp, uiter );
- break;
- }
- case IN_RET: {
- debug( REALM_BYTECODE, "IN_RET\n" );
-
- FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId];
- downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen );
- vm_popn( fi->frameSize );
-
- exec->frameId = (long) vm_pop();
- exec->framePtr = (Tree**) vm_pop();
- instr = (Code*) vm_pop();
- Tree *retVal = vm_pop();
- vm_popn( fi->argSize );
- vm_push( retVal );
- break;
- }
- case IN_TO_UPPER: {
- debug( REALM_BYTECODE, "IN_TO_UPPER\n" );
-
- Tree *in = vm_pop();
- Head *head = stringToUpper( in->tokdata );
- Tree *upper = constructString( prg, head );
- treeUpref( upper );
- vm_push( upper );
- treeDownref( prg, sp, in );
- break;
- }
- case IN_TO_LOWER: {
- debug( REALM_BYTECODE, "IN_TO_LOWER\n" );
-
- Tree *in = vm_pop();
- Head *head = stringToLower( in->tokdata );
- Tree *lower = constructString( prg, head );
- treeUpref( lower );
- vm_push( lower );
- treeDownref( prg, sp, in );
- break;
- }
- case IN_ERROR: {
- debug( REALM_BYTECODE, "IN_ERROR\n" );
-
- /* Pop the global. */
- Tree *global = vm_pop();
- treeDownref( prg, sp, global );
- treeUpref( prg->lastParseError );
- vm_push( prg->lastParseError );
- break;
- }
- case IN_OPEN_FILE: {
- debug( REALM_BYTECODE, "IN_OPEN_FILE\n" );
-
- Tree *mode = vm_pop();
- Tree *name = vm_pop();
- Tree *res = (Tree*)openFile( prg, name, mode );
- treeUpref( res );
- vm_push( res );
- treeDownref( prg, sp, name );
- treeDownref( prg, sp, mode );
- break;
- }
- case IN_GET_STDIN: {
- debug( REALM_BYTECODE, "IN_GET_STDIN\n" );
-
- /* Pop the root object. */
- Tree *obj = vm_pop();
- treeDownref( prg, sp, obj );
- if ( prg->stdinVal == 0 ) {
- prg->stdinVal = openStreamFd( prg, 0 );
- treeUpref( (Tree*)prg->stdinVal );
- }
-
- treeUpref( (Tree*)prg->stdinVal );
- vm_push( (Tree*)prg->stdinVal );
- break;
- }
- case IN_LOAD_ARGV: {
- Half field;
- read_half( field );
- debug( REALM_BYTECODE, "IN_LOAD_ARGV %lu\n", field );
-
- /* Tree comes back upreffed. */
- Tree *tree = constructArgv( prg, prg->argc, prg->argv );
- setField( prg, prg->global, field, tree );
- break;
- }
-
- case IN_EXIT: {
- debug( REALM_BYTECODE, "IN_EXIT\n" );
-
- Tree *global = vm_pop();
- Int *status = (Int*)vm_pop();
- prg->exitStatus = status->value;
- prg->induceExit = 1;
- treeDownref( prg, sp, global );
- treeDownref( prg, sp, (Tree*)status );
-
- while ( true ) {
- FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId];
- downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen );
- vm_popn( fi->frameSize );
-
- /* Call layout. */
- exec->frameId = (long) vm_pop();
- exec->framePtr = (Tree**) vm_pop();
- instr = (Code*) vm_pop();
- Tree *retVal = vm_pop();
- vm_popn( fi->argSize );
-
- treeDownref( prg, sp, retVal );
-
- /* We stop on the root, which doesn't have the full function
- * stack layout. */
- if ( exec->frameId == prg->rtd->rootFrameId )
- break;
- }
-
- goto out;
- }
-
- case IN_STOP: {
- debug( REALM_BYTECODE, "IN_STOP\n" );
-
- FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId];
- downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen );
- vm_popn( fi->frameSize );
-
- fflush( stdout );
- goto out;
- }
-
- /* Halt is a default instruction given by the compiler when it is
- * asked to generate and instruction it doesn't have. It is deliberate
- * and can represent "not implemented" or "compiler error" because a
- * variable holding instructions was not properly initialize. */
- case IN_HALT: {
- fatal( "IN_HALT -- compiler did something wrong\n" );
- exit(1);
- break;
- }
- default: {
- fatal( "UNKNOWN INSTRUCTION: 0x%2x -- something is wrong\n", *(instr-1) );
- assert(false);
- break;
- }
- }
- goto again;
-
-out:
- if ( ! prg->induceExit )
- assert( sp == root );
- return sp;
-}
-
diff --git a/colm/bytecode.h b/colm/bytecode.h
deleted file mode 100644
index 2151544d..00000000
--- a/colm/bytecode.h
+++ /dev/null
@@ -1,487 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _BYTECODE_H
-#define _BYTECODE_H
-
-#include <colm/pdarun.h>
-#include <colm/tree.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
- #error "SIZEOF_LONG contained an unexpected value"
-#endif
-
-typedef unsigned long ulong;
-typedef unsigned char uchar;
-
-#define IN_LOAD_INT 0x02
-#define IN_LOAD_STR 0x03
-#define IN_LOAD_NIL 0x04
-#define IN_LOAD_TRUE 0x05
-#define IN_LOAD_FALSE 0x06
-#define IN_LOAD_TREE 0xf4
-#define IN_LOAD_WORD 0xf5
-
-#define IN_ADD_INT 0x07
-#define IN_SUB_INT 0x08
-#define IN_MULT_INT 0x09
-#define IN_DIV_INT 0xd0
-
-#define IN_TST_EQL 0x0a
-#define IN_TST_NOT_EQL 0x0b
-#define IN_TST_LESS 0x0c
-#define IN_TST_GRTR 0x0d
-#define IN_TST_LESS_EQL 0x0e
-#define IN_TST_GRTR_EQL 0x0f
-#define IN_TST_LOGICAL_AND 0x10
-#define IN_TST_LOGICAL_OR 0x11
-
-#define IN_NOT 0x12
-
-#define IN_JMP 0x13
-#define IN_JMP_FALSE 0x14
-#define IN_JMP_TRUE 0x15
-
-#define IN_STR_ATOI 0x16
-#define IN_STR_LENGTH 0x17
-#define IN_CONCAT_STR 0x18
-#define IN_TREE_TRIM 0xfc
-
-#define IN_INIT_LOCALS 0x19
-#define IN_POP 0x1b
-#define IN_POP_N_WORDS 0x1c
-#define IN_DUP_TOP 0x1d
-#define IN_DUP_TOP_OFF 0xbc
-#define IN_REJECT 0x1e
-#define IN_MATCH 0x1f
-#define IN_CONSTRUCT 0x20
-#define IN_TREE_NEW 0x21
-
-#define IN_GET_LOCAL_R 0x22
-#define IN_GET_LOCAL_WC 0x23
-#define IN_SET_LOCAL_WC 0x24
-
-#define IN_GET_LOCAL_REF_R 0x25
-#define IN_GET_LOCAL_REF_WC 0x26
-#define IN_SET_LOCAL_REF_WC 0x27
-
-#define IN_SAVE_RET 0x28
-
-#define IN_GET_FIELD_R 0x29
-#define IN_GET_FIELD_WC 0x2a
-#define IN_GET_FIELD_WV 0x2b
-#define IN_GET_FIELD_BKT 0x2c
-
-#define IN_SET_FIELD_WV 0x2d
-#define IN_SET_FIELD_WC 0x2e
-#define IN_SET_FIELD_BKT 0x2f
-#define IN_SET_FIELD_LEAVE_WC 0x30
-
-#define IN_GET_MATCH_LENGTH_R 0x31
-#define IN_GET_MATCH_TEXT_R 0x32
-
-#define IN_GET_TOKEN_DATA_R 0x33
-#define IN_SET_TOKEN_DATA_WC 0x34
-#define IN_SET_TOKEN_DATA_WV 0x35
-#define IN_SET_TOKEN_DATA_BKT 0x36
-
-#define IN_GET_TOKEN_POS_R 0x37
-#define IN_GET_TOKEN_LINE_R 0xf6
-
-#define IN_INIT_RHS_EL 0x38
-#define IN_INIT_LHS_EL 0xef
-#define IN_INIT_CAPTURES 0x39
-#define IN_STORE_LHS_EL 0xf0
-#define IN_RESTORE_LHS 0x01
-
-#define IN_TRITER_FROM_REF 0x3a
-#define IN_TRITER_ADVANCE 0x3b
-#define IN_TRITER_NEXT_CHILD 0x3c
-#define IN_TRITER_GET_CUR_R 0x3d
-#define IN_TRITER_GET_CUR_WC 0x3e
-#define IN_TRITER_SET_CUR_WC 0x3f
-#define IN_TRITER_DESTROY 0x40
-#define IN_TRITER_NEXT_REPEAT 0x41
-#define IN_TRITER_PREV_REPEAT 0x42
-
-#define IN_REV_TRITER_FROM_REF 0x43
-#define IN_REV_TRITER_DESTROY 0x44
-#define IN_REV_TRITER_PREV_CHILD 0x45
-
-#define IN_UITER_DESTROY 0x46
-#define IN_UITER_CREATE_WV 0x47
-#define IN_UITER_CREATE_WC 0x48
-#define IN_UITER_ADVANCE 0x49
-#define IN_UITER_GET_CUR_R 0x4a
-#define IN_UITER_GET_CUR_WC 0x4b
-#define IN_UITER_SET_CUR_WC 0x4c
-
-#define IN_TREE_SEARCH 0x4d
-
-#define IN_LOAD_GLOBAL_R 0x4e
-#define IN_LOAD_GLOBAL_WV 0x4f
-#define IN_LOAD_GLOBAL_WC 0x50
-#define IN_LOAD_GLOBAL_BKT 0x51
-
-#define IN_PTR_DEREF_R 0x52
-#define IN_PTR_DEREF_WV 0x53
-#define IN_PTR_DEREF_WC 0x54
-#define IN_PTR_DEREF_BKT 0x55
-
-#define IN_REF_FROM_LOCAL 0x56
-#define IN_REF_FROM_REF 0x57
-#define IN_REF_FROM_QUAL_REF 0x58
-#define IN_TRITER_REF_FROM_CUR 0x59
-#define IN_UITER_REF_FROM_CUR 0x5a
-
-#define IN_MAP_LENGTH 0x5b
-#define IN_MAP_FIND 0x5c
-#define IN_MAP_INSERT_WV 0x5d
-#define IN_MAP_INSERT_WC 0x5e
-#define IN_MAP_INSERT_BKT 0x5f
-#define IN_MAP_STORE_WV 0x60
-#define IN_MAP_STORE_WC 0x61
-#define IN_MAP_STORE_BKT 0x62
-#define IN_MAP_REMOVE_WV 0x63
-#define IN_MAP_REMOVE_WC 0x64
-#define IN_MAP_REMOVE_BKT 0x65
-
-#define IN_LIST_LENGTH 0x66
-#define IN_LIST_APPEND_WV 0x67
-#define IN_LIST_APPEND_WC 0x68
-#define IN_LIST_APPEND_BKT 0x69
-#define IN_LIST_REMOVE_END_WV 0x6a
-#define IN_LIST_REMOVE_END_WC 0x6b
-#define IN_LIST_REMOVE_END_BKT 0x6c
-
-#define IN_GET_LIST_MEM_R 0x6d
-#define IN_GET_LIST_MEM_WC 0x6e
-#define IN_GET_LIST_MEM_WV 0x6f
-#define IN_GET_LIST_MEM_BKT 0x70
-#define IN_SET_LIST_MEM_WV 0x71
-#define IN_SET_LIST_MEM_WC 0x72
-#define IN_SET_LIST_MEM_BKT 0x73
-
-#define IN_VECTOR_LENGTH 0x74
-#define IN_VECTOR_APPEND_WV 0x75
-#define IN_VECTOR_APPEND_WC 0x76
-#define IN_VECTOR_APPEND_BKT 0x77
-#define IN_VECTOR_INSERT_WV 0x78
-#define IN_VECTOR_INSERT_WC 0x79
-#define IN_VECTOR_INSERT_BKT 0x7a
-
-#define IN_PRINT 0x7b
-#define IN_PRINT_XML_AC 0x7c
-#define IN_PRINT_XML 0x7d
-#define IN_PRINT_STREAM 0x7e
-
-#define IN_HALT 0x7f
-
-#define IN_CALL_WC 0x80
-#define IN_CALL_WV 0x81
-#define IN_RET 0x82
-#define IN_YIELD 0x83
-#define IN_STOP 0x84
-
-#define IN_STR_UORD8 0x85
-#define IN_STR_SORD8 0x86
-#define IN_STR_UORD16 0x87
-#define IN_STR_SORD16 0x88
-#define IN_STR_UORD32 0x89
-#define IN_STR_SORD32 0x8a
-
-#define IN_INT_TO_STR 0x8b
-#define IN_TREE_TO_STR 0x8c
-#define IN_TREE_TO_STR_NOTRIM 0xfd
-
-#define IN_CREATE_TOKEN 0x8d
-#define IN_MAKE_TOKEN 0x8e
-#define IN_MAKE_TREE 0x8f
-#define IN_CONSTRUCT_TERM 0x90
-
-#define IN_INPUT_PULL_WV 0xf7
-#define IN_INPUT_PULL_BKT 0xf8
-
-#define IN_PARSE_LOAD_START 0xf2
-#define IN_PARSE_SAVE_STEPS 0xf3
-#define IN_PARSE_FRAG_WC 0xc0
-#define IN_PARSE_FRAG_WC3 0xe1
-
-#define IN_PARSE_FRAG_WV 0xc1
-#define IN_PARSE_FRAG_WV3 0xe4
-
-#define IN_PARSE_FRAG_BKT 0xc2
-#define IN_PARSE_FRAG_BKT3 0xe6
-
-#define IN_INPUT_APPEND_WC 0x91
-#define IN_INPUT_APPEND_WV 0x92
-#define IN_INPUT_APPEND_BKT 0x93
-
-#define IN_PARSE_FINISH_WC 0x9d
-#define IN_PARSE_FINISH_WC3 0xea
-
-#define IN_PARSE_FINISH_WV 0xbd
-#define IN_PARSE_FINISH_WV3 0xeb
-
-#define IN_PARSE_FINISH_BKT 0xbf
-#define IN_PARSE_FINISH_BKT3 0xec
-
-#define IN_PCR_CALL 0xe0
-#define IN_PCR_RET 0xe3
-#define IN_PCR_END_DECK 0xed
-
-#define IN_PARSE_EXTRACT_INPUT
-
-#define IN_OPEN_FILE 0x9e
-#define IN_GET_STDIN 0x9f
-#define IN_GET_STDOUT 0xa0
-#define IN_GET_STDERR 0xa1
-#define IN_LOAD_ARGV 0xa2
-#define IN_TO_UPPER 0xa3
-#define IN_TO_LOWER 0xa4
-#define IN_EXIT 0xa5
-#define IN_ERROR 0xa6
-
-#define IN_LOAD_ACCUM_R 0xa8
-#define IN_LOAD_ACCUM_WV 0xa9
-#define IN_LOAD_ACCUM_WC 0xaa
-#define IN_LOAD_ACCUM_BKT 0xab
-
-#define IN_LOAD_INPUT_R 0x98
-#define IN_LOAD_INPUT_WV 0x99
-#define IN_LOAD_INPUT_WC 0x9a
-#define IN_LOAD_INPUT_BKT 0x9b
-
-#define IN_INPUT_PUSH_WV 0xf9
-#define IN_INPUT_PUSH_BKT 0xfa
-#define IN_INPUT_PUSH_IGNORE_WV 0xfb
-
-#define IN_LOAD_CONTEXT_R 0xac
-#define IN_LOAD_CONTEXT_WV 0xad
-#define IN_LOAD_CONTEXT_WC 0xae
-#define IN_LOAD_CONTEXT_BKT 0xaf
-
-#define IN_GET_ACCUM_CTX_R 0xb0
-#define IN_GET_ACCUM_CTX_WC 0xb1
-#define IN_GET_ACCUM_CTX_WV 0xb2
-#define IN_SET_ACCUM_CTX_WC 0xb3
-#define IN_SET_ACCUM_CTX_WV 0xb4
-
-#define IN_LOAD_CTX_R 0xb5
-#define IN_LOAD_CTX_WC 0xb6
-#define IN_LOAD_CTX_WV 0xb7
-#define IN_LOAD_CTX_BKT 0xb8
-
-#define IN_SPRINTF 0xcf
-
-#define IN_GET_RHS_VAL_R 0xd1
-#define IN_GET_RHS_VAL_WC 0xd2
-#define IN_GET_RHS_VAL_WV 0xd3
-#define IN_GET_RHS_VAL_BKT 0xd4
-#define IN_SET_RHS_VAL_WC 0xd5
-#define IN_SET_RHS_VAL_WV 0xd6
-#define IN_SET_RHS_VAL_BKT 0xd7
-
-#define IN_CONSTRUCT_INPUT 0x9c
-#define IN_SET_INPUT 0xa7
-#define IN_GET_INPUT 0xb9
-
-/* Types */
-#define TYPE_NIL 0x01
-#define TYPE_TREE 0x02
-#define TYPE_REF 0x03
-#define TYPE_PTR 0x04
-#define TYPE_ITER 0x05
-#define TYPE_IGNORE_LIST 0x06
-
-/* Types of Generics. */
-#define GEN_LIST 0x10
-#define GEN_MAP 0x11
-#define GEN_VECTOR 0x12
-#define GEN_PARSER 0x13
-
-/* Virtual machine stack size, number of pointers.
- * This will be mmapped. */
-#define VM_STACK_SIZE (SIZEOF_WORD*1024ll*1024ll)
-
-/* Known language element ids. */
-#define LEL_ID_PTR 1
-#define LEL_ID_BOOL 2
-#define LEL_ID_INT 3
-#define LEL_ID_STR 4
-#define LEL_ID_STREAM 5
-#define LEL_ID_INPUT 6
-#define LEL_ID_IGNORE 7
-
-/*
- * Flags
- */
-
-/* A tree that has been generated by a termDup. */
-#define PF_TERM_DUP 0x0001
-
-/* Has been processed by the commit function. All children have also been
- * processed. */
-#define PF_COMMITTED 0x0002
-
-/* Created by a token generation action, not made from the input. */
-#define PF_ARTIFICIAL 0x0004
-
-/* Named node from a pattern or constructor. */
-#define PF_NAMED 0x0008
-
-/* There is reverse code associated with this tree node. */
-#define PF_HAS_RCODE 0x0010
-
-#define PF_RIGHT_IGNORE 0x0020
-
-#define PF_LEFT_IL_ATTACHED 0x0400
-#define PF_RIGHT_IL_ATTACHED 0x0800
-
-#define AF_LEFT_IGNORE 0x0100
-#define AF_RIGHT_IGNORE 0x0200
-
-#define AF_SUPPRESS_LEFT 0x4000
-#define AF_SUPPRESS_RIGHT 0x8000
-
-/*
- * Call stack.
- */
-
-/* Number of spots in the frame, after the args. */
-#define FR_AA 4
-
-/* Positions relative to the frame pointer. */
-#define FR_RV 3 /* return value */
-#define FR_RI 2 /* return instruction */
-#define FR_RFP 1 /* return frame pointer */
-#define FR_RFD 0 /* return frame id. */
-
-/*
- * Calling Convention:
- * a1
- * a2
- * a3
- * ...
- * return value FR_RV
- * return instr FR_RI
- * return frame ptr FR_RFP
- * return frame id FR_RFD
- */
-
-/*
- * User iterator call stack.
- * Adds an iframe pointer, removes the return value.
- */
-
-/* Number of spots in the frame, after the args. */
-#define IFR_AA 3
-
-/* Positions relative to the frame pointer. */
-#define IFR_RIN 2 /* return instruction */
-#define IFR_RIF 1 /* return iframe pointer */
-#define IFR_RFR 0 /* return frame pointer */
-
-/* Exported to modules other than bytecode.c */
-#define vm_push(i) /*if ( sp == prg->se ) vm_grow( prg ); */ (*(--sp) = (i))
-#define vm_pop() (*sp++)
-#define vm_top() (*sp)
-#define vm_ptop() (sp)
-#define vm_pop_ignore() (sp++)
-
-void vm_grow( struct ColmProgram * );
-
-typedef Tree *SW;
-typedef Tree **StackPtr;
-
-
-/* Can't use sizeof() because we have used types that are bigger than the
- * serial representation. */
-#define SIZEOF_CODE 1
-#define SIZEOF_HALF 2
-#define SIZEOF_WORD sizeof(Word)
-
-typedef struct _Execution
-{
- Parser *parser;
- PdaRun *pdaRun;
- FsmRun *fsmRun;
- InputStream *inputStream;
- Tree **framePtr;
- Tree **iframePtr;
- long frameId;
- long rcodeUnitLen;
-} Execution;
-
-long stringLength( Head *str );
-const char *stringData( Head *str );
-Head *stringAllocFull( struct ColmProgram *prg, const char *data, long length );
-Head *stringCopy( struct ColmProgram *prg, Head *head );
-void stringFree( struct ColmProgram *prg, Head *head );
-void stringShorten( Head *tokdata, long newlen );
-Head *concatStr( Head *s1, Head *s2 );
-Word strAtoi( Head *str );
-Word strUord16( Head *head );
-Word strUord8( Head *head );
-Word cmpString( Head *s1, Head *s2 );
-Head *stringToUpper( Head *s );
-Head *stringToLower( Head *s );
-Head *stringSprintf( struct ColmProgram *prg, Str *format, Int *integer );
-
-Head *makeLiteral( struct ColmProgram *prg, long litoffset );
-Head *intToStr( struct ColmProgram *prg, Word i );
-
-Tree *constructString( struct ColmProgram *prg, Head *s );
-
-void initExecution( Execution *exec, Parser *parser, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, int frameId );
-
-void mainExecution( struct ColmProgram *prg, Execution *exec, Code *code );
-void reductionExecution( Execution *exec, Tree **sp );
-void generationExecution( Execution *exec, Tree **sp );
-void reverseExecution( Execution *exec, Tree **sp, RtCodeVect *allRev );
-
-Kid *allocAttrs( struct ColmProgram *prg, long length );
-void freeAttrs( struct ColmProgram *prg, Kid *attrs );
-void setAttr( Tree *tree, long pos, Tree *val );
-Kid *getAttrKid( Tree *tree, long pos );
-
-Tree *splitTree( struct ColmProgram *prg, Tree *t );
-void rcodeDownrefAll( struct ColmProgram *prg, Tree **sp, RtCodeVect *cv );
-void commitFull( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, long commitReduce );
-Tree *getParsedRoot( PdaRun *pdaRun, int stop );
-void splitRef( struct ColmProgram *prg, Tree ***sp, Ref *fromRef );
-
-void allocGlobal( struct ColmProgram *prg );
-Tree **executeCode( struct ColmProgram *prg, Execution *exec, Tree **sp, Code *instr );
-void rcodeDownref( struct ColmProgram *prg, Tree **sp, Code *instr );
-Tree **stackAlloc();
-Code *popReverseCode( RtCodeVect *allRev );
-void sendBackBuffered( FsmRun *fsmRun, InputStream *inputStream );
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/colm/closure.cc b/colm/closure.cc
deleted file mode 100644
index 37b0e259..00000000
--- a/colm/closure.cc
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- * Copyright 2005-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "global.h"
-#include "parsedata.h"
-
-#include "vector.h"
-#include <assert.h>
-#include <string.h>
-#include <iostream>
-
-using std::endl;
-using std::cerr;
-
-void Compiler::lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState,
- PdaTrans *expandFrom, Definition *prod )
-{
- /* We use dot sets for finding unique states. In the future, should merge
- * dots sets with the stateSet pointer (only need one of these). */
- assert( dest != prodState );
- dest->dotSet.insert( prodState->dotSet );
-
- /* Get the epsilons, context, out priorities. */
- dest->pendingCommits.insert( prodState->pendingCommits );
- //if ( prodState->pendingCommits.length() > 0 )
- // cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl;
-
- if ( prodState->transMap.length() > 0 ) {
- assert( prodState->transMap.length() == 1 );
- PdaTrans *srcTrans = prodState->transMap[0].value;
-
- /* Look for the source in the destination. */
- TransMapEl *destTel = dest->transMap.find( srcTrans->lowKey );
- if ( destTel == 0 ) {
- /* Make a new state and transition to it. */
- PdaState *newState = pdaGraph->addState();
- PdaTrans *newTrans = new PdaTrans();
-
- /* Attach the new transition to the new state. */
- newTrans->lowKey = srcTrans->lowKey;
- pdaGraph->attachTrans( dest, newState, newTrans );
- pdaGraph->addInTrans( newTrans, srcTrans );
-
- /* The transitions we make during lr0 closure are all shifts. */
- assert( newTrans->isShift );
- assert( srcTrans->isShift );
-
- /* The new state must have its state set setup. */
- newState->stateSet = new PdaStateSet;
- newState->stateSet->insert( srcTrans->toState );
-
- /* Insert the transition into the map. Be sure to set destTel, it
- * is needed below. */
- dest->transMap.insert( srcTrans->lowKey, newTrans, &destTel );
-
- /* If the item is a non-term, queue it for closure. */
- LangEl *langEl = langElIndex[srcTrans->lowKey];
- if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
- pdaGraph->transClosureQueue.append( newTrans );
- //cerr << "put to trans closure queue" << endl;
- }
- }
- else {
- //cerr << "merging transitions" << endl;
- destTel->value->toState->stateSet->insert( srcTrans->toState );
- pdaGraph->addInTrans( destTel->value, srcTrans );
- }
-
- /* If this is an expansion then we may need to bring in commits. */
- if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) {
- //cerr << "SETTING COMMIT ON CLOSURE ROUND" << endl;
- destTel->value->commits.insert( expandFrom->commits );
-
- expandFrom->commits.empty();
- }
- }
- else {
- /* ProdState does not have any transitions out. It is at the end of a
- * production. */
- if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) {
- //cerr << "SETTING COMMIT IN PENDING LOOKAHEAD" << endl;
- for ( LongSet::Iter len = expandFrom->commits; len.lte(); len++ )
- dest->pendingCommits.insert( ProdIdPair( prod->prodId, *len ) );
-
- expandFrom->commits.empty();
- }
- }
-}
-
-void Compiler::lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state )
-{
- /* State should not already be closed. */
- assert( !state->inClosedMap );
-
- /* This is used each time we invoke closure, it must be cleared. */
- pdaGraph->transClosureQueue.abandon();
-
- /* Drag in the core items. */
- for ( PdaStateSet::Iter ssi = *state->stateSet; ssi.lte(); ssi++ )
- lr0BringInItem( pdaGraph, state, *ssi, 0, 0 );
-
- /* Now bring in the derived items. */
- while ( pdaGraph->transClosureQueue.length() > 0 ) {
- PdaTrans *toClose = pdaGraph->transClosureQueue.detachFirst();
- //cerr << "have a transition to derive" << endl;
-
- /* Get the langEl. */
- LangEl *langEl = langElIndex[toClose->lowKey];
-
- /* Make graphs for all of the productions that the non
- * terminal goes to that are not already in the state's dotSet. */
- for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) {
- /* Bring in the start state of the production. */
- lr0BringInItem( pdaGraph, state, prod->fsm->startState, toClose, prod );
- }
- }
-
- /* Try and insert into the closed dict. */
- DotSetMapEl *lastFound;
- if ( pdaGraph->closedMap.insert( state, &lastFound ) ) {
- /* Insertion into closed dict succeeded. There is no state with the
- * same dot set. The state is now closed. It is guaranteed a spot in
- * the closed dict and it will never go away (states never deleted
- * during closure). */
- pdaGraph->stateClosedList.append( state );
- state->inClosedMap = true;
-
- /* Add all of the states in the out transitions to the closure queue.
- * This will give us a depth first search of the graph. */
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- /* Get the state the transEl goes to. */
- PdaState *targ = trans->value->toState;
-
- /* If the state on this tranisition has not already been slated
- * for closure, then add it to the queue. */
- if ( !targ->onClosureQueue && !targ->inClosedMap ) {
- pdaGraph->stateClosureQueue.append( targ );
- targ->onClosureQueue = true;
- }
- }
- }
- else {
- /* Insertion into closed dict failed. There is an existing state
- * with the same dot set. Get the existing state. */
- pdaGraph->inTransMove( lastFound, state );
- for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
- pdaGraph->stateList.detach( tel->value->toState );
- delete tel->value->toState;
- delete tel->value;
- }
- pdaGraph->stateList.detach( state );
- delete state;
- }
-}
-
-/* Invoke cloure on the graph. We use a queue here to achieve a breadth
- * first search of the tree we build. Note, there are back edges in this
- * tree. They are the edges made when upon closure, a dot set exists
- * already. */
-void Compiler::lr0CloseAllStates( PdaGraph *pdaGraph )
-{
- /* While there are items on the closure queue. */
- while ( pdaGraph->stateClosureQueue.length() > 0 ) {
- /* Pop the first item off. */
- PdaState *state = pdaGraph->stateClosureQueue.detachFirst();
- state->onClosureQueue = false;
-
- /* Invoke closure upon the state. */
- lr0InvokeClosure( pdaGraph, state );
- }
-}
-
-void Compiler::transferCommits( PdaGraph *pdaGraph, PdaTrans *trans,
- PdaState *state, long prodId )
-{
- ProdIdPairSet &pendingCommits = state->pendingCommits;
- for ( ProdIdPairSet::Iter pi = pendingCommits; pi.lte(); pi++ ) {
- if ( pi->onReduce == prodId )
- trans->commits.insert( pi->length );
- }
-}
-
-void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys )
-{
- for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) {
- int prodId = ets->prodId;
- PdaState *expandTo = ets->state;
-
- for ( FollowToAdd::Iter fkey = followKeys; fkey.lte(); fkey++ ) {
- TransMapEl *transEl = expandTo->transMap.find( fkey->key );
-
- if ( transEl != 0 ) {
- /* Set up the follow transition. */
- PdaTrans *destTrans = transEl->value;
-
- transferCommits( pdaGraph, destTrans, expandTo, prodId );
-
- pdaGraph->addInReduction( destTrans, prodId, fkey->value );
- }
- else {
- /* Set up the follow transition. */
- PdaTrans *followTrans = new PdaTrans;
- followTrans->lowKey = fkey->key;
- followTrans->isShift = false;
- followTrans->reductions.insert( prodId, fkey->value );
-
- transferCommits( pdaGraph, followTrans, expandTo, prodId );
-
- pdaGraph->attachTrans( expandTo, actionDestState, followTrans );
- expandTo->transMap.insert( followTrans->lowKey, followTrans );
- pdaGraph->transClosureQueue.append( followTrans );
- }
- }
- }
-}
-
-long PdaTrans::maxPrior()
-{
- long prior = LONG_MIN;
- if ( isShift && shiftPrior > prior )
- prior = shiftPrior;
- for ( ReductionMap::Iter red = reductions; red.lte(); red++ ) {
- if ( red->value > prior )
- prior = red->value;
- }
- return prior;
-}
-
-void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state )
-{
- /* Finding non-terminals into the state. */
- for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
- long key = in->lowKey;
- LangEl *langEl = langElIndex[key];
- if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
- /* Finding the following transitions. */
- FollowToAdd followKeys;
- for ( TransMap::Iter fout = state->transMap; fout.lte(); fout++ ) {
- int fkey = fout->key;
- LangEl *flel = langElIndex[fkey];
- if ( flel == 0 || flel->type == LangEl::Term ) {
- long prior = fout->value->maxPrior();
- followKeys.insert( fkey, prior );
- }
- }
-
- if ( followKeys.length() > 0 )
- lalr1AddFollow2( pdaGraph, in, followKeys );
- }
- }
-}
-
-void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans,
- long followKey, long prior )
-{
- for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) {
- int prodId = ets->prodId;
- PdaState *expandTo = ets->state;
-
- TransMapEl *transEl = expandTo->transMap.find( followKey );
- if ( transEl != 0 ) {
- /* Add in the reductions, or in the shift. */
- PdaTrans *destTrans = transEl->value;
-
- transferCommits( pdaGraph, destTrans, expandTo, prodId );
-
- pdaGraph->addInReduction( destTrans, prodId, prior );
- }
- else {
- /* Set up the follow transition. */
- PdaTrans *followTrans = new PdaTrans;
- followTrans->lowKey = followKey;
- followTrans->isShift = false;
- followTrans->reductions.insert( prodId, prior );
-
- transferCommits( pdaGraph, followTrans, expandTo, prodId );
-
- pdaGraph->attachTrans( expandTo, actionDestState, followTrans );
- expandTo->transMap.insert( followTrans->lowKey, followTrans );
- pdaGraph->transClosureQueue.append( followTrans );
- }
- }
-}
-
-void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans )
-{
- PdaState *state = trans->fromState;
- int fkey = trans->lowKey;
- LangEl *flel = langElIndex[fkey];
- if ( flel == 0 || flel->type == LangEl::Term ) {
- /* Finding non-terminals into the state. */
- for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
- long key = in->lowKey;
- LangEl *langEl = langElIndex[key];
- if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
- //cerr << "FOLLOW PRIOR TRANSFER 2: " << prior << endl;
- long prior = trans->maxPrior();
- lalr1AddFollow2( pdaGraph, in, fkey, prior );
- }
- }
- }
-}
-
-/* Add follow sets to an LR(0) graph to make it LALR(1). */
-void Compiler::lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls )
-{
- /* Make the state that all reduction actions go to. Since a reduction pops
- * states of the stack and sets the new target state, this state is
- * actually never reached. Just here to link the trans to. */
- actionDestState = pdaGraph->addState();
- pdaGraph->setFinState( actionDestState );
-
- for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
- /* Get the entry into the graph and traverse over start. */
- PdaState *overStart = pdaGraph->followFsm( (*pe)->startState, (*pe)->rootDef->fsm );
-
- /* Add _eof after the initial _start. */
- PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState,
- (*pe)->eofLel->id, (*pe)->eofLel->id );
- eofTrans->isShift = true;
- }
-
- /* This was used during lr0 table construction. */
- pdaGraph->transClosureQueue.abandon();
-
- /* Need to pass over every state initially. */
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
- lalr1AddFollow1( pdaGraph, state );
-
- /* While the closure queue has items, pop them off and add follow
- * characters. */
- while ( pdaGraph->transClosureQueue.length() > 0 ) {
- /* Pop the first item off and add Follow for it . */
- PdaTrans *trans = pdaGraph->transClosureQueue.detachFirst();
- lalr1AddFollow1( pdaGraph, trans );
- }
-}
-
-void Compiler::linkExpansions( PdaGraph *pdaGraph )
-{
- pdaGraph->setStateNumbers();
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- /* Find transitions out on non terminals. */
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- long key = trans->key;
- LangEl *langEl = langElIndex[key];
- if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
- /* For each production that the non terminal expand to ... */
- for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) {
- /* Follow the production and add to the trans's expand to set. */
- PdaState *followRes = pdaGraph->followFsm( state, prod->fsm );
-
- //LangEl *lel = langElIndex[key];
- //cerr << state->stateNum << ", ";
- //if ( lel != 0 )
- // cerr << lel->data;
- //else
- // cerr << (char)key;
- //cerr << " -> " << (*fto)->stateNum << " on " <<
- // prod->data << " (fss = " << fin.pos() << ")" << endl;
- trans->value->expandTo.insert( ExpandToEl( followRes, prod->prodId ) );
- }
- }
- }
- }
-}
-
-/* Add terminal versions of all nonterminal transitions. */
-void Compiler::addDupTerms( PdaGraph *pdaGraph )
-{
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- PdaTransList newTranitions;
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- LangEl *lel = langElIndex[trans->value->lowKey];
- if ( lel->type == LangEl::NonTerm ) {
- PdaTrans *dupTrans = new PdaTrans;
- dupTrans->lowKey = lel->termDup->id;
- dupTrans->isShift = true;
-
- /* Save the target state in to state. In the next loop when we
- * attach the transition we must clear this because the
- * attaching code requires the transition to be unattached. */
- dupTrans->toState = trans->value->toState;
- newTranitions.append( dupTrans );
-
- /* Commit code used? */
- //transferCommits( pdaGraph, followTrans, expandTo, prodId );
- }
- }
-
- for ( PdaTrans *dup = newTranitions.head; dup != 0; ) {
- PdaTrans *next = dup->next;
- PdaState *toState = dup->toState;
- dup->toState = 0;
- pdaGraph->attachTrans( state, toState, dup );
- state->transMap.insert( dup->lowKey, dup );
- dup = next;
- }
- }
-}
-
-/* Generate a LALR(1) graph. */
-void Compiler::lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls )
-{
- /* Make the intial graph. */
- pdaGraph->langElIndex = langElIndex;
-
- for ( Vector<LangEl*>::Iter r = parserEls; r.lte(); r++ ) {
- /* Create the entry point. */
- PdaState *rs = pdaGraph->addState();
- pdaGraph->entryStateSet.insert( rs );
-
- /* State set of just one state. */
- rs->stateSet = new PdaStateSet;
- rs->stateSet->insert( (*r)->rootDef->fsm->startState );
-
- /* Queue the start state for closure. */
- rs->onClosureQueue = true;
- pdaGraph->stateClosureQueue.append( rs );
-
- (*r)->startState = rs;
- }
-
- /* Run the lr0 closure. */
- lr0CloseAllStates( pdaGraph );
-
- /* Add terminal versions of all nonterminal transitions. */
- addDupTerms( pdaGraph );
-
- /* Link production expansions to the place they expand to. */
- linkExpansions( pdaGraph );
-
- /* Walk the graph adding follow sets to the LR(0) graph. */
- lalr1AddFollowSets( pdaGraph, parserEls );
-
-// /* Set the commit on the final eof shift. */
-// PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id );
-// PdaTrans *eofTrans = overStart->toState->findTrans( eofLangEl->id );
-// eofTrans->afterShiftCommits.insert( 2 );
-}
diff --git a/colm/codegen.cc b/colm/codegen.cc
deleted file mode 100644
index 4403cf8f..00000000
--- a/colm/codegen.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "parsedata.h"
-#include "fsmcodegen.h"
-#include "redfsm.h"
-#include "bstmap.h"
-#include "fsmrun.h"
-#include "debug.h"
-#include <sstream>
-#include <string>
-
-
-void FsmCodeGen::writeMain()
-{
- out <<
- "int main( int argc, const char **argv )\n"
- "{\n"
- " struct ColmProgram *prg;\n"
- " int exitStatus;\n"
- " colmInit( " << colmActiveRealm << " );\n"
- " prg = colmNewProgram( &main_runtimeData, argc, argv );\n"
- " colmRunProgram( prg );\n"
- " exitStatus = colmDeleteProgram( prg );\n"
- " return exitStatus;\n"
- "}\n"
- "\n";
-
- out.flush();
-}
-
-
diff --git a/colm/codevect.c b/colm/codevect.c
deleted file mode 100644
index a05c7ea4..00000000
--- a/colm/codevect.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Aapl.
- *
- * Aapl is free software; you can redistribute it and/or modify it under the
- * terms of the GNU Lesser General Public License as published by the Free
- * Software Foundation; either version 2.1 of the License, or (at your option)
- * any later version.
- *
- * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
- * more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <colm/rtvector.h>
-#include <colm/pdarun.h>
-
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-
-
-void initRtCodeVect( RtCodeVect *vect )
-{
- vect->data = 0;
- vect->tabLen = 0;
- vect->allocLen = 0;
-}
-
-static long newSizeUp( long existing, long needed )
-{
- return needed > existing ? (needed<<1) : existing;
-}
-
-static long newSizeDown( long existing, long needed )
-{
- return needed < (existing>>2) ? (needed<<1) : existing;
-}
-
-/* Up resize the data for len elements using Resize::upResize to tell us the
- * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */
-static void upResize( RtCodeVect *vect, long len )
-{
- /* Ask the resizer what the new tabLen will be. */
- long newLen = newSizeUp(vect->allocLen, len);
-
- /* Did the data grow? */
- if ( newLen > vect->allocLen ) {
- vect->allocLen = newLen;
- if ( vect->data != 0 ) {
- /* Table exists already, resize it up. */
- vect->data = (Code*) realloc( vect->data, sizeof(Code) * newLen );
- //if ( vect->data == 0 )
- // throw std::bad_alloc();
- }
- else {
- /* Create the data. */
- vect->data = (Code*) malloc( sizeof(Code) * newLen );
- //if ( vect->data == 0 )
- // throw std::bad_alloc();
- }
- }
-}
-
-/* Down resize the data for len elements using Resize::downResize to determine
- * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */
-static void downResize( RtCodeVect *vect, long len)
-{
- /* Ask the resizer what the new tabLen will be. */
- long newLen = newSizeDown( vect->allocLen, len );
-
- /* Did the data shrink? */
- if ( newLen < vect->allocLen ) {
- vect->allocLen = newLen;
- if ( newLen == 0 ) {
- /* Simply free the data. */
- free( vect->data );
- vect->data = 0;
- }
- else {
- /* Not shrinking to size zero, realloc it to the smaller size. */
- vect->data = (Code*) realloc( vect->data, sizeof(Code) * newLen );
- //if ( vect->data == 0 )
- // throw std::bad_alloc();
- }
- }
-}
-
-
-void rtCodeVectEmpty( RtCodeVect *vect )
-{
- if ( vect->data != 0 ) {
- /* Free the data space. */
- free( vect->data );
- vect->data = 0;
- vect->tabLen = vect->allocLen = 0;
- }
-}
-
-void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len )
-{
- long endPos, i;
- //Code *item;
-
- /* If we are given a negative position to replace at then
- * treat it as a position relative to the length. */
- if ( pos < 0 )
- pos = vect->tabLen + pos;
-
- /* The end is the one past the last item that we want
- * to write to. */
- endPos = pos + len;
-
- /* Make sure we have enough space. */
- if ( endPos > vect->tabLen ) {
- upResize( vect, endPos );
-
- /* Delete any objects we need to delete. */
- //item = vect->data + pos;
- //for ( i = pos; i < vect->tabLen; i++, item++ )
- // item->~Code();
-
- /* We are extending the vector, set the new data length. */
- vect->tabLen = endPos;
- }
- else {
- /* Delete any objects we need to delete. */
- //item = vect->data + pos;
- //for ( i = pos; i < endPos; i++, item++ )
- // item->~Code();
- }
-
- /* Copy data in using copy constructor. */
- Code *dst = vect->data + pos;
- const Code *src = val;
- for ( i = 0; i < len; i++, dst++, src++ )
- *dst = *src;
-}
-
-void rtCodeVectRemove( RtCodeVect *vect, long pos, long len )
-{
- long newLen, lenToSlideOver, endPos;
- Code *dst;//, *item;
-
- /* If we are given a negative position to remove at then
- * treat it as a position relative to the length. */
- if ( pos < 0 )
- pos = vect->tabLen + pos;
-
- /* The first position after the last item deleted. */
- endPos = pos + len;
-
- /* The new data length. */
- newLen = vect->tabLen - len;
-
- /* The place in the data we are deleting at. */
- dst = vect->data + pos;
-
- /* Call Destructors. */
- //item = dst;
- //for ( long i = 0; i < len; i += 1, item += 1 )
- // item->~Code();
-
- /* Shift data over if necessary. */
- lenToSlideOver = vect->tabLen - endPos;
- if ( len > 0 && lenToSlideOver > 0 )
- memmove(dst, dst + len, sizeof(Code)*lenToSlideOver);
-
- /* Shrink the data if necessary. */
- downResize( vect, newLen );
-
- /* Set the new data length. */
- vect->tabLen = newLen;
-}
-
-
diff --git a/colm/colm.h b/colm/colm.h
deleted file mode 100644
index 4f169254..00000000
--- a/colm/colm.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef __COLM_COLM_H
-#define __COLM_COLM_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct ColmTree;
-struct ColmKid;
-struct ColmProgram;
-struct ColmRuntimeData;
-
-void colmInit( long debugRealm );
-struct ColmProgram *colmNewProgram( struct ColmRuntimeData *rtd, int argc, const char **argv );
-void colmRunProgram( struct ColmProgram *prg );
-int colmDeleteProgram( struct ColmProgram *prg );
-
-struct ColmPrintArgs
-{
- void *arg;
- int comm;
- int attr;
- int trim;
- void (*out)( struct ColmPrintArgs *args, const char *data, int length );
- void (*openTree)( struct ColmProgram *prg, struct ColmTree **sp,
- struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid );
- void (*printTerm)( struct ColmProgram *prg, struct ColmTree **sp,
- struct ColmPrintArgs *args, struct ColmKid *kid );
- void (*closeTree)( struct ColmProgram *prg, struct ColmTree **sp,
- struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid );
-};
-
-void printNull( struct ColmProgram *prg, struct ColmTree **sp,
- struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid );
-void printTermTree( struct ColmProgram *prg, struct ColmTree **sp,
- struct ColmPrintArgs *printArgs, struct ColmKid *kid );
-
-struct ColmTree **vm_root( struct ColmProgram *prg );
-struct ColmTree *returnVal( struct ColmProgram *prg );
-void printTreeArgs( struct ColmProgram *prg, struct ColmTree **sp,
- struct ColmPrintArgs *printArgs, struct ColmTree *tree );
-
-int repeatEnd( struct ColmTree *tree );
-int listLast( struct ColmTree *tree );
-struct ColmTree *getRhsVal( struct ColmProgram *prg, struct ColmTree *tree, int *a );
-struct ColmTree *getAttr( struct ColmTree *tree, long pos );
-struct ColmTree *getGlobal( struct ColmProgram *prg, long pos );
-struct ColmTree *getRepeatNext( struct ColmTree *tree );
-struct ColmTree *getRepeatVal( struct ColmTree *tree );
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/colm/compiler.cc b/colm/compiler.cc
deleted file mode 100644
index c1e775f2..00000000
--- a/colm/compiler.cc
+++ /dev/null
@@ -1,1496 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <iostream>
-#include <iomanip>
-#include <errno.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <sstream>
-
-#include "global.h"
-#include "lmparse.h"
-#include "parsedata.h"
-#include "parsetree.h"
-#include "mergesort.h"
-#include "redbuild.h"
-#include "pdacodegen.h"
-#include "fsmcodegen.h"
-#include "fsmrun.h"
-#include "pdarun.h"
-#include "colm.h"
-#include "pool.h"
-
-using namespace std;
-using std::ostringstream;
-
-char machineMain[] = "main";
-exit_object endp;
-void operator<<( ostream &out, exit_object & )
-{
- out << endl;
- exit(1);
-}
-
-/* Perform minimization after an operation according
- * to the command line args. */
-void afterOpMinimize( FsmGraph *fsm, bool lastInSeq )
-{
- /* Switch on the prefered minimization algorithm. */
- if ( lastInSeq ) {
- /* First clean up the graph. FsmGraph operations may leave these
- * lying around. There should be no dead end states. The subtract
- * intersection operators are the only places where they may be
- * created and those operators clean them up. */
- fsm->removeUnreachableStates();
- fsm->minimizePartition2();
- }
-}
-
-/* Count the transitions in the fsm by walking the state list. */
-int countTransitions( FsmGraph *fsm )
-{
- int numTrans = 0;
- FsmState *state = fsm->stateList.head;
- while ( state != 0 ) {
- numTrans += state->outList.length();
- state = state->next;
- }
- return numTrans;
-}
-
-Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd )
-{
- /* Reset errno so we can check for overflow or underflow. In the event of
- * an error, sets the return val to the upper or lower bound being tested
- * against. */
- errno = 0;
- unsigned int size = keyOps->alphType->size;
- bool unusedBits = size < sizeof(unsigned long);
-
- unsigned long ul = strtoul( str, 0, 16 );
-
- if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) {
- error(loc) << "literal " << str << " overflows the alphabet type" << endl;
- ul = 1 << (size * 8);
- }
-
- if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) )
- ul |= (0xffffffff >> (size*8 ) ) << (size*8);
-
- return Key( (long)ul );
-}
-
-Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd )
-{
- /* Convert the number to a decimal. First reset errno so we can check
- * for overflow or underflow. */
- errno = 0;
- long long minVal = keyOps->alphType->minVal;
- long long maxVal = keyOps->alphType->maxVal;
-
- long long ll = strtoll( str, 0, 10 );
-
- /* Check for underflow. */
- if ( (errno == ERANGE && ll < 0) || ll < minVal) {
- error(loc) << "literal " << str << " underflows the alphabet type" << endl;
- ll = minVal;
- }
- /* Check for overflow. */
- else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) {
- error(loc) << "literal " << str << " overflows the alphabet type" << endl;
- ll = maxVal;
- }
-
- if ( keyOps->alphType->isSigned )
- return Key( (long)ll );
- else
- return Key( (unsigned long)ll );
-}
-
-/* Make an fsm key in int format (what the fsm graph uses) from an alphabet
- * number returned by the parser. Validates that the number doesn't overflow
- * the alphabet type. */
-Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd )
-{
- /* Switch on hex/decimal format. */
- if ( str[0] == '0' && str[1] == 'x' )
- return makeFsmKeyHex( str, loc, pd );
- else
- return makeFsmKeyDec( str, loc, pd );
-}
-
-/* Make an fsm int format (what the fsm graph uses) from a single character.
- * Performs proper conversion depending on signed/unsigned property of the
- * alphabet. */
-Key makeFsmKeyChar( char c, Compiler *pd )
-{
- if ( keyOps->isSigned ) {
- /* Copy from a char type. */
- return Key( c );
- }
- else {
- /* Copy from an unsigned byte type. */
- return Key( (unsigned char)c );
- }
-}
-
-/* Make an fsm key array in int format (what the fsm graph uses) from a string
- * of characters. Performs proper conversion depending on signed/unsigned
- * property of the alphabet. */
-void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd )
-{
- if ( keyOps->isSigned ) {
- /* Copy from a char star type. */
- char *src = data;
- for ( int i = 0; i < len; i++ )
- result[i] = Key(src[i]);
- }
- else {
- /* Copy from an unsigned byte ptr type. */
- unsigned char *src = (unsigned char*) data;
- for ( int i = 0; i < len; i++ )
- result[i] = Key(src[i]);
- }
-}
-
-/* Like makeFsmKeyArray except the result has only unique keys. They ordering
- * will be changed. */
-void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
- bool caseInsensitive, Compiler *pd )
-{
- /* Use a transitions list for getting unique keys. */
- if ( keyOps->isSigned ) {
- /* Copy from a char star type. */
- char *src = data;
- for ( int si = 0; si < len; si++ ) {
- Key key( src[si] );
- result.insert( key );
- if ( caseInsensitive ) {
- if ( key.isLower() )
- result.insert( key.toUpper() );
- else if ( key.isUpper() )
- result.insert( key.toLower() );
- }
- }
- }
- else {
- /* Copy from an unsigned byte ptr type. */
- unsigned char *src = (unsigned char*) data;
- for ( int si = 0; si < len; si++ ) {
- Key key( src[si] );
- result.insert( key );
- if ( caseInsensitive ) {
- if ( key.isLower() )
- result.insert( key.toUpper() );
- else if ( key.isUpper() )
- result.insert( key.toLower() );
- }
- }
- }
-}
-
-FsmGraph *dotFsm( Compiler *pd )
-{
- FsmGraph *retFsm = new FsmGraph();
- retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
- return retFsm;
-}
-
-FsmGraph *dotStarFsm( Compiler *pd )
-{
- FsmGraph *retFsm = new FsmGraph();
- retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
- return retFsm;
-}
-
-/* Make a builtin type. Depends on the signed nature of the alphabet type. */
-FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd )
-{
- /* FsmGraph created to return. */
- FsmGraph *retFsm = 0;
- bool isSigned = keyOps->isSigned;
-
- switch ( builtin ) {
- case BT_Any: {
- /* All characters. */
- retFsm = dotFsm( pd );
- break;
- }
- case BT_Ascii: {
- /* Ascii characters 0 to 127. */
- retFsm = new FsmGraph();
- retFsm->rangeFsm( 0, 127 );
- break;
- }
- case BT_Extend: {
- /* Ascii extended characters. This is the full byte range. Dependent
- * on signed, vs no signed. If the alphabet is one byte then just use
- * dot fsm. */
- if ( isSigned ) {
- retFsm = new FsmGraph();
- retFsm->rangeFsm( -128, 127 );
- }
- else {
- retFsm = new FsmGraph();
- retFsm->rangeFsm( 0, 255 );
- }
- break;
- }
- case BT_Alpha: {
- /* Alpha [A-Za-z]. */
- FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph();
- upper->rangeFsm( 'A', 'Z' );
- lower->rangeFsm( 'a', 'z' );
- upper->unionOp( lower );
- upper->minimizePartition2();
- retFsm = upper;
- break;
- }
- case BT_Digit: {
- /* Digits [0-9]. */
- retFsm = new FsmGraph();
- retFsm->rangeFsm( '0', '9' );
- break;
- }
- case BT_Alnum: {
- /* Alpha numerics [0-9A-Za-z]. */
- FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph();
- FsmGraph *upper = new FsmGraph();
- digit->rangeFsm( '0', '9' );
- upper->rangeFsm( 'A', 'Z' );
- lower->rangeFsm( 'a', 'z' );
- digit->unionOp( upper );
- digit->unionOp( lower );
- digit->minimizePartition2();
- retFsm = digit;
- break;
- }
- case BT_Lower: {
- /* Lower case characters. */
- retFsm = new FsmGraph();
- retFsm->rangeFsm( 'a', 'z' );
- break;
- }
- case BT_Upper: {
- /* Upper case characters. */
- retFsm = new FsmGraph();
- retFsm->rangeFsm( 'A', 'Z' );
- break;
- }
- case BT_Cntrl: {
- /* Control characters. */
- FsmGraph *cntrl = new FsmGraph();
- FsmGraph *highChar = new FsmGraph();
- cntrl->rangeFsm( 0, 31 );
- highChar->concatFsm( 127 );
- cntrl->unionOp( highChar );
- cntrl->minimizePartition2();
- retFsm = cntrl;
- break;
- }
- case BT_Graph: {
- /* Graphical ascii characters [!-~]. */
- retFsm = new FsmGraph();
- retFsm->rangeFsm( '!', '~' );
- break;
- }
- case BT_Print: {
- /* Printable characters. Same as graph except includes space. */
- retFsm = new FsmGraph();
- retFsm->rangeFsm( ' ', '~' );
- break;
- }
- case BT_Punct: {
- /* Punctuation. */
- FsmGraph *range1 = new FsmGraph();
- FsmGraph *range2 = new FsmGraph();
- FsmGraph *range3 = new FsmGraph();
- FsmGraph *range4 = new FsmGraph();
- range1->rangeFsm( '!', '/' );
- range2->rangeFsm( ':', '@' );
- range3->rangeFsm( '[', '`' );
- range4->rangeFsm( '{', '~' );
- range1->unionOp( range2 );
- range1->unionOp( range3 );
- range1->unionOp( range4 );
- range1->minimizePartition2();
- retFsm = range1;
- break;
- }
- case BT_Space: {
- /* Whitespace: [\t\v\f\n\r ]. */
- FsmGraph *cntrl = new FsmGraph();
- FsmGraph *space = new FsmGraph();
- cntrl->rangeFsm( '\t', '\r' );
- space->concatFsm( ' ' );
- cntrl->unionOp( space );
- cntrl->minimizePartition2();
- retFsm = cntrl;
- break;
- }
- case BT_Xdigit: {
- /* Hex digits [0-9A-Fa-f]. */
- FsmGraph *digit = new FsmGraph();
- FsmGraph *upper = new FsmGraph();
- FsmGraph *lower = new FsmGraph();
- digit->rangeFsm( '0', '9' );
- upper->rangeFsm( 'A', 'F' );
- lower->rangeFsm( 'a', 'f' );
- digit->unionOp( upper );
- digit->unionOp( lower );
- digit->minimizePartition2();
- retFsm = digit;
- break;
- }
- case BT_Lambda: {
- retFsm = new FsmGraph();
- retFsm->lambdaFsm();
- break;
- }
- case BT_Empty: {
- retFsm = new FsmGraph();
- retFsm->emptyFsm();
- break;
- }}
-
- return retFsm;
-}
-
-/* Check if this name inst or any name inst below is referenced. */
-bool NameInst::anyRefsRec()
-{
- if ( numRefs > 0 )
- return true;
-
- /* Recurse on children until true. */
- for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) {
- if ( (*ch)->anyRefsRec() )
- return true;
- }
-
- return false;
-}
-
-/*
- * Compiler
- */
-
-/* Initialize the structure that will collect info during the parse of a
- * machine. */
-Compiler::Compiler( const String &fileName, const String &sectionName,
- const InputLoc &sectionLoc, ostream &out )
-:
- nextPriorKey(0),
- nextLocalErrKey(1), /* 0 is reserved for global error actions. */
- nextNameId(0),
- alphTypeSet(false),
- getKeyExpr(0),
- accessExpr(0),
- curStateExpr(0),
- lowerNum(0),
- upperNum(0),
- fileName(fileName),
- sectionName(sectionName),
- sectionLoc(sectionLoc),
- errorCount(0),
- curActionOrd(0),
- curPriorOrd(0),
- nextEpsilonResolvedLink(0),
- nextTokenId(1),
- rootCodeBlock(0),
- mainReturnUT(0),
- parserName(sectionName),
- out(out),
- access(0),
- tokenStruct(0),
- rootLangEl(0),
- eofLangEl(0),
- errorLangEl(0),
- defaultCharLangEl(0),
- rootRegion(0),
- defaultRegion(0),
- firstNonTermId(0),
- prodIdIndex(0),
- nextPatReplId(0),
- nextGenericId(1),
- nextFuncId(0),
- loopCleanup(0),
- nextObjectId(1), /* 0 is reserved for no object. */
- nextFrameId(0),
- nextParserId(0),
- nextLabelId(0),
- revertOn(true),
- predValue(0),
- nextMatchEndNum(0),
- argvTypeRef(0),
- context(0)
-{
-}
-
-/* Clean up the data collected during a parse. */
-Compiler::~Compiler()
-{
- /* Delete all the nodes in the action list. Will cause all the
- * string data that represents the actions to be deallocated. */
- actionList.empty();
-}
-
-/* Make a name id in the current name instantiation scope if it is not
- * already there. */
-NameInst *Compiler::addNameInst( const InputLoc &loc, char *data, bool isLabel )
-{
- /* Create the name instantitaion object and insert it. */
- NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel );
- curNameInst->childVect.append( newNameInst );
- if ( data != 0 )
- curNameInst->children.insertMulti( data, newNameInst );
- return newNameInst;
-}
-
-void Compiler::initNameWalk( NameInst *rootName )
-{
- curNameInst = rootName;
- curNameChild = 0;
-}
-
-/* Goes into the next child scope. The number of the child is already set up.
- * We need this for the syncronous name tree and parse tree walk to work
- * properly. It is reset on entry into a scope and advanced on poping of a
- * scope. A call to enterNameScope should be accompanied by a corresponding
- * popNameScope. */
-NameFrame Compiler::enterNameScope( bool isLocal, int numScopes )
-{
- /* Save off the current data. */
- NameFrame retFrame;
- retFrame.prevNameInst = curNameInst;
- retFrame.prevNameChild = curNameChild;
- retFrame.prevLocalScope = localNameScope;
-
- /* Enter into the new name scope. */
- for ( int i = 0; i < numScopes; i++ ) {
- curNameInst = curNameInst->childVect[curNameChild];
- curNameChild = 0;
- }
-
- if ( isLocal )
- localNameScope = curNameInst;
-
- return retFrame;
-}
-
-/* Return from a child scope to a parent. The parent info must be specified as
- * an argument and is obtained from the corresponding call to enterNameScope.
- * */
-void Compiler::popNameScope( const NameFrame &frame )
-{
- /* Pop the name scope. */
- curNameInst = frame.prevNameInst;
- curNameChild = frame.prevNameChild+1;
- localNameScope = frame.prevLocalScope;
-}
-
-void Compiler::resetNameScope( const NameFrame &frame )
-{
- /* Pop the name scope. */
- curNameInst = frame.prevNameInst;
- curNameChild = frame.prevNameChild;
- localNameScope = frame.prevLocalScope;
-}
-
-
-void Compiler::unsetObsoleteEntries( FsmGraph *graph )
-{
- /* Loop the reference names and increment the usage. Names that are no
- * longer needed will be unset in graph. */
- for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) {
- /* Get the name. */
- NameInst *name = *ref;
- name->numUses += 1;
-
- /* If the name is no longer needed unset its corresponding entry. */
- if ( name->numUses == name->numRefs ) {
- assert( graph->entryPoints.find( name->id ) != 0 );
- graph->unsetEntry( name->id );
- }
- }
-}
-
-NameSet Compiler::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly )
-{
- /* Queue needed for breadth-first search, load it with the start node. */
- NameInstList nameQueue;
- nameQueue.append( refFrom );
-
- NameSet result;
- while ( nameQueue.length() > 0 ) {
- /* Pull the next from location off the queue. */
- NameInst *from = nameQueue.detachFirst();
-
- /* Look for the name. */
- NameMapEl *low, *high;
- if ( from->children.findMulti( data, low, high ) ) {
- /* Record all instances of the name. */
- for ( ; low <= high; low++ )
- result.insert( low->value );
- }
-
- /* Name not there, do breadth-first operation of appending all
- * childrent to the processing queue. */
- for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) {
- if ( !recLabelsOnly || (*name)->isLabel )
- nameQueue.append( *name );
- }
- }
-
- /* Queue exhausted and name never found. */
- return result;
-}
-
-void Compiler::resolveFrom( NameSet &result, NameInst *refFrom,
- const NameRef &nameRef, int namePos )
-{
- /* Look for the name in the owning scope of the factor with aug. */
- NameSet partResult = resolvePart( refFrom, nameRef[namePos], false );
-
- /* If there are more parts to the name then continue on. */
- if ( ++namePos < nameRef.length() ) {
- /* There are more components to the name, search using all the part
- * results as the base. */
- for ( NameSet::Iter name = partResult; name.lte(); name++ )
- resolveFrom( result, *name, nameRef, namePos );
- }
- else {
- /* This is the last component, append the part results to the final
- * results. */
- result.insert( partResult );
- }
-}
-
-ostream &operator<<( ostream &out, const Token &token )
-{
- out << token.data;
- return out;
-}
-
-/* Write out a name reference. */
-ostream &operator<<( ostream &out, const NameRef &nameRef )
-{
- int pos = 0;
- if ( nameRef[pos] == 0 ) {
- out << "::";
- pos += 1;
- }
- out << nameRef[pos++];
- for ( ; pos < nameRef.length(); pos++ )
- out << "::" << nameRef[pos];
- return out;
-}
-
-ostream &operator<<( ostream &out, const NameInst &nameInst )
-{
- /* Count the number fully qualified name parts. */
- int numParents = 0;
- NameInst *curParent = nameInst.parent;
- while ( curParent != 0 ) {
- numParents += 1;
- curParent = curParent->parent;
- }
-
- /* Make an array and fill it in. */
- curParent = nameInst.parent;
- NameInst **parents = new NameInst*[numParents];
- for ( int p = numParents-1; p >= 0; p-- ) {
- parents[p] = curParent;
- curParent = curParent->parent;
- }
-
- /* Write the parents out, skip the root. */
- for ( int p = 1; p < numParents; p++ )
- out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" );
-
- /* Write the name and cleanup. */
- out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" );
- delete[] parents;
- return out;
-}
-
-struct CmpNameInstLoc
-{
- static int compare( const NameInst *ni1, const NameInst *ni2 )
- {
- if ( ni1->loc.line < ni2->loc.line )
- return -1;
- else if ( ni1->loc.line > ni2->loc.line )
- return 1;
- else if ( ni1->loc.col < ni2->loc.col )
- return -1;
- else if ( ni1->loc.col > ni2->loc.col )
- return 1;
- return 0;
- }
-};
-
-void errorStateLabels( const NameSet &resolved )
-{
- MergeSort<NameInst*, CmpNameInstLoc> mergeSort;
- mergeSort.sort( resolved.data, resolved.length() );
- for ( NameSet::Iter res = resolved; res.lte(); res++ )
- error((*res)->loc) << " -> " << **res << endl;
-}
-
-
-void Compiler::referenceRegions( NameInst *rootName )
-{
- for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) {
- /* Inc the reference in the name. This will cause the entry point to
- * survive to the end of the graph generating walk. */
- (*inst)->numRefs += 1;
- }
-}
-
-/* Walk a name tree starting at from and fill the name index. */
-void Compiler::fillNameIndex( NameInst **nameIndex, NameInst *from )
-{
- /* Fill the value for from in the name index. */
- nameIndex[from->id] = from;
-
- /* Recurse on the implicit final state and then all children. */
- if ( from->final != 0 )
- fillNameIndex( nameIndex, from->final );
- for ( NameVect::Iter name = from->childVect; name.lte(); name++ )
- fillNameIndex( nameIndex, *name );
-}
-
-NameInst **Compiler::makeNameIndex( NameInst *rootName )
-{
- /* The number of nodes in the tree can now be given by nextNameId. Put a
- * null pointer on the end of the list to terminate it. */
- NameInst **nameIndex = new NameInst*[nextNameId+1];
- memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) );
- fillNameIndex( nameIndex, rootName );
- return nameIndex;
-}
-
-void Compiler::createBuiltin( const char *name, BuiltinMachine builtin )
-{
- Expression *expression = new Expression( builtin );
- Join *join = new Join( expression );
- VarDef *varDef = new VarDef( name, join );
- GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
- rootNamespace->rlMap.insert( graphDictEl );
-}
-
-/* Initialize the graph dict with builtin types. */
-void Compiler::initGraphDict( )
-{
- createBuiltin( "any", BT_Any );
- createBuiltin( "ascii", BT_Ascii );
- createBuiltin( "extend", BT_Extend );
- createBuiltin( "alpha", BT_Alpha );
- createBuiltin( "digit", BT_Digit );
- createBuiltin( "alnum", BT_Alnum );
- createBuiltin( "lower", BT_Lower );
- createBuiltin( "upper", BT_Upper );
- createBuiltin( "cntrl", BT_Cntrl );
- createBuiltin( "graph", BT_Graph );
- createBuiltin( "print", BT_Print );
- createBuiltin( "punct", BT_Punct );
- createBuiltin( "space", BT_Space );
- createBuiltin( "xdigit", BT_Xdigit );
- createBuiltin( "null", BT_Lambda );
- createBuiltin( "zlen", BT_Lambda );
- createBuiltin( "empty", BT_Empty );
-}
-
-/* Initialize the key operators object that will be referenced by all fsms
- * created. */
-void Compiler::initKeyOps( )
-{
- /* Signedness and bounds. */
- HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
- thisKeyOps.setAlphType( alphType );
-
- if ( lowerNum != 0 ) {
- /* If ranges are given then interpret the alphabet type. */
- thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
- thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
- }
-
- thisCondData.nextCondKey = thisKeyOps.maxKey;
- thisCondData.nextCondKey.increment();
-}
-
-void Compiler::printNameInst( NameInst *nameInst, int level )
-{
- for ( int i = 0; i < level; i++ )
- cerr << " ";
- cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") <<
- " id: " << nameInst->id <<
- " refs: " << nameInst->numRefs << endl;
- for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ )
- printNameInst( *name, level+1 );
-}
-
-/* Remove duplicates of unique actions from an action table. */
-void Compiler::removeDups( ActionTable &table )
-{
- /* Scan through the table looking for unique actions to
- * remove duplicates of. */
- for ( int i = 0; i < table.length(); i++ ) {
- /* Remove any duplicates ahead of i. */
- for ( int r = i+1; r < table.length(); ) {
- if ( table[r].value == table[i].value )
- table.vremove(r);
- else
- r += 1;
- }
- }
-}
-
-/* Remove duplicates from action lists. This operates only on transition and
- * eof action lists and so should be called once all actions have been
- * transfered to their final resting place. */
-void Compiler::removeActionDups( FsmGraph *graph )
-{
- /* Loop all states. */
- for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
- /* Loop all transitions. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
- removeDups( trans->actionTable );
- removeDups( state->toStateActionTable );
- removeDups( state->fromStateActionTable );
- removeDups( state->eofActionTable );
- }
-}
-
-Action *Compiler::newAction( const String &name, InlineList *inlineList )
-{
- InputLoc loc;
- loc.line = 1;
- loc.col = 1;
- loc.fileName = 0;
-
- Action *action = new Action( loc, name, inlineList );
- actionList.append( action );
- return action;
-}
-
-void Compiler::initLongestMatchData()
-{
- if ( regionList.length() > 0 ) {
- /* The initActId action gives act a default value. */
- InlineList *il4 = new InlineList;
- il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) );
- initActId = newAction( "initact", il4 );
- initActId->isLmAction = true;
-
- /* The setTokStart action sets tokstart. */
- InlineList *il5 = new InlineList;
- il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) );
- setTokStart = newAction( "tokstart", il5 );
- setTokStart->isLmAction = true;
-
- /* The setTokEnd action sets tokend. */
- InlineList *il3 = new InlineList;
- il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) );
- setTokEnd = newAction( "tokend", il3 );
- setTokEnd->isLmAction = true;
-
- /* The action will also need an ordering: ahead of all user action
- * embeddings. */
- initActIdOrd = curActionOrd++;
- setTokStartOrd = curActionOrd++;
- setTokEndOrd = curActionOrd++;
- }
-}
-
-void Compiler::finishGraphBuild( FsmGraph *graph )
-{
- /* Resolve any labels that point to multiple states. Any labels that are
- * still around are referenced only by gotos and calls and they need to be
- * made into deterministic entry points. */
- graph->deterministicEntry();
-
- /*
- * All state construction is now complete.
- */
-
- /* Transfer global error actions. */
- for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
- graph->transferErrorActions( state, 0 );
-
- removeActionDups( graph );
-
- /* Remove unreachable states. There should be no dead end states. The
- * subtract and intersection operators are the only places where they may
- * be created and those operators clean them up. */
- graph->removeUnreachableStates();
-
- /* No more fsm operations are to be done. Action ordering numbers are
- * no longer of use and will just hinder minimization. Clear them. */
- graph->nullActionKeys();
-
- /* Transition priorities are no longer of use. We can clear them
- * because they will just hinder minimization as well. Clear them. */
- graph->clearAllPriorities();
-
- /* Minimize here even if we minimized at every op. Now that function
- * keys have been cleared we may get a more minimal fsm. */
- graph->minimizePartition2();
- graph->compressTransitions();
-}
-
-void Compiler::printNameTree( NameInst *rootName )
-{
- /* Print the name instance map. */
- cerr << "name tree:" << endl;
- for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ )
- printNameInst( *name, 0 );
-}
-
-void Compiler::printNameIndex( NameInst **nameIndex )
-{
- /* The name index is terminated with a null pointer. */
- cerr << "name index:" << endl;
- for ( int ni = 0; nameIndex[ni]; ni++ ) {
- cerr << ni << ": ";
- char *name = nameIndex[ni]->name;
- cerr << ( name != 0 ? name : "<ANON>" ) << endl;
- }
-}
-
-
-/* Build the name tree and supporting data structures. */
-NameInst *Compiler::makeNameTree()
-{
- /* Create the root name. */
- nextNameId = 0;
- NameInst *rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false );
-
- /* First make the name tree. */
- initNameWalk( rootName );
- for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
- /* Recurse on the instance. */
- glel->value->makeNameTree( glel->loc, this );
- }
-
- return rootName;
-}
-
-FsmGraph *Compiler::makeAllRegions()
-{
- /* Build the name tree and supporting data structures. */
- NameInst *rootName = makeNameTree( );
- NameInst **nameIndex = makeNameIndex( rootName );
-
- /* Resovle the implicit name references to the nfa instantiations. */
- referenceRegions( rootName );
-
- int numGraphs = 0;
- FsmGraph **graphs = new FsmGraph*[instanceList.length()];
-
- /* Make all the instantiations, we know that main exists in this list. */
- initNameWalk( rootName );
- for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
- /* Build the graph from a walk of the parse tree. */
- FsmGraph *newGraph = glel->value->walk( this );
-
- /* Wrap up the construction. */
- finishGraphBuild( newGraph );
-
- /* Save off the new graph. */
- graphs[numGraphs++] = newGraph;
- }
-
- /* NOTE: If putting in minimization here we need to include eofTarget
- * into the minimization algorithm. It is currently set by the longest
- * match operator and not considered anywhere else. */
-
- /* Add all the other graphs into the first. */
- FsmGraph *all = graphs[0];
- all->globOp( graphs+1, numGraphs-1 );
- delete[] graphs;
-
- /* Go through all the token regions and check for lmRequiresErrorState. */
- for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
- if ( reg->lmSwitchHandlesError )
- all->lmRequiresErrorState = true;
- }
-
- all->rootName = rootName;
- all->nameIndex = nameIndex;
-
- return all;
-}
-
-void Compiler::analyzeAction( Action *action, InlineList *inlineList )
-{
- /* FIXME: Actions used as conditions should be very constrained. */
- for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
- //if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
- // action->anyCall = true;
-
- /* Need to recurse into longest match items. */
- if ( item->type == InlineItem::LmSwitch ) {
- TokenRegion *lm = item->tokenRegion;
- for ( TokenDefListReg::Iter lmi = lm->tokenDefList; lmi.lte(); lmi++ ) {
- if ( lmi->action != 0 )
- analyzeAction( action, lmi->action->inlineList );
- }
- }
-
- if ( item->type == InlineItem::LmOnLast ||
- item->type == InlineItem::LmOnNext ||
- item->type == InlineItem::LmOnLagBehind )
- {
- TokenDef *lmi = item->longestMatchPart;
- if ( lmi->action != 0 )
- analyzeAction( action, lmi->action->inlineList );
- }
-
- if ( item->children != 0 )
- analyzeAction( action, item->children );
- }
-}
-
-void Compiler::analyzeGraph( FsmGraph *graph )
-{
- for ( ActionList::Iter act = actionList; act.lte(); act++ )
- analyzeAction( act, act->inlineList );
-
- for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
- /* The transition list. */
- for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
- for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
- at->value->numTransRefs += 1;
- }
-
- for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
- at->value->numToStateRefs += 1;
-
- for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
- at->value->numFromStateRefs += 1;
-
- for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
- at->value->numEofRefs += 1;
-
- for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) {
- for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ )
- (*sci)->numCondRefs += 1;
- }
- }
-}
-
-FsmGraph *Compiler::makeScanner()
-{
- /* Make the graph, do minimization. */
- FsmGraph *fsmGraph = makeAllRegions();
-
- /* If any errors have occured in the input file then don't write anything. */
- if ( gblErrorCount > 0 )
- return 0;
-
- analyzeGraph( fsmGraph );
-
- /* Decide if an error state is necessary.
- * 1. There is an error transition
- * 2. There is a gap in the transitions
- * 3. The longest match operator requires it. */
- if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() )
- fsmGraph->errState = fsmGraph->addState();
-
- /* State numbers need to be assigned such that all final states have a
- * larger state id number than all non-final states. This enables the
- * first_final mechanism to function correctly. We also want states to be
- * ordered in a predictable fashion. So we first apply a depth-first
- * search, then do a stable sort by final state status, then assign
- * numbers. */
-
- fsmGraph->depthFirstOrdering();
- fsmGraph->sortStatesByFinal();
- fsmGraph->setStateNumbers( 0 );
-
- return fsmGraph;
-}
-
-void Compiler::createDefaultScanner()
-{
- InputLoc loc = { 0, 0, 0 };
-
- const char *name = "___DEFAULT_SCANNER";
-
- /* Create the default namespace. */
- defaultNamespace = new Namespace( InputLoc(), name,
- namespaceList.length(), 0 );
- namespaceList.append( defaultNamespace );
-
- /* Create a scanner which will be used when no other scanner can be
- * figured out. It returns single characters. */
- defaultRegion = new TokenRegion( InputLoc(), name,
- regionList.length(), 0 );
- regionList.append( defaultRegion );
-
- /* Insert the machine definition into the graph dictionary. */
- RegionGraphDictEl *newEl = rootNamespace->graphDict.insert( name );
- assert( newEl != 0 );
- newEl->value = new RegionDef( name, defaultRegion );
- newEl->isInstance = true;
- instanceList.append( newEl );
-
- Join *join = new Join( new Expression( BT_Any ) );
-
- TokenDef *tokenDef = new TokenDef( name, String(), false, false,
- join, 0, loc, nextTokenId++,
- rootNamespace, defaultRegion, 0, 0, 0 );
-
- defaultRegion->tokenDefList.append( tokenDef );
-
- /* Now create the one and only token -> "<chr>" / any / */
- name = "___DEFAULT_SCANNER_CHR";
- defaultCharLangEl = addLangEl( this, defaultNamespace, name, LangEl::Term );
-
- tokenDef->tdLangEl = defaultCharLangEl;
- defaultCharLangEl->tokenDef = tokenDef;
-}
-
-LangEl *Compiler::makeRepeatProd( Namespace *nspace, const String &repeatName,
- NamespaceQual *nspaceQual, const String &name )
-{
- LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm );
- prodName->isRepeat = true;
-
- ProdElList *prodElList1 = new ProdElList;
-
- /* Build the first production of the repeat. */
- TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name );
- ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
-
- UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
- TypeRef *typeRef2 = new TypeRef( InputLoc(), prodNameUT );
- ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef2, 0 );
-
- prodElList1->append( factor1 );
- prodElList1->append( factor2 );
-
- Definition *newDef1 = new Definition( InputLoc(),
- prodName, prodElList1, false, 0,
- prodList.length(), prodName->defList.length(),
- Definition::Production );
-
- prodName->defList.append( newDef1 );
- prodList.append( newDef1 );
-
- /* Build the second production of the repeat. */
- ProdElList *prodElList2 = new ProdElList;
-
- Definition *newDef2 = new Definition( InputLoc(),
- prodName, prodElList2, false, 0,
- prodList.length(), prodName->defList.length(),
- Definition::Production );
-
- prodName->defList.append( newDef2 );
- prodList.append( newDef2 );
-
- return prodName;
-}
-
-LangEl *Compiler::makeListProd( Namespace *nspace, const String &listName, NamespaceQual *nspaceQual, const String &name )
-{
- LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm );
- prodName->isList = true;
-
- /* Build the first production of the list. */
- TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name );
- ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
-
- UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
- TypeRef *typeRef2 = new TypeRef( InputLoc(), prodNameUT );
- ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef2, 0 );
-
- ProdElList *prodElList1 = new ProdElList;
- prodElList1->append( factor1 );
- prodElList1->append( factor2 );
-
- Definition *newDef1 = new Definition( InputLoc(),
- prodName, prodElList1, false, 0,
- prodList.length(), prodName->defList.length(),
- Definition::Production );
-
- prodName->defList.append( newDef1 );
- prodList.append( newDef1 );
-
- /* Build the second production of the list. */
- TypeRef *typeRef3 = new TypeRef( InputLoc(), nspaceQual, name );
- ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef3, 0 );
-
- ProdElList *prodElList2 = new ProdElList;
- prodElList2->append( factor3 );
-
- Definition *newDef2 = new Definition( InputLoc(),
- prodName, prodElList2, false, 0,
- prodList.length(), prodName->defList.length(),
- Definition::Production );
-
- prodName->defList.append( newDef2 );
- prodList.append( newDef2 );
-
- return prodName;
-}
-
-LangEl *Compiler::makeOptProd( Namespace *nspace, const String &optName, NamespaceQual *nspaceQual, const String &name )
-{
- LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm );
- prodName->isOpt = true;
-
- ProdElList *prodElList1 = new ProdElList;
-
- /* Build the first production of the repeat. */
- TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name );
- ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
- prodElList1->append( factor1 );
-
- Definition *newDef1 = new Definition( InputLoc(),
- prodName, prodElList1, false, 0,
- prodList.length(), prodName->defList.length(),
- Definition::Production );
-
- prodName->defList.append( newDef1 );
- prodList.append( newDef1 );
-
- /* Build the second production of the repeat. */
- ProdElList *prodElList2 = new ProdElList;
-
- Definition *newDef2 = new Definition( InputLoc(),
- prodName, prodElList2, false, 0,
- prodList.length(), prodName->defList.length(),
- Definition::Production );
-
- prodName->defList.append( newDef2 );
- prodList.append( newDef2 );
-
- return prodName;
-}
-
-Namespace *Namespace::findNamespace( const String &name )
-{
- for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) {
- if ( strcmp( name, (*c)->name ) == 0 )
- return *c;
- }
- return 0;
-}
-
-/* Search from a previously resolved qualification. (name 1+ in a qual list). */
-Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart )
-{
- /* While there are still parts in the qualification. */
- while ( qualPart.lte() ) {
- Namespace *child = from->findNamespace( *qualPart );
- if ( child == 0 )
- return 0;
-
- from = child;
- qualPart.increment();
- }
-
- return from;
-}
-
-Namespace *NamespaceQual::getQual( Compiler *pd )
-{
- /* Do the search only once. */
- if ( cachedNspaceQual != 0 )
- return cachedNspaceQual;
-
- if ( qualNames.length() == 0 ) {
- /* No qualification, use the region the qualification was
- * declared in. */
- cachedNspaceQual = declInNspace;
- }
- else if ( strcmp( qualNames[0], "root" ) == 0 ) {
- /* First item is "root." Start the downward search from there. */
- StringVect::Iter qualPart = qualNames;
- qualPart.increment();
- cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart );
- return cachedNspaceQual;
- }
- else {
- /* Have a qualification. Move upwards through the declared
- * regions looking for the first part. */
- StringVect::Iter qualPart = qualNames;
- Namespace *parentNamespace = declInNspace;
- while ( parentNamespace != 0 ) {
- /* Search for the first part underneath the current parent. */
- Namespace *child = parentNamespace->findNamespace( *qualPart );
-
- if ( child != 0 ) {
- /* Found the first part. Start going below the result. */
- qualPart.increment();
- cachedNspaceQual = searchFrom( child, qualPart );
- return cachedNspaceQual;
- }
-
- /* Not found, move up to the parent. */
- parentNamespace = parentNamespace->parentNamespace;
- }
-
- /* Failed to find the place to start from. */
- cachedNspaceQual = 0;
- }
-
- return cachedNspaceQual;
-}
-
-void Compiler::initEmptyScanners()
-{
- for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
- if ( reg->tokenDefList.length() == 0 ) {
- reg->wasEmpty = true;
-
- static int def = 1;
- InputLoc loc = { 0, 0, 0 };
- String name( reg->name.length() + 16, "__%s_DEF_PAT_%d", reg->name.data, def++ );
-
- Join *join = new Join( new Expression( BT_Any ) );
-
- TokenDef *tokenDef = new TokenDef( name, String(), false, false, join,
- 0, loc, nextTokenId++, rootNamespace, reg, 0, 0, 0 );
- reg->tokenDefList.append( tokenDef );
-
- /* These do not go in the namespace so so they cannot get declared
- * in the declare pass. */
- LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term );
-
- tokenDef->tdLangEl = lel;
- lel->tokenDef = tokenDef;
- }
- }
-}
-
-
-void Compiler::parsePatterns()
-{
- Program *prg = colmNewProgram( runtimeData, 0, 0 );
-
- /* Turn off context-dependent parsing. */
- prg->ctxDepParsing = 0;
-
- Tree **vm_stack = stackAlloc();
- Tree **root = &vm_stack[VM_STACK_SIZE];
-
- for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
- if ( colm_log_compile ) {
- cerr << "parsing replacement at " <<
- repl->loc.line << ' ' << repl->loc.col << endl;
- }
-
- InputStream *in = new InputStream;
- FsmRun *fsmRun = new FsmRun;
- repl->pdaRun = new PdaRun;
-
- initInputStream( in );
- initPdaRun( repl->pdaRun, prg, pdaTables, fsmRun, repl->langEl->parserId, 0, false, 0 );
- initFsmRun( fsmRun, prg );
-
- Stream *res = streamAllocate( prg );
- res->id = LEL_ID_STREAM;
- res->in = newSourceStreamRepl( repl );
- appendStream( in, (Tree*)res );
- setEof( in );
-
- newToken( prg, repl->pdaRun, fsmRun );
- long pcr = parseLoop( prg, root, repl->pdaRun, fsmRun, in, PcrStart );
- assert( pcr == PcrDone );
- if ( repl->pdaRun->parseError )
- cout << "parse error" << endp;
- }
-
- for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
- if ( colm_log_compile ) {
- cerr << "parsing pattern at " <<
- pat->loc.line << ' ' << pat->loc.col << endl;
- }
-
- InputStream *in = new InputStream;
- FsmRun *fsmRun = new FsmRun;
- pat->pdaRun = new PdaRun;
-
- initInputStream( in );
- initPdaRun( pat->pdaRun, prg, pdaTables, fsmRun, pat->langEl->parserId, 0, false, 0 );
- initFsmRun( fsmRun, prg );
-
- Stream *res = streamAllocate( prg );
- res->id = LEL_ID_STREAM;
- res->in = newSourceStreamPattern( pat );
- appendStream( in, (Tree*)res );
- setEof( in );
-
- newToken( prg, pat->pdaRun, fsmRun );
- long pcr = parseLoop( prg, root, pat->pdaRun, fsmRun, in, PcrStart );
- assert( pcr == PcrDone );
- if ( pat->pdaRun->parseError )
- cout << "parse error" << endp;
- }
-
- fillInPatterns( prg );
-}
-
-void Compiler::collectParserEls( BstSet<LangEl*> &parserEls )
-{
- for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
- /* We assume the reduction action compilation phase was run before
- * pattern parsing and it decorated the pattern with the target type. */
- assert( pat->langEl != 0 );
- if ( pat->langEl->type != LangEl::NonTerm )
- error(pat->loc) << "pattern type is not a non-terminal" << endp;
-
- if ( pat->langEl->parserId < 0 ) {
- /* Make a parser for the language element. */
- parserEls.insert( pat->langEl );
- pat->langEl->parserId = nextParserId++;
- }
- }
-
- for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
- /* We assume the reduction action compilation phase was run before
- * replacement parsing decorated the replacement with the target type. */
- assert( repl->langEl != 0 );
-
- if ( repl->langEl->parserId < 0 ) {
- /* Make a parser for the language element. */
- parserEls.insert( repl->langEl );
- repl->langEl->parserId = nextParserId++;
- }
- }
-
- /* Make parsers that we need. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->parserId >= 0 )
- parserEls.insert( lel );
- }
-}
-
-
-void Compiler::generateOutput()
-{
- FsmCodeGen *fsmGen = new FsmCodeGen("<INPUT>", sectionName,
- *outStream, redFsm, fsmTables );
-
- PdaCodeGen *pdaGen = new PdaCodeGen( outputFileName, "parser", this, *outStream );
-
- fsmGen->writeIncludes();
- pdaGen->defineRuntime();
- fsmGen->writeCode();
-
- /* Make parsers that we need. */
- pdaGen->writeParserData( 0, pdaTables );
-
- /* Write the runtime data. */
- pdaGen->writeRuntimeData( runtimeData, pdaTables );
-
- if ( !gblLibrary )
- fsmGen->writeMain();
-
- outStream->flush();
-}
-
-
-void Compiler::prepGrammar()
-{
- /* This will create language elements. */
- wrapNonTerminals();
-
- makeLangElIds();
- makeLangElNames();
- makeDefinitionNames();
- noUndefindLangEls();
-
- /* Put the language elements in an index by language element id. */
- langElIndex = new LangEl*[nextSymbolId+1];
- memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) );
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
- langElIndex[lel->id] = lel;
-
- makeProdFsms();
-
- /* Allocate the Runtime data now. Every PdaTable that we make
- * will reference it, but it will be filled in after all the tables are
- * built. */
- runtimeData = new RuntimeData;
-}
-
-void Compiler::compile()
-{
- beginProcessing();
- initKeyOps();
-
-
- /* Type declaration. */
- typeDeclaration();
-
- /* Type resolving. */
- typeResolve();
-
- makeTerminalWrappers();
- makeEofElements();
-
- /*
- * Parsers
- */
-
- /* Init the longest match data */
- initLongestMatchData();
- FsmGraph *fsmGraph = makeScanner();
-
- if ( colm_log_compile ) {
- printNameTree( fsmGraph->rootName );
- printNameIndex( fsmGraph->nameIndex );
- }
-
- prepGrammar();
-
- /* Compile bytecode. */
- compileByteCode();
-
- /* Make the reduced fsm. */
- RedFsmBuild reduce( sectionName, this, fsmGraph );
- redFsm = reduce.reduceMachine();
-
- BstSet<LangEl*> parserEls;
- collectParserEls( parserEls );
-
- makeParser( parserEls );
-
- /* Make the scanner tables. */
- fsmTables = redFsm->makeFsmTables();
-
- /* Now that all parsers are built, make the global runtimeData. */
- makeRuntimeData();
-
- /*
- * All compilation is now complete.
- */
-
- /* Parse patterns and replacements. */
- parsePatterns();
-}
-
diff --git a/colm/ctinput.cc b/colm/ctinput.cc
deleted file mode 100644
index b5086268..00000000
--- a/colm/ctinput.cc
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "parsedata.h"
-#include "parsetree.h"
-#include "input.h"
-#include "fsmrun.h"
-#include "debug.h"
-#include "pool.h"
-
-#include <iostream>
-
-using std::cerr;
-using std::endl;
-
-SourceFuncs patternFuncs;
-SourceFuncs replFuncs;
-
-/*
- * Pattern
- */
-
-SourceStream *newSourceStreamPattern( Pattern *pattern )
-{
- SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream));
- memset( is, 0, sizeof(SourceStream) );
- is->handlesLine = true;
- is->pattern = pattern;
- is->patItem = pattern->list->head;
- is->funcs = &patternFuncs;
- return is;
-}
-
-LangEl *inputStreamPatternGetLangEl( SourceStream *is, long *bindId, char **data, long *length )
-{
- LangEl *klangEl = is->patItem->factor->langEl;
- *bindId = is->patItem->bindId;
- *data = 0;
- *length = 0;
- is->line = is->patItem->loc.line;
-
- is->patItem = is->patItem->next;
- is->offset = 0;
- return klangEl;
-}
-
-int inputStreamPatternGetData( SourceStream *is, int skip, char *dest, int length, int *copied )
-{
- *copied = 0;
-
- PatternItem *buf = is->patItem;
- int offset = is->offset;
-
- while ( true ) {
- if ( buf == 0 )
- return INPUT_EOD;
-
- if ( buf->type == PatternItem::FactorType )
- return INPUT_LANG_EL;
-
- if ( offset == 0 )
- is->line = buf->loc.line;
-
- assert ( buf->type == PatternItem::InputText );
- int avail = buf->data.length() - offset;
-
- if ( avail > 0 ) {
- /* The source data from the current buffer. */
- char *src = &buf->data[offset];
- int slen = avail <= length ? avail : length;
-
- /* Need to skip? */
- if ( skip > 0 && slen <= skip ) {
- /* Skipping the the whole source. */
- skip -= slen;
- }
- else {
- /* Either skip is zero, or less than slen. Skip goes to zero.
- * Some data left over, copy it. */
- src += skip;
- slen -= skip;
- skip = 0;
-
- memcpy( dest, src, slen ) ;
- *copied += slen;
- break;
- }
- }
-
- buf = buf->next;
- offset = 0;
- }
-
- return INPUT_DATA;
-}
-
-void inputStreamPatternBackup( SourceStream *is )
-{
- if ( is->patItem == 0 )
- is->patItem = is->pattern->list->tail;
- else
- is->patItem = is->patItem->prev;
-}
-
-void inputStreamPatternPushBackBuf( SourceStream *is, RunBuf *runBuf )
-{
- char *data = runBuf->data + runBuf->offset;
- long length = runBuf->length;
-
- if ( length == 0 )
- return;
-
- /* While pushing back past the current pattern item start. */
- while ( length > is->offset ) {
- length -= is->offset;
- if ( is->offset > 0 )
- assert( memcmp( is->patItem->data, data-length, is->offset ) == 0 );
- inputStreamPatternBackup( is );
- is->offset = is->patItem->data.length();
- }
-
- is->offset -= length;
- assert( memcmp( &is->patItem->data[is->offset], data, length ) == 0 );
-}
-
-void inputStreamPatternUndoConsumeLangEl( SourceStream *is )
-{
- inputStreamPatternBackup( is );
- is->offset = is->patItem->data.length();
-}
-
-int inputStreamPatternConsumeData( SourceStream *is, int length )
-{
- debug( REALM_INPUT, "consuming %ld bytes\n", length );
-
- int consumed = 0;
-
- while ( true ) {
- if ( is->patItem == 0 )
- break;
-
- int avail = is->patItem->data.length() - is->offset;
-
- if ( length >= avail ) {
- /* Read up to the end of the data. Advance the
- * pattern item. */
- is->patItem = is->patItem->next;
- is->offset = 0;
-
- length -= avail;
- consumed += avail;
-
- if ( length == 0 )
- break;
- }
- else {
- is->offset += length;
- consumed += length;
- break;
- }
- }
-
- return consumed;
-}
-
-int inputStreamPatternUndoConsumeData( SourceStream *is, const char *data, int length )
-{
- is->offset -= length;
- return length;
-}
-
-extern "C" void initPatternFuncs()
-{
- memset( &patternFuncs, 0, sizeof(SourceFuncs) );
-
- patternFuncs.getData = &inputStreamPatternGetData;
- patternFuncs.consumeData = &inputStreamPatternConsumeData;
- patternFuncs.undoConsumeData = &inputStreamPatternUndoConsumeData;
-
- patternFuncs.consumeLangEl = &inputStreamPatternGetLangEl;
- patternFuncs.undoConsumeLangEl = &inputStreamPatternUndoConsumeLangEl;
-}
-
-
-/*
- * Replacement
- */
-
-SourceStream *newSourceStreamRepl( Replacement *replacement )
-{
- SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream));
- memset( is, 0, sizeof(SourceStream) );
- is->handlesLine = true;
- is->replacement = replacement;
- is->replItem = replacement->list->head;
- is->funcs = &replFuncs;
- return is;
-}
-
-LangEl *inputStreamReplGetLangEl( SourceStream *is, long *bindId, char **data, long *length )
-{
- LangEl *klangEl = is->replItem->type == ReplItem::ExprType ?
- is->replItem->langEl : is->replItem->factor->langEl;
- *bindId = is->replItem->bindId;
-
- *data = 0;
- *length = 0;
- is->line = is->replItem->loc.line;
-
- if ( is->replItem->type == ReplItem::FactorType ) {
- if ( is->replItem->factor->typeRef->pdaLiteral != 0 ) {
- bool unusedCI;
- prepareLitString( is->replItem->data, unusedCI,
- is->replItem->factor->typeRef->pdaLiteral->token.data,
- is->replItem->factor->typeRef->pdaLiteral->token.loc );
-
- *data = is->replItem->data;
- *length = is->replItem->data.length();
- }
- }
-
- is->replItem = is->replItem->next;
- is->offset = 0;
- return klangEl;
-}
-
-int inputStreamReplGetData( SourceStream *is, int skip, char *dest, int length, int *copied )
-{
- *copied = 0;
-
- ReplItem *buf = is->replItem;
- int offset = is->offset;
-
- while ( true ) {
- if ( buf == 0 )
- return INPUT_EOD;
-
- if ( buf->type == ReplItem::ExprType || buf->type == ReplItem::FactorType )
- return INPUT_LANG_EL;
-
- if ( offset == 0 )
- is->line = buf->loc.line;
-
- assert ( buf->type == ReplItem::InputText );
- int avail = buf->data.length() - offset;
-
- if ( avail > 0 ) {
- /* The source data from the current buffer. */
- char *src = &buf->data[offset];
- int slen = avail <= length ? avail : length;
-
- /* Need to skip? */
- if ( skip > 0 && slen <= skip ) {
- /* Skipping the the whole source. */
- skip -= slen;
- }
- else {
- /* Either skip is zero, or less than slen. Skip goes to zero.
- * Some data left over, copy it. */
- src += skip;
- slen -= skip;
- skip = 0;
-
- memcpy( dest, src, slen ) ;
- *copied += slen;
- break;
- }
- }
-
- buf = buf->next;
- offset = 0;
- }
-
- return INPUT_DATA;
-}
-
-void inputStreamReplBackup( SourceStream *is )
-{
- if ( is->replItem == 0 )
- is->replItem = is->replacement->list->tail;
- else
- is->replItem = is->replItem->prev;
-}
-
-void inputStreamReplPushBackBuf( SourceStream *is, RunBuf *runBuf )
-{
- char *data = runBuf->data + runBuf->offset;
- long length = runBuf->length;
-
- if ( colm_log_parse ) {
- cerr << "push back data: ";
- cerr.write( data, length );
- cerr << endl;
- }
-
- if ( length == 0 )
- return;
-
- /* While pushing back past the current pattern item start. */
- while ( length > is->offset ) {
- length -= is->offset;
- if ( is->offset > 0 )
- assert( memcmp( is->replItem->data, data-length, is->offset ) == 0 );
- inputStreamReplBackup( is );
- is->offset = is->replItem->data.length();
- }
-
- is->offset -= length;
- assert( memcmp( &is->replItem->data[is->offset], data, length ) == 0 );
-}
-
-void inputStreamReplUndoConsumeLangEl( SourceStream *is )
-{
- inputStreamReplBackup( is );
- is->offset = is->replItem->data.length();
-}
-
-int inputStreamReplConsumeData( SourceStream *is, int length )
-{
- int consumed = 0;
-
- while ( true ) {
- if ( is->replItem == 0 )
- break;
-
- int avail = is->replItem->data.length() - is->offset;
-
- if ( length >= avail ) {
- /* Read up to the end of the data. Advance the
- * pattern item. */
- is->replItem = is->replItem->next;
- is->offset = 0;
-
- length -= avail;
- consumed += avail;
-
- if ( length == 0 )
- break;
- }
- else {
- is->offset += length;
- consumed += length;
- break;
- }
- }
-
- return consumed;
-}
-
-int inputStreamReplUndoConsumeData( SourceStream *is, const char *data, int length )
-{
- is->offset -= length;
- return length;
-}
-
-extern "C" void initReplFuncs()
-{
- memset( &replFuncs, 0, sizeof(SourceFuncs) );
-
- replFuncs.getData = &inputStreamReplGetData;
- replFuncs.consumeData = &inputStreamReplConsumeData;
- replFuncs.undoConsumeData = &inputStreamReplUndoConsumeData;
-
- replFuncs.consumeLangEl = &inputStreamReplGetLangEl;
- replFuncs.undoConsumeLangEl = &inputStreamReplUndoConsumeLangEl;
-}
-
-void sendNamedLangEl( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream )
-{
- /* All three set by consumeLangEl. */
- long bindId;
- char *data;
- long length;
-
- LangEl *klangEl = consumeLangEl( inputStream, &bindId, &data, &length );
-
- #ifdef COLM_LOG_PARSE
- if ( colm_log_parse ) {
- cerr << "named langEl: " << prg->rtd->lelInfo[klangEl->id].name << endl;
- }
- #endif
-
- /* Copy the token data. */
- Head *tokdata = 0;
- if ( data != 0 )
- tokdata = stringAllocFull( prg, data, length );
-
- Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, klangEl->id, tokdata );
-
- incrementSteps( pdaRun );
-
- ParseTree *parseTree = parseTreeAllocate( prg );
- parseTree->id = input->tree->id;
- parseTree->flags |= PF_NAMED;
- parseTree->shadow = input;
-
- if ( bindId > 0 )
- pushBinding( pdaRun, parseTree );
-
- pdaRun->parseInput = parseTree;
-}
-
-void initBindings( PdaRun *pdaRun )
-{
- /* Bindings are indexed at 1. Need a no-binding. */
- pdaRun->bindings = new Bindings;
- pdaRun->bindings->push(0);
-}
-
-void pushBinding( PdaRun *pdaRun, ParseTree *parseTree )
-{
- /* If the item is bound then store it in the bindings array. */
- pdaRun->bindings->push( parseTree );
-}
-
-void popBinding( PdaRun *pdaRun, ParseTree *parseTree )
-{
- ParseTree *lastBound = pdaRun->bindings->top();
- if ( lastBound == parseTree )
- pdaRun->bindings->pop();
-}
diff --git a/colm/debug.c b/colm/debug.c
deleted file mode 100644
index 6d9689e0..00000000
--- a/colm/debug.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <colm/debug.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-long colmActiveRealm = 0;
-const char *colmRealmNames[REALMS] =
- {
- "BYTECODE",
- "PARSE",
- "MATCH",
- "COMPILE",
- "POOL",
- "PRINT",
- "INPUT",
- "SCAN",
- };
-
-int _debug( long realm, const char *fmt, ... )
-{
- int result = 0;
- if ( colmActiveRealm & realm ) {
- /* Compute the index by shifting. */
- int ind = 0;
- while ( (realm & 0x1) != 0x1 ) {
- realm >>= 1;
- ind += 1;
- }
-
- fprintf( stderr, "%s: ", colmRealmNames[ind] );
- va_list args;
- va_start( args, fmt );
- result = vfprintf( stderr, fmt, args );
- va_end( args );
- }
-
- return result;
-}
-
-void fatal( const char *fmt, ... )
-{
- va_list args;
- fprintf( stderr, "fatal: " );
- va_start( args, fmt );
- vfprintf( stderr, fmt, args );
- va_end( args );
- exit(1);
-}
-
-void message( const char *fmt, ... )
-{
- va_list args;
- fprintf( stderr, "message: " );
- va_start( args, fmt );
- vfprintf( stderr, fmt, args );
- va_end( args );
-}
diff --git a/colm/debug.h b/colm/debug.h
deleted file mode 100644
index 3fd9bb8e..00000000
--- a/colm/debug.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-void fatal( const char *fmt, ... );
-
-#ifdef DEBUG
-#define debug( realm, ... ) _debug( realm, __VA_ARGS__ )
-#define check_realm( realm ) _check_realm( realm )
-#else
-#define debug( realm, ... )
-#define check_realm( realm )
-#endif
-
-int _debug( long realm, const char *fmt, ... );
-
-void message( const char *fmt, ... );
-
-#define REALM_BYTECODE 0x00000001
-#define REALM_PARSE 0x00000002
-#define REALM_MATCH 0x00000004
-#define REALM_COMPILE 0x00000008
-#define REALM_POOL 0x00000010
-#define REALM_PRINT 0x00000020
-#define REALM_INPUT 0x00000040
-#define REALM_SCAN 0x00000080
-
-#define REALMS 32
-
-extern long colmActiveRealm;
-extern const char *colmRealmNames[REALMS];
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/colm/declare.cc b/colm/declare.cc
deleted file mode 100644
index 167fe050..00000000
--- a/colm/declare.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Copyright 2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "bytecode.h"
-#include "parsedata.h"
-#include "fsmrun.h"
-#include <iostream>
-#include <assert.h>
-
-LangEl *declareLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type )
-{
- /* If the id is already in the dict, it will be placed in last found. If
- * it is not there then it will be inserted and last found will be set to it. */
- TypeMapEl *inDict = nspace->typeMap.find( data );
- if ( inDict != 0 )
- error() << "'" << data << "' already defined as something else" << endp;
-
- /* Language element not there. Make the new lang el and insert.. */
- LangEl *langEl = new LangEl( nspace, data, type );
- TypeMapEl *typeMapEl = new TypeMapEl( data, langEl );
- nspace->typeMap.insert( typeMapEl );
- pd->langEls.append( langEl );
-
- return langEl;
-}
-
-/* Does not map the new language element. */
-LangEl *addLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type )
-{
- LangEl *langEl = new LangEl( nspace, data, type );
- pd->langEls.append( langEl );
- return langEl;
-}
-
-void declareTypeAlias( Compiler *pd, Namespace *nspace, const String &data, TypeRef *typeRef )
-{
- /* If the id is already in the dict, it will be placed in last found. If
- * it is not there then it will be inserted and last found will be set to it. */
- TypeMapEl *inDict = nspace->typeMap.find( data );
- if ( inDict != 0 )
- error() << "'" << data << "' already defined as something else" << endp;
-
- /* Language element not there. Make the new lang el and insert.. */
- TypeMapEl *typeMapEl = new TypeMapEl( data, typeRef );
- nspace->typeMap.insert( typeMapEl );
-}
-
-LangEl *findType( Compiler *pd, Namespace *nspace, const String &data )
-{
- /* If the id is already in the dict, it will be placed in last found. If
- * it is not there then it will be inserted and last found will be set to it. */
- TypeMapEl *inDict = nspace->typeMap.find( data );
-
- if ( inDict == 0 )
- error() << "'" << data << "' not declared as anything" << endp;
-
- return inDict->value;
-}
-
-
-void Compiler::declareBaseLangEls()
-{
- /* Order here is important because we make assumptions about the inbuild
- * language elements in the runtime. Note tokens are have identifiers set
- * in an initial pass. */
-
- /* Make a "_notoken" language element. This element is used when a
- * generation action fails to generate anything, but there is reverse code
- * that needs to be associated with a language element. This allows us to
- * always associate reverse code with the first language element produced
- * after a generation action. */
- noTokenLangEl = declareLangEl( this, rootNamespace, "_notoken", LangEl::Term );
- noTokenLangEl->ignore = true;
-
- /* Make the "stream" language element */
- ptrLangEl = declareLangEl( this, rootNamespace, "ptr", LangEl::Term );
- boolLangEl = declareLangEl( this, rootNamespace, "bool", LangEl::Term );
- intLangEl = declareLangEl( this, rootNamespace, "int", LangEl::Term );
- strLangEl = declareLangEl( this, rootNamespace, "str", LangEl::Term );
- streamLangEl = declareLangEl( this, rootNamespace, "stream", LangEl::Term );
- inputLangEl = declareLangEl( this, rootNamespace, "accum_stream", LangEl::Term );
- ignoreLangEl = declareLangEl( this, rootNamespace, "il", LangEl::Term );
-
- /* Make the EOF language element. */
- eofLangEl = 0;
-
- /* Make the "any" language element */
- anyLangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm );
-}
-
-
-void Compiler::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm )
-{
- UniqueType *prodNameUT = findUniqueType( TYPE_TREE, nonTerm );
- TypeRef *typeRef = new TypeRef( InputLoc(), prodNameUT );
- ObjField *el = new ObjField( InputLoc(), typeRef, "lhs" );
-
- el->isLhsEl = true;
-
- initLocalInstructions( el );
-
- localFrame->insertField( el->name, el );
-}
-
-void Compiler::addProdLHSLoad( Definition *prod, CodeVect &code, long &insertPos )
-{
- ObjField *lhsField = prod->redBlock->localFrame->findField("lhs");
- assert( lhsField != 0 );
-
- CodeVect loads;
- if ( lhsField->beenReferenced ) {
- loads.append( IN_INIT_LHS_EL );
- loads.appendHalf( lhsField->offset );
- }
-
- code.insert( insertPos, loads );
- insertPos += loads.length();
-}
-
-void Compiler::addPushBackLHS( Definition *prod, CodeVect &code, long &insertPos )
-{
- CodeBlock *block = prod->redBlock;
-
- /* If the lhs tree is dirty then we will need to save off the old lhs
- * before it gets modified. We want to avoid this for attribute
- * modifications. The computation of dirtyTree should deal with this for
- * us. */
- ObjField *lhsField = block->localFrame->findField("lhs");
- assert( lhsField != 0 );
-
- if ( lhsField->beenReferenced ) {
- code.append( IN_STORE_LHS_EL );
- code.appendHalf( lhsField->offset );
- }
-}
-
-void Compiler::addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList )
-{
- long position = 1;
- for ( ProdElList::Iter rhsEl = *prodElList; rhsEl.lte(); rhsEl++, position++ ) {
- if ( rhsEl->type == ProdEl::ReferenceType ) {
- /* Use an offset of zero. For frame objects we compute the offset on
- * demand. */
- String name( 8, "r%d", position );
- ObjField *el = new ObjField( InputLoc(), rhsEl->typeRef, name );
- rhsEl->objField = el;
-
- /* Right hand side elements are constant. */
- el->isConst = true;
- el->isRhsEl = true;
-
- /* Only ever fetch for reading since they are constant. */
- el->inGetR = IN_GET_LOCAL_R;
-
- localFrame->insertField( el->name, el );
- }
- }
-}
-
-void Compiler::addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos )
-{
- CodeVect loads;
- long elPos = 0;
- for ( ProdElList::Iter rhsEl = *prod->prodElList; rhsEl.lte(); rhsEl++, elPos++ ) {
- if ( rhsEl->type == ProdEl::ReferenceType ) {
- if ( rhsEl->objField->beenReferenced ) {
- loads.append ( IN_INIT_RHS_EL );
- loads.appendHalf( elPos );
- loads.appendHalf( rhsEl->objField->offset );
- }
- }
- }
-
- /* Insert and update the insert position. */
- code.insert( insertPos, loads );
- insertPos += loads.length();
-}
-
-void GenericType::declare( Compiler *pd, Namespace *nspace )
-{
- //std::cout << "generic " << g->name << std::endl;
-
- LangEl *langEl = declareLangEl( pd, nspace, name, LangEl::NonTerm );
-
- /* Add one empty production. */
- ProdElList *emptyList = new ProdElList;
- //addProduction( g->loc, langEl, emptyList, false, 0, 0 );
-
- {
- LangEl *prodName = langEl;
- assert( prodName->type == LangEl::NonTerm );
-
- Definition *newDef = new Definition( InputLoc(), prodName,
- emptyList, false, 0,
- pd->prodList.length(), prodName->defList.length(),
- Definition::Production );
-
- prodName->defList.append( newDef );
- pd->prodList.append( newDef );
- newDef->predOf = 0;
- }
-
- langEl->generic = this;
- this->langEl = langEl;
-}
-
-void Namespace::declare( Compiler *pd )
-{
- for ( GenericList::Iter g = genericList; g.lte(); g++ )
- g->declare( pd, this );
-
- for ( LiteralDict::Iter l = literalDict; l.lte(); l++ ) {
- if ( l->value->dupOf != 0 ) {
- /* Duplicate of another. Use the lang el of that token. */
- assert( l->value->dupOf->tdLangEl != 0 );
- l->value->tdLangEl = l->value->dupOf->tdLangEl;
- }
- else {
- if ( l->value->isZero ) {
- l->value->tdLangEl = l->value->tokenRegion->ciLel;
- assert( l->value->tokenRegion->ciLel != 0 );
- }
- else {
- /* Original. Create a token for the literal. */
- LangEl *newLangEl = declareLangEl( pd, this, l->value->name, LangEl::Term );
-
- newLangEl->lit = l->value->literal;
- newLangEl->isLiteral = true;
- newLangEl->tokenDef = l->value;
-
- l->value->tdLangEl = newLangEl;
-
- if ( l->value->noPreIgnore )
- newLangEl->noPreIgnore = true;
- if ( l->value->noPostIgnore )
- newLangEl->noPostIgnore = true;
- }
- }
- }
-
- for ( ContextDefList::Iter c = contextDefList; c.lte(); c++ ) {
- LangEl *lel = declareLangEl( pd, this, c->name, LangEl::NonTerm );
- ProdElList *emptyList = new ProdElList;
- //addProduction( c->context->loc, c->name, emptyList, false, 0, 0 );
-
- {
- LangEl *prodName = lel;
- assert( prodName->type == LangEl::NonTerm );
-
- Definition *newDef = new Definition( loc, prodName,
- emptyList, false, 0,
- pd->prodList.length(), prodName->defList.length(),
- Definition::Production );
-
- prodName->defList.append( newDef );
- pd->prodList.append( newDef );
- newDef->predOf = 0;
-
- /* If the token has the same name as the region it is in, then also
- * insert it into the symbol map for the parent region. */
- if ( strcmp( c->name, this->name ) == 0 ) {
- /* Insert the name into the top of the region stack after popping the
- * region just created. We need it in the parent. */
- TypeMapEl *typeMapEl = new TypeMapEl( c->name, prodName );
- this->parentNamespace->typeMap.insert( typeMapEl );
- }
- }
-
- c->context->lel = lel;
- lel->contextDef = c->context;
- lel->objectDef = c->context->contextObjDef;
- }
-
- for ( TokenDefListNs::Iter t = tokenDefList; t.lte(); t++ ) {
- /* Literals already taken care of. */
- if ( ! t->isLiteral ) {
- if ( t->dupOf != 0 ) {
- /* Duplicate of another. Use the lang el of that token. */
- assert( t->dupOf->tdLangEl != 0 );
- t->tdLangEl = t->dupOf->tdLangEl;
- }
- else {
- /* Create the token. */
- LangEl *tokEl = declareLangEl( pd, this, t->name, LangEl::Term );
- tokEl->ignore = t->ignore;
- tokEl->transBlock = t->codeBlock;
- tokEl->objectDef = t->objectDef;
- tokEl->contextIn = t->contextIn;
- tokEl->tokenDef = t;
-
- if ( t->noPreIgnore )
- tokEl->noPreIgnore = true;
- if ( t->noPostIgnore )
- tokEl->noPostIgnore = true;
-
- t->tdLangEl = tokEl;
- }
- }
- }
-
- for ( NtDefList::Iter n = ntDefList; n.lte(); n++ ) {
- /* Get the language element. */
- LangEl *langEl = declareLangEl( pd, this, n->name, LangEl::NonTerm );
- //$$->langEl = langEl;
-
- /* Get the language element. */
- langEl->objectDef = n->objectDef;
- langEl->reduceFirst = n->reduceFirst;
- langEl->contextIn = n->contextIn;
- langEl->defList.transfer( *n->defList );
-
- for ( LelDefList::Iter d = langEl->defList; d.lte(); d++ ) {
- d->prodName = langEl;
-
- if ( d->redBlock != 0 ) {
- pd->addProdRedObjectVar( d->redBlock->localFrame, langEl );
- pd->addProdRHSVars( d->redBlock->localFrame, d->prodElList );
- }
-
- /* References to the reduce item. */
- }
- }
-
- for ( TypeAliasList::Iter ta = typeAliasList; ta.lte(); ta++ )
- declareTypeAlias( pd, this, ta->name, ta->typeRef );
-
- /* Go into child aliases. */
- for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ )
- (*c)->declare( pd );
-}
-
-void Compiler::setPrecedence()
-{
- for ( PredDeclList::Iter predDecl = predDeclList; predDecl != 0; predDecl++ ) {
- predDecl->typeRef->lookupType( this );
-
- LangEl *langEl = predDecl->typeRef->uniqueType->langEl;
- langEl->predType = predDecl->predType;
- langEl->predValue = predDecl->predValue;
- }
-}
-
-/*
- * Type Declaration Root.
- */
-void Compiler::typeDeclaration()
-{
- /* These must be declared first, since the runtime assumes their identifiers. */
- declareBaseLangEls();
-
- makeIgnoreCollectors();
-
- rootNamespace->declare( this );
-
- /* Fill any empty scanners with a default token. */
- initEmptyScanners();
-
- /* Create the default scanner which will return single characters for us
- * when we have no other scanner */
- createDefaultScanner();
-
- initUniqueTypes();
-
- setPrecedence();
-}
diff --git a/colm/defs.h.in b/colm/defs.h.in
deleted file mode 100644
index 06a3f9df..00000000
--- a/colm/defs.h.in
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2001 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Ragel.
- *
- * Ragel is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Ragel is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Ragel; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-
-/* Configuration */
-#undef COLM_LOG
-#undef COLM_LOG_BYTECODE
-#undef COLM_LOG_PARSE
-#undef COLM_LOG_MATCH
-#undef COLM_LOG_COMPILE
-
-/* If COLM_LOG is defined then turn on all logging options. */
-#ifdef COLM_LOG
-#define COLM_LOG_BYTECODE 1
-#define COLM_LOG_PARSE 1
-#define COLM_LOG_MATCH 1
-#define COLM_LOG_COMPILE 1
-#endif
-
-extern int colm_log_bytecode;
-extern int colm_log_parse;
-extern int colm_log_match;
-extern int colm_log_compile;
-extern int colm_log_conds;
-
-/* The size of `long', as computed by sizeof. */
-#undef SIZEOF_LONG
-
-#endif /* _CONFIG_H */
diff --git a/colm/dotgen.cc b/colm/dotgen.cc
deleted file mode 100644
index e4474958..00000000
--- a/colm/dotgen.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#include "global.h"
-#include "parsedata.h"
-
-using namespace std;
-
-
-void Compiler::writeTransList( PdaState *state )
-{
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- /* Write out the from and to states. */
- out << "\t" << state->stateNum << " -> " << trans->value->toState->stateNum;
-
- /* Begin the label. */
- out << " [ label = \"";
- long key = trans->key;
- LangEl *lel = langElIndex[key];
- if ( lel != 0 )
- out << lel->name;
- else
- out << (char)key;
-
- if ( trans->value->actions.length() > 0 ) {
- out << " / ";
- for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) {
- switch ( *act & 0x3 ) {
- case 1:
- out << "S(" << trans->value->actOrds[act.pos()] << ")";
- break;
- case 2: {
- out << "R(" << prodIdIndex[(*act >> 2)]->data <<
- ", " << trans->value->actOrds[act.pos()] << ")";
- break;
- }
- case 3: {
- out << "SR(" << prodIdIndex[(*act >> 2)]->data <<
- ", " << trans->value->actOrds[act.pos()] << ")";
- break;
- }}
- if ( ! act.last() )
- out << ", ";
- }
- }
-
- out << "\" ];\n";
- }
-}
-
-void Compiler::writeDotFile( PdaGraph *graph )
-{
- out <<
- "digraph " << parserName << " {\n"
- " rankdir=LR;\n"
- " ranksep=\"0\"\n"
- " nodesep=\"0.25\"\n"
- "\n";
-
- /* Define the psuedo states. Transitions will be done after the states
- * have been defined as either final or not final. */
- out <<
- " node [ shape = point ];\n";
-
- for ( int i = 0; i < graph->entryStateSet.length(); i++ )
- out << "\tENTRY" << i << " [ label = \"\" ];\n";
-
- out <<
- "\n"
- " node [ shape = circle, fixedsize = true, height = 0.6 ];\n";
-
- /* Walk the states. */
- for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ )
- out << " " << st->stateNum << " [ label = \"" << st->stateNum << "\" ];\n";
-
- out << "\n";
-
- /* Walk the states. */
- for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ )
- writeTransList( st );
-
- /* Start state and other entry points. */
- for ( PdaStateSet::Iter st = graph->entryStateSet; st.lte(); st++ )
- out << "\tENTRY" << st.pos() << " -> " << (*st)->stateNum << " [ label = \"\" ];\n";
-
- out <<
- "}\n";
-}
-
-void Compiler::writeDotFile()
-{
- writeDotFile( pdaGraph );
-}
-
diff --git a/colm/dotgen.h b/colm/dotgen.h
deleted file mode 100644
index d05a2410..00000000
--- a/colm/dotgen.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _GVDOTGEN_H
-#define _GVDOTGEN_H
-
-#include <iostream>
-
-#if 0
-
-class GraphvizDotGen : public CodeGenData
-{
-public:
- GraphvizDotGen( ostream &out ) : CodeGenData(out) { }
-
- /* Print an fsm to out stream. */
- void writeTransList( RedState *state );
- void writeDotFile( );
-
- virtual void finishRagelDef();
-
-private:
- /* Writing labels and actions. */
- std::ostream &ONCHAR( Key lowKey, Key highKey );
- std::ostream &TRANS_ACTION( RedState *fromState, RedTrans *trans );
- std::ostream &ACTION( RedAction *action );
- std::ostream &KEY( Key key );
-};
-
-#endif
-
-
-#endif /* _GVDOTGEN_H */
diff --git a/colm/exports.cc b/colm/exports.cc
deleted file mode 100644
index f5153330..00000000
--- a/colm/exports.cc
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "parsedata.h"
-#include "fsmcodegen.h"
-#include "redfsm.h"
-#include "bstmap.h"
-#include "fsmrun.h"
-#include "debug.h"
-#include <sstream>
-#include <string>
-
-using std::ostream;
-using std::ostringstream;
-using std::string;
-using std::cerr;
-using std::endl;
-
-void Compiler::openNameSpace( ostream &out, Namespace *nspace )
-{
- if ( nspace == defaultNamespace || nspace == rootNamespace )
- return;
-
- openNameSpace( out, nspace->parentNamespace );
- out << "namespace " << nspace->name << " { ";
-}
-
-void Compiler::closeNameSpace( ostream &out, Namespace *nspace )
-{
- if ( nspace == defaultNamespace || nspace == rootNamespace )
- return;
-
- openNameSpace( out, nspace->parentNamespace );
- out << " }";
-}
-
-void Compiler::generateExports()
-{
- ostream &out = *outStream;
-
- out <<
- "#ifndef _EXPORTS_H\n"
- "#define _EXPORTS_H\n"
- "\n"
- "#include <colm/colm.h>\n"
- "#include <string>\n"
- "\n";
-
- out <<
- "inline void appendString( ColmPrintArgs *args, const char *data, int length )\n"
- "{\n"
- " std::string *str = (std::string*)args->arg;\n"
- " *str += std::string( data, length );\n"
- "}\n"
- "\n";
-
- out <<
- "inline std::string printTreeStr( ColmProgram *prg, ColmTree *tree, bool trim )\n"
- "{\n"
- " std::string str;\n"
- " ColmPrintArgs printArgs = { &str, 1, 0, trim, &appendString, \n"
- " &printNull, &printTermTree, &printNull };\n"
- " printTreeArgs( prg, vm_root(prg), &printArgs, tree );\n"
- " return str;\n"
- "}\n"
- "\n";
-
- /* Declare. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->isEOF ) {
- out << "// isEOF\n";
- continue;
- }
- if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isTokenOnly ) {
- out << "// isTokenOnly\n";
- continue;
- }
- if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isIgnoreOnly ) {
- out << "// isIgnoreOnly\n";
- continue;
- }
- if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isCiOnly ) {
- out << "// isCiOnly\n";
- continue;
- }
- if ( lel->ciRegion != 0 ) {
- out << "// ciRegion != 0\n";
- continue;
- }
- openNameSpace( out, lel->nspace );
- out << "struct " << lel->fullName << ";";
- closeNameSpace( out, lel->nspace );
- out << "\n";
- }
-
- /* Class definitions. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->isEOF ) {
- out << "// isTokenOnly\n";
- continue;
- }
- if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isTokenOnly ) {
- out << "// isTokenOnly\n";
- continue;
- }
- if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isIgnoreOnly ) {
- out << "// isIgnoreOnly\n";
- continue;
- }
- if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isCiOnly ) {
- out << "// isCiOnly\n";
- continue;
- }
- if ( lel->ciRegion != 0 ) {
- out << "// ciRegion != 0\n";
- continue;
- }
-
- openNameSpace( out, lel->nspace );
- out << "struct " << lel->fullName << "\n";
- out << "{\n";
- out << " std::string text() { return printTreeStr( prg, tree, true ); }\n";
- out << " std::string text_notrim() { return printTreeStr( prg, tree, false ); }\n";
- out << " operator ColmTree *() { return tree; }\n";
- out << " ColmProgram *prg;\n";
- out << " ColmTree *tree;\n";
-
- if ( mainReturnUT != 0 && mainReturnUT->langEl == lel ) {
- out << " " << lel->fullName << "( ColmProgram *prg ) : prg(prg), tree(returnVal(prg)) {}\n";
- }
- out << " " << lel->fullName << "( ColmProgram *prg, ColmTree *tree ) : prg(prg), tree(tree) {}\n";
-
- if ( lel->objectDef != 0 && lel->objectDef->objFieldList != 0 ) {
- ObjFieldList *objFieldList = lel->objectDef->objFieldList;
- for ( ObjFieldList::Iter ofi = *objFieldList; ofi.lte(); ofi++ ) {
- ObjField *field = ofi->value;
- if ( field->useOffset && field->typeRef != 0 ) {
- UniqueType *ut = field->typeRef->lookupType( this );
-
- if ( ut != 0 && ut->typeId == TYPE_TREE ) {
- out << " " << ut->langEl->refName << " " << field->name << "();\n";
- }
- }
-
- if ( field->isRhsGet ) {
- UniqueType *ut = field->typeRef->lookupType( this );
-
- if ( ut != 0 && ut->typeId == TYPE_TREE ) {
- out << " " << ut->langEl->refName << " " << field->name << "();\n";
- }
- }
- }
- }
-
- if ( lel->isRepeat ) {
- out << " " << "int end() { return repeatEnd( tree ); }\n";
- out << " " << lel->refName << " next();\n";
- out << " " << lel->repeatOf->refName << " value();\n";
- }
-
- if ( lel->isList ) {
- out << " " << "int last() { return listLast( tree ); }\n";
- out << " " << lel->refName << " next();\n";
- out << " " << lel->repeatOf->refName << " value();\n";
- }
- out << "};";
- closeNameSpace( out, lel->nspace );
- out << "\n";
- }
-
- for ( ObjFieldList::Iter of = *globalObjectDef->objFieldList; of.lte(); of++ ) {
- ObjField *field = of->value;
- if ( field->isExport ) {
- UniqueType *ut = field->typeRef->lookupType(this);
- if ( ut != 0 && ut->typeId == TYPE_TREE ) {
- out << ut->langEl->refName << " " << field->name << "( ColmProgram *prg );\n";
- }
- }
- }
-
- out << "#endif\n";
-}
-
-void Compiler::generateExportsImpl()
-{
- ostream &out = *outStream;
-
- if ( gblExportTo != 0 ) {
- out << "#include \"" << gblExportTo << "\"\n";
- }
-
- /* Function implementations. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->objectDef != 0 && lel->objectDef->objFieldList != 0 ) {
- ObjFieldList *objFieldList = lel->objectDef->objFieldList;
- for ( ObjFieldList::Iter ofi = *objFieldList; ofi.lte(); ofi++ ) {
- ObjField *field = ofi->value;
- if ( field->useOffset && field->typeRef != 0 ) {
- UniqueType *ut = field->typeRef->lookupType( this );
-
- if ( ut != 0 && ut->typeId == TYPE_TREE ) {
- out << ut->langEl->refName << " " << lel->declName << "::" << field->name <<
- "() { return " << ut->langEl->refName <<
- "( prg, getAttr( tree, " << field->offset << ") ); }\n";
- }
- }
-
- if ( field->isRhsGet ) {
- UniqueType *ut = field->typeRef->lookupType( this );
-
- if ( ut != 0 && ut->typeId == TYPE_TREE ) {
- out << ut->langEl->refName << " " << lel->declName << "::" << field->name <<
- "() { static int a[] = {";
-
- /* Need to place the array computing the val. */
- out << field->rhsVal.length();
- for ( Vector<RhsVal>::Iter rg = field->rhsVal; rg.lte(); rg++ ) {
- out << ", " << rg->prodNum;
- out << ", " << rg->childNum;
- }
-
- out << "}; return " << ut->langEl->refName <<
- "( prg, getRhsVal( prg, tree, a ) ); }\n";
- }
- }
- }
- }
-
- if ( lel->isRepeat ) {
- out << lel->refName << " " << lel->declName << "::" << " next"
- "() { return " << lel->refName <<
- "( prg, getRepeatNext( tree ) ); }\n";
-
- out << lel->repeatOf->refName << " " << lel->declName << "::" << " value"
- "() { return " << lel->repeatOf->refName <<
- "( prg, getRepeatVal( tree ) ); }\n";
-
- }
-
- if ( lel->isList ) {
- out << lel->refName << " " << lel->declName << "::" << " next"
- "() { return " << lel->refName <<
- "( prg, getRepeatNext( tree ) ); }\n";
-
- out << lel->repeatOf->refName << " " << lel->declName << "::" << " value"
- "() { return " << lel->repeatOf->refName <<
- "( prg, getRepeatVal( tree ) ); }\n";
- }
- }
-
- out << "\n";
-
- for ( ObjFieldList::Iter of = *globalObjectDef->objFieldList; of.lte(); of++ ) {
- ObjField *field = of->value;
- if ( field->isExport ) {
- UniqueType *ut = field->typeRef->lookupType(this);
- if ( ut != 0 && ut->typeId == TYPE_TREE ) {
- out <<
- ut->langEl->refName << " " << field->name << "(ColmProgram *prg)\n"
- "{ return " << ut->langEl->refName << "( prg, getGlobal( prg, " <<
- field->offset << ") ); }\n";
- }
- }
- }
-}
-
-
diff --git a/colm/fsmap.cc b/colm/fsmap.cc
deleted file mode 100644
index a4c072b6..00000000
--- a/colm/fsmap.cc
+++ /dev/null
@@ -1,856 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "config.h"
-#include "defs.h"
-#include "fsmgraph.h"
-#include <iostream>
-
-using std::cerr;
-using std::endl;
-
-CondData *condData = 0;
-KeyOps *keyOps = 0;
-
-/* Insert an action into an action table. */
-void ActionTable::setAction( int ordering, Action *action )
-{
- /* Multi-insert in case specific instances of an action appear in a
- * transition more than once. */
- insertMulti( ordering, action );
-}
-
-/* Set all the action from another action table in this table. */
-void ActionTable::setActions( const ActionTable &other )
-{
- for ( ActionTable::Iter action = other; action.lte(); action++ )
- insertMulti( action->key, action->value );
-}
-
-void ActionTable::setActions( int *orderings, Action **actions, int nActs )
-{
- for ( int a = 0; a < nActs; a++ )
- insertMulti( orderings[a], actions[a] );
-}
-
-bool ActionTable::hasAction( Action *action )
-{
- for ( int a = 0; a < length(); a++ ) {
- if ( data[a].value == action )
- return true;
- }
- return false;
-}
-
-/* Insert an action into an action table. */
-void LmActionTable::setAction( int ordering, TokenDef *action )
-{
- /* Multi-insert in case specific instances of an action appear in a
- * transition more than once. */
- insertMulti( ordering, action );
-}
-
-/* Set all the action from another action table in this table. */
-void LmActionTable::setActions( const LmActionTable &other )
-{
- for ( LmActionTable::Iter action = other; action.lte(); action++ )
- insertMulti( action->key, action->value );
-}
-
-void ErrActionTable::setAction( int ordering, Action *action, int transferPoint )
-{
- insertMulti( ErrActionTableEl( action, ordering, transferPoint ) );
-}
-
-void ErrActionTable::setActions( const ErrActionTable &other )
-{
- for ( ErrActionTable::Iter act = other; act.lte(); act++ )
- insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) );
-}
-
-/* Insert a priority into this priority table. Looks out for priorities on
- * duplicate keys. */
-void PriorTable::setPrior( int ordering, PriorDesc *desc )
-{
- PriorEl *lastHit = 0;
- PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit );
- if ( insed == 0 ) {
- /* This already has a priority on the same key as desc. Overwrite the
- * priority if the ordering is larger (later in time). */
- if ( ordering >= lastHit->ordering )
- *lastHit = PriorEl( ordering, desc );
- }
-}
-
-/* Set all the priorities from a priorTable in this table. */
-void PriorTable::setPriors( const PriorTable &other )
-{
- /* Loop src priorities once to overwrite duplicates. */
- PriorTable::Iter priorIt = other;
- for ( ; priorIt.lte(); priorIt++ )
- setPrior( priorIt->ordering, priorIt->desc );
-}
-
-/* Set the priority of starting transitions. Isolates the start state so it has
- * no other entry points, then sets the priorities of all the transitions out
- * of the start state. If the start state is final, then the outPrior of the
- * start state is also set. The idea is that a machine that accepts the null
- * string can still specify the starting trans prior for when it accepts the
- * null word. */
-void FsmGraph::startFsmPrior( int ordering, PriorDesc *prior )
-{
- /* Make sure the start state has no other entry points. */
- isolateStartState();
-
- /* Walk all transitions out of the start state. */
- for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
- if ( trans->toState != 0 )
- trans->priorTable.setPrior( ordering, prior );
- }
-}
-
-/* Set the priority of all transitions in a graph. Walks all transition lists
- * and all def transitions. */
-void FsmGraph::allTransPrior( int ordering, PriorDesc *prior )
-{
- /* Walk the list of all states. */
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- /* Walk the out list of the state. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- if ( trans->toState != 0 )
- trans->priorTable.setPrior( ordering, prior );
- }
- }
-}
-
-/* Set the priority of all transitions that go into a final state. Note that if
- * any entry states are final, we will not be setting the priority of any
- * transitions that may go into those states in the future. The graph does not
- * support pending in transitions in the same way pending out transitions are
- * supported. */
-void FsmGraph::finishFsmPrior( int ordering, PriorDesc *prior )
-{
- /* Walk all final states. */
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
- /* Walk all in transitions of the final state. */
- for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
- trans->priorTable.setPrior( ordering, prior );
- }
-}
-
-/* Set the priority of any future out transitions that may be made going out of
- * this state machine. */
-void FsmGraph::leaveFsmPrior( int ordering, PriorDesc *prior )
-{
- /* Set priority in all final states. */
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
- (*state)->outPriorTable.setPrior( ordering, prior );
-}
-
-
-/* Set actions to execute on starting transitions. Isolates the start state
- * so it has no other entry points, then adds to the transition functions
- * of all the transitions out of the start state. If the start state is final,
- * then the func is also added to the start state's out func list. The idea is
- * that a machine that accepts the null string can execute a start func when it
- * matches the null word, which can only be done when leaving the start/final
- * state. */
-void FsmGraph::startFsmAction( int ordering, Action *action )
-{
- /* Make sure the start state has no other entry points. */
- isolateStartState();
-
- /* Walk the start state's transitions, setting functions. */
- for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
- if ( trans->toState != 0 )
- trans->actionTable.setAction( ordering, action );
- }
-}
-
-/* Set functions to execute on all transitions. Walks the out lists of all
- * states. */
-void FsmGraph::allTransAction( int ordering, Action *action )
-{
- /* Walk all states. */
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- /* Walk the out list of the state. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- if ( trans->toState != 0 )
- trans->actionTable.setAction( ordering, action );
- }
- }
-}
-
-/* Specify functions to execute upon entering final states. If the start state
- * is final we can't really specify a function to execute upon entering that
- * final state the first time. So function really means whenever entering a
- * final state from within the same fsm. */
-void FsmGraph::finishFsmAction( int ordering, Action *action )
-{
- /* Walk all final states. */
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
- /* Walk the final state's in list. */
- for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
- trans->actionTable.setAction( ordering, action );
- }
-}
-
-/* Add functions to any future out transitions that may be made going out of
- * this state machine. */
-void FsmGraph::leaveFsmAction( int ordering, Action *action )
-{
- /* Insert the action in the outActionTable of all final states. */
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
- (*state)->outActionTable.setAction( ordering, action );
-}
-
-/* Add functions to the longest match action table for constructing scanners. */
-void FsmGraph::longMatchAction( int ordering, TokenDef *lmPart )
-{
- /* Walk all final states. */
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
- /* Walk the final state's in list. */
- for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
- trans->lmActionTable.setAction( ordering, lmPart );
- }
-}
-
-void FsmGraph::fillGaps( FsmState *state )
-{
- if ( state->outList.length() == 0 ) {
- /* Add the range on the lower and upper bound. */
- attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey );
- }
- else {
- TransList srcList;
- srcList.transfer( state->outList );
-
- /* Check for a gap at the beginning. */
- TransList::Iter trans = srcList, next;
- if ( keyOps->minKey < trans->lowKey ) {
- /* Make the high key and append. */
- Key highKey = trans->lowKey;
- highKey.decrement();
-
- attachNewTrans( state, 0, keyOps->minKey, highKey );
- }
-
- /* Write the transition. */
- next = trans.next();
- state->outList.append( trans );
-
- /* Keep the last high end. */
- Key lastHigh = trans->highKey;
-
- /* Loop each source range. */
- for ( trans = next; trans.lte(); trans = next ) {
- /* Make the next key following the last range. */
- Key nextKey = lastHigh;
- nextKey.increment();
-
- /* Check for a gap from last up to here. */
- if ( nextKey < trans->lowKey ) {
- /* Make the high end of the range that fills the gap. */
- Key highKey = trans->lowKey;
- highKey.decrement();
-
- attachNewTrans( state, 0, nextKey, highKey );
- }
-
- /* Reduce the transition. If it reduced to anything then add it. */
- next = trans.next();
- state->outList.append( trans );
-
- /* Keep the last high end. */
- lastHigh = trans->highKey;
- }
-
- /* Now check for a gap on the end to fill. */
- if ( lastHigh < keyOps->maxKey ) {
- /* Get a copy of the default. */
- lastHigh.increment();
-
- attachNewTrans( state, 0, lastHigh, keyOps->maxKey );
- }
- }
-}
-
-void FsmGraph::setErrorAction( FsmState *state, int ordering, Action *action )
-{
- /* Fill any gaps in the out list with an error transition. */
- fillGaps( state );
-
- /* Set error transitions in the transitions that go to error. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- if ( trans->toState == 0 )
- trans->actionTable.setAction( ordering, action );
- }
-}
-
-void FsmGraph::setErrorActions( FsmState *state, const ActionTable &other )
-{
- /* Fill any gaps in the out list with an error transition. */
- fillGaps( state );
-
- /* Set error transitions in the transitions that go to error. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- if ( trans->toState == 0 )
- trans->actionTable.setActions( other );
- }
-}
-
-
-/* Give a target state for error transitions. */
-void FsmGraph::setErrorTarget( FsmState *state, FsmState *target, int *orderings,
- Action **actions, int nActs )
-{
- /* Fill any gaps in the out list with an error transition. */
- fillGaps( state );
-
- /* Set error target in the transitions that go to error. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- if ( trans->toState == 0 ) {
- /* The trans goes to error, redirect it. */
- redirectErrorTrans( trans->fromState, target, trans );
- trans->actionTable.setActions( orderings, actions, nActs );
- }
- }
-}
-
-void FsmGraph::transferErrorActions( FsmState *state, int transferPoint )
-{
- for ( int i = 0; i < state->errActionTable.length(); ) {
- ErrActionTableEl *act = state->errActionTable.data + i;
- if ( act->transferPoint == transferPoint ) {
- /* Transfer the error action and remove it. */
- setErrorAction( state, act->ordering, act->action );
- state->errActionTable.vremove( i );
- }
- else {
- /* Not transfering and deleting, skip over the item. */
- i += 1;
- }
- }
-}
-
-/* Set error actions in the start state. */
-void FsmGraph::startErrorAction( int ordering, Action *action, int transferPoint )
-{
- /* Make sure the start state has no other entry points. */
- isolateStartState();
-
- /* Add the actions. */
- startState->errActionTable.setAction( ordering, action, transferPoint );
-}
-
-/* Set error actions in all states where there is a transition out. */
-void FsmGraph::allErrorAction( int ordering, Action *action, int transferPoint )
-{
- /* Insert actions in the error action table of all states. */
- for ( StateList::Iter state = stateList; state.lte(); state++ )
- state->errActionTable.setAction( ordering, action, transferPoint );
-}
-
-/* Set error actions in final states. */
-void FsmGraph::finalErrorAction( int ordering, Action *action, int transferPoint )
-{
- /* Add the action to the error table of final states. */
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
- (*state)->errActionTable.setAction( ordering, action, transferPoint );
-}
-
-void FsmGraph::notStartErrorAction( int ordering, Action *action, int transferPoint )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( state != startState )
- state->errActionTable.setAction( ordering, action, transferPoint );
- }
-}
-
-void FsmGraph::notFinalErrorAction( int ordering, Action *action, int transferPoint )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( ! state->isFinState() )
- state->errActionTable.setAction( ordering, action, transferPoint );
- }
-}
-
-/* Set error actions in the states that have transitions into a final state. */
-void FsmGraph::middleErrorAction( int ordering, Action *action, int transferPoint )
-{
- /* Isolate the start state in case it is reachable from in inside the
- * machine, in which case we don't want it set. */
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( state != startState && ! state->isFinState() )
- state->errActionTable.setAction( ordering, action, transferPoint );
- }
-}
-
-/* Set EOF actions in the start state. */
-void FsmGraph::startEOFAction( int ordering, Action *action )
-{
- /* Make sure the start state has no other entry points. */
- isolateStartState();
-
- /* Add the actions. */
- startState->eofActionTable.setAction( ordering, action );
-}
-
-/* Set EOF actions in all states where there is a transition out. */
-void FsmGraph::allEOFAction( int ordering, Action *action )
-{
- /* Insert actions in the EOF action table of all states. */
- for ( StateList::Iter state = stateList; state.lte(); state++ )
- state->eofActionTable.setAction( ordering, action );
-}
-
-/* Set EOF actions in final states. */
-void FsmGraph::finalEOFAction( int ordering, Action *action )
-{
- /* Add the action to the error table of final states. */
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
- (*state)->eofActionTable.setAction( ordering, action );
-}
-
-void FsmGraph::notStartEOFAction( int ordering, Action *action )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( state != startState )
- state->eofActionTable.setAction( ordering, action );
- }
-}
-
-void FsmGraph::notFinalEOFAction( int ordering, Action *action )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( ! state->isFinState() )
- state->eofActionTable.setAction( ordering, action );
- }
-}
-
-/* Set EOF actions in the states that have transitions into a final state. */
-void FsmGraph::middleEOFAction( int ordering, Action *action )
-{
- /* Set the actions in all states that are not the start state and not final. */
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( state != startState && ! state->isFinState() )
- state->eofActionTable.setAction( ordering, action );
- }
-}
-
-/*
- * Set To State Actions.
- */
-
-/* Set to state actions in the start state. */
-void FsmGraph::startToStateAction( int ordering, Action *action )
-{
- /* Make sure the start state has no other entry points. */
- isolateStartState();
- startState->toStateActionTable.setAction( ordering, action );
-}
-
-/* Set to state actions in all states. */
-void FsmGraph::allToStateAction( int ordering, Action *action )
-{
- /* Insert the action on all states. */
- for ( StateList::Iter state = stateList; state.lte(); state++ )
- state->toStateActionTable.setAction( ordering, action );
-}
-
-/* Set to state actions in final states. */
-void FsmGraph::finalToStateAction( int ordering, Action *action )
-{
- /* Add the action to the error table of final states. */
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
- (*state)->toStateActionTable.setAction( ordering, action );
-}
-
-void FsmGraph::notStartToStateAction( int ordering, Action *action )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( state != startState )
- state->toStateActionTable.setAction( ordering, action );
- }
-}
-
-void FsmGraph::notFinalToStateAction( int ordering, Action *action )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( ! state->isFinState() )
- state->toStateActionTable.setAction( ordering, action );
- }
-}
-
-/* Set to state actions in states that are not final and not the start state. */
-void FsmGraph::middleToStateAction( int ordering, Action *action )
-{
- /* Set the action in all states that are not the start state and not final. */
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( state != startState && ! state->isFinState() )
- state->toStateActionTable.setAction( ordering, action );
- }
-}
-
-/*
- * Set From State Actions.
- */
-
-void FsmGraph::startFromStateAction( int ordering, Action *action )
-{
- /* Make sure the start state has no other entry points. */
- isolateStartState();
- startState->fromStateActionTable.setAction( ordering, action );
-}
-
-void FsmGraph::allFromStateAction( int ordering, Action *action )
-{
- /* Insert the action on all states. */
- for ( StateList::Iter state = stateList; state.lte(); state++ )
- state->fromStateActionTable.setAction( ordering, action );
-}
-
-void FsmGraph::finalFromStateAction( int ordering, Action *action )
-{
- /* Add the action to the error table of final states. */
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
- (*state)->fromStateActionTable.setAction( ordering, action );
-}
-
-void FsmGraph::notStartFromStateAction( int ordering, Action *action )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( state != startState )
- state->fromStateActionTable.setAction( ordering, action );
- }
-}
-
-void FsmGraph::notFinalFromStateAction( int ordering, Action *action )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( ! state->isFinState() )
- state->fromStateActionTable.setAction( ordering, action );
- }
-}
-
-void FsmGraph::middleFromStateAction( int ordering, Action *action )
-{
- /* Set the action in all states that are not the start state and not final. */
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- if ( state != startState && ! state->isFinState() )
- state->fromStateActionTable.setAction( ordering, action );
- }
-}
-
-/* Shift the function ordering of the start transitions to start
- * at fromOrder and increase in units of 1. Useful before staring.
- * Returns the maximum number of order numbers used. */
-int FsmGraph::shiftStartActionOrder( int fromOrder )
-{
- int maxUsed = 0;
-
- /* Walk the start state's transitions, shifting function ordering. */
- for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
- /* Walk the function data for the transition and set the keys to
- * increasing values starting at fromOrder. */
- int curFromOrder = fromOrder;
- ActionTable::Iter action = trans->actionTable;
- for ( ; action.lte(); action++ )
- action->key = curFromOrder++;
-
- /* Keep track of the max number of orders used. */
- if ( curFromOrder - fromOrder > maxUsed )
- maxUsed = curFromOrder - fromOrder;
- }
-
- return maxUsed;
-}
-
-/* Remove all priorities. */
-void FsmGraph::clearAllPriorities()
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- /* Clear out priority data. */
- state->outPriorTable.empty();
-
- /* Clear transition data from the out transitions. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
- trans->priorTable.empty();
- }
-}
-
-/* Zeros out the function ordering keys. This may be called before minimization
- * when it is known that no more fsm operations are going to be done. This
- * will achieve greater reduction as states will not be separated on the basis
- * of function ordering. */
-void FsmGraph::nullActionKeys( )
-{
- /* For each state... */
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- /* Walk the transitions for the state. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- /* Walk the action table for the transition. */
- for ( ActionTable::Iter action = trans->actionTable;
- action.lte(); action++ )
- action->key = 0;
-
- /* Walk the action table for the transition. */
- for ( LmActionTable::Iter action = trans->lmActionTable;
- action.lte(); action++ )
- action->key = 0;
- }
-
- /* Null the action keys of the to state action table. */
- for ( ActionTable::Iter action = state->toStateActionTable;
- action.lte(); action++ )
- action->key = 0;
-
- /* Null the action keys of the from state action table. */
- for ( ActionTable::Iter action = state->fromStateActionTable;
- action.lte(); action++ )
- action->key = 0;
-
- /* Null the action keys of the out transtions. */
- for ( ActionTable::Iter action = state->outActionTable;
- action.lte(); action++ )
- action->key = 0;
-
- /* Null the action keys of the error action table. */
- for ( ErrActionTable::Iter action = state->errActionTable;
- action.lte(); action++ )
- action->ordering = 0;
-
- /* Null the action keys eof action table. */
- for ( ActionTable::Iter action = state->eofActionTable;
- action.lte(); action++ )
- action->key = 0;
- }
-}
-
-/* Walk the list of states and verify that non final states do not have out
- * data, that all stateBits are cleared, and that there are no states with
- * zero foreign in transitions. */
-void FsmGraph::verifyStates()
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- /* Non final states should not have leaving data. */
- if ( ! (state->stateBits & SB_ISFINAL) ) {
- assert( state->outActionTable.length() == 0 );
- assert( state->outCondSet.length() == 0 );
- assert( state->outPriorTable.length() == 0 );
- }
-
- /* Data used in algorithms should be cleared. */
- assert( (state->stateBits & SB_BOTH) == 0 );
- assert( state->foreignInTrans > 0 );
- }
-}
-
-/* Compare two transitions according to their relative priority. Since the
- * base transition has no priority associated with it, the default is to
- * return equal. */
-int FsmGraph::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 )
-{
- /* Looking for differing priorities on same keys. Need to concurrently
- * scan the priority lists. */
- PriorTable::Iter pd1 = priorTable1;
- PriorTable::Iter pd2 = priorTable2;
- while ( pd1.lte() && pd2.lte() ) {
- /* Check keys. */
- if ( pd1->desc->key < pd2->desc->key )
- pd1.increment();
- else if ( pd1->desc->key > pd2->desc->key )
- pd2.increment();
- /* Keys are the same, check priorities. */
- else if ( pd1->desc->priority < pd2->desc->priority )
- return -1;
- else if ( pd1->desc->priority > pd2->desc->priority )
- return 1;
- else {
- /* Keys and priorities are equal, advance both. */
- pd1.increment();
- pd2.increment();
- }
- }
-
- /* No differing priorities on the same key. */
- return 0;
-}
-
-/* Compares two transitions according to priority and functions. Pointers
- * should not be null. Does not consider to state or from state. Compare two
- * transitions according to the data contained in the transitions. Data means
- * any properties added to user transitions that may differentiate them. Since
- * the base transition has no data, the default is to return equal. */
-int FsmGraph::compareTransData( FsmTrans *trans1, FsmTrans *trans2 )
-{
- /* Compare the prior table. */
- int cmpRes = CmpPriorTable::compare( trans1->priorTable,
- trans2->priorTable );
- if ( cmpRes != 0 )
- return cmpRes;
-
- /* Compare longest match action tables. */
- cmpRes = CmpLmActionTable::compare(trans1->lmActionTable,
- trans2->lmActionTable);
- if ( cmpRes != 0 )
- return cmpRes;
-
- /* Compare action tables. */
- return CmpActionTable::compare(trans1->actionTable,
- trans2->actionTable);
-}
-
-/* Callback invoked when another trans (or possibly this) is added into this
- * transition during the merging process. Draw in any properties of srcTrans
- * into this transition. AddInTrans is called when a new transitions is made
- * that will be a duplicate of another transition or a combination of several
- * other transitions. AddInTrans will be called for each transition that the
- * new transition is to represent. */
-void FsmGraph::addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans )
-{
- /* Protect against adding in from ourselves. */
- if ( srcTrans == destTrans ) {
- /* Adding in ourselves, need to make a copy of the source transitions.
- * The priorities are not copied in as that would have no effect. */
- destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) );
- destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) );
- }
- else {
- /* Not a copy of ourself, get the functions and priorities. */
- destTrans->lmActionTable.setActions( srcTrans->lmActionTable );
- destTrans->actionTable.setActions( srcTrans->actionTable );
- destTrans->priorTable.setPriors( srcTrans->priorTable );
- }
-}
-
-/* Compare the properties of states that are embedded by users. Compares out
- * priorities, out transitions, to, from, out, error and eof action tables. */
-int FsmGraph::compareStateData( const FsmState *state1, const FsmState *state2 )
-{
- /* Compare the out priority table. */
- int cmpRes = CmpPriorTable::
- compare( state1->outPriorTable, state2->outPriorTable );
- if ( cmpRes != 0 )
- return cmpRes;
-
- /* Test to state action tables. */
- cmpRes = CmpActionTable::compare( state1->toStateActionTable,
- state2->toStateActionTable );
- if ( cmpRes != 0 )
- return cmpRes;
-
- /* Test from state action tables. */
- cmpRes = CmpActionTable::compare( state1->fromStateActionTable,
- state2->fromStateActionTable );
- if ( cmpRes != 0 )
- return cmpRes;
-
- /* Test out action tables. */
- cmpRes = CmpActionTable::compare( state1->outActionTable,
- state2->outActionTable );
- if ( cmpRes != 0 )
- return cmpRes;
-
- /* Test out condition sets. */
- cmpRes = CmpActionSet::compare( state1->outCondSet,
- state2->outCondSet );
- if ( cmpRes != 0 )
- return cmpRes;
-
- /* Test out error action tables. */
- cmpRes = CmpErrActionTable::compare( state1->errActionTable,
- state2->errActionTable );
- if ( cmpRes != 0 )
- return cmpRes;
-
- /* Test eof action tables. */
- return CmpActionTable::compare( state1->eofActionTable,
- state2->eofActionTable );
-}
-
-
-/* Invoked when a state looses its final state status and the leaving
- * transition embedding data should be deleted. */
-void FsmGraph::clearOutData( FsmState *state )
-{
- /* Kill the out actions and priorities. */
- state->outActionTable.empty();
- state->outCondSet.empty();
- state->outPriorTable.empty();
-}
-
-bool FsmGraph::hasOutData( FsmState *state )
-{
- return ( state->outActionTable.length() > 0 ||
- state->outCondSet.length() > 0 ||
- state->outPriorTable.length() > 0 );
-}
-
-/*
- * Setting Conditions.
- */
-
-void logNewExpansion( Expansion *exp );
-void logCondSpace( CondSpace *condSpace );
-
-CondSpace *FsmGraph::addCondSpace( const CondSet &condSet )
-{
- CondSpace *condSpace = condData->condSpaceMap.find( condSet );
- if ( condSpace == 0 ) {
- Key baseKey = condData->nextCondKey;
- condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize();
-
- condSpace = new CondSpace( condSet );
- condSpace->baseKey = baseKey;
- condData->condSpaceMap.insert( condSpace );
-
- #ifdef COLM_LOG_CONDS
- if ( colm_log_conds ) {
- cerr << "adding new condition space" << endl;
- cerr << " condition set: ";
- logCondSpace( condSpace );
- cerr << endl;
- cerr << " baseKey: " << baseKey.getVal() << endl;
- }
- #endif
- }
- return condSpace;
-}
-
-void FsmGraph::startFsmCondition( Action *condAction )
-{
- /* Make sure the start state has no other entry points. */
- isolateStartState();
- embedCondition( startState, condAction );
-}
-
-void FsmGraph::allTransCondition( Action *condAction )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ )
- embedCondition( state, condAction );
-}
-
-void FsmGraph::leaveFsmCondition( Action *condAction )
-{
- for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
- (*state)->outCondSet.insert( condAction );
-}
diff --git a/colm/fsmattach.cc b/colm/fsmattach.cc
deleted file mode 100644
index a58ed9a4..00000000
--- a/colm/fsmattach.cc
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <string.h>
-#include <assert.h>
-#include "fsmgraph.h"
-
-#include <iostream>
-using namespace std;
-
-/* Insert a transition into an inlist. The head must be supplied. */
-void FsmGraph::attachToInList( FsmState *from, FsmState *to,
- FsmTrans *&head, FsmTrans *trans )
-{
- trans->ilnext = head;
- trans->ilprev = 0;
-
- /* If in trans list is not empty, set the head->prev to trans. */
- if ( head != 0 )
- head->ilprev = trans;
-
- /* Now insert ourselves at the front of the list. */
- head = trans;
-
- /* Keep track of foreign transitions for from and to. */
- if ( from != to ) {
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions is about to go up to 1 then
- * move it from the misfit list to the main list. */
- if ( to->foreignInTrans == 0 )
- stateList.append( misfitList.detach( to ) );
- }
-
- to->foreignInTrans += 1;
- }
-};
-
-/* Detach a transition from an inlist. The head of the inlist must be supplied. */
-void FsmGraph::detachFromInList( FsmState *from, FsmState *to,
- FsmTrans *&head, FsmTrans *trans )
-{
- /* Detach in the inTransList. */
- if ( trans->ilprev == 0 )
- head = trans->ilnext;
- else
- trans->ilprev->ilnext = trans->ilnext;
-
- if ( trans->ilnext != 0 )
- trans->ilnext->ilprev = trans->ilprev;
-
- /* Keep track of foreign transitions for from and to. */
- if ( from != to ) {
- to->foreignInTrans -= 1;
-
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions goes down to 0 then move it
- * from the main list to the misfit list. */
- if ( to->foreignInTrans == 0 )
- misfitList.append( stateList.detach( to ) );
- }
- }
-}
-
-/* Attach states on the default transition, range list or on out/in list key.
- * First makes a new transition. If there is already a transition out from
- * fromState on the default, then will assertion fail. */
-FsmTrans *FsmGraph::attachNewTrans( FsmState *from, FsmState *to, Key lowKey, Key highKey )
-{
- /* Make the new transition. */
- FsmTrans *retVal = new FsmTrans();
-
- /* The transition is now attached. Remember the parties involved. */
- retVal->fromState = from;
- retVal->toState = to;
-
- /* Make the entry in the out list for the transitions. */
- from->outList.append( retVal );
-
- /* Set the the keys of the new trans. */
- retVal->lowKey = lowKey;
- retVal->highKey = highKey;
-
- /* Attach using inList as the head pointer. */
- if ( to != 0 )
- attachToInList( from, to, to->inList.head, retVal );
-
- return retVal;
-}
-
-/* Attach for range lists or for the default transition. This attach should
- * be used when a transition already is allocated and must be attached to a
- * target state. Does not handle adding the transition into the out list. */
-void FsmGraph::attachTrans( FsmState *from, FsmState *to, FsmTrans *trans )
-{
- assert( trans->fromState == 0 && trans->toState == 0 );
- trans->fromState = from;
- trans->toState = to;
-
- if ( to != 0 ) {
- /* Attach using the inList pointer as the head pointer. */
- attachToInList( from, to, to->inList.head, trans );
- }
-}
-
-/* Redirect a transition away from error and towards some state. This is just
- * like attachTrans except it requires fromState to be set and does not touch
- * it. */
-void FsmGraph::redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans )
-{
- assert( trans->fromState != 0 && trans->toState == 0 );
- trans->toState = to;
-
- if ( to != 0 ) {
- /* Attach using the inList pointer as the head pointer. */
- attachToInList( from, to, to->inList.head, trans );
- }
-}
-
-/* Detach for out/in lists or for default transition. */
-void FsmGraph::detachTrans( FsmState *from, FsmState *to, FsmTrans *trans )
-{
- assert( trans->fromState == from && trans->toState == to );
- trans->fromState = 0;
- trans->toState = 0;
-
- if ( to != 0 ) {
- /* Detach using to's inList pointer as the head. */
- detachFromInList( from, to, to->inList.head, trans );
- }
-}
-
-
-/* Detach a state from the graph. Detaches and deletes transitions in and out
- * of the state. Empties inList and outList. Removes the state from the final
- * state set. A detached state becomes useless and should be deleted. */
-void FsmGraph::detachState( FsmState *state )
-{
- /* Detach the in transitions from the inList list of transitions. */
- while ( state->inList.head != 0 ) {
- /* Get pointers to the trans and the state. */
- FsmTrans *trans = state->inList.head;
- FsmState *fromState = trans->fromState;
-
- /* Detach the transitions from the source state. */
- detachTrans( fromState, state, trans );
-
- /* Ok to delete the transition. */
- fromState->outList.detach( trans );
- delete trans;
- }
-
- /* Remove the entry points in on the machine. */
- while ( state->entryIds.length() > 0 )
- unsetEntry( state->entryIds[0], state );
-
- /* Detach out range transitions. */
- for ( TransList::Iter trans = state->outList; trans.lte(); ) {
- TransList::Iter next = trans.next();
- detachTrans( state, trans->toState, trans );
- delete trans;
- trans = next;
- }
-
- /* Delete all of the out range pointers. */
- state->outList.abandon();
-
- /* Unset final stateness before detaching from graph. */
- if ( state->stateBits & SB_ISFINAL )
- finStateSet.remove( state );
-}
-
-
-/* Duplicate a transition. Makes a new transition that is attached to the same
- * dest as srcTrans. The new transition has functions and priority taken from
- * srcTrans. Used for merging a transition in to a free spot. The trans can
- * just be dropped in. It does not conflict with an existing trans and need
- * not be crossed. Returns the new transition. */
-FsmTrans *FsmGraph::dupTrans( FsmState *from, FsmTrans *srcTrans )
-{
- /* Make a new transition. */
- FsmTrans *newTrans = new FsmTrans();
-
- /* We can attach the transition, one does not exist. */
- attachTrans( from, srcTrans->toState, newTrans );
-
- /* Call the user callback to add in the original source transition. */
- addInTrans( newTrans, srcTrans );
-
- return newTrans;
-}
-
-/* In crossing, src trans and dest trans both go to existing states. Make one
- * state from the sets of states that src and dest trans go to. */
-FsmTrans *FsmGraph::fsmAttachStates( MergeData &md, FsmState *from,
- FsmTrans *destTrans, FsmTrans *srcTrans )
-{
- /* The priorities are equal. We must merge the transitions. Does the
- * existing trans go to the state we are to attach to? ie, are we to
- * simply double up the transition? */
- FsmState *toState = srcTrans->toState;
- FsmState *existingState = destTrans->toState;
-
- if ( existingState == toState ) {
- /* The transition is a double up to the same state. Copy the src
- * trans into itself. We don't need to merge in the from out trans
- * data, that was done already. */
- addInTrans( destTrans, srcTrans );
- }
- else {
- /* The trans is not a double up. Dest trans cannot be the same as src
- * trans. Set up the state set. */
- StateSet stateSet;
-
- /* We go to all the states the existing trans goes to, plus... */
- if ( existingState->stateDictEl == 0 )
- stateSet.insert( existingState );
- else
- stateSet.insert( existingState->stateDictEl->stateSet );
-
- /* ... all the states that we have been told to go to. */
- if ( toState->stateDictEl == 0 )
- stateSet.insert( toState );
- else
- stateSet.insert( toState->stateDictEl->stateSet );
-
- /* Look for the state. If it is not there already, make it. */
- StateDictEl *lastFound;
- if ( md.stateDict.insert( stateSet, &lastFound ) ) {
- /* Make a new state representing the combination of states in
- * stateSet. It gets added to the fill list. This means that we
- * need to fill in it's transitions sometime in the future. We
- * don't do that now (ie, do not recurse). */
- FsmState *combinState = addState();
-
- /* Link up the dict element and the state. */
- lastFound->targState = combinState;
- combinState->stateDictEl = lastFound;
-
- /* Add to the fill list. */
- md.fillListAppend( combinState );
- }
-
- /* Get the state insertted/deleted. */
- FsmState *targ = lastFound->targState;
-
- /* Detach the state from existing state. */
- detachTrans( from, existingState, destTrans );
-
- /* Re-attach to the new target. */
- attachTrans( from, targ, destTrans );
-
- /* Add in src trans to the existing transition that we redirected to
- * the new state. We don't need to merge in the from out trans data,
- * that was done already. */
- addInTrans( destTrans, srcTrans );
- }
-
- return destTrans;
-}
-
-/* Two transitions are to be crossed, handle the possibility of either going
- * to the error state. */
-FsmTrans *FsmGraph::mergeTrans( MergeData &md, FsmState *from,
- FsmTrans *destTrans, FsmTrans *srcTrans )
-{
- FsmTrans *retTrans = 0;
- if ( destTrans->toState == 0 && srcTrans->toState == 0 ) {
- /* Error added into error. */
- addInTrans( destTrans, srcTrans );
- retTrans = destTrans;
- }
- else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) {
- /* Non error added into error we need to detach and reattach, */
- detachTrans( from, destTrans->toState, destTrans );
- attachTrans( from, srcTrans->toState, destTrans );
- addInTrans( destTrans, srcTrans );
- retTrans = destTrans;
- }
- else if ( srcTrans->toState == 0 ) {
- /* Dest goes somewhere but src doesn't, just add it it in. */
- addInTrans( destTrans, srcTrans );
- retTrans = destTrans;
- }
- else {
- /* Both go somewhere, run the actual cross. */
- retTrans = fsmAttachStates( md, from, destTrans, srcTrans );
- }
-
- return retTrans;
-}
-
-/* Find the trans with the higher priority. If src is lower priority then dest then
- * src is ignored. If src is higher priority than dest, then src overwrites dest. If
- * the priorities are equal, then they are merged. */
-FsmTrans *FsmGraph::crossTransitions( MergeData &md, FsmState *from,
- FsmTrans *destTrans, FsmTrans *srcTrans )
-{
- FsmTrans *retTrans;
-
- /* Compare the priority of the dest and src transitions. */
- int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable );
- if ( compareRes < 0 ) {
- /* Src trans has a higher priority than dest, src overwrites dest.
- * Detach dest and return a copy of src. */
- detachTrans( from, destTrans->toState, destTrans );
- retTrans = dupTrans( from, srcTrans );
- }
- else if ( compareRes > 0 ) {
- /* The dest trans has a higher priority, use dest. */
- retTrans = destTrans;
- }
- else {
- /* Src trans and dest trans have the same priority, they must be merged. */
- retTrans = mergeTrans( md, from, destTrans, srcTrans );
- }
-
- /* Return the transition that resulted from the cross. */
- return retTrans;
-}
-
-/* Copy the transitions in srcList to the outlist of dest. The srcList should
- * not be the outList of dest, otherwise you would be copying the contents of
- * srcList into itself as it's iterated: bad news. */
-void FsmGraph::outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList )
-{
- /* The destination list. */
- TransList destList;
-
- /* Set up an iterator to stop at breaks. */
- PairIter<FsmTrans> outPair( dest->outList.head, srcList );
- for ( ; !outPair.end(); outPair++ ) {
- switch ( outPair.userState ) {
- case RangeInS1: {
- /* The pair iter is the authority on the keys. It may have needed
- * to break the dest range. */
- FsmTrans *destTrans = outPair.s1Tel.trans;
- destTrans->lowKey = outPair.s1Tel.lowKey;
- destTrans->highKey = outPair.s1Tel.highKey;
- destList.append( destTrans );
- break;
- }
- case RangeInS2: {
- /* Src range may get crossed with dest's default transition. */
- FsmTrans *newTrans = dupTrans( dest, outPair.s2Tel.trans );
-
- /* Set up the transition's keys and append to the dest list. */
- newTrans->lowKey = outPair.s2Tel.lowKey;
- newTrans->highKey = outPair.s2Tel.highKey;
- destList.append( newTrans );
- break;
- }
- case RangeOverlap: {
- /* Exact overlap, cross them. */
- FsmTrans *newTrans = crossTransitions( md, dest,
- outPair.s1Tel.trans, outPair.s2Tel.trans );
-
- /* Set up the transition's keys and append to the dest list. */
- newTrans->lowKey = outPair.s1Tel.lowKey;
- newTrans->highKey = outPair.s1Tel.highKey;
- destList.append( newTrans );
- break;
- }
- case BreakS1: {
- /* Since we are always writing to the dest trans, the dest needs
- * to be copied when it is broken. The copy goes into the first
- * half of the break to "break it off". */
- outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans );
- break;
- }
- case BreakS2:
- break;
- }
- }
-
- /* Abandon the old outList and transfer destList into it. */
- dest->outList.transfer( destList );
-}
-
-
-/* Move all the transitions that go into src so that they go into dest. */
-void FsmGraph::inTransMove( FsmState *dest, FsmState *src )
-{
- /* Do not try to move in trans to and from the same state. */
- assert( dest != src );
-
- /* If src is the start state, dest becomes the start state. */
- if ( src == startState ) {
- unsetStartState();
- setStartState( dest );
- }
-
- /* For each entry point into, create an entry point into dest, when the
- * state is detached, the entry points to src will be removed. */
- for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ )
- changeEntry( *enId, dest, src );
-
- /* Move the transitions in inList. */
- while ( src->inList.head != 0 ) {
- /* Get trans and from state. */
- FsmTrans *trans = src->inList.head;
- FsmState *fromState = trans->fromState;
-
- /* Detach from src, reattach to dest. */
- detachTrans( fromState, src, trans );
- attachTrans( fromState, dest, trans );
- }
-}
diff --git a/colm/fsmbase.cc b/colm/fsmbase.cc
deleted file mode 100644
index 90341039..00000000
--- a/colm/fsmbase.cc
+++ /dev/null
@@ -1,602 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <string.h>
-#include <assert.h>
-#include "fsmgraph.h"
-
-/* Simple singly linked list append routine for the fill list. The new state
- * goes to the end of the list. */
-void MergeData::fillListAppend( FsmState *state )
-{
- state->alg.next = 0;
-
- if ( stfillHead == 0 ) {
- /* List is empty, state becomes head and tail. */
- stfillHead = state;
- stfillTail = state;
- }
- else {
- /* List is not empty, state goes after last element. */
- stfillTail->alg.next = state;
- stfillTail = state;
- }
-}
-
-/* Graph constructor. */
-FsmGraph::FsmGraph()
-:
- /* No start state. */
- startState(0),
- errState(0),
-
- /* Misfit accounting is a switch, turned on only at specific times. It
- * controls what happens when states have no way in from the outside
- * world.. */
- misfitAccounting(false),
-
- lmRequiresErrorState(false)
-{
-}
-
-/* Copy all graph data including transitions. */
-FsmGraph::FsmGraph( const FsmGraph &graph )
-:
- /* Lists start empty. Will be filled by copy. */
- stateList(),
- misfitList(),
-
- /* Copy in the entry points,
- * pointers will be resolved later. */
- entryPoints(graph.entryPoints),
- startState(graph.startState),
- errState(0),
-
- /* Will be filled by copy. */
- finStateSet(),
-
- /* Misfit accounting is only on during merging. */
- misfitAccounting(false),
-
- lmRequiresErrorState(graph.lmRequiresErrorState)
-{
- /* Create the states and record their map in the original state. */
- StateList::Iter origState = graph.stateList;
- for ( ; origState.lte(); origState++ ) {
- /* Make the new state. */
- FsmState *newState = new FsmState( *origState );
-
- /* Add the state to the list. */
- stateList.append( newState );
-
- /* Set the mapsTo item of the old state. */
- origState->alg.stateMap = newState;
- }
-
- /* Derefernce all the state maps. */
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- /* The points to the original in the src machine. The taget's duplicate
- * is in the statemap. */
- FsmState *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0;
-
- /* Attach The transition to the duplicate. */
- trans->toState = 0;
- attachTrans( state, toState, trans );
- }
- }
-
- /* Fix the state pointers in the entry points array. */
- EntryMapEl *eel = entryPoints.data;
- for ( int e = 0; e < entryPoints.length(); e++, eel++ ) {
- /* Get the duplicate of the state. */
- eel->value = eel->value->alg.stateMap;
-
- /* Foreign in transitions must be built up when duping machines so
- * increment it here. */
- eel->value->foreignInTrans += 1;
- }
-
- /* Fix the start state pointer and the new start state's count of in
- * transiions. */
- startState = startState->alg.stateMap;
- startState->foreignInTrans += 1;
-
- /* Build the final state set. */
- StateSet::Iter st = graph.finStateSet;
- for ( ; st.lte(); st++ )
- finStateSet.insert((*st)->alg.stateMap);
-}
-
-/* Deletes all transition data then deletes each state. */
-FsmGraph::~FsmGraph()
-{
- /* Delete all the transitions. */
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- /* Iterate the out transitions, deleting them. */
- state->outList.empty();
- }
-
- /* Delete all the states. */
- stateList.empty();
-}
-
-/* Set a state final. The state has its isFinState set to true and the state
- * is added to the finStateSet. */
-void FsmGraph::setFinState( FsmState *state )
-{
- /* Is it already a fin state. */
- if ( state->stateBits & SB_ISFINAL )
- return;
-
- state->stateBits |= SB_ISFINAL;
- finStateSet.insert( state );
-}
-
-/* Set a state non-final. The has its isFinState flag set false and the state
- * is removed from the final state set. */
-void FsmGraph::unsetFinState( FsmState *state )
-{
- /* Is it already a non-final state? */
- if ( ! (state->stateBits & SB_ISFINAL) )
- return;
-
- /* When a state looses its final state status it must relinquish all the
- * properties that are allowed only for final states. */
- clearOutData( state );
-
- state->stateBits &= ~ SB_ISFINAL;
- finStateSet.remove( state );
-}
-
-/* Set and unset a state as the start state. */
-void FsmGraph::setStartState( FsmState *state )
-{
- /* Sould change from unset to set. */
- assert( startState == 0 );
- startState = state;
-
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions is about to go up to 1 then
- * take it off the misfit list and put it on the head list. */
- if ( state->foreignInTrans == 0 )
- stateList.append( misfitList.detach( state ) );
- }
-
- /* Up the foreign in transitions to the state. */
- state->foreignInTrans += 1;
-}
-
-void FsmGraph::unsetStartState()
-{
- /* Should change from set to unset. */
- assert( startState != 0 );
-
- /* Decrement the entry's count of foreign entries. */
- startState->foreignInTrans -= 1;
-
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions just went down to 0 then take
- * it off the main list and put it on the misfit list. */
- if ( startState->foreignInTrans == 0 )
- misfitList.append( stateList.detach( startState ) );
- }
-
- startState = 0;
-}
-
-/* Associate an id with a state. Makes the state a named entry point. Has no
- * effect if the entry point is already mapped to the state. */
-void FsmGraph::setEntry( int id, FsmState *state )
-{
- /* Insert the id into the state. If the state is already labelled with id,
- * nothing to do. */
- if ( state->entryIds.insert( id ) ) {
- /* Insert the entry and assert that it succeeds. */
- entryPoints.insertMulti( id, state );
-
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions is about to go up to 1 then
- * take it off the misfit list and put it on the head list. */
- if ( state->foreignInTrans == 0 )
- stateList.append( misfitList.detach( state ) );
- }
-
- /* Up the foreign in transitions to the state. */
- state->foreignInTrans += 1;
- }
-}
-
-/* Remove the association of an id with a state. The state looses it's entry
- * point status. Assumes that the id is indeed mapped to state. */
-void FsmGraph::unsetEntry( int id, FsmState *state )
-{
- /* Find the entry point in on id. */
- EntryMapEl *enLow = 0, *enHigh = 0;
- entryPoints.findMulti( id, enLow, enHigh );
- while ( enLow->value != state )
- enLow += 1;
-
- /* Remove the record from the map. */
- entryPoints.remove( enLow );
-
- /* Remove the state's sense of the link. */
- state->entryIds.remove( id );
- state->foreignInTrans -= 1;
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions just went down to 0 then take
- * it off the main list and put it on the misfit list. */
- if ( state->foreignInTrans == 0 )
- misfitList.append( stateList.detach( state ) );
- }
-}
-
-/* Remove all association of an id with states. Assumes that the id is indeed
- * mapped to a state. */
-void FsmGraph::unsetEntry( int id )
-{
- /* Find the entry point in on id. */
- EntryMapEl *enLow = 0, *enHigh = 0;
- entryPoints.findMulti( id, enLow, enHigh );
- for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) {
- /* Remove the state's sense of the link. */
- mel->value->entryIds.remove( id );
- mel->value->foreignInTrans -= 1;
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions just went down to 0
- * then take it off the main list and put it on the misfit list. */
- if ( mel->value->foreignInTrans == 0 )
- misfitList.append( stateList.detach( mel->value ) );
- }
- }
-
- /* Remove the records from the entry points map. */
- entryPoints.removeMulti( enLow, enHigh );
-}
-
-
-void FsmGraph::changeEntry( int id, FsmState *to, FsmState *from )
-{
- /* Find the entry in the entry map. */
- EntryMapEl *enLow = 0, *enHigh = 0;
- entryPoints.findMulti( id, enLow, enHigh );
- while ( enLow->value != from )
- enLow += 1;
-
- /* Change it to the new target. */
- enLow->value = to;
-
- /* Remove from's sense of the link. */
- from->entryIds.remove( id );
- from->foreignInTrans -= 1;
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions just went down to 0 then take
- * it off the main list and put it on the misfit list. */
- if ( from->foreignInTrans == 0 )
- misfitList.append( stateList.detach( from ) );
- }
-
- /* Add to's sense of the link. */
- if ( to->entryIds.insert( id ) != 0 ) {
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions is about to go up to 1 then
- * take it off the misfit list and put it on the head list. */
- if ( to->foreignInTrans == 0 )
- stateList.append( misfitList.detach( to ) );
- }
-
- /* Up the foreign in transitions to the state. */
- to->foreignInTrans += 1;
- }
-}
-
-
-/* Clear all entry points from a machine. */
-void FsmGraph::unsetAllEntryPoints()
-{
- for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) {
- /* Kill all the state's entry points at once. */
- if ( en->value->entryIds.length() > 0 ) {
- en->value->foreignInTrans -= en->value->entryIds.length();
-
- if ( misfitAccounting ) {
- /* If the number of foreign in transitions just went down to 0
- * then take it off the main list and put it on the misfit
- * list. */
- if ( en->value->foreignInTrans == 0 )
- misfitList.append( stateList.detach( en->value ) );
- }
-
- /* Clear the set of ids out all at once. */
- en->value->entryIds.empty();
- }
- }
-
- /* Now clear out the entry map all at once. */
- entryPoints.empty();
-}
-
-/* Assigning an epsilon transition into final states. */
-void FsmGraph::epsilonTrans( int id )
-{
- for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ )
- (*fs)->epsilonTrans.append( id );
-}
-
-/* Mark all states reachable from state. Traverses transitions forward. Used
- * for removing states that have no path into them. */
-void FsmGraph::markReachableFromHere( FsmState *state )
-{
- /* Base case: return; */
- if ( state->stateBits & SB_ISMARKED )
- return;
-
- /* Set this state as processed. We are going to visit all states that this
- * state has a transition to. */
- state->stateBits |= SB_ISMARKED;
-
- /* Recurse on all out transitions. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- if ( trans->toState != 0 )
- markReachableFromHere( trans->toState );
- }
-}
-
-void FsmGraph::markReachableFromHereStopFinal( FsmState *state )
-{
- /* Base case: return; */
- if ( state->stateBits & SB_ISMARKED )
- return;
-
- /* Set this state as processed. We are going to visit all states that this
- * state has a transition to. */
- state->stateBits |= SB_ISMARKED;
-
- /* Recurse on all out transitions. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- FsmState *toState = trans->toState;
- if ( toState != 0 && !toState->isFinState() )
- markReachableFromHereStopFinal( toState );
- }
-}
-
-/* Mark all states reachable from state. Traverse transitions backwards. Used
- * for removing dead end paths in graphs. */
-void FsmGraph::markReachableFromHereReverse( FsmState *state )
-{
- /* Base case: return; */
- if ( state->stateBits & SB_ISMARKED )
- return;
-
- /* Set this state as processed. We are going to visit all states with
- * transitions into this state. */
- state->stateBits |= SB_ISMARKED;
-
- /* Recurse on all items in transitions. */
- for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
- markReachableFromHereReverse( trans->fromState );
-}
-
-/* Determine if there are any entry points into a start state other than the
- * start state. Setting starting transitions requires that the start state be
- * isolated. In most cases a start state will already be isolated. */
-bool FsmGraph::isStartStateIsolated()
-{
- /* If there are any in transitions then the state is not isolated. */
- if ( startState->inList.head != 0 )
- return false;
-
- /* If there are any entry points then isolated. */
- if ( startState->entryIds.length() > 0 )
- return false;
-
- return true;
-}
-
-/* Bring in other's entry points. Assumes others states are going to be
- * copied into this machine. */
-void FsmGraph::copyInEntryPoints( FsmGraph *other )
-{
- /* Use insert multi because names are not unique. */
- for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ )
- entryPoints.insertMulti( en->key, en->value );
-}
-
-
-void FsmGraph::unsetAllFinStates()
-{
- for ( StateSet::Iter st = finStateSet; st.lte(); st++ )
- (*st)->stateBits &= ~ SB_ISFINAL;
- finStateSet.empty();
-}
-
-void FsmGraph::setFinBits( int finStateBits )
-{
- for ( int s = 0; s < finStateSet.length(); s++ )
- finStateSet.data[s]->stateBits |= finStateBits;
-}
-
-
-/* Tests the integrity of the transition lists and the fromStates. */
-void FsmGraph::verifyIntegrity()
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ ) {
- /* Walk the out transitions and assert fromState is correct. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
- assert( trans->fromState == state );
-
- /* Walk the inlist and assert toState is correct. */
- for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
- assert( trans->toState == state );
- }
-}
-
-void FsmGraph::verifyReachability()
-{
- /* Mark all the states that can be reached
- * through the set of entry points. */
- markReachableFromHere( startState );
- for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
- markReachableFromHere( en->value );
-
- /* Check that everything got marked. */
- for ( StateList::Iter st = stateList; st.lte(); st++ ) {
- /* Assert it got marked and then clear the mark. */
- assert( st->stateBits & SB_ISMARKED );
- st->stateBits &= ~ SB_ISMARKED;
- }
-}
-
-void FsmGraph::verifyNoDeadEndStates()
-{
- /* Mark all states that have paths to the final states. */
- for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ )
- markReachableFromHereReverse( *pst );
-
- /* Start state gets honorary marking. Must be done AFTER recursive call. */
- startState->stateBits |= SB_ISMARKED;
-
- /* Make sure everything got marked. */
- for ( StateList::Iter st = stateList; st.lte(); st++ ) {
- /* Assert the state got marked and unmark it. */
- assert( st->stateBits & SB_ISMARKED );
- st->stateBits &= ~ SB_ISMARKED;
- }
-}
-
-void FsmGraph::depthFirstOrdering( FsmState *state )
-{
- /* Nothing to do if the state is already on the list. */
- if ( state->stateBits & SB_ONLIST )
- return;
-
- /* Doing depth first, put state on the list. */
- state->stateBits |= SB_ONLIST;
- stateList.append( state );
-
- /* Recurse on everything ranges. */
- for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) {
- if ( tel->toState != 0 )
- depthFirstOrdering( tel->toState );
- }
-}
-
-/* Ordering states by transition connections. */
-void FsmGraph::depthFirstOrdering()
-{
- /* Init on state list flags. */
- for ( StateList::Iter st = stateList; st.lte(); st++ )
- st->stateBits &= ~SB_ONLIST;
-
- /* Clear out the state list, we will rebuild it. */
- int stateListLen = stateList.length();
- stateList.abandon();
-
- /* Add back to the state list from the start state and all other entry
- * points. */
- if ( errState != 0 )
- depthFirstOrdering( errState );
- depthFirstOrdering( startState );
- for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
- depthFirstOrdering( en->value );
-
- /* Make sure we put everything back on. */
- assert( stateListLen == stateList.length() );
-}
-
-/* Stable sort the states by final state status. */
-void FsmGraph::sortStatesByFinal()
-{
- /* Move forward through the list and throw final states onto the end. */
- FsmState *state = 0;
- FsmState *next = stateList.head;
- FsmState *last = stateList.tail;
- while ( state != last ) {
- /* Move forward and load up the next. */
- state = next;
- next = state->next;
-
- /* Throw to the end? */
- if ( state->isFinState() ) {
- stateList.detach( state );
- stateList.append( state );
- }
- }
-}
-
-void FsmGraph::setStateNumbers( int base )
-{
- for ( StateList::Iter state = stateList; state.lte(); state++ )
- state->alg.stateNum = base++;
-}
-
-
-bool FsmGraph::checkErrTrans( FsmState *state, FsmTrans *trans )
-{
- /* Might go directly to error state. */
- if ( trans->toState == 0 )
- return true;
-
- if ( trans->prev == 0 ) {
- /* If this is the first transition. */
- if ( keyOps->minKey < trans->lowKey )
- return true;
- }
- else {
- /* Not the first transition. Compare against the prev. */
- FsmTrans *prev = trans->prev;
- Key nextKey = prev->highKey;
- nextKey.increment();
- if ( nextKey < trans->lowKey )
- return true;
- }
- return false;
-}
-
-bool FsmGraph::checkErrTransFinish( FsmState *state )
-{
- /* Check if there are any ranges already. */
- if ( state->outList.length() == 0 )
- return true;
- else {
- /* Get the last and check for a gap on the end. */
- FsmTrans *last = state->outList.tail;
- if ( last->highKey < keyOps->maxKey )
- return true;
- }
- return 0;
-}
-
-bool FsmGraph::hasErrorTrans()
-{
- bool result;
- for ( StateList::Iter st = stateList; st.lte(); st++ ) {
- for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) {
- result = checkErrTrans( st, tr );
- if ( result )
- return true;
- }
- result = checkErrTransFinish( st );
- if ( result )
- return true;
- }
- return false;
-}
diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc
deleted file mode 100644
index 86302c31..00000000
--- a/colm/fsmcodegen.cc
+++ /dev/null
@@ -1,1098 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "parsedata.h"
-#include "fsmcodegen.h"
-#include "redfsm.h"
-#include "bstmap.h"
-#include "fsmrun.h"
-#include <sstream>
-#include <string>
-#include <assert.h>
-
-
-using std::ostream;
-using std::ostringstream;
-using std::string;
-using std::cerr;
-using std::endl;
-
-
-/* Init code gen with in parameters. */
-FsmCodeGen::FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out,
- RedFsm *redFsm, FsmTables *fsmTables )
-:
- sourceFileName(sourceFileName),
- fsmName(fsmName),
- out(out),
- redFsm(redFsm),
- fsmTables(fsmTables),
- codeGenErrCount(0),
- dataPrefix(true),
- writeFirstFinal(true),
- writeErr(true)
-{
-}
-
-unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal )
-{
- long long maxValLL = (long long) maxVal;
- HostType *arrayType = keyOps->typeSubsumes( maxValLL );
- assert( arrayType != 0 );
- return arrayType->size;
-}
-
-string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal )
-{
- long long maxValLL = (long long) maxVal;
- HostType *arrayType = keyOps->typeSubsumes( maxValLL );
- assert( arrayType != 0 );
-
- string ret = arrayType->data1;
- if ( arrayType->data2 != 0 ) {
- ret += " ";
- ret += arrayType->data2;
- }
- return ret;
-}
-
-
-/* Write out the fsm name. */
-string FsmCodeGen::FSM_NAME()
-{
- return fsmName;
-}
-
-/* Emit the offset of the start state as a decimal integer. */
-string FsmCodeGen::START_STATE_ID()
-{
- ostringstream ret;
- ret << redFsm->startState->id;
- return ret.str();
-};
-
-/* Write out the array of actions. */
-std::ostream &FsmCodeGen::ACTIONS_ARRAY()
-{
- out << "\t0, ";
- int totalActions = 1;
- for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
- /* Write out the length, which will never be the last character. */
- out << act->key.length() << ", ";
- /* Put in a line break every 8 */
- if ( totalActions++ % 8 == 7 )
- out << "\n\t";
-
- for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
- out << item->value->actionId;
- if ( ! (act.last() && item.last()) )
- out << ", ";
-
- /* Put in a line break every 8 */
- if ( totalActions++ % 8 == 7 )
- out << "\n\t";
- }
- }
- out << "\n";
- return out;
-}
-
-
-string FsmCodeGen::CS()
-{
- ostringstream ret;
- /* Expression for retrieving the key, use simple dereference. */
- ret << ACCESS() << "cs";
- return ret.str();
-}
-
-string FsmCodeGen::GET_WIDE_KEY()
-{
- if ( redFsm->anyConditions() )
- return "_widec";
- else
- return GET_KEY();
-}
-
-string FsmCodeGen::GET_WIDE_KEY( RedState *state )
-{
- if ( state->stateCondList.length() > 0 )
- return "_widec";
- else
- return GET_KEY();
-}
-
-string FsmCodeGen::GET_KEY()
-{
- ostringstream ret;
- /* Expression for retrieving the key, use simple dereference. */
- ret << "(*" << P() << ")";
- return ret.str();
-}
-
-/* Write out level number of tabs. Makes the nested binary search nice
- * looking. */
-string FsmCodeGen::TABS( int level )
-{
- string result;
- while ( level-- > 0 )
- result += "\t";
- return result;
-}
-
-/* Write out a key from the fsm code gen. Depends on wether or not the key is
- * signed. */
-string FsmCodeGen::KEY( Key key )
-{
- ostringstream ret;
- if ( keyOps->isSigned || !hostLang->explicitUnsigned )
- ret << key.getVal();
- else
- ret << (unsigned long) key.getVal() << 'u';
- return ret.str();
-}
-
-void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item )
-{
- ret << ACT() << " = " << item->longestMatchPart->longestMatchId << ";";
-}
-
-void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
-{
- /* The tokend action sets tokend. */
- ret << TOKEND() << " = " << P() << "+1;";
-}
-void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
-{
- ret << TOKSTART() << " = 0;";
-}
-
-void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item )
-{
- ret << ACT() << " = 0;";
-}
-
-void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item )
-{
- ret << TOKSTART() << " = " << P() << ";";
-}
-
-void FsmCodeGen::EMIT_TOKEN( ostream &ret, LangEl *token )
-{
- ret << " " << MATCHED_TOKEN() << " = " << token->id << ";\n";
-}
-
-void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
- int targState, int inFinish )
-{
- ret <<
- " " << P() << " = " << TOKEND() << ";\n"
- " switch( " << ACT() << " ) {\n";
-
- /* If the switch handles error then we also forced the error state. It
- * will exist. */
- if ( item->tokenRegion->lmSwitchHandlesError ) {
- ret << " case 0: " << P() << " = " << TOKSTART() <<
- "; goto st" << redFsm->errState->id << ";\n";
- }
-
- for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) {
- if ( lmi->inLmSelect ) {
- assert( lmi->tdLangEl != 0 );
- ret << " case " << lmi->longestMatchId << ":\n";
- EMIT_TOKEN( ret, lmi->tdLangEl );
- ret << " break;\n";
- }
- }
-
- ret <<
- " }\n"
- "\t"
- " return;\n";
-}
-
-void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item )
-{
- assert( item->longestMatchPart->tdLangEl != 0 );
-
- ret << " " << P() << " += 1;\n";
- EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
- ret << " return;\n";
-}
-
-void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item )
-{
- assert( item->longestMatchPart->tdLangEl != 0 );
-
- EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
- ret << " return;\n";
-}
-
-void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item )
-{
- assert( item->longestMatchPart->tdLangEl != 0 );
-
- ret << " " << P() << " = " << TOKEND() << ";\n";
- EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
- ret << " return;\n";
-}
-
-
-/* Write out an inline tree structure. Walks the list and possibly calls out
- * to virtual functions than handle language specific items in the tree. */
-void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList,
- int targState, bool inFinish )
-{
- for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
- switch ( item->type ) {
- case InlineItem::Text:
- assert( false );
- break;
- case InlineItem::LmSetActId:
- SET_ACT( ret, item );
- break;
- case InlineItem::LmSetTokEnd:
- SET_TOKEND( ret, item );
- break;
- case InlineItem::LmInitTokStart:
- assert( false );
- break;
- case InlineItem::LmInitAct:
- INIT_ACT( ret, item );
- break;
- case InlineItem::LmSetTokStart:
- SET_TOKSTART( ret, item );
- break;
- case InlineItem::LmSwitch:
- LM_SWITCH( ret, item, targState, inFinish );
- break;
- case InlineItem::LmOnLast:
- LM_ON_LAST( ret, item );
- break;
- case InlineItem::LmOnNext:
- LM_ON_NEXT( ret, item );
- break;
- case InlineItem::LmOnLagBehind:
- LM_ON_LAG_BEHIND( ret, item );
- break;
- }
- }
-}
-
-/* Write out paths in line directives. Escapes any special characters. */
-string FsmCodeGen::LDIR_PATH( char *path )
-{
- ostringstream ret;
- for ( char *pc = path; *pc != 0; pc++ ) {
- if ( *pc == '\\' )
- ret << "\\\\";
- else
- ret << *pc;
- }
- return ret.str();
-}
-
-void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish )
-{
- /* Write the block and close it off. */
- ret << "\t{";
- INLINE_LIST( ret, action->inlineList, targState, inFinish );
-
- if ( action->markId > 0 )
- ret << "mark[" << action->markId-1 << "] = " << P() << ";\n";
-
- ret << "}\n";
-
-}
-
-void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition )
-{
- ret << "\n";
- INLINE_LIST( ret, condition->inlineList, 0, false );
-}
-
-string FsmCodeGen::ERROR_STATE()
-{
- ostringstream ret;
- if ( redFsm->errState != 0 )
- ret << redFsm->errState->id;
- else
- ret << "-1";
- return ret.str();
-}
-
-string FsmCodeGen::FIRST_FINAL_STATE()
-{
- ostringstream ret;
- if ( redFsm->firstFinState != 0 )
- ret << redFsm->firstFinState->id;
- else
- ret << redFsm->nextStateId;
- return ret.str();
-}
-
-string FsmCodeGen::DATA_PREFIX()
-{
- if ( dataPrefix )
- return FSM_NAME() + "_";
- return "";
-}
-
-/* Emit the alphabet data type. */
-string FsmCodeGen::ALPH_TYPE()
-{
- string ret = keyOps->alphType->data1;
- if ( keyOps->alphType->data2 != 0 ) {
- ret += " ";
- ret += + keyOps->alphType->data2;
- }
- return ret;
-}
-
-/* Emit the alphabet data type. */
-string FsmCodeGen::WIDE_ALPH_TYPE()
-{
- string ret;
- if ( redFsm->maxKey <= keyOps->maxKey )
- ret = ALPH_TYPE();
- else {
- long long maxKeyVal = redFsm->maxKey.getLongLong();
- HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal );
- assert( wideType != 0 );
-
- ret = wideType->data1;
- if ( wideType->data2 != 0 ) {
- ret += " ";
- ret += wideType->data2;
- }
- }
- return ret;
-}
-
-
-string FsmCodeGen::PTR_CONST()
-{
- return "const ";
-}
-
-std::ostream &FsmCodeGen::OPEN_ARRAY( string type, string name )
-{
- out << "static const " << type << " " << name << "[] = {\n";
- return out;
-}
-
-std::ostream &FsmCodeGen::CLOSE_ARRAY()
-{
- return out << "};\n";
-}
-
-std::ostream &FsmCodeGen::STATIC_VAR( string type, string name )
-{
- out << "static const " << type << " " << name;
- return out;
-}
-
-string FsmCodeGen::UINT( )
-{
- return "unsigned int";
-}
-
-string FsmCodeGen::ARR_OFF( string ptr, string offset )
-{
- return ptr + " + " + offset;
-}
-
-string FsmCodeGen::CAST( string type )
-{
- return "(" + type + ")";
-}
-
-std::ostream &FsmCodeGen::TO_STATE_ACTION_SWITCH()
-{
- /* Walk the list of functions, printing the cases. */
- for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) {
- /* Write out referenced actions. */
- if ( act->numToStateRefs > 0 ) {
- /* Write the case label, the action and the case break. */
- out << "\tcase " << act->actionId << ":\n";
- ACTION( out, act, 0, false );
- out << "\tbreak;\n";
- }
- }
-
- return out;
-}
-
-std::ostream &FsmCodeGen::FROM_STATE_ACTION_SWITCH()
-{
- /* Walk the list of functions, printing the cases. */
- for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) {
- /* Write out referenced actions. */
- if ( act->numFromStateRefs > 0 ) {
- /* Write the case label, the action and the case break. */
- out << "\tcase " << act->actionId << ":\n";
- ACTION( out, act, 0, false );
- out << "\tbreak;\n";
- }
- }
-
- return out;
-}
-
-std::ostream &FsmCodeGen::ACTION_SWITCH()
-{
- /* Walk the list of functions, printing the cases. */
- for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) {
- /* Write out referenced actions. */
- if ( act->numTransRefs > 0 ) {
- /* Write the case label, the action and the case break. */
- out << "\tcase " << act->actionId << ":\n";
- ACTION( out, act, 0, false );
- out << "\tbreak;\n";
- }
- }
-
- return out;
-}
-
-void FsmCodeGen::emitSingleSwitch( RedState *state )
-{
- /* Load up the singles. */
- int numSingles = state->outSingle.length();
- RedTransEl *data = state->outSingle.data;
-
- if ( numSingles == 1 ) {
- /* If there is a single single key then write it out as an if. */
- out << "\tif ( " << GET_WIDE_KEY(state) << " == " <<
- KEY(data[0].lowKey) << " )\n\t\t";
-
- /* Virtual function for writing the target of the transition. */
- TRANS_GOTO(data[0].value, 0) << "\n";
- }
- else if ( numSingles > 1 ) {
- /* Write out single keys in a switch if there is more than one. */
- out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n";
-
- /* Write out the single indicies. */
- for ( int j = 0; j < numSingles; j++ ) {
- out << "\t\tcase " << KEY(data[j].lowKey) << ": ";
- TRANS_GOTO(data[j].value, 0) << "\n";
- }
-
- /* Close off the transition switch. */
- out << "\t}\n";
- }
-}
-
-void FsmCodeGen::emitRangeBSearch( RedState *state, int level, int low, int high )
-{
- /* Get the mid position, staying on the lower end of the range. */
- int mid = (low + high) >> 1;
- RedTransEl *data = state->outRange.data;
-
- /* Determine if we need to look higher or lower. */
- bool anyLower = mid > low;
- bool anyHigher = mid < high;
-
- /* Determine if the keys at mid are the limits of the alphabet. */
- bool limitLow = data[mid].lowKey == keyOps->minKey;
- bool limitHigh = data[mid].highKey == keyOps->maxKey;
-
- if ( anyLower && anyHigher ) {
- /* Can go lower and higher than mid. */
- out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
- KEY(data[mid].lowKey) << " ) {\n";
- emitRangeBSearch( state, level+1, low, mid-1 );
- out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " <<
- KEY(data[mid].highKey) << " ) {\n";
- emitRangeBSearch( state, level+1, mid+1, high );
- out << TABS(level) << "} else\n";
- TRANS_GOTO(data[mid].value, level+1) << "\n";
- }
- else if ( anyLower && !anyHigher ) {
- /* Can go lower than mid but not higher. */
- out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
- KEY(data[mid].lowKey) << " ) {\n";
- emitRangeBSearch( state, level+1, low, mid-1 );
-
- /* if the higher is the highest in the alphabet then there is no
- * sense testing it. */
- if ( limitHigh ) {
- out << TABS(level) << "} else\n";
- TRANS_GOTO(data[mid].value, level+1) << "\n";
- }
- else {
- out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " <<
- KEY(data[mid].highKey) << " )\n";
- TRANS_GOTO(data[mid].value, level+1) << "\n";
- }
- }
- else if ( !anyLower && anyHigher ) {
- /* Can go higher than mid but not lower. */
- out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " <<
- KEY(data[mid].highKey) << " ) {\n";
- emitRangeBSearch( state, level+1, mid+1, high );
-
- /* If the lower end is the lowest in the alphabet then there is no
- * sense testing it. */
- if ( limitLow ) {
- out << TABS(level) << "} else\n";
- TRANS_GOTO(data[mid].value, level+1) << "\n";
- }
- else {
- out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " <<
- KEY(data[mid].lowKey) << " )\n";
- TRANS_GOTO(data[mid].value, level+1) << "\n";
- }
- }
- else {
- /* Cannot go higher or lower than mid. It's mid or bust. What
- * tests to do depends on limits of alphabet. */
- if ( !limitLow && !limitHigh ) {
- out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
- GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " <<
- KEY(data[mid].highKey) << " )\n";
- TRANS_GOTO(data[mid].value, level+1) << "\n";
- }
- else if ( limitLow && !limitHigh ) {
- out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " <<
- KEY(data[mid].highKey) << " )\n";
- TRANS_GOTO(data[mid].value, level+1) << "\n";
- }
- else if ( !limitLow && limitHigh ) {
- out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
- GET_WIDE_KEY(state) << " )\n";
- TRANS_GOTO(data[mid].value, level+1) << "\n";
- }
- else {
- /* Both high and low are at the limit. No tests to do. */
- TRANS_GOTO(data[mid].value, level+1) << "\n";
- }
- }
-}
-
-void FsmCodeGen::COND_TRANSLATE( GenStateCond *stateCond, int level )
-{
- GenCondSpace *condSpace = stateCond->condSpace;
- out << TABS(level) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" <<
- KEY(condSpace->baseKey) << " + (" << GET_KEY() <<
- " - " << KEY(keyOps->minKey) << "));\n";
-
- for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) {
- out << TABS(level) << "if ( ";
- CONDITION( out, *csi );
- Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize());
- out << " ) _widec += " << condValOffset << ";\n";
- }
-}
-
-void FsmCodeGen::emitCondBSearch( RedState *state, int level, int low, int high )
-{
- /* Get the mid position, staying on the lower end of the range. */
- int mid = (low + high) >> 1;
- GenStateCond **data = state->stateCondVect.data;
-
- /* Determine if we need to look higher or lower. */
- bool anyLower = mid > low;
- bool anyHigher = mid < high;
-
- /* Determine if the keys at mid are the limits of the alphabet. */
- bool limitLow = data[mid]->lowKey == keyOps->minKey;
- bool limitHigh = data[mid]->highKey == keyOps->maxKey;
-
- if ( anyLower && anyHigher ) {
- /* Can go lower and higher than mid. */
- out << TABS(level) << "if ( " << GET_KEY() << " < " <<
- KEY(data[mid]->lowKey) << " ) {\n";
- emitCondBSearch( state, level+1, low, mid-1 );
- out << TABS(level) << "} else if ( " << GET_KEY() << " > " <<
- KEY(data[mid]->highKey) << " ) {\n";
- emitCondBSearch( state, level+1, mid+1, high );
- out << TABS(level) << "} else {\n";
- COND_TRANSLATE(data[mid], level+1);
- out << TABS(level) << "}\n";
- }
- else if ( anyLower && !anyHigher ) {
- /* Can go lower than mid but not higher. */
- out << TABS(level) << "if ( " << GET_KEY() << " < " <<
- KEY(data[mid]->lowKey) << " ) {\n";
- emitCondBSearch( state, level+1, low, mid-1 );
-
- /* if the higher is the highest in the alphabet then there is no
- * sense testing it. */
- if ( limitHigh ) {
- out << TABS(level) << "} else {\n";
- COND_TRANSLATE(data[mid], level+1);
- out << TABS(level) << "}\n";
- }
- else {
- out << TABS(level) << "} else if ( " << GET_KEY() << " <= " <<
- KEY(data[mid]->highKey) << " ) {\n";
- COND_TRANSLATE(data[mid], level+1);
- out << TABS(level) << "}\n";
- }
- }
- else if ( !anyLower && anyHigher ) {
- /* Can go higher than mid but not lower. */
- out << TABS(level) << "if ( " << GET_KEY() << " > " <<
- KEY(data[mid]->highKey) << " ) {\n";
- emitCondBSearch( state, level+1, mid+1, high );
-
- /* If the lower end is the lowest in the alphabet then there is no
- * sense testing it. */
- if ( limitLow ) {
- out << TABS(level) << "} else {\n";
- COND_TRANSLATE(data[mid], level+1);
- out << TABS(level) << "}\n";
- }
- else {
- out << TABS(level) << "} else if ( " << GET_KEY() << " >= " <<
- KEY(data[mid]->lowKey) << " ) {\n";
- COND_TRANSLATE(data[mid], level+1);
- out << TABS(level) << "}\n";
- }
- }
- else {
- /* Cannot go higher or lower than mid. It's mid or bust. What
- * tests to do depends on limits of alphabet. */
- if ( !limitLow && !limitHigh ) {
- out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " <<
- GET_KEY() << " && " << GET_KEY() << " <= " <<
- KEY(data[mid]->highKey) << " ) {\n";
- COND_TRANSLATE(data[mid], level+1);
- out << TABS(level) << "}\n";
- }
- else if ( limitLow && !limitHigh ) {
- out << TABS(level) << "if ( " << GET_KEY() << " <= " <<
- KEY(data[mid]->highKey) << " ) {\n";
- COND_TRANSLATE(data[mid], level+1);
- out << TABS(level) << "}\n";
- }
- else if ( !limitLow && limitHigh ) {
- out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " <<
- GET_KEY() << " )\n {";
- COND_TRANSLATE(data[mid], level+1);
- out << TABS(level) << "}\n";
- }
- else {
- /* Both high and low are at the limit. No tests to do. */
- COND_TRANSLATE(data[mid], level);
- }
- }
-}
-
-std::ostream &FsmCodeGen::STATE_GOTOS()
-{
- for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
- if ( st == redFsm->errState )
- STATE_GOTO_ERROR();
- else {
- /* Writing code above state gotos. */
- GOTO_HEADER( st );
-
- if ( st->stateCondVect.length() > 0 ) {
- out << " _widec = " << GET_KEY() << ";\n";
- emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 );
- }
-
- /* Try singles. */
- if ( st->outSingle.length() > 0 )
- emitSingleSwitch( st );
-
- /* Default case is to binary search for the ranges, if that fails then */
- if ( st->outRange.length() > 0 )
- emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 );
-
- /* Write the default transition. */
- TRANS_GOTO( st->defTrans, 1 ) << "\n";
- }
- }
- return out;
-}
-
-unsigned int FsmCodeGen::TO_STATE_ACTION( RedState *state )
-{
- int act = 0;
- if ( state->toStateAction != 0 )
- act = state->toStateAction->location+1;
- return act;
-}
-
-unsigned int FsmCodeGen::FROM_STATE_ACTION( RedState *state )
-{
- int act = 0;
- if ( state->fromStateAction != 0 )
- act = state->fromStateAction->location+1;
- return act;
-}
-
-std::ostream &FsmCodeGen::TO_STATE_ACTIONS()
-{
- /* Take one off for the psuedo start state. */
- int numStates = redFsm->stateList.length();
- unsigned int *vals = new unsigned int[numStates];
- memset( vals, 0, sizeof(unsigned int)*numStates );
-
- for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
- vals[st->id] = TO_STATE_ACTION(st);
-
- out << "\t";
- for ( int st = 0; st < redFsm->nextStateId; st++ ) {
- /* Write any eof action. */
- out << vals[st];
- if ( st < numStates-1 ) {
- out << ", ";
- if ( (st+1) % IALL == 0 )
- out << "\n\t";
- }
- }
- out << "\n";
- delete[] vals;
- return out;
-}
-
-std::ostream &FsmCodeGen::FROM_STATE_ACTIONS()
-{
- /* Take one off for the psuedo start state. */
- int numStates = redFsm->stateList.length();
- unsigned int *vals = new unsigned int[numStates];
- memset( vals, 0, sizeof(unsigned int)*numStates );
-
- for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
- vals[st->id] = FROM_STATE_ACTION(st);
-
- out << "\t";
- for ( int st = 0; st < redFsm->nextStateId; st++ ) {
- /* Write any eof action. */
- out << vals[st];
- if ( st < numStates-1 ) {
- out << ", ";
- if ( (st+1) % IALL == 0 )
- out << "\n\t";
- }
- }
- out << "\n";
- delete[] vals;
- return out;
-}
-
-bool FsmCodeGen::IN_TRANS_ACTIONS( RedState *state )
-{
- /* Emit any transitions that have actions and that go to this state. */
- for ( int it = 0; it < state->numInTrans; it++ ) {
- RedTrans *trans = state->inTrans[it];
- if ( trans->action != 0 && trans->labelNeeded ) {
- /* Write the label for the transition so it can be jumped to. */
- out << "tr" << trans->id << ":\n";
-
- /* If the action contains a next, then we must preload the current
- * state since the action may or may not set it. */
- if ( trans->action->anyNextStmt() )
- out << " " << CS() << " = " << trans->targ->id << ";\n";
-
- /* Write each action in the list. */
- for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ )
- ACTION( out, item->value, trans->targ->id, false );
-
- out << "\tgoto st" << trans->targ->id << ";\n";
- }
- }
-
- return 0;
-}
-
-/* Called from FsmCodeGen::STATE_GOTOS just before writing the gotos for each
- * state. */
-void FsmCodeGen::GOTO_HEADER( RedState *state )
-{
- IN_TRANS_ACTIONS( state );
-
- if ( state->labelNeeded )
- out << "st" << state->id << ":\n";
-
- if ( state->toStateAction != 0 ) {
- /* Remember that we wrote an action. Write every action in the list. */
- for ( GenActionTable::Iter item = state->toStateAction->key; item.lte(); item++ )
- ACTION( out, item->value, state->id, false );
- }
-
- /* Give the state a switch case. */
- out << "case " << state->id << ":\n";
-
- /* Advance and test buffer pos. */
- out <<
- " if ( ++" << P() << " == " << PE() << " )\n"
- " goto out" << state->id << ";\n";
-
- if ( state->fromStateAction != 0 ) {
- /* Remember that we wrote an action. Write every action in the list. */
- for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ )
- ACTION( out, item->value, state->id, false );
- }
-
- /* Record the prev state if necessary. */
- if ( state->anyRegCurStateRef() )
- out << " _ps = " << state->id << ";\n";
-}
-
-void FsmCodeGen::STATE_GOTO_ERROR()
-{
- /* In the error state we need to emit some stuff that usually goes into
- * the header. */
- RedState *state = redFsm->errState;
- IN_TRANS_ACTIONS( state );
-
- if ( state->labelNeeded )
- out << "st" << state->id << ":\n";
-
- /* We do not need a case label here because the the error state is checked
- * at the head of the loop. */
-
- /* Break out here. */
- out << " goto out" << state->id << ";\n";
-}
-
-
-/* Emit the goto to take for a given transition. */
-std::ostream &FsmCodeGen::TRANS_GOTO( RedTrans *trans, int level )
-{
- if ( trans->action != 0 ) {
- /* Go to the transition which will go to the state. */
- out << TABS(level) << "goto tr" << trans->id << ";";
- }
- else {
- /* Go directly to the target state. */
- out << TABS(level) << "goto st" << trans->targ->id << ";";
- }
- return out;
-}
-
-std::ostream &FsmCodeGen::EXIT_STATES()
-{
- for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
- out << " case " << st->id << ": out" << st->id << ": ";
- if ( st->eofTrans != 0 ) {
- out << "if ( " << PE() << " == " << PEOF() << " ) {";
- TRANS_GOTO( st->eofTrans, 0 );
- out << "\n";
- out << "}";
- }
-
- /* Exit. */
- out << CS() << " = " << st->id << "; goto out; \n";
- }
- return out;
-}
-
-/* Set up labelNeeded flag for each state. */
-void FsmCodeGen::setLabelsNeeded()
-{
- /* Do not use all labels by default, init all labelNeeded vars to false. */
- for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
- st->labelNeeded = false;
-
- if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() )
- redFsm->errState->labelNeeded = true;
-
- /* Walk all transitions and set only those that have targs. */
- for ( RedTransSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
- /* If there is no action with a next statement, then the label will be
- * needed. */
- if ( trans->action == 0 || !trans->action->anyNextStmt() )
- trans->targ->labelNeeded = true;
- }
-
- for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
- st->outNeeded = st->labelNeeded;
-}
-
-void FsmCodeGen::writeData()
-{
- out << "#define " << START() << " " << START_STATE_ID() << "\n";
- out << "#define " << FIRST_FINAL() << " " << FIRST_FINAL_STATE() << "\n";
- out << "#define " << ERROR() << " " << ERROR_STATE() << "\n";
- out << "#define false 0\n";
- out << "#define true 1\n";
- out << "\n";
-
- out << "long " << ENTRY_BY_REGION() << "[] = {\n\t";
- for ( int i = 0; i < fsmTables->numRegions; i++ ) {
- out << fsmTables->entryByRegion[i];
-
- if ( i < fsmTables->numRegions-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out <<
- "FsmTables fsmTables_start =\n"
- "{\n"
- " 0, " /* actions */
- " 0, " /* keyOffsets */
- " 0, " /* transKeys */
- " 0, " /* singleLengths */
- " 0, " /* rangeLengths */
- " 0, " /* indexOffsets */
- " 0, " /* transTargsWI */
- " 0, " /* transActionsWI */
- " 0, " /* toStateActions */
- " 0, " /* fromStateActions */
- " 0, " /* eofActions */
- " 0,\n" /* eofTargs */
- " " << ENTRY_BY_REGION() << ",\n"
-
- "\n"
- " 0, " /* numStates */
- " 0, " /* numActions */
- " 0, " /* numTransKeys */
- " 0, " /* numSingleLengths */
- " 0, " /* numRangeLengths */
- " 0, " /* numIndexOffsets */
- " 0, " /* numTransTargsWI */
- " 0,\n" /* numTransActionsWI */
- " " << redFsm->regionToEntry.length() << ",\n"
- "\n"
- " " << START() << ",\n"
- " " << FIRST_FINAL() << ",\n"
- " " << ERROR() << ",\n"
- "\n"
- " 0,\n" /* actionSwitch */
- " 0\n" /* numActionSwitch */
- "};\n"
- "\n";
-}
-
-void FsmCodeGen::writeInit()
-{
- out <<
- " " << CS() << " = " << START() << ";\n";
-
- /* If there are any calls, then the stack top needs initialization. */
- if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
- out << "\t" << TOP() << " = 0;\n";
-
- out <<
- " " << TOKSTART() << " = 0;\n"
- " " << TOKEND() << " = 0;\n"
- " " << ACT() << " = 0;\n";
-
- out << "\n";
-}
-
-void FsmCodeGen::writeExec()
-{
- setLabelsNeeded();
-
- out <<
- "void fsmExecute( FsmRun *fsmRun, InputStream *inputStream )\n"
- "{\n"
- "/*_resume:*/\n";
-
- if ( redFsm->errState != 0 ) {
- out <<
- " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
- " goto out;\n";
- }
-
- out <<
- " if ( " << P() << " == " << PE() << " )\n"
- " goto out_switch;\n"
- " --" << P() << ";\n"
- "\n"
- " switch ( " << CS() << " )\n {\n";
- STATE_GOTOS() <<
- " }\n";
-
- out <<
- "out_switch:\n"
- " switch ( " << CS() << " )\n {\n";
- EXIT_STATES() <<
- " }\n";
-
- out <<
- " out: {}\n"
- "}\n"
- "\n";
-}
-
-void FsmCodeGen::writeIncludes()
-{
- out <<
- "#include <colm/pdarun.h>\n"
- "#include <colm/fsmrun.h>\n"
- "#include <colm/debug.h>\n"
- "#include <colm/bytecode.h>\n"
- "#include <stdio.h>\n"
- "#include <stdlib.h>\n"
- "#include <string.h>\n"
- "#include <assert.h>\n"
- "#include <colm/config.h>\n"
- "#include <colm/defs.h>\n"
- "#include <colm/input.h>\n"
- "#include <colm/tree.h>\n"
- "#include <colm/program.h>\n"
- "#include <colm/colm.h>\n"
- "\n"
- "\n";
-}
-
-void FsmCodeGen::writeCode()
-{
- redFsm->depthFirstOrdering();
-
-
- writeData();
- writeExec();
-
- /* Referenced in the runtime lib, but used only in the compiler. Probably
- * should use the preprocessor to make these go away. */
- out <<
- "void sendNamedLangEl( Program *prg, Tree **tree, PdaRun *pdaRun,\n"
- " FsmRun *fsmRun, InputStream *inputStream ) { }\n"
- "void initBindings( PdaRun *pdaRun ) {}\n"
- "void pushBinding( PdaRun *pdaRun, ParseTree *parseTree ) {}\n"
- "void popBinding( PdaRun *pdaRun, ParseTree *tree ) {}\n"
- "void initStaticFuncs() {}\n"
- "void initPatternFuncs() {}\n"
- "void initReplFuncs() {}\n"
- "void initInputFuncs();\n"
- "\n"
- "\n";
-}
-
-ostream &FsmCodeGen::source_warning( const InputLoc &loc )
-{
- cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: ";
- return cerr;
-}
-
-ostream &FsmCodeGen::source_error( const InputLoc &loc )
-{
- codeGenErrCount += 1;
- assert( sourceFileName != 0 );
- cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": ";
- return cerr;
-}
-
-
diff --git a/colm/fsmcodegen.h b/colm/fsmcodegen.h
deleted file mode 100644
index 41cd88ec..00000000
--- a/colm/fsmcodegen.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _FSMCODEGEN_H
-#define _FSMCODEGEN_H
-
-#include <iostream>
-#include <string>
-#include <stdio.h>
-#include "keyops.h"
-#include "parsedata.h"
-#include "redfsm.h"
-#include "fsmrun.h"
-
-using std::string;
-using std::ostream;
-
-/* Integer array line length. */
-#define IALL 8
-
-/* Forwards. */
-struct RedFsm;
-struct RedState;
-struct GenAction;
-struct NameInst;
-struct RedAction;
-struct LongestMatch;
-struct TokenDef;
-struct InlineList;
-struct InlineItem;
-struct NameInst;
-struct FsmCodeGen;
-
-typedef unsigned long ulong;
-typedef unsigned char uchar;
-
-
-/*
- * The interface to the parser
- */
-
-std::ostream *openOutput( char *inputFile );
-
-inline string itoa( int i )
-{
- char buf[16];
- sprintf( buf, "%i", i );
- return buf;
-}
-
-/*
- * class FsmCodeGen
- */
-class FsmCodeGen
-{
-public:
- FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out,
- RedFsm *redFsm, FsmTables *fsmTables );
-
-protected:
- string FSM_NAME();
- string START_STATE_ID();
- ostream &ACTIONS_ARRAY();
- string GET_WIDE_KEY();
- string GET_WIDE_KEY( RedState *state );
- string TABS( int level );
- string KEY( Key key );
- string LDIR_PATH( char *path );
- void ACTION( ostream &ret, GenAction *action, int targState, bool inFinish );
- void CONDITION( ostream &ret, GenAction *condition );
- string ALPH_TYPE();
- string WIDE_ALPH_TYPE();
- string ARRAY_TYPE( unsigned long maxVal );
-
- string ARR_OFF( string ptr, string offset );
- string CAST( string type );
- string UINT();
- string GET_KEY();
-
- string ACCESS() { return "fsmRun->"; }
-
- string P() { return ACCESS() + "p"; }
- string PE() { return ACCESS() + "pe"; }
- string PEOF() { return ACCESS() + "peof"; }
-
- string CS();
- string TOP() { return ACCESS() + "top"; }
- string TOKSTART() { return ACCESS() + "tokstart"; }
- string TOKEND() { return ACCESS() + "tokend"; }
- string ACT() { return ACCESS() + "act"; }
- string MATCHED_TOKEN() { return ACCESS() + "matchedToken"; }
-
- string DATA_PREFIX();
-
- string START() { return DATA_PREFIX() + "start"; }
- string ERROR() { return DATA_PREFIX() + "error"; }
- string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; }
-
- string ENTRY_BY_REGION() { return DATA_PREFIX() + "entryByRegion"; }
-
-
- void INLINE_LIST( ostream &ret, InlineList *inlineList,
- int targState, bool inFinish );
- void EXEC_TOKEND( ostream &ret, InlineItem *item, int targState, int inFinish );
- void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish );
- void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish );
- void SET_ACT( ostream &ret, InlineItem *item );
- void INIT_TOKSTART( ostream &ret, InlineItem *item );
- void INIT_ACT( ostream &ret, InlineItem *item );
- void SET_TOKSTART( ostream &ret, InlineItem *item );
- void SET_TOKEND( ostream &ret, InlineItem *item );
- void GET_TOKEND( ostream &ret, InlineItem *item );
- void SUB_ACTION( ostream &ret, InlineItem *item, int targState, bool inFinish );
- void LM_ON_LAST( ostream &ret, InlineItem *item );
- void LM_ON_NEXT( ostream &ret, InlineItem *item );
- void LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item );
- void EXEC_TOKEND( ostream &ret );
- void EMIT_TOKEN( ostream &ret, LangEl *token );
-
- string ERROR_STATE();
- string FIRST_FINAL_STATE();
-
- string PTR_CONST();
- ostream &OPEN_ARRAY( string type, string name );
- ostream &CLOSE_ARRAY();
- ostream &STATIC_VAR( string type, string name );
-
- string CTRL_FLOW();
-
- ostream &source_warning(const InputLoc &loc);
- ostream &source_error(const InputLoc &loc);
-
- unsigned int arrayTypeSize( unsigned long maxVal );
-
-/* subclass */
-
-public:
- const char *sourceFileName;
- const char *fsmName;
- ostream &out;
- RedFsm *redFsm;
- FsmTables *fsmTables;
- int codeGenErrCount;
-
- /* Write options. */
- bool dataPrefix;
- bool writeFirstFinal;
- bool writeErr;
-
- std::ostream &TO_STATE_ACTION_SWITCH();
- std::ostream &FROM_STATE_ACTION_SWITCH();
- std::ostream &ACTION_SWITCH();
- std::ostream &STATE_GOTOS();
- std::ostream &TRANSITIONS();
- std::ostream &EXEC_FUNCS();
-
- unsigned int TO_STATE_ACTION( RedState *state );
- unsigned int FROM_STATE_ACTION( RedState *state );
-
- std::ostream &TO_STATE_ACTIONS();
- std::ostream &FROM_STATE_ACTIONS();
-
- void COND_TRANSLATE( GenStateCond *stateCond, int level );
- void emitCondBSearch( RedState *state, int level, int low, int high );
- void STATE_CONDS( RedState *state, bool genDefault );
-
- void emitSingleSwitch( RedState *state );
- void emitRangeBSearch( RedState *state, int level, int low, int high );
-
- std::ostream &EXIT_STATES();
- std::ostream &TRANS_GOTO( RedTrans *trans, int level );
- std::ostream &FINISH_CASES();
-
- void writeIncludes();
- void writeData();
- void writeInit();
- void writeExec();
- void writeCode();
- void writeMain();
-
-protected:
- bool useAgainLabel();
-
- /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for
- * each state. */
- bool IN_TRANS_ACTIONS( RedState *state );
- void GOTO_HEADER( RedState *state );
- void STATE_GOTO_ERROR();
-
- /* Set up labelNeeded flag for each state. */
- void setLabelsNeeded();
-};
-
-#endif /* _FSMCODEGEN_H */
diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc
deleted file mode 100644
index f922c7a4..00000000
--- a/colm/fsmexec.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <string.h>
-#include <iostream>
-
-#include "config.h"
-#include "defs.h"
-#include "fsmrun.h"
-#include "redfsm.h"
-#include "parsedata.h"
-#include "parsetree.h"
-#include "pdarun.h"
-#include "global.h"
-
-void execAction( FsmRun *fsmRun, GenAction *genAction )
-{
- for ( InlineList::Iter item = *genAction->inlineList; item.lte(); item++ ) {
- switch ( item->type ) {
- case InlineItem::Text:
- assert(false);
- break;
- case InlineItem::LmSetActId:
- fsmRun->act = item->longestMatchPart->longestMatchId;
- break;
- case InlineItem::LmSetTokEnd:
- fsmRun->tokend = fsmRun->p + 1;
- break;
- case InlineItem::LmInitTokStart:
- assert(false);
- break;
- case InlineItem::LmInitAct:
- fsmRun->act = 0;
- break;
- case InlineItem::LmSetTokStart:
- fsmRun->tokstart = fsmRun->p;
- break;
- case InlineItem::LmSwitch:
- /* If the switch handles error then we also forced the error state. It
- * will exist. */
- fsmRun->p = fsmRun->tokend;
- if ( item->tokenRegion->lmSwitchHandlesError && fsmRun->act == 0 ) {
- fsmRun->p = fsmRun->tokstart;
- fsmRun->cs = fsmRun->tables->errorState;
- }
- else {
- for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList;
- lmi.lte(); lmi++ )
- {
- if ( lmi->inLmSelect && fsmRun->act == lmi->longestMatchId )
- fsmRun->matchedToken = lmi->tdLangEl->id;
- }
- }
- fsmRun->returnResult = true;
- break;
- case InlineItem::LmOnLast:
- fsmRun->p += 1;
- fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
- fsmRun->returnResult = true;
- break;
- case InlineItem::LmOnNext:
- fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
- fsmRun->returnResult = true;
- break;
- case InlineItem::LmOnLagBehind:
- fsmRun->p = fsmRun->tokend;
- fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
- fsmRun->returnResult = true;
- break;
- }
- }
-
- if ( genAction->markType == MarkMark )
- fsmRun->mark[genAction->markId-1] = fsmRun->p;
-}
-
-void fsmExecute( FsmRun *fsmRun, InputStream *inputStream )
-{
- int _klen;
- unsigned int _trans;
- const long *_acts;
- unsigned int _nacts;
- const char *_keys;
-
- /* Init the token match to nothing (the sentinal). */
- fsmRun->matchedToken = 0;
-
-/*_resume:*/
- if ( fsmRun->cs == fsmRun->tables->errorState )
- goto out;
-
- if ( fsmRun->p == fsmRun->pe )
- goto out;
-
-_loop_head:
- _acts = fsmRun->tables->actions + fsmRun->tables->fromStateActions[fsmRun->cs];
- _nacts = (unsigned int) *_acts++;
- while ( _nacts-- > 0 )
- execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
-
- _keys = fsmRun->tables->transKeys + fsmRun->tables->keyOffsets[fsmRun->cs];
- _trans = fsmRun->tables->indexOffsets[fsmRun->cs];
-
- _klen = fsmRun->tables->singleLengths[fsmRun->cs];
- if ( _klen > 0 ) {
- const char *_lower = _keys;
- const char *_mid;
- const char *_upper = _keys + _klen - 1;
- while (1) {
- if ( _upper < _lower )
- break;
-
- _mid = _lower + ((_upper-_lower) >> 1);
- if ( (*fsmRun->p) < *_mid )
- _upper = _mid - 1;
- else if ( (*fsmRun->p) > *_mid )
- _lower = _mid + 1;
- else {
- _trans += (_mid - _keys);
- goto _match;
- }
- }
- _keys += _klen;
- _trans += _klen;
- }
-
- _klen = fsmRun->tables->rangeLengths[fsmRun->cs];
- if ( _klen > 0 ) {
- const char *_lower = _keys;
- const char *_mid;
- const char *_upper = _keys + (_klen<<1) - 2;
- while (1) {
- if ( _upper < _lower )
- break;
-
- _mid = _lower + (((_upper-_lower) >> 1) & ~1);
- if ( (*fsmRun->p) < _mid[0] )
- _upper = _mid - 2;
- else if ( (*fsmRun->p) > _mid[1] )
- _lower = _mid + 2;
- else {
- _trans += ((_mid - _keys)>>1);
- goto _match;
- }
- }
- _trans += _klen;
- }
-
-_match:
- fsmRun->cs = fsmRun->tables->transTargsWI[_trans];
-
- if ( fsmRun->tables->transActionsWI[_trans] == 0 )
- goto _again;
-
- fsmRun->returnResult = false;
- _acts = fsmRun->tables->actions + fsmRun->tables->transActionsWI[_trans];
- _nacts = (unsigned int) *_acts++;
- while ( _nacts-- > 0 )
- execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
- if ( fsmRun->returnResult )
- return;
-
-_again:
- _acts = fsmRun->tables->actions + fsmRun->tables->toStateActions[fsmRun->cs];
- _nacts = (unsigned int) *_acts++;
- while ( _nacts-- > 0 )
- execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
-
- if ( fsmRun->cs == fsmRun->tables->errorState )
- goto out;
-
- if ( ++fsmRun->p != fsmRun->pe )
- goto _loop_head;
-out:
- if ( fsmRun->p == fsmRun->peof ) {
- fsmRun->returnResult = false;
- _acts = fsmRun->tables->actions + fsmRun->tables->eofActions[fsmRun->cs];
- _nacts = (unsigned int) *_acts++;
-
- if ( fsmRun->tables->eofTargs[fsmRun->cs] >= 0 )
- fsmRun->cs = fsmRun->tables->eofTargs[fsmRun->cs];
-
- while ( _nacts-- > 0 )
- execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
- if ( fsmRun->returnResult )
- return;
- }
-}
-
-
diff --git a/colm/fsmgraph.cc b/colm/fsmgraph.cc
deleted file mode 100644
index 590d7902..00000000
--- a/colm/fsmgraph.cc
+++ /dev/null
@@ -1,1408 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <assert.h>
-#include <iostream>
-
-#include "config.h"
-#include "defs.h"
-#include "fsmgraph.h"
-#include "mergesort.h"
-
-using std::cerr;
-using std::endl;
-
-/* Make a new state. The new state will be put on the graph's
- * list of state. The new state can be created final or non final. */
-FsmState *FsmGraph::addState()
-{
- /* Make the new state to return. */
- FsmState *state = new FsmState();
-
- if ( misfitAccounting ) {
- /* Create the new state on the misfit list. All states are created
- * with no foreign in transitions. */
- misfitList.append( state );
- }
- else {
- /* Create the new state. */
- stateList.append( state );
- }
-
- return state;
-}
-
-/* Construct an FSM that is the concatenation of an array of characters. A new
- * machine will be made that has len+1 states with one transition between each
- * state for each integer in str. IsSigned determines if the integers are to
- * be considered as signed or unsigned ints. */
-void FsmGraph::concatFsm( Key *str, int len )
-{
- /* Make the first state and set it as the start state. */
- FsmState *last = addState();
- setStartState( last );
-
- /* Attach subsequent states. */
- for ( int i = 0; i < len; i++ ) {
- FsmState *newState = addState();
- attachNewTrans( last, newState, str[i], str[i] );
- last = newState;
- }
-
- /* Make the last state the final state. */
- setFinState( last );
-}
-
-/* Case insensitive version of concatFsm. */
-void FsmGraph::concatFsmCI( Key *str, int len )
-{
- /* Make the first state and set it as the start state. */
- FsmState *last = addState();
- setStartState( last );
-
- /* Attach subsequent states. */
- for ( int i = 0; i < len; i++ ) {
- FsmState *newState = addState();
-
- KeySet keySet;
- if ( str[i].isLower() )
- keySet.insert( str[i].toUpper() );
- if ( str[i].isUpper() )
- keySet.insert( str[i].toLower() );
- keySet.insert( str[i] );
-
- for ( int i = 0; i < keySet.length(); i++ )
- attachNewTrans( last, newState, keySet[i], keySet[i] );
-
- last = newState;
- }
-
- /* Make the last state the final state. */
- setFinState( last );
-}
-
-/* Construct a machine that matches one character. A new machine will be made
- * that has two states with a single transition between the states. IsSigned
- * determines if the integers are to be considered as signed or unsigned ints. */
-void FsmGraph::concatFsm( Key chr )
-{
- /* Two states first start, second final. */
- setStartState( addState() );
-
- FsmState *end = addState();
- setFinState( end );
-
- /* Attach on the character. */
- attachNewTrans( startState, end, chr, chr );
-}
-
-/* Construct a machine that matches any character in set. A new machine will
- * be made that has two states and len transitions between the them. The set
- * should be ordered correctly accroding to KeyOps and should not contain
- * any duplicates. */
-void FsmGraph::orFsm( Key *set, int len )
-{
- /* Two states first start, second final. */
- setStartState( addState() );
-
- FsmState *end = addState();
- setFinState( end );
-
- for ( int i = 1; i < len; i++ )
- assert( set[i-1] < set[i] );
-
- /* Attach on all the integers in the given string of ints. */
- for ( int i = 0; i < len; i++ )
- attachNewTrans( startState, end, set[i], set[i] );
-}
-
-/* Construct a machine that matches a range of characters. A new machine will
- * be made with two states and a range transition between them. The range will
- * match any characters from low to high inclusive. Low should be less than or
- * equal to high otherwise undefined behaviour results. IsSigned determines
- * if the integers are to be considered as signed or unsigned ints. */
-void FsmGraph::rangeFsm( Key low, Key high )
-{
- /* Two states first start, second final. */
- setStartState( addState() );
-
- FsmState *end = addState();
- setFinState( end );
-
- /* Attach using the range of characters. */
- attachNewTrans( startState, end, low, high );
-}
-
-/* Construct a machine that a repeated range of characters. */
-void FsmGraph::rangeStarFsm( Key low, Key high)
-{
- /* One state which is final and is the start state. */
- setStartState( addState() );
- setFinState( startState );
-
- /* Attach start to start using range of characters. */
- attachNewTrans( startState, startState, low, high );
-}
-
-/* Construct a machine that matches the empty string. A new machine will be
- * made with only one state. The new state will be both a start and final
- * state. IsSigned determines if the machine has a signed or unsigned
- * alphabet. Fsm operations must be done on machines with the same alphabet
- * signedness. */
-void FsmGraph::lambdaFsm( )
-{
- /* Give it one state with no transitions making it
- * the start state and final state. */
- setStartState( addState() );
- setFinState( startState );
-}
-
-/* Construct a machine that matches nothing at all. A new machine will be
- * made with only one state. It will not be final. */
-void FsmGraph::emptyFsm( )
-{
- /* Give it one state with no transitions making it
- * the start state and final state. */
- setStartState( addState() );
-}
-
-void FsmGraph::transferOutData( FsmState *destState, FsmState *srcState )
-{
- for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) {
- if ( trans->toState != 0 ) {
- /* Get the actions data from the outActionTable. */
- trans->actionTable.setActions( srcState->outActionTable );
-
- /* Get the priorities from the outPriorTable. */
- trans->priorTable.setPriors( srcState->outPriorTable );
- }
- }
-}
-
-/* Kleene star operator. Makes this machine the kleene star of itself. Any
- * transitions made going out of the machine and back into itself will be
- * notified that they are leaving transitions by having the leavingFromState
- * callback invoked. */
-void FsmGraph::starOp( )
-{
- /* For the merging process. */
- MergeData md;
-
- /* Turn on misfit accounting to possibly catch the old start state. */
- setMisfitAccounting( true );
-
- /* Create the new new start state. It will be set final after the merging
- * of the final states with the start state is complete. */
- FsmState *prevStartState = startState;
- unsetStartState();
- setStartState( addState() );
-
- /* Merge the new start state with the old one to isolate it. */
- mergeStates( md, startState, prevStartState );
-
- /* Merge the start state into all final states. Except the start state on
- * the first pass. If the start state is set final we will be doubling up
- * its transitions, which will get transfered to any final states that
- * follow it in the final state set. This will be determined by the order
- * of items in the final state set. To prevent this we just merge with the
- * start on a second pass. */
- for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) {
- if ( *st != startState )
- mergeStatesLeaving( md, *st, startState );
- }
-
- /* Now it is safe to merge the start state with itself (provided it
- * is set final). */
- if ( startState->isFinState() )
- mergeStatesLeaving( md, startState, startState );
-
- /* Now ensure the new start state is a final state. */
- setFinState( startState );
-
- /* Fill in any states that were newed up as combinations of others. */
- fillInStates( md );
-
- /* Remove the misfits and turn off misfit accounting. */
- removeMisfits();
- setMisfitAccounting( false );
-}
-
-void FsmGraph::repeatOp( int times )
-{
- /* Must be 1 and up. 0 produces null machine and requires deleting this. */
- assert( times > 0 );
-
- /* A repeat of one does absolutely nothing. */
- if ( times == 1 )
- return;
-
- /* Make a machine to make copies from. */
- FsmGraph *copyFrom = new FsmGraph( *this );
-
- /* Concatentate duplicates onto the end up until before the last. */
- for ( int i = 1; i < times-1; i++ ) {
- FsmGraph *dup = new FsmGraph( *copyFrom );
- doConcat( dup, 0, false );
- }
-
- /* Now use the copyFrom on the end. */
- doConcat( copyFrom, 0, false );
-}
-
-void FsmGraph::optionalRepeatOp( int times )
-{
- /* Must be 1 and up. 0 produces null machine and requires deleting this. */
- assert( times > 0 );
-
- /* A repeat of one optional merely allows zero string. */
- if ( times == 1 ) {
- setFinState( startState );
- return;
- }
-
- /* Make a machine to make copies from. */
- FsmGraph *copyFrom = new FsmGraph( *this );
-
- /* The state set used in the from end of the concatentation. Starts with
- * the initial final state set, then after each concatenation, gets set to
- * the the final states that come from the the duplicate. */
- StateSet lastFinSet( finStateSet );
-
- /* Set the initial state to zero to allow zero copies. */
- setFinState( startState );
-
- /* Concatentate duplicates onto the end up until before the last. */
- for ( int i = 1; i < times-1; i++ ) {
- /* Make a duplicate for concating and set the fin bits to graph 2 so we
- * can pick out it's final states after the optional style concat. */
- FsmGraph *dup = new FsmGraph( *copyFrom );
- dup->setFinBits( SB_GRAPH2 );
- doConcat( dup, &lastFinSet, true );
-
- /* Clear the last final state set and make the new one by taking only
- * the final states that come from graph 2.*/
- lastFinSet.empty();
- for ( int i = 0; i < finStateSet.length(); i++ ) {
- /* If the state came from graph 2, add it to the last set and clear
- * the bits. */
- FsmState *fs = finStateSet[i];
- if ( fs->stateBits & SB_GRAPH2 ) {
- lastFinSet.insert( fs );
- fs->stateBits &= ~SB_GRAPH2;
- }
- }
- }
-
- /* Now use the copyFrom on the end, no bits set, no bits to clear. */
- doConcat( copyFrom, &lastFinSet, true );
-}
-
-
-/* Fsm concatentation worker. Supports treating the concatentation as optional,
- * which essentially leaves the final states of machine one as final. */
-void FsmGraph::doConcat( FsmGraph *other, StateSet *fromStates, bool optional )
-{
- /* For the merging process. */
- StateSet finStateSetCopy, startStateSet;
- MergeData md;
-
- /* Turn on misfit accounting for both graphs. */
- setMisfitAccounting( true );
- other->setMisfitAccounting( true );
-
- /* Get the other's start state. */
- FsmState *otherStartState = other->startState;
-
- /* Unset other's start state before bringing in the entry points. */
- other->unsetStartState();
-
- /* Bring in the rest of other's entry points. */
- copyInEntryPoints( other );
- other->entryPoints.empty();
-
- /* Bring in other's states into our state lists. */
- stateList.append( other->stateList );
- misfitList.append( other->misfitList );
-
- /* If from states is not set, then get a copy of our final state set before
- * we clobber it and use it instead. */
- if ( fromStates == 0 ) {
- finStateSetCopy = finStateSet;
- fromStates = &finStateSetCopy;
- }
-
- /* Unset all of our final states and get the final states from other. */
- if ( !optional )
- unsetAllFinStates();
- finStateSet.insert( other->finStateSet );
-
- /* Since other's lists are empty, we can delete the fsm without
- * affecting any states. */
- delete other;
-
- /* Merge our former final states with the start state of other. */
- for ( int i = 0; i < fromStates->length(); i++ ) {
- FsmState *state = fromStates->data[i];
-
- /* Merge the former final state with other's start state. */
- mergeStatesLeaving( md, state, otherStartState );
-
- /* If the former final state was not reset final then we must clear
- * the state's out trans data. If it got reset final then it gets to
- * keep its out trans data. This must be done before fillInStates gets
- * called to prevent the data from being sourced. */
- if ( ! state->isFinState() )
- clearOutData( state );
- }
-
- /* Fill in any new states made from merging. */
- fillInStates( md );
-
- /* Remove the misfits and turn off misfit accounting. */
- removeMisfits();
- setMisfitAccounting( false );
-}
-
-/* Concatenates other to the end of this machine. Other is deleted. Any
- * transitions made leaving this machine and entering into other are notified
- * that they are leaving transitions by having the leavingFromState callback
- * invoked. */
-void FsmGraph::concatOp( FsmGraph *other )
-{
- /* Assert same signedness and return graph concatenation op. */
- doConcat( other, 0, false );
-}
-
-
-void FsmGraph::doOr( FsmGraph *other )
-{
- /* For the merging process. */
- MergeData md;
-
- /* Build a state set consisting of both start states */
- StateSet startStateSet;
- startStateSet.insert( startState );
- startStateSet.insert( other->startState );
-
- /* Both of the original start states loose their start state status. */
- unsetStartState();
- other->unsetStartState();
-
- /* Bring in the rest of other's entry points. */
- copyInEntryPoints( other );
- other->entryPoints.empty();
-
- /* Merge the lists. This will move all the states from other
- * into this. No states will be deleted. */
- stateList.append( other->stateList );
- misfitList.append( other->misfitList );
-
- /* Move the final set data from other into this. */
- finStateSet.insert(other->finStateSet);
- other->finStateSet.empty();
-
- /* Since other's list is empty, we can delete the fsm without
- * affecting any states. */
- delete other;
-
- /* Create a new start state. */
- setStartState( addState() );
-
- /* Merge the start states. */
- mergeStates( md, startState, startStateSet.data, startStateSet.length() );
-
- /* Fill in any new states made from merging. */
- fillInStates( md );
-}
-
-/* Unions other with this machine. Other is deleted. */
-void FsmGraph::unionOp( FsmGraph *other )
-{
- /* Turn on misfit accounting for both graphs. */
- setMisfitAccounting( true );
- other->setMisfitAccounting( true );
-
- /* Call Worker routine. */
- doOr( other );
-
- /* Remove the misfits and turn off misfit accounting. */
- removeMisfits();
- setMisfitAccounting( false );
-}
-
-/* Intersects other with this machine. Other is deleted. */
-void FsmGraph::intersectOp( FsmGraph *other )
-{
- /* Turn on misfit accounting for both graphs. */
- setMisfitAccounting( true );
- other->setMisfitAccounting( true );
-
- /* Set the fin bits on this and other to want each other. */
- setFinBits( SB_GRAPH1 );
- other->setFinBits( SB_GRAPH2 );
-
- /* Call worker Or routine. */
- doOr( other );
-
- /* Unset any final states that are no longer to
- * be final due to final bits. */
- unsetIncompleteFinals();
-
- /* Remove the misfits and turn off misfit accounting. */
- removeMisfits();
- setMisfitAccounting( false );
-
- /* Remove states that have no path to a final state. */
- removeDeadEndStates();
-}
-
-/* Set subtracts other machine from this machine. Other is deleted. */
-void FsmGraph::subtractOp( FsmGraph *other )
-{
- /* Turn on misfit accounting for both graphs. */
- setMisfitAccounting( true );
- other->setMisfitAccounting( true );
-
- /* Set the fin bits of other to be killers. */
- other->setFinBits( SB_GRAPH1 );
-
- /* Call worker Or routine. */
- doOr( other );
-
- /* Unset any final states that are no longer to
- * be final due to final bits. */
- unsetKilledFinals();
-
- /* Remove the misfits and turn off misfit accounting. */
- removeMisfits();
- setMisfitAccounting( false );
-
- /* Remove states that have no path to a final state. */
- removeDeadEndStates();
-}
-
-bool FsmGraph::inEptVect( EptVect *eptVect, FsmState *state )
-{
- if ( eptVect != 0 ) {
- /* Vect is there, walk it looking for state. */
- for ( int i = 0; i < eptVect->length(); i++ ) {
- if ( eptVect->data[i].targ == state )
- return true;
- }
- }
- return false;
-}
-
-/* Fill epsilon vectors in a root state from a given starting point. Epmploys
- * a depth first search through the graph of epsilon transitions. */
-void FsmGraph::epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving )
-{
- /* Walk the epsilon transitions out of the state. */
- for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) {
- /* Find the entry point, if the it does not resove, ignore it. */
- EntryMapEl *enLow, *enHigh;
- if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) {
- /* Loop the targets. */
- for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) {
- /* Do not add the root or states already in eptVect. */
- FsmState *targ = en->value;
- if ( targ != from && !inEptVect(root->eptVect, targ) ) {
- /* Maybe need to create the eptVect. */
- if ( root->eptVect == 0 )
- root->eptVect = new EptVect();
-
- /* If moving to a different graph or if any parent is
- * leaving then we are leaving. */
- bool leaving = parentLeaving ||
- root->owningGraph != targ->owningGraph;
-
- /* All ok, add the target epsilon and recurse. */
- root->eptVect->append( EptVectEl(targ, leaving) );
- epsilonFillEptVectFrom( root, targ, leaving );
- }
- }
- }
- }
-}
-
-void FsmGraph::shadowReadWriteStates( MergeData &md )
-{
- /* Init isolatedShadow algorithm data. */
- for ( StateList::Iter st = stateList; st.lte(); st++ )
- st->isolatedShadow = 0;
-
- /* Any states that may be both read from and written to must
- * be shadowed. */
- for ( StateList::Iter st = stateList; st.lte(); st++ ) {
- /* Find such states by looping through stateVect lists, which give us
- * the states that will be read from. May cause us to visit the states
- * that we are interested in more than once. */
- if ( st->eptVect != 0 ) {
- /* For all states that will be read from. */
- for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
- /* Check for read and write to the same state. */
- FsmState *targ = ept->targ;
- if ( targ->eptVect != 0 ) {
- /* State is to be written to, if the shadow is not already
- * there, create it. */
- if ( targ->isolatedShadow == 0 ) {
- FsmState *shadow = addState();
- mergeStates( md, shadow, targ );
- targ->isolatedShadow = shadow;
- }
-
- /* Write shadow into the state vector so that it is the
- * state that the epsilon transition will read from. */
- ept->targ = targ->isolatedShadow;
- }
- }
- }
- }
-}
-
-void FsmGraph::resolveEpsilonTrans( MergeData &md )
-{
- /* Walk the state list and invoke recursive worker on each state. */
- for ( StateList::Iter st = stateList; st.lte(); st++ )
- epsilonFillEptVectFrom( st, st, false );
-
- /* Prevent reading from and writing to of the same state. */
- shadowReadWriteStates( md );
-
- /* For all states that have epsilon transitions out, draw the transitions,
- * clear the epsilon transitions. */
- for ( StateList::Iter st = stateList; st.lte(); st++ ) {
- /* If there is a state vector, then create the pre-merge state. */
- if ( st->eptVect != 0 ) {
- /* Merge all the epsilon targets into the state. */
- for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
- if ( ept->leaving )
- mergeStatesLeaving( md, st, ept->targ );
- else
- mergeStates( md, st, ept->targ );
- }
-
- /* Clean up the target list. */
- delete st->eptVect;
- st->eptVect = 0;
- }
-
- /* Clear the epsilon transitions vector. */
- st->epsilonTrans.empty();
- }
-}
-
-void FsmGraph::epsilonOp()
-{
- /* For merging process. */
- MergeData md;
-
- setMisfitAccounting( true );
-
- for ( StateList::Iter st = stateList; st.lte(); st++ )
- st->owningGraph = 0;
-
- /* Perform merges. */
- resolveEpsilonTrans( md );
-
- /* Epsilons can caused merges which leave behind unreachable states. */
- fillInStates( md );
-
- /* Remove the misfits and turn off misfit accounting. */
- removeMisfits();
- setMisfitAccounting( false );
-}
-
-/* Make a new maching by joining together a bunch of machines without making
- * any transitions between them. A negative finalId results in there being no
- * final id. */
-void FsmGraph::joinOp( int startId, int finalId, FsmGraph **others, int numOthers )
-{
- /* For the merging process. */
- MergeData md;
-
- /* Set the owning machines. Start at one. Zero is reserved for the start
- * and final states. */
- for ( StateList::Iter st = stateList; st.lte(); st++ )
- st->owningGraph = 1;
- for ( int m = 0; m < numOthers; m++ ) {
- for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ )
- st->owningGraph = 2+m;
- }
-
- /* All machines loose start state status. */
- unsetStartState();
- for ( int m = 0; m < numOthers; m++ )
- others[m]->unsetStartState();
-
- /* Bring the other machines into this. */
- for ( int m = 0; m < numOthers; m++ ) {
- /* Bring in the rest of other's entry points. */
- copyInEntryPoints( others[m] );
- others[m]->entryPoints.empty();
-
- /* Merge the lists. This will move all the states from other into
- * this. No states will be deleted. */
- stateList.append( others[m]->stateList );
- assert( others[m]->misfitList.length() == 0 );
-
- /* Move the final set data from other into this. */
- finStateSet.insert( others[m]->finStateSet );
- others[m]->finStateSet.empty();
-
- /* Since other's list is empty, we can delete the fsm without
- * affecting any states. */
- delete others[m];
- }
-
- /* Look up the start entry point. */
- EntryMapEl *enLow = 0, *enHigh = 0;
- bool findRes = entryPoints.findMulti( startId, enLow, enHigh );
- if ( ! findRes ) {
- /* No start state. Set a default one and proceed with the join. Note
- * that the result of the join will be a very uninteresting machine. */
- setStartState( addState() );
- }
- else {
- /* There is at least one start state, create a state that will become
- * the new start state. */
- FsmState *newStart = addState();
- setStartState( newStart );
-
- /* The start state is in an owning machine class all it's own. */
- newStart->owningGraph = 0;
-
- /* Create the set of states to merge from. */
- StateSet stateSet;
- for ( EntryMapEl *en = enLow; en <= enHigh; en++ )
- stateSet.insert( en->value );
-
- /* Merge in the set of start states into the new start state. */
- mergeStates( md, newStart, stateSet.data, stateSet.length() );
- }
-
- /* Take a copy of the final state set, before unsetting them all. This
- * will allow us to call clearOutData on the states that don't get
- * final state status back back. */
- StateSet finStateSetCopy = finStateSet;
-
- /* Now all final states are unset. */
- unsetAllFinStates();
-
- if ( finalId >= 0 ) {
- /* Create the implicit final state. */
- FsmState *finState = addState();
- setFinState( finState );
-
- /* Assign an entry into the final state on the final state entry id. Note
- * that there may already be an entry on this id. That's ok. Also set the
- * final state owning machine id. It's in a class all it's own. */
- setEntry( finalId, finState );
- finState->owningGraph = 0;
- }
-
- /* Hand over to workers for resolving epsilon trans. This will merge states
- * with the targets of their epsilon transitions. */
- resolveEpsilonTrans( md );
-
- /* Invoke the relinquish final callback on any states that did not get
- * final state status back. */
- for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) {
- if ( !((*st)->stateBits & SB_ISFINAL) )
- clearOutData( *st );
- }
-
- /* Fill in any new states made from merging. */
- fillInStates( md );
-
- /* Joining can be messy. Instead of having misfit accounting on (which is
- * tricky here) do a full cleaning. */
- removeUnreachableStates();
-}
-
-void FsmGraph::globOp( FsmGraph **others, int numOthers )
-{
- /* All other machines loose start states status. */
- for ( int m = 0; m < numOthers; m++ )
- others[m]->unsetStartState();
-
- /* Bring the other machines into this. */
- for ( int m = 0; m < numOthers; m++ ) {
- /* Bring in the rest of other's entry points. */
- copyInEntryPoints( others[m] );
- others[m]->entryPoints.empty();
-
- /* Merge the lists. This will move all the states from other into
- * this. No states will be deleted. */
- stateList.append( others[m]->stateList );
- assert( others[m]->misfitList.length() == 0 );
-
- /* Move the final set data from other into this. */
- finStateSet.insert( others[m]->finStateSet );
- others[m]->finStateSet.empty();
-
- /* Since other's list is empty, we can delete the fsm without
- * affecting any states. */
- delete others[m];
- }
-}
-
-void FsmGraph::deterministicEntry()
-{
- /* For the merging process. */
- MergeData md;
-
- /* States may loose their entry points, turn on misfit accounting. */
- setMisfitAccounting( true );
-
- /* Get a copy of the entry map then clear all the entry points. As we
- * iterate the old entry map finding duplicates we will add the entry
- * points for the new states that we create. */
- EntryMap prevEntry = entryPoints;
- unsetAllEntryPoints();
-
- for ( int enId = 0; enId < prevEntry.length(); ) {
- /* Count the number of states on this entry key. */
- int highId = enId;
- while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key )
- highId += 1;
-
- int numIds = highId - enId;
- if ( numIds == 1 ) {
- /* Only a single entry point, just set the entry. */
- setEntry( prevEntry[enId].key, prevEntry[enId].value );
- }
- else {
- /* Multiple entry points, need to create a new state and merge in
- * all the targets of entry points. */
- FsmState *newEntry = addState();
- for ( int en = enId; en < highId; en++ )
- mergeStates( md, newEntry, prevEntry[en].value );
-
- /* Add the new state as the single entry point. */
- setEntry( prevEntry[enId].key, newEntry );
- }
-
- enId += numIds;
- }
-
- /* The old start state may be unreachable. Remove the misfits and turn off
- * misfit accounting. */
- removeMisfits();
- setMisfitAccounting( false );
-}
-
-/* Unset any final states that are no longer to be final due to final bits. */
-void FsmGraph::unsetKilledFinals()
-{
- /* Duplicate the final state set before we begin modifying it. */
- StateSet fin( finStateSet );
-
- for ( int s = 0; s < fin.length(); s++ ) {
- /* Check for killing bit. */
- FsmState *state = fin.data[s];
- if ( state->stateBits & SB_GRAPH1 ) {
- /* One final state is a killer, set to non-final. */
- unsetFinState( state );
- }
-
- /* Clear all killing bits. Non final states should never have had those
- * state bits set in the first place. */
- state->stateBits &= ~SB_GRAPH1;
- }
-}
-
-/* Unset any final states that are no longer to be final due to final bits. */
-void FsmGraph::unsetIncompleteFinals()
-{
- /* Duplicate the final state set before we begin modifying it. */
- StateSet fin( finStateSet );
-
- for ( int s = 0; s < fin.length(); s++ ) {
- /* Check for one set but not the other. */
- FsmState *state = fin.data[s];
- if ( state->stateBits & SB_BOTH &&
- (state->stateBits & SB_BOTH) != SB_BOTH )
- {
- /* One state wants the other but it is not there. */
- unsetFinState( state );
- }
-
- /* Clear wanting bits. Non final states should never have had those
- * state bits set in the first place. */
- state->stateBits &= ~SB_BOTH;
- }
-}
-
-/* Ensure that the start state is free of entry points (aside from the fact
- * that it is the start state). If the start state has entry points then Make a
- * new start state by merging with the old one. Useful before modifying start
- * transitions. If the existing start state has any entry points other than the
- * start state entry then modifying its transitions changes more than the start
- * transitions. So isolate the start state by separating it out such that it
- * only has start stateness as it's entry point. */
-void FsmGraph::isolateStartState( )
-{
- /* For the merging process. */
- MergeData md;
-
- /* Bail out if the start state is already isolated. */
- if ( isStartStateIsolated() )
- return;
-
- /* Turn on misfit accounting to possibly catch the old start state. */
- setMisfitAccounting( true );
-
- /* This will be the new start state. The existing start
- * state is merged with it. */
- FsmState *prevStartState = startState;
- unsetStartState();
- setStartState( addState() );
-
- /* Merge the new start state with the old one to isolate it. */
- mergeStates( md, startState, prevStartState );
-
- /* Stfil and stateDict will be empty because the merging of the old start
- * state into the new one will not have any conflicting transitions. */
- assert( md.stateDict.treeSize == 0 );
- assert( md.stfillHead == 0 );
-
- /* The old start state may be unreachable. Remove the misfits and turn off
- * misfit accounting. */
- removeMisfits();
- setMisfitAccounting( false );
-}
-
-#if COLM_LOG_CONDS
-void logCondSpace( CondSpace *condSpace )
-{
- if ( condSpace == 0 )
- cerr << "<empty>";
- else {
- for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) {
- if ( ! csi.last() )
- cerr << ',';
- (*csi)->actionName( cerr );
- }
- }
-}
-
-void logNewExpansion( Expansion *exp )
-{
- cerr << "created expansion:" << endl;
- cerr << " range: " << exp->lowKey.getVal() << " .. " <<
- exp->highKey.getVal() << endl;
-
- cerr << " fromCondSpace: ";
- logCondSpace( exp->fromCondSpace );
- cerr << endl;
- cerr << " fromVals: " << exp->fromVals << endl;
-
- cerr << " toCondSpace: ";
- logCondSpace( exp->toCondSpace );
- cerr << endl;
- cerr << " toValsList: ";
- for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ )
- cerr << " " << *to;
- cerr << endl;
-}
-#endif
-
-
-void FsmGraph::findTransExpansions( ExpansionList &expansionList,
- FsmState *destState, FsmState *srcState )
-{
- PairIter<FsmTrans, StateCond> transCond( destState->outList.head,
- srcState->stateCondList.head );
- for ( ; !transCond.end(); transCond++ ) {
- if ( transCond.userState == RangeOverlap ) {
- Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
- transCond.s1Tel.highKey );
- expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans);
- expansion->fromTrans->fromState = 0;
- expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
- expansion->fromCondSpace = 0;
- expansion->fromVals = 0;
- CondSpace *srcCS = transCond.s2Tel.trans->condSpace;
- expansion->toCondSpace = srcCS;
-
- long numTargVals = (1 << srcCS->condSet.length());
- for ( long targVals = 0; targVals < numTargVals; targVals++ )
- expansion->toValsList.append( targVals );
-
- #ifdef COLM_LOG_CONDS
- if ( colm_log_conds ) {
- logNewExpansion( expansion );
- }
- #endif
- expansionList.append( expansion );
- }
- }
-}
-
-void FsmGraph::findCondExpInTrans( ExpansionList &expansionList, FsmState *state,
- Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
- long fromVals, LongVect &toValsList )
-{
- FsmTrans searchTrans;
- searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
- (lowKey - keyOps->minKey);
- searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() +
- (highKey - keyOps->minKey);
- searchTrans.prev = searchTrans.next = 0;
-
- PairIter<FsmTrans> pairIter( state->outList.head, &searchTrans );
- for ( ; !pairIter.end(); pairIter++ ) {
- if ( pairIter.userState == RangeOverlap ) {
- Expansion *expansion = new Expansion( lowKey, highKey );
- expansion->fromTrans = new FsmTrans(*pairIter.s1Tel.trans);
- expansion->fromTrans->fromState = 0;
- expansion->fromTrans->toState = pairIter.s1Tel.trans->toState;
- expansion->fromCondSpace = fromCondSpace;
- expansion->fromVals = fromVals;
- expansion->toCondSpace = toCondSpace;
- expansion->toValsList = toValsList;
-
- expansionList.append( expansion );
- #ifdef COLM_LOG_CONDS
- if ( colm_log_conds ) {
- logNewExpansion( expansion );
- }
- #endif
- }
- }
-}
-
-void FsmGraph::findCondExpansions( ExpansionList &expansionList,
- FsmState *destState, FsmState *srcState )
-{
- PairIter<StateCond, StateCond> condCond( destState->stateCondList.head,
- srcState->stateCondList.head );
- for ( ; !condCond.end(); condCond++ ) {
- if ( condCond.userState == RangeOverlap ) {
- /* Loop over all existing condVals . */
- CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet;
- long destLen = destCS.length();
-
- /* Find the items in src cond set that are not in dest
- * cond set. These are the items that we must expand. */
- CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet;
- for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ )
- srcOnlyCS.remove( *dcsi );
- long srcOnlyLen = srcOnlyCS.length();
-
- if ( srcOnlyCS.length() > 0 ) {
- #ifdef COLM_LOG_CONDS
- if ( colm_log_conds ) {
- cerr << "there are " << srcOnlyCS.length() << " item(s) that are "
- "only in the srcCS" << endl;
- }
- #endif
-
- CondSet mergedCS = destCS;
- mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet );
-
- CondSpace *fromCondSpace = addCondSpace( destCS );
- CondSpace *toCondSpace = addCondSpace( mergedCS );
-
- /* Loop all values in the dest space. */
- for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
- long basicVals = 0;
- for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
- if ( destVals & (1 << csi.pos()) ) {
- Action **cim = mergedCS.find( *csi );
- long bitPos = (cim - mergedCS.data);
- basicVals |= 1 << bitPos;
- }
- }
-
- /* Loop all new values. */
- LongVect expandToVals;
- for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) {
- long targVals = basicVals;
- for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) {
- if ( soVals & (1 << csi.pos()) ) {
- Action **cim = mergedCS.find( *csi );
- long bitPos = (cim - mergedCS.data);
- targVals |= 1 << bitPos;
- }
- }
- expandToVals.append( targVals );
- }
-
- findCondExpInTrans( expansionList, destState,
- condCond.s1Tel.lowKey, condCond.s1Tel.highKey,
- fromCondSpace, toCondSpace, destVals, expandToVals );
- }
- }
- }
- }
-}
-
-void FsmGraph::doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 )
-{
- for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
- for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) {
- long targVals = *to;
-
- /* We will use the copy of the transition that was made when the
- * expansion was created. It will get used multiple times. Each
- * time we must set up the keys, everything else is constant and
- * and already prepared. */
- FsmTrans *srcTrans = exp->fromTrans;
-
- srcTrans->lowKey = exp->toCondSpace->baseKey +
- targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
- srcTrans->highKey = exp->toCondSpace->baseKey +
- targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
-
- TransList srcList;
- srcList.append( srcTrans );
- outTransCopy( md, destState, srcList.head );
- srcList.abandon();
- }
- }
-}
-
-
-void FsmGraph::doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 )
-{
- for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) {
- Removal removal;
- if ( exp->fromCondSpace == 0 ) {
- removal.lowKey = exp->lowKey;
- removal.highKey = exp->highKey;
- }
- else {
- removal.lowKey = exp->fromCondSpace->baseKey +
- exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey);
- removal.highKey = exp->fromCondSpace->baseKey +
- exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey);
- }
- removal.next = 0;
-
- TransList destList;
- PairIter<FsmTrans, Removal> pairIter( destState->outList.head, &removal );
- for ( ; !pairIter.end(); pairIter++ ) {
- switch ( pairIter.userState ) {
- case RangeInS1: {
- FsmTrans *destTrans = pairIter.s1Tel.trans;
- destTrans->lowKey = pairIter.s1Tel.lowKey;
- destTrans->highKey = pairIter.s1Tel.highKey;
- destList.append( destTrans );
- break;
- }
- case RangeInS2:
- break;
- case RangeOverlap: {
- FsmTrans *trans = pairIter.s1Tel.trans;
- detachTrans( trans->fromState, trans->toState, trans );
- delete trans;
- break;
- }
- case BreakS1: {
- pairIter.s1Tel.trans = dupTrans( destState,
- pairIter.s1Tel.trans );
- break;
- }
- case BreakS2:
- break;
- }
- }
- destState->outList.transfer( destList );
- }
-}
-
-void FsmGraph::mergeStateConds( FsmState *destState, FsmState *srcState )
-{
- StateCondList destList;
- PairIter<StateCond> pairIter( destState->stateCondList.head,
- srcState->stateCondList.head );
- for ( ; !pairIter.end(); pairIter++ ) {
- switch ( pairIter.userState ) {
- case RangeInS1: {
- StateCond *destCond = pairIter.s1Tel.trans;
- destCond->lowKey = pairIter.s1Tel.lowKey;
- destCond->highKey = pairIter.s1Tel.highKey;
- destList.append( destCond );
- break;
- }
- case RangeInS2: {
- StateCond *newCond = new StateCond( *pairIter.s2Tel.trans );
- newCond->lowKey = pairIter.s2Tel.lowKey;
- newCond->highKey = pairIter.s2Tel.highKey;
- destList.append( newCond );
- break;
- }
- case RangeOverlap: {
- StateCond *destCond = pairIter.s1Tel.trans;
- StateCond *srcCond = pairIter.s2Tel.trans;
- CondSet mergedCondSet;
- mergedCondSet.insert( destCond->condSpace->condSet );
- mergedCondSet.insert( srcCond->condSpace->condSet );
- destCond->condSpace = addCondSpace( mergedCondSet );
-
- destCond->lowKey = pairIter.s1Tel.lowKey;
- destCond->highKey = pairIter.s1Tel.highKey;
- destList.append( destCond );
- break;
- }
- case BreakS1:
- pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans );
- break;
-
- case BreakS2:
- break;
- }
- }
- destState->stateCondList.transfer( destList );
-}
-
-/* A state merge which represents the drawing in of leaving transitions. If
- * there is any out data then we duplicate the souce state, transfer the out
- * data, then merge in the state. The new state will be reaped because it will
- * not be given any in transitions. */
-void FsmGraph::mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState )
-{
- if ( !hasOutData( destState ) )
- mergeStates( md, destState, srcState );
- else {
- FsmState *ssMutable = addState();
- mergeStates( md, ssMutable, srcState );
- transferOutData( ssMutable, destState );
-
- for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ )
- embedCondition( md, ssMutable, *cond );
-
- mergeStates( md, destState, ssMutable );
- }
-}
-
-void FsmGraph::mergeStates( MergeData &md, FsmState *destState,
- FsmState **srcStates, int numSrc )
-{
- for ( int s = 0; s < numSrc; s++ )
- mergeStates( md, destState, srcStates[s] );
-}
-
-void FsmGraph::mergeStates( MergeData &md, FsmState *destState, FsmState *srcState )
-{
- ExpansionList expList1;
- ExpansionList expList2;
-
- findTransExpansions( expList1, destState, srcState );
- findCondExpansions( expList1, destState, srcState );
- findTransExpansions( expList2, srcState, destState );
- findCondExpansions( expList2, srcState, destState );
-
- mergeStateConds( destState, srcState );
-
- outTransCopy( md, destState, srcState->outList.head );
-
- doExpand( md, destState, expList1 );
- doExpand( md, destState, expList2 );
-
- doRemove( md, destState, expList1 );
- doRemove( md, destState, expList2 );
-
- expList1.empty();
- expList2.empty();
-
- /* Get its bits and final state status. */
- destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL );
- if ( srcState->isFinState() )
- setFinState( destState );
-
- /* Draw in any properties of srcState into destState. */
- if ( srcState == destState ) {
- /* Duplicate the list to protect against write to source. The
- * priorities sets are not copied in because that would have no
- * effect. */
- destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) );
-
- /* Get all actions, duplicating to protect against write to source. */
- destState->toStateActionTable.setActions(
- ActionTable( srcState->toStateActionTable ) );
- destState->fromStateActionTable.setActions(
- ActionTable( srcState->fromStateActionTable ) );
- destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) );
- destState->outCondSet.insert( ActionSet( srcState->outCondSet ) );
- destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) );
- destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) );
- }
- else {
- /* Get the epsilons, out priorities. */
- destState->epsilonTrans.append( srcState->epsilonTrans );
- destState->outPriorTable.setPriors( srcState->outPriorTable );
-
- /* Get all actions. */
- destState->toStateActionTable.setActions( srcState->toStateActionTable );
- destState->fromStateActionTable.setActions( srcState->fromStateActionTable );
- destState->outActionTable.setActions( srcState->outActionTable );
- destState->outCondSet.insert( srcState->outCondSet );
- destState->errActionTable.setActions( srcState->errActionTable );
- destState->eofActionTable.setActions( srcState->eofActionTable );
- }
-}
-
-void FsmGraph::fillInStates( MergeData &md )
-{
- /* Merge any states that are awaiting merging. This will likey cause
- * other states to be added to the stfil list. */
- FsmState *state = md.stfillHead;
- while ( state != 0 ) {
- StateSet *stateSet = &state->stateDictEl->stateSet;
- mergeStates( md, state, stateSet->data, stateSet->length() );
- state = state->alg.next;
- }
-
- /* Delete the state sets of all states that are on the fill list. */
- state = md.stfillHead;
- while ( state != 0 ) {
- /* Delete and reset the state set. */
- delete state->stateDictEl;
- state->stateDictEl = 0;
-
- /* Next state in the stfill list. */
- state = state->alg.next;
- }
-
- /* StateDict will still have its ptrs/size set but all of it's element
- * will be deleted so we don't need to clean it up. */
-}
-
-void FsmGraph::findEmbedExpansions( ExpansionList &expansionList,
- FsmState *destState, Action *condAction )
-{
- StateCondList destList;
- PairIter<FsmTrans, StateCond> transCond( destState->outList.head,
- destState->stateCondList.head );
- for ( ; !transCond.end(); transCond++ ) {
- switch ( transCond.userState ) {
- case RangeInS1: {
- if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) {
- assert( transCond.s1Tel.highKey <= keyOps->maxKey );
-
- /* Make a new state cond. */
- StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey,
- transCond.s1Tel.highKey );
- newStateCond->condSpace = addCondSpace( CondSet( condAction ) );
- destList.append( newStateCond );
-
- /* Create the expansion. */
- Expansion *expansion = new Expansion( transCond.s1Tel.lowKey,
- transCond.s1Tel.highKey );
- expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans);
- expansion->fromTrans->fromState = 0;
- expansion->fromTrans->toState = transCond.s1Tel.trans->toState;
- expansion->fromCondSpace = 0;
- expansion->fromVals = 0;
- expansion->toCondSpace = newStateCond->condSpace;
- expansion->toValsList.append( 1 );
- #ifdef COLM_LOG_CONDS
- if ( colm_log_conds ) {
- logNewExpansion( expansion );
- }
- #endif
- expansionList.append( expansion );
- }
- break;
- }
- case RangeInS2: {
- /* Enhance state cond and find the expansion. */
- StateCond *stateCond = transCond.s2Tel.trans;
- stateCond->lowKey = transCond.s2Tel.lowKey;
- stateCond->highKey = transCond.s2Tel.highKey;
-
- CondSet &destCS = stateCond->condSpace->condSet;
- long destLen = destCS.length();
- CondSpace *fromCondSpace = stateCond->condSpace;
-
- CondSet mergedCS = destCS;
- mergedCS.insert( condAction );
- CondSpace *toCondSpace = addCondSpace( mergedCS );
- stateCond->condSpace = toCondSpace;
- destList.append( stateCond );
-
- /* Loop all values in the dest space. */
- for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) {
- long basicVals = 0;
- for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) {
- if ( destVals & (1 << csi.pos()) ) {
- Action **cim = mergedCS.find( *csi );
- long bitPos = (cim - mergedCS.data);
- basicVals |= 1 << bitPos;
- }
- }
-
- long targVals = basicVals;
- Action **cim = mergedCS.find( condAction );
- long bitPos = (cim - mergedCS.data);
- targVals |= 1 << bitPos;
-
- LongVect expandToVals( targVals );
- findCondExpInTrans( expansionList, destState,
- transCond.s2Tel.lowKey, transCond.s2Tel.highKey,
- fromCondSpace, toCondSpace, destVals, expandToVals );
- }
- break;
- }
-
-
- case RangeOverlap:
- case BreakS1:
- case BreakS2:
- assert( false );
- break;
- }
- }
-
- destState->stateCondList.transfer( destList );
-}
-
-void FsmGraph::embedCondition( FsmState *state, Action *condAction )
-{
- MergeData md;
- ExpansionList expList;
-
- /* Turn on misfit accounting to possibly catch the old start state. */
- setMisfitAccounting( true );
-
- /* Worker. */
- embedCondition( md, state, condAction );
-
- /* Fill in any states that were newed up as combinations of others. */
- fillInStates( md );
-
- /* Remove the misfits and turn off misfit accounting. */
- removeMisfits();
- setMisfitAccounting( false );
-}
-
-void FsmGraph::embedCondition( MergeData &md, FsmState *state, Action *condAction )
-{
- ExpansionList expList;
-
- findEmbedExpansions( expList, state, condAction );
- doExpand( md, state, expList );
- doRemove( md, state, expList );
- expList.empty();
-}
diff --git a/colm/fsmgraph.h b/colm/fsmgraph.h
deleted file mode 100644
index fca23cc1..00000000
--- a/colm/fsmgraph.h
+++ /dev/null
@@ -1,1388 +0,0 @@
-/*
- * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _FSMGRAPH_H
-#define _FSMGRAPH_H
-
-#include <assert.h>
-#include "keyops.h"
-#include "vector.h"
-#include "bstset.h"
-#include "compare.h"
-#include "avltree.h"
-#include "dlist.h"
-#include "bstmap.h"
-#include "sbstmap.h"
-#include "sbstset.h"
-#include "sbsttable.h"
-#include "avlset.h"
-#include "avlmap.h"
-
-/* Flags that control merging. */
-#define SB_GRAPH1 0x01
-#define SB_GRAPH2 0x02
-#define SB_BOTH 0x03
-#define SB_ISFINAL 0x04
-#define SB_ISMARKED 0x08
-#define SB_ONLIST 0x10
-
-struct FsmTrans;
-struct FsmState;
-struct FsmGraph;
-struct Action;
-struct TokenDef;
-struct NameInst;
-
-/* State list element for unambiguous access to list element. */
-struct FsmListEl
-{
- FsmState *prev, *next;
-};
-
-/* This is the marked index for a state pair. Used in minimization. It keeps
- * track of whether or not the state pair is marked. */
-struct MarkIndex
-{
- MarkIndex(int states);
- ~MarkIndex();
-
- void markPair(int state1, int state2);
- bool isPairMarked(int state1, int state2);
-
-private:
- int numStates;
- bool *array;
-};
-
-extern KeyOps *keyOps;
-
-/* Transistion Action Element. */
-typedef SBstMapEl< int, Action* > ActionTableEl;
-
-/* Transition Action Table. */
-struct ActionTable
- : public SBstMap< int, Action*, CmpOrd<int> >
-{
- void setAction( int ordering, Action *action );
- void setActions( int *orderings, Action **actions, int nActs );
- void setActions( const ActionTable &other );
-
- bool hasAction( Action *action );
-};
-
-typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet;
-typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet;
-
-/* Transistion Action Element. */
-typedef SBstMapEl< int, TokenDef* > LmActionTableEl;
-
-/* Transition Action Table. */
-struct LmActionTable
- : public SBstMap< int, TokenDef*, CmpOrd<int> >
-{
- void setAction( int ordering, TokenDef *action );
- void setActions( const LmActionTable &other );
-};
-
-/* Compare of a whole action table element (key & value). */
-struct CmpActionTableEl
-{
- static int compare( const ActionTableEl &action1,
- const ActionTableEl &action2 )
- {
- if ( action1.key < action2.key )
- return -1;
- else if ( action1.key > action2.key )
- return 1;
- else if ( action1.value < action2.value )
- return -1;
- else if ( action1.value > action2.value )
- return 1;
- return 0;
- }
-};
-
-/* Compare for ActionTable. */
-typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
-
-/* Compare of a whole lm action table element (key & value). */
-struct CmpLmActionTableEl
-{
- static int compare( const LmActionTableEl &lmAction1,
- const LmActionTableEl &lmAction2 )
- {
- if ( lmAction1.key < lmAction2.key )
- return -1;
- else if ( lmAction1.key > lmAction2.key )
- return 1;
- else if ( lmAction1.value < lmAction2.value )
- return -1;
- else if ( lmAction1.value > lmAction2.value )
- return 1;
- return 0;
- }
-};
-
-/* Compare for ActionTable. */
-typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable;
-
-/* Action table element for error action tables. Adds the encoding of transfer
- * point. */
-struct ErrActionTableEl
-{
- ErrActionTableEl( Action *action, int ordering, int transferPoint )
- : ordering(ordering), action(action), transferPoint(transferPoint) { }
-
- /* Ordering and id of the action embedding. */
- int ordering;
- Action *action;
-
- /* Id of point of transfere from Error action table to transtions and
- * eofActionTable. */
- int transferPoint;
-
- int getKey() const { return ordering; }
-};
-
-struct ErrActionTable
- : public SBstTable< ErrActionTableEl, int, CmpOrd<int> >
-{
- void setAction( int ordering, Action *action, int transferPoint );
- void setActions( const ErrActionTable &other );
-};
-
-/* Compare of an error action table element (key & value). */
-struct CmpErrActionTableEl
-{
- static int compare( const ErrActionTableEl &action1,
- const ErrActionTableEl &action2 )
- {
- if ( action1.ordering < action2.ordering )
- return -1;
- else if ( action1.ordering > action2.ordering )
- return 1;
- else if ( action1.action < action2.action )
- return -1;
- else if ( action1.action > action2.action )
- return 1;
- else if ( action1.transferPoint < action2.transferPoint )
- return -1;
- else if ( action1.transferPoint > action2.transferPoint )
- return 1;
- return 0;
- }
-};
-
-/* Compare for ErrActionTable. */
-typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable;
-
-
-/* Descibe a priority, shared among PriorEls.
- * Has key and whether or not used. */
-struct PriorDesc
-{
- int key;
- int priority;
-};
-
-/* Element in the arrays of priorities for transitions and arrays. Ordering is
- * unique among instantiations of machines, desc is shared. */
-struct PriorEl
-{
- PriorEl( int ordering, PriorDesc *desc )
- : ordering(ordering), desc(desc) { }
-
- int ordering;
- PriorDesc *desc;
-};
-
-/* Compare priority elements, which are ordered by the priority descriptor
- * key. */
-struct PriorElCmp
-{
- static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
- {
- if ( pel1.desc->key < pel2.desc->key )
- return -1;
- else if ( pel1.desc->key > pel2.desc->key )
- return 1;
- else
- return 0;
- }
-};
-
-
-/* Priority Table. */
-struct PriorTable
- : public SBstSet< PriorEl, PriorElCmp >
-{
- void setPrior( int ordering, PriorDesc *desc );
- void setPriors( const PriorTable &other );
-};
-
-/* Compare of prior table elements for distinguising state data. */
-struct CmpPriorEl
-{
- static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
- {
- if ( pel1.desc < pel2.desc )
- return -1;
- else if ( pel1.desc > pel2.desc )
- return 1;
- else if ( pel1.ordering < pel2.ordering )
- return -1;
- else if ( pel1.ordering > pel2.ordering )
- return 1;
- return 0;
- }
-};
-
-/* Compare of PriorTable distinguising state data. Using a compare of the
- * pointers is a little more strict than it needs be. It requires that
- * prioritiy tables have the exact same set of priority assignment operators
- * (from the input lang) to be considered equal.
- *
- * Really only key-value pairs need be tested and ordering be merged. However
- * this would require that in the fuseing of states, priority descriptors be
- * chosen for the new fused state based on priority. Since the out transition
- * lists and ranges aren't necessarily going to line up, this is more work for
- * little gain. Final compression resets all priorities first, so this would
- * only be useful for compression at every operator, which is only an
- * undocumented test feature.
- */
-typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable;
-
-/* Plain action list that imposes no ordering. */
-typedef Vector<int> TransFuncList;
-
-/* Comparison for TransFuncList. */
-typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare;
-
-/* Transition class that implements actions and priorities. */
-struct FsmTrans
-{
- FsmTrans() : fromState(0), toState(0) {}
- FsmTrans( const FsmTrans &other ) :
- lowKey(other.lowKey),
- highKey(other.highKey),
- fromState(0), toState(0),
- actionTable(other.actionTable),
- priorTable(other.priorTable)
- {
- assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 );
- }
-
- Key lowKey, highKey;
- FsmState *fromState;
- FsmState *toState;
-
- /* Pointers for outlist. */
- FsmTrans *prev, *next;
-
- /* Pointers for in-list. */
- FsmTrans *ilprev, *ilnext;
-
- /* The function table and priority for the transition. */
- ActionTable actionTable;
- PriorTable priorTable;
-
- LmActionTable lmActionTable;
-};
-
-/* In transition list. Like DList except only has head pointers, which is all
- * that is required. Insertion and deletion is handled by the graph. This
- * class provides the iterator of a single list. */
-struct TransInList
-{
- TransInList() : head(0) { }
-
- FsmTrans *head;
-
- struct Iter
- {
- /* Default construct. */
- Iter() : ptr(0) { }
-
- /* Construct, assign from a list. */
- Iter( const TransInList &il ) : ptr(il.head) { }
- Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; }
-
- /* At the end */
- bool lte() const { return ptr != 0; }
- bool end() const { return ptr == 0; }
-
- /* At the first, last element. */
- bool first() const { return ptr && ptr->ilprev == 0; }
- bool last() const { return ptr && ptr->ilnext == 0; }
-
- /* Cast, dereference, arrow ops. */
- operator FsmTrans*() const { return ptr; }
- FsmTrans &operator *() const { return *ptr; }
- FsmTrans *operator->() const { return ptr; }
-
- /* Increment, decrement. */
- inline void operator++(int) { ptr = ptr->ilnext; }
- inline void operator--(int) { ptr = ptr->ilprev; }
-
- /* The iterator is simply a pointer. */
- FsmTrans *ptr;
- };
-};
-
-typedef DList<FsmTrans> TransList;
-
-/* Set of states, list of states. */
-typedef BstSet<FsmState*> StateSet;
-typedef DList<FsmState> StateList;
-
-/* A element in a state dict. */
-struct StateDictEl
-:
- public AvlTreeEl<StateDictEl>
-{
- StateDictEl(const StateSet &stateSet)
- : stateSet(stateSet) { }
-
- const StateSet &getKey() { return stateSet; }
- StateSet stateSet;
- FsmState *targState;
-};
-
-/* Dictionary mapping a set of states to a target state. */
-typedef AvlTree< StateDictEl, StateSet, CmpTable<FsmState*> > StateDict;
-
-/* Data needed for a merge operation. */
-struct MergeData
-{
- MergeData()
- : stfillHead(0), stfillTail(0) { }
-
- StateDict stateDict;
-
- FsmState *stfillHead;
- FsmState *stfillTail;
-
- void fillListAppend( FsmState *state );
-};
-
-struct TransEl
-{
- /* Constructors. */
- TransEl() { }
- TransEl( Key lowKey, Key highKey )
- : lowKey(lowKey), highKey(highKey) { }
- TransEl( Key lowKey, Key highKey, FsmTrans *value )
- : lowKey(lowKey), highKey(highKey), value(value) { }
-
- Key lowKey, highKey;
- FsmTrans *value;
-};
-
-struct CmpKey
-{
- static int compare( const Key key1, const Key key2 )
- {
- if ( key1 < key2 )
- return -1;
- else if ( key1 > key2 )
- return 1;
- else
- return 0;
- }
-};
-
-/* Vector based set of key items. */
-typedef BstSet<Key, CmpKey> KeySet;
-
-struct MinPartition
-{
- MinPartition() : active(false) { }
-
- StateList list;
- bool active;
-
- MinPartition *prev, *next;
-};
-
-/* Epsilon transition stored in a state. Specifies the target */
-typedef Vector<int> EpsilonTrans;
-
-/* List of states that are to be drawn into this. */
-struct EptVectEl
-{
- EptVectEl( FsmState *targ, bool leaving )
- : targ(targ), leaving(leaving) { }
-
- FsmState *targ;
- bool leaving;
-};
-typedef Vector<EptVectEl> EptVect;
-
-/* Set of entry ids that go into this state. */
-typedef BstSet<int> EntryIdSet;
-
-/* Set of longest match items that may be active in a given state. */
-typedef BstSet<TokenDef*> LmItemSet;
-
-/* Conditions. */
-typedef BstSet< Action*, CmpOrd<Action*> > CondSet;
-typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet;
-
-struct CondSpace
- : public AvlTreeEl<CondSpace>
-{
- CondSpace( const CondSet &condSet )
- : condSet(condSet) {}
-
- const CondSet &getKey() { return condSet; }
-
- CondSet condSet;
- Key baseKey;
- long condSpaceId;
-};
-
-typedef Vector<CondSpace*> CondSpaceVect;
-
-typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap;
-
-struct StateCond
-{
- StateCond( Key lowKey, Key highKey ) :
- lowKey(lowKey), highKey(highKey) {}
-
- Key lowKey;
- Key highKey;
- CondSpace *condSpace;
-
- StateCond *prev, *next;
-};
-
-typedef DList<StateCond> StateCondList;
-typedef Vector<long> LongVect;
-
-struct Expansion
-{
- Expansion( Key lowKey, Key highKey ) :
- lowKey(lowKey), highKey(highKey),
- fromTrans(0), fromCondSpace(0),
- toCondSpace(0) {}
-
- ~Expansion()
- {
- if ( fromTrans != 0 )
- delete fromTrans;
- }
-
- Key lowKey;
- Key highKey;
-
- FsmTrans *fromTrans;
- CondSpace *fromCondSpace;
- long fromVals;
-
- CondSpace *toCondSpace;
- LongVect toValsList;
-
- Expansion *prev, *next;
-};
-
-typedef DList<Expansion> ExpansionList;
-
-struct Removal
-{
- Key lowKey;
- Key highKey;
-
- Removal *next;
-};
-
-struct CondData
-{
- CondData() : nextCondKey(0) {}
-
- /* Condition info. */
- Key nextCondKey;
-
- CondSpaceMap condSpaceMap;
-};
-
-extern CondData *condData;
-
-/* State class that implements actions and priorities. */
-struct FsmState
-{
- FsmState();
- FsmState(const FsmState &other);
- ~FsmState();
-
- /* Is the state final? */
- bool isFinState() { return stateBits & SB_ISFINAL; }
-
- /* Out transition list and the pointer for the default out trans. */
- TransList outList;
-
- /* In transition Lists. */
- TransInList inList;
-
- /* Entry points into the state. */
- EntryIdSet entryIds;
-
- /* Epsilon transitions. */
- EpsilonTrans epsilonTrans;
-
- /* Condition info. */
- StateCondList stateCondList;
-
- /* Number of in transitions from states other than ourselves. */
- int foreignInTrans;
-
- /* Temporary data for various algorithms. */
- union {
- /* When duplicating the fsm we need to map each
- * state to the new state representing it. */
- FsmState *stateMap;
-
- /* When minimizing machines by partitioning, this maps to the group
- * the state is in. */
- MinPartition *partition;
-
- /* When merging states (state machine operations) this next pointer is
- * used for the list of states that need to be filled in. */
- FsmState *next;
-
- /* Identification for printing and stable minimization. */
- int stateNum;
-
- } alg;
-
- /* Data used in epsilon operation, maybe fit into alg? */
- FsmState *isolatedShadow;
- int owningGraph;
-
- /* A pointer to a dict element that contains the set of states this state
- * represents. This cannot go into alg, because alg.next is used during
- * the merging process. */
- StateDictEl *stateDictEl;
-
- /* When drawing epsilon transitions, holds the list of states to merge
- * with. */
- EptVect *eptVect;
-
- /* Bits controlling the behaviour of the state during collapsing to dfa. */
- int stateBits;
-
- /* State list elements. */
- FsmState *next, *prev;
-
- /*
- * Priority and Action data.
- */
-
- /* Out priorities transfered to out transitions. */
- PriorTable outPriorTable;
-
- /* The following two action tables are distinguished by the fact that when
- * toState actions are executed immediatly after transition actions of
- * incoming transitions and the current character will be the same as the
- * one available then. The fromState actions are executed immediately
- * before the transition actions of outgoing transitions and the current
- * character is same as the one available then. */
-
- /* Actions to execute upon entering into a state. */
- ActionTable toStateActionTable;
-
- /* Actions to execute when going from the state to the transition. */
- ActionTable fromStateActionTable;
-
- /* Actions to add to any future transitions that leave via this state. */
- ActionTable outActionTable;
-
- /* Conditions to add to any future transiions that leave via this sttate. */
- ActionSet outCondSet;
-
- /* Error action tables. */
- ErrActionTable errActionTable;
-
- /* Actions to execute on eof. */
- ActionTable eofActionTable;
-
- /* Set of longest match items that may be active in this state. */
- LmItemSet lmItemSet;
-
- FsmState *eofTarget;
-};
-
-template <class ListItem> struct NextTrans
-{
- Key lowKey, highKey;
- ListItem *trans;
- ListItem *next;
-
- void load() {
- if ( trans == 0 )
- next = 0;
- else {
- next = trans->next;
- lowKey = trans->lowKey;
- highKey = trans->highKey;
- }
- }
-
- void set( ListItem *t ) {
- trans = t;
- load();
- }
-
- void increment() {
- trans = next;
- load();
- }
-};
-
-
-/* Encodes the different states that are meaningful to the of the iterator. */
-enum PairIterUserState
-{
- RangeInS1, RangeInS2,
- RangeOverlap,
- BreakS1, BreakS2
-};
-
-template <class ListItem1, class ListItem2 = ListItem1> struct PairIter
-{
- /* Encodes the different states that an fsm iterator can be in. */
- enum IterState {
- Begin,
- ConsumeS1Range, ConsumeS2Range,
- OnlyInS1Range, OnlyInS2Range,
- S1SticksOut, S1SticksOutBreak,
- S2SticksOut, S2SticksOutBreak,
- S1DragsBehind, S1DragsBehindBreak,
- S2DragsBehind, S2DragsBehindBreak,
- ExactOverlap, End
- };
-
- PairIter( ListItem1 *list1, ListItem2 *list2 );
-
- /* Query iterator. */
- bool lte() { return itState != End; }
- bool end() { return itState == End; }
- void operator++(int) { findNext(); }
- void operator++() { findNext(); }
-
- /* Iterator state. */
- ListItem1 *list1;
- ListItem2 *list2;
- IterState itState;
- PairIterUserState userState;
-
- NextTrans<ListItem1> s1Tel;
- NextTrans<ListItem2> s2Tel;
- Key bottomLow, bottomHigh;
- ListItem1 *bottomTrans1;
- ListItem2 *bottomTrans2;
-
-private:
- void findNext();
-};
-
-/* Init the iterator by advancing to the first item. */
-template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter(
- ListItem1 *list1, ListItem2 *list2 )
-:
- list1(list1),
- list2(list2),
- itState(Begin)
-{
- findNext();
-}
-
-/* Return and re-entry for the co-routine iterators. This should ALWAYS be
- * used inside of a block. */
-#define CO_RETURN(label) \
- itState = label; \
- return; \
- entry##label: {}
-
-/* Return and re-entry for the co-routine iterators. This should ALWAYS be
- * used inside of a block. */
-#define CO_RETURN2(label, uState) \
- itState = label; \
- userState = uState; \
- return; \
- entry##label: {}
-
-/* Advance to the next transition. When returns, trans points to the next
- * transition, unless there are no more, in which case end() returns true. */
-template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext()
-{
- /* Jump into the iterator routine base on the iterator state. */
- switch ( itState ) {
- case Begin: goto entryBegin;
- case ConsumeS1Range: goto entryConsumeS1Range;
- case ConsumeS2Range: goto entryConsumeS2Range;
- case OnlyInS1Range: goto entryOnlyInS1Range;
- case OnlyInS2Range: goto entryOnlyInS2Range;
- case S1SticksOut: goto entryS1SticksOut;
- case S1SticksOutBreak: goto entryS1SticksOutBreak;
- case S2SticksOut: goto entryS2SticksOut;
- case S2SticksOutBreak: goto entryS2SticksOutBreak;
- case S1DragsBehind: goto entryS1DragsBehind;
- case S1DragsBehindBreak: goto entryS1DragsBehindBreak;
- case S2DragsBehind: goto entryS2DragsBehind;
- case S2DragsBehindBreak: goto entryS2DragsBehindBreak;
- case ExactOverlap: goto entryExactOverlap;
- case End: goto entryEnd;
- }
-
-entryBegin:
- /* Set up the next structs at the head of the transition lists. */
- s1Tel.set( list1 );
- s2Tel.set( list2 );
-
- /* Concurrently scan both out ranges. */
- while ( true ) {
- if ( s1Tel.trans == 0 ) {
- /* We are at the end of state1's ranges. Process the rest of
- * state2's ranges. */
- while ( s2Tel.trans != 0 ) {
- /* Range is only in s2. */
- CO_RETURN2( ConsumeS2Range, RangeInS2 );
- s2Tel.increment();
- }
- break;
- }
- else if ( s2Tel.trans == 0 ) {
- /* We are at the end of state2's ranges. Process the rest of
- * state1's ranges. */
- while ( s1Tel.trans != 0 ) {
- /* Range is only in s1. */
- CO_RETURN2( ConsumeS1Range, RangeInS1 );
- s1Tel.increment();
- }
- break;
- }
- /* Both state1's and state2's transition elements are good.
- * The signiture of no overlap is a back key being in front of a
- * front key. */
- else if ( s1Tel.highKey < s2Tel.lowKey ) {
- /* A range exists in state1 that does not overlap with state2. */
- CO_RETURN2( OnlyInS1Range, RangeInS1 );
- s1Tel.increment();
- }
- else if ( s2Tel.highKey < s1Tel.lowKey ) {
- /* A range exists in state2 that does not overlap with state1. */
- CO_RETURN2( OnlyInS2Range, RangeInS2 );
- s2Tel.increment();
- }
- /* There is overlap, must mix the ranges in some way. */
- else if ( s1Tel.lowKey < s2Tel.lowKey ) {
- /* Range from state1 sticks out front. Must break it into
- * non-overlaping and overlaping segments. */
- bottomLow = s2Tel.lowKey;
- bottomHigh = s1Tel.highKey;
- s1Tel.highKey = s2Tel.lowKey;
- s1Tel.highKey.decrement();
- bottomTrans1 = s1Tel.trans;
-
- /* Notify the caller that we are breaking s1. This gives them a
- * chance to duplicate s1Tel[0,1].value. */
- CO_RETURN2( S1SticksOutBreak, BreakS1 );
-
- /* Broken off range is only in s1. */
- CO_RETURN2( S1SticksOut, RangeInS1 );
-
- /* Advance over the part sticking out front. */
- s1Tel.lowKey = bottomLow;
- s1Tel.highKey = bottomHigh;
- s1Tel.trans = bottomTrans1;
- }
- else if ( s2Tel.lowKey < s1Tel.lowKey ) {
- /* Range from state2 sticks out front. Must break it into
- * non-overlaping and overlaping segments. */
- bottomLow = s1Tel.lowKey;
- bottomHigh = s2Tel.highKey;
- s2Tel.highKey = s1Tel.lowKey;
- s2Tel.highKey.decrement();
- bottomTrans2 = s2Tel.trans;
-
- /* Notify the caller that we are breaking s2. This gives them a
- * chance to duplicate s2Tel[0,1].value. */
- CO_RETURN2( S2SticksOutBreak, BreakS2 );
-
- /* Broken off range is only in s2. */
- CO_RETURN2( S2SticksOut, RangeInS2 );
-
- /* Advance over the part sticking out front. */
- s2Tel.lowKey = bottomLow;
- s2Tel.highKey = bottomHigh;
- s2Tel.trans = bottomTrans2;
- }
- /* Low ends are even. Are the high ends even? */
- else if ( s1Tel.highKey < s2Tel.highKey ) {
- /* Range from state2 goes longer than the range from state1. We
- * must break the range from state2 into an evenly overlaping
- * segment. */
- bottomLow = s1Tel.highKey;
- bottomLow.increment();
- bottomHigh = s2Tel.highKey;
- s2Tel.highKey = s1Tel.highKey;
- bottomTrans2 = s2Tel.trans;
-
- /* Notify the caller that we are breaking s2. This gives them a
- * chance to duplicate s2Tel[0,1].value. */
- CO_RETURN2( S2DragsBehindBreak, BreakS2 );
-
- /* Breaking s2 produces exact overlap. */
- CO_RETURN2( S2DragsBehind, RangeOverlap );
-
- /* Advance over the front we just broke off of range 2. */
- s2Tel.lowKey = bottomLow;
- s2Tel.highKey = bottomHigh;
- s2Tel.trans = bottomTrans2;
-
- /* Advance over the entire s1Tel. We have consumed it. */
- s1Tel.increment();
- }
- else if ( s2Tel.highKey < s1Tel.highKey ) {
- /* Range from state1 goes longer than the range from state2. We
- * must break the range from state1 into an evenly overlaping
- * segment. */
- bottomLow = s2Tel.highKey;
- bottomLow.increment();
- bottomHigh = s1Tel.highKey;
- s1Tel.highKey = s2Tel.highKey;
- bottomTrans1 = s1Tel.trans;
-
- /* Notify the caller that we are breaking s1. This gives them a
- * chance to duplicate s2Tel[0,1].value. */
- CO_RETURN2( S1DragsBehindBreak, BreakS1 );
-
- /* Breaking s1 produces exact overlap. */
- CO_RETURN2( S1DragsBehind, RangeOverlap );
-
- /* Advance over the front we just broke off of range 1. */
- s1Tel.lowKey = bottomLow;
- s1Tel.highKey = bottomHigh;
- s1Tel.trans = bottomTrans1;
-
- /* Advance over the entire s2Tel. We have consumed it. */
- s2Tel.increment();
- }
- else {
- /* There is an exact overlap. */
- CO_RETURN2( ExactOverlap, RangeOverlap );
-
- s1Tel.increment();
- s2Tel.increment();
- }
- }
-
- /* Done, go into end state. */
- CO_RETURN( End );
-}
-
-
-/* Compare lists of epsilon transitions. Entries are name ids of targets. */
-typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
-
-/* Compare class for the Approximate minimization. */
-class ApproxCompare
-{
-public:
- ApproxCompare() { }
- int compare( const FsmState *pState1, const FsmState *pState2 );
-};
-
-/* Compare class for the initial partitioning of a partition minimization. */
-class InitPartitionCompare
-{
-public:
- InitPartitionCompare() { }
- int compare( const FsmState *pState1, const FsmState *pState2 );
-};
-
-/* Compare class for the regular partitioning of a partition minimization. */
-class PartitionCompare
-{
-public:
- PartitionCompare() { }
- int compare( const FsmState *pState1, const FsmState *pState2 );
-};
-
-/* Compare class for a minimization that marks pairs. Provides the shouldMark
- * routine. */
-class MarkCompare
-{
-public:
- MarkCompare() { }
- bool shouldMark( MarkIndex &markIndex, const FsmState *pState1,
- const FsmState *pState2 );
-};
-
-/* List of partitions. */
-typedef DList< MinPartition > PartitionList;
-
-/* List of transtions out of a state. */
-typedef Vector<TransEl> TransListVect;
-
-/* Entry point map used for keeping track of entry points in a machine. */
-typedef BstSet< int > EntryIdSet;
-typedef BstMapEl< int, FsmState* > EntryMapEl;
-typedef BstMap< int, FsmState* > EntryMap;
-typedef Vector<EntryMapEl> EntryMapBase;
-
-/* Graph class that implements actions and priorities. */
-struct FsmGraph
-{
- /* Constructors/Destructors. */
- FsmGraph( );
- FsmGraph( const FsmGraph &graph );
- ~FsmGraph();
-
- /* The list of states. */
- StateList stateList;
- StateList misfitList;
-
- /* The map of entry points. */
- EntryMap entryPoints;
-
- /* The start state. */
- FsmState *startState;
-
- /* Error state, possibly created only when the final machine has been
- * created and the XML machine is about to be written. No transitions
- * point to this state. */
- FsmState *errState;
-
- /* The set of final states. */
- StateSet finStateSet;
-
- /* Misfit Accounting. Are misfits put on a separate list. */
- bool misfitAccounting;
-
- bool lmRequiresErrorState;
- NameInst *rootName;
- NameInst **nameIndex;
-
- /*
- * Transition actions and priorities.
- */
-
- /* Set priorities on transtions. */
- void startFsmPrior( int ordering, PriorDesc *prior );
- void allTransPrior( int ordering, PriorDesc *prior );
- void finishFsmPrior( int ordering, PriorDesc *prior );
- void leaveFsmPrior( int ordering, PriorDesc *prior );
-
- /* Action setting support. */
- void transferErrorActions( FsmState *state, int transferPoint );
- void setErrorAction( FsmState *state, int ordering, Action *action );
- void setErrorActions( FsmState *state, const ActionTable &other );
-
- /* Fill all spaces in a transition list with an error transition. */
- void fillGaps( FsmState *state );
-
- /* Similar to setErrorAction, instead gives a state to go to on error. */
- void setErrorTarget( FsmState *state, FsmState *target, int *orderings,
- Action **actions, int nActs );
-
- /* Set actions to execute. */
- void startFsmAction( int ordering, Action *action );
- void allTransAction( int ordering, Action *action );
- void finishFsmAction( int ordering, Action *action );
- void leaveFsmAction( int ordering, Action *action );
- void longMatchAction( int ordering, TokenDef *lmPart );
-
- /* Set conditions. */
- CondSpace *addCondSpace( const CondSet &condSet );
-
- void findEmbedExpansions( ExpansionList &expansionList,
- FsmState *destState, Action *condAction );
- void embedCondition( MergeData &md, FsmState *state, Action *condAction );
- void embedCondition( FsmState *state, Action *condAction );
-
- void startFsmCondition( Action *condAction );
- void allTransCondition( Action *condAction );
- void leaveFsmCondition( Action *condAction );
-
- /* Set error actions to execute. */
- void startErrorAction( int ordering, Action *action, int transferPoint );
- void allErrorAction( int ordering, Action *action, int transferPoint );
- void finalErrorAction( int ordering, Action *action, int transferPoint );
- void notStartErrorAction( int ordering, Action *action, int transferPoint );
- void notFinalErrorAction( int ordering, Action *action, int transferPoint );
- void middleErrorAction( int ordering, Action *action, int transferPoint );
-
- /* Set EOF actions. */
- void startEOFAction( int ordering, Action *action );
- void allEOFAction( int ordering, Action *action );
- void finalEOFAction( int ordering, Action *action );
- void notStartEOFAction( int ordering, Action *action );
- void notFinalEOFAction( int ordering, Action *action );
- void middleEOFAction( int ordering, Action *action );
-
- /* Set To State actions. */
- void startToStateAction( int ordering, Action *action );
- void allToStateAction( int ordering, Action *action );
- void finalToStateAction( int ordering, Action *action );
- void notStartToStateAction( int ordering, Action *action );
- void notFinalToStateAction( int ordering, Action *action );
- void middleToStateAction( int ordering, Action *action );
-
- /* Set From State actions. */
- void startFromStateAction( int ordering, Action *action );
- void allFromStateAction( int ordering, Action *action );
- void finalFromStateAction( int ordering, Action *action );
- void notStartFromStateAction( int ordering, Action *action );
- void notFinalFromStateAction( int ordering, Action *action );
- void middleFromStateAction( int ordering, Action *action );
-
- /* Shift the action ordering of the start transitions to start at
- * fromOrder and increase in units of 1. Useful before kleene star
- * operation. */
- int shiftStartActionOrder( int fromOrder );
-
- /* Clear all priorities from the fsm to so they won't affcet minimization
- * of the final fsm. */
- void clearAllPriorities();
-
- /* Zero out all the function keys. */
- void nullActionKeys();
-
- /* Walk the list of states and verify state properties. */
- void verifyStates();
-
- /* Misfit Accounting. Are misfits put on a separate list. */
- void setMisfitAccounting( bool val )
- { misfitAccounting = val; }
-
- /* Set and Unset a state as final. */
- void setFinState( FsmState *state );
- void unsetFinState( FsmState *state );
-
- void setStartState( FsmState *state );
- void unsetStartState( );
-
- /* Set and unset a state as an entry point. */
- void setEntry( int id, FsmState *state );
- void changeEntry( int id, FsmState *to, FsmState *from );
- void unsetEntry( int id, FsmState *state );
- void unsetEntry( int id );
- void unsetAllEntryPoints();
-
- /* Epsilon transitions. */
- void epsilonTrans( int id );
- void shadowReadWriteStates( MergeData &md );
-
- /*
- * Basic attaching and detaching.
- */
-
- /* Common to attaching/detaching list and default. */
- void attachToInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans );
- void detachFromInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans );
-
- /* Attach with a new transition. */
- FsmTrans *attachNewTrans( FsmState *from, FsmState *to,
- Key onChar1, Key onChar2 );
-
- /* Attach with an existing transition that already in an out list. */
- void attachTrans( FsmState *from, FsmState *to, FsmTrans *trans );
-
- /* Redirect a transition away from error and towards some state. */
- void redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans );
-
- /* Detach a transition from a target state. */
- void detachTrans( FsmState *from, FsmState *to, FsmTrans *trans );
-
- /* Detach a state from the graph. */
- void detachState( FsmState *state );
-
- /*
- * NFA to DFA conversion routines.
- */
-
- /* Duplicate a transition that will dropin to a free spot. */
- FsmTrans *dupTrans( FsmState *from, FsmTrans *srcTrans );
-
- /* In crossing, two transitions both go to real states. */
- FsmTrans *fsmAttachStates( MergeData &md, FsmState *from,
- FsmTrans *destTrans, FsmTrans *srcTrans );
-
- /* Two transitions are to be crossed, handle the possibility of either
- * going to the error state. */
- FsmTrans *mergeTrans( MergeData &md, FsmState *from,
- FsmTrans *destTrans, FsmTrans *srcTrans );
-
- /* Compare deterimne relative priorities of two transition tables. */
- int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 );
-
- /* Cross a src transition with one that is already occupying a spot. */
- FsmTrans *crossTransitions( MergeData &md, FsmState *from,
- FsmTrans *destTrans, FsmTrans *srcTrans );
-
- void outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList );
-
- void doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 );
- void doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 );
- void findCondExpInTrans( ExpansionList &expansionList, FsmState *state,
- Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace,
- long destVals, LongVect &toValsList );
- void findTransExpansions( ExpansionList &expansionList,
- FsmState *destState, FsmState *srcState );
- void findCondExpansions( ExpansionList &expansionList,
- FsmState *destState, FsmState *srcState );
- void mergeStateConds( FsmState *destState, FsmState *srcState );
-
- /* Merge a set of states into newState. */
- void mergeStates( MergeData &md, FsmState *destState,
- FsmState **srcStates, int numSrc );
- void mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState );
- void mergeStates( MergeData &md, FsmState *destState, FsmState *srcState );
-
- /* Make all states that are combinations of other states and that
- * have not yet had their out transitions filled in. This will
- * empty out stateDict and stFil. */
- void fillInStates( MergeData &md );
-
- /*
- * Transition Comparison.
- */
-
- /* Compare transition data. Either of the pointers may be null. */
- static inline int compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 );
-
- /* Compare target state and transition data. Either pointer may be null. */
- static inline int compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 );
-
- /* Compare target partitions. Either pointer may be null. */
- static inline int comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 );
-
- /* Check marked status of target states. Either pointer may be null. */
- static inline bool shouldMarkPtr( MarkIndex &markIndex,
- FsmTrans *trans1, FsmTrans *trans2 );
-
- /*
- * Callbacks.
- */
-
- /* Compare priority and function table of transitions. */
- static int compareTransData( FsmTrans *trans1, FsmTrans *trans2 );
-
- /* Add in the properties of srcTrans into this. */
- void addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans );
-
- /* Compare states on data stored in the states. */
- static int compareStateData( const FsmState *state1, const FsmState *state2 );
-
- /* Out transition data. */
- void clearOutData( FsmState *state );
- bool hasOutData( FsmState *state );
- void transferOutData( FsmState *destState, FsmState *srcState );
-
- /*
- * Allocation.
- */
-
- /* New up a state and add it to the graph. */
- FsmState *addState();
-
- /*
- * Building basic machines
- */
-
- void concatFsm( Key c );
- void concatFsm( Key *str, int len );
- void concatFsmCI( Key *str, int len );
- void orFsm( Key *set, int len );
- void rangeFsm( Key low, Key high );
- void rangeStarFsm( Key low, Key high );
- void emptyFsm( );
- void lambdaFsm( );
-
- /*
- * Fsm operators.
- */
-
- void starOp( );
- void repeatOp( int times );
- void optionalRepeatOp( int times );
- void concatOp( FsmGraph *other );
- void unionOp( FsmGraph *other );
- void intersectOp( FsmGraph *other );
- void subtractOp( FsmGraph *other );
- void epsilonOp();
- void joinOp( int startId, int finalId, FsmGraph **others, int numOthers );
- void globOp( FsmGraph **others, int numOthers );
- void deterministicEntry();
-
- /*
- * Operator workers
- */
-
- /* Determine if there are any entry points into a start state other than
- * the start state. */
- bool isStartStateIsolated();
-
- /* Make a new start state that has no entry points. Will not change the
- * identity of the fsm. */
- void isolateStartState();
-
- /* Workers for resolving epsilon transitions. */
- bool inEptVect( EptVect *eptVect, FsmState *targ );
- void epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving );
- void resolveEpsilonTrans( MergeData &md );
-
- /* Workers for concatenation and union. */
- void doConcat( FsmGraph *other, StateSet *fromStates, bool optional );
- void doOr( FsmGraph *other );
-
- /*
- * Final states
- */
-
- /* Unset any final states that are no longer to be final
- * due to final bits. */
- void unsetIncompleteFinals();
- void unsetKilledFinals();
-
- /* Bring in other's entry points. Assumes others states are going to be
- * copied into this machine. */
- void copyInEntryPoints( FsmGraph *other );
-
- /* Ordering states. */
- void depthFirstOrdering( FsmState *state );
- void depthFirstOrdering();
- void sortStatesByFinal();
-
- /* Set sqequential state numbers starting at 0. */
- void setStateNumbers( int base );
-
- /* Unset all final states. */
- void unsetAllFinStates();
-
- /* Set the bits of final states and clear the bits of non final states. */
- void setFinBits( int finStateBits );
-
- /*
- * Self-consistency checks.
- */
-
- /* Run a sanity check on the machine. */
- void verifyIntegrity();
-
- /* Verify that there are no unreachable states, or dead end states. */
- void verifyReachability();
- void verifyNoDeadEndStates();
-
- /*
- * Path pruning
- */
-
- /* Mark all states reachable from state. */
- void markReachableFromHereReverse( FsmState *state );
-
- /* Mark all states reachable from state. */
- void markReachableFromHere( FsmState *state );
- void markReachableFromHereStopFinal( FsmState *state );
-
- /* Removes states that cannot be reached by any path in the fsm and are
- * thus wasted silicon. */
- void removeDeadEndStates();
-
- /* Removes states that cannot be reached by any path in the fsm and are
- * thus wasted silicon. */
- void removeUnreachableStates();
-
- /* Remove error actions from states on which the error transition will
- * never be taken. */
- bool outListCovers( FsmState *state );
- bool anyErrorRange( FsmState *state );
-
- /* Remove states that are on the misfit list. */
- void removeMisfits();
-
- /*
- * FSM Minimization
- */
-
- /* Minimization by partitioning. */
- void minimizePartition1();
- void minimizePartition2();
-
- /* Minimize the final state Machine. The result is the minimal fsm. Slow
- * but stable, correct minimization. Uses n^2 space (lookout) and average
- * n^2 time. Worst case n^3 time, but a that is a very rare case. */
- void minimizeStable();
-
- /* Minimize the final state machine. Does not find the minimal fsm, but a
- * pretty good approximation. Does not use any extra space. Average n^2
- * time. Worst case n^3 time, but a that is a very rare case. */
- void minimizeApproximate();
-
- /* This is the worker for the minimize approximate solution. It merges
- * states that have identical out transitions. */
- bool minimizeRound( );
-
- /* Given an intial partioning of states, split partitions that have out trans
- * to differing partitions. */
- int partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts );
-
- /* Split partitions that have a transition to a previously split partition, until
- * there are no more partitions to split. */
- int splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts );
-
- /* Fuse together states in the same partition. */
- void fusePartitions( MinPartition *parts, int numParts );
-
- /* Mark pairs where out final stateness differs, out trans data differs,
- * trans pairs go to a marked pair or trans data differs. Should get
- * alot of pairs. */
- void initialMarkRound( MarkIndex &markIndex );
-
- /* One marking round on all state pairs. Considers if trans pairs go
- * to a marked state only. Returns whether or not a pair was marked. */
- bool markRound( MarkIndex &markIndex );
-
- /* Move the in trans into src into dest. */
- void inTransMove(FsmState *dest, FsmState *src);
-
- /* Make state src and dest the same state. */
- void fuseEquivStates(FsmState *dest, FsmState *src);
-
- /* Find any states that didn't get marked by the marking algorithm and
- * merge them into the primary states of their equivalence class. */
- void fuseUnmarkedPairs( MarkIndex &markIndex );
-
- /* Merge neighboring transitions go to the same state and have the same
- * transitions data. */
- void compressTransitions();
-
- /* Returns true if there is a transtion (either explicit or by a gap) to
- * the error state. */
- bool checkErrTrans( FsmState *state, FsmTrans *trans );
- bool checkErrTransFinish( FsmState *state );
- bool hasErrorTrans();
-};
-
-
-#endif /* _FSMGRAPH_H */
diff --git a/colm/fsmmin.cc b/colm/fsmmin.cc
deleted file mode 100644
index cbb2b99f..00000000
--- a/colm/fsmmin.cc
+++ /dev/null
@@ -1,732 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "fsmgraph.h"
-#include "mergesort.h"
-
-int FsmGraph::partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts )
-{
- /* Need a mergesort object and a single partition compare. */
- MergeSort<FsmState*, PartitionCompare> mergeSort;
- PartitionCompare partCompare;
-
- /* For each partition. */
- for ( int p = 0; p < numParts; p++ ) {
- /* Fill the pointer array with the states in the partition. */
- StateList::Iter state = parts[p].list;
- for ( int s = 0; state.lte(); state++, s++ )
- statePtrs[s] = state;
-
- /* Sort the states using the partitioning compare. */
- int numStates = parts[p].list.length();
- mergeSort.sort( statePtrs, numStates );
-
- /* Assign the states into partitions based on the results of the sort. */
- int destPart = p, firstNewPart = numParts;
- for ( int s = 1; s < numStates; s++ ) {
- /* If this state differs from the last then move to the next partition. */
- if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
- /* The new partition is the next avail spot. */
- destPart = numParts;
- numParts += 1;
- }
-
- /* If the state is not staying in the first partition, then
- * transfer it to its destination partition. */
- if ( destPart != p ) {
- FsmState *state = parts[p].list.detach( statePtrs[s] );
- parts[destPart].list.append( state );
- }
- }
-
- /* Fix the partition pointer for all the states that got moved to a new
- * partition. This must be done after the states are transfered so the
- * result of the sort is not altered. */
- for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) {
- StateList::Iter state = parts[newPart].list;
- for ( ; state.lte(); state++ )
- state->alg.partition = &parts[newPart];
- }
- }
-
- return numParts;
-}
-
-/**
- * \brief Minimize by partitioning version 1.
- *
- * Repeatedly tries to split partitions until all partitions are unsplittable.
- * Produces the most minimal FSM possible.
- */
-void FsmGraph::minimizePartition1()
-{
- /* Need one mergesort object and partition compares. */
- MergeSort<FsmState*, InitPartitionCompare> mergeSort;
- InitPartitionCompare initPartCompare;
-
- /* Nothing to do if there are no states. */
- if ( stateList.length() == 0 )
- return;
-
- /*
- * First thing is to partition the states by final state status and
- * transition functions. This gives us an initial partitioning to work
- * with.
- */
-
- /* Make a array of pointers to states. */
- int numStates = stateList.length();
- FsmState** statePtrs = new FsmState*[numStates];
-
- /* Fill up an array of pointers to the states for easy sorting. */
- StateList::Iter state = stateList;
- for ( int s = 0; state.lte(); state++, s++ )
- statePtrs[s] = state;
-
- /* Sort the states using the array of states. */
- mergeSort.sort( statePtrs, numStates );
-
- /* An array of lists of states is used to partition the states. */
- MinPartition *parts = new MinPartition[numStates];
-
- /* Assign the states into partitions. */
- int destPart = 0;
- for ( int s = 0; s < numStates; s++ ) {
- /* If this state differs from the last then move to the next partition. */
- if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
- /* Move to the next partition. */
- destPart += 1;
- }
-
- /* Put the state into its partition. */
- statePtrs[s]->alg.partition = &parts[destPart];
- parts[destPart].list.append( statePtrs[s] );
- }
-
- /* We just moved all the states from the main list into partitions without
- * taking them off the main list. So clean up the main list now. */
- stateList.abandon();
-
- /* Split partitions. */
- int numParts = destPart + 1;
- while ( true ) {
- /* Test all partitions for splitting. */
- int newNum = partitionRound( statePtrs, parts, numParts );
-
- /* When no partitions can be split, stop. */
- if ( newNum == numParts )
- break;
-
- numParts = newNum;
- }
-
- /* Fuse states in the same partition. The states will end up back on the
- * main list. */
- fusePartitions( parts, numParts );
-
- /* Cleanup. */
- delete[] statePtrs;
- delete[] parts;
-}
-
-/* Split partitions that need splittting, decide which partitions might need
- * to be split as a result, continue until there are no more that might need
- * to be split. */
-int FsmGraph::splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts )
-{
- /* Need a mergesort and a partition compare. */
- MergeSort<FsmState*, PartitionCompare> mergeSort;
- PartitionCompare partCompare;
-
- /* The lists of unsplitable (partList) and splitable partitions.
- * Only partitions in the splitable list are check for needing splitting. */
- PartitionList partList, splittable;
-
- /* Initially, all partitions are born from a split (the initial
- * partitioning) and can cause other partitions to be split. So any
- * partition with a state with a transition out to another partition is a
- * candidate for splitting. This will make every partition except possibly
- * partitions of final states split candidates. */
- for ( int p = 0; p < numParts; p++ ) {
- /* Assume not active. */
- parts[p].active = false;
-
- /* Look for a trans out of any state in the partition. */
- for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) {
- /* If there is at least one transition out to another state then
- * the partition becomes splittable. */
- if ( state->outList.length() > 0 ) {
- parts[p].active = true;
- break;
- }
- }
-
- /* If it was found active then it goes on the splittable list. */
- if ( parts[p].active )
- splittable.append( &parts[p] );
- else
- partList.append( &parts[p] );
- }
-
- /* While there are partitions that are splittable, pull one off and try
- * to split it. If it splits, determine which partitions may now be split
- * as a result of the newly split partition. */
- while ( splittable.length() > 0 ) {
- MinPartition *partition = splittable.detachFirst();
-
- /* Fill the pointer array with the states in the partition. */
- StateList::Iter state = partition->list;
- for ( int s = 0; state.lte(); state++, s++ )
- statePtrs[s] = state;
-
- /* Sort the states using the partitioning compare. */
- int numStates = partition->list.length();
- mergeSort.sort( statePtrs, numStates );
-
- /* Assign the states into partitions based on the results of the sort. */
- MinPartition *destPart = partition;
- int firstNewPart = numParts;
- for ( int s = 1; s < numStates; s++ ) {
- /* If this state differs from the last then move to the next partition. */
- if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
- /* The new partition is the next avail spot. */
- destPart = &parts[numParts];
- numParts += 1;
- }
-
- /* If the state is not staying in the first partition, then
- * transfer it to its destination partition. */
- if ( destPart != partition ) {
- FsmState *state = partition->list.detach( statePtrs[s] );
- destPart->list.append( state );
- }
- }
-
- /* Fix the partition pointer for all the states that got moved to a new
- * partition. This must be done after the states are transfered so the
- * result of the sort is not altered. */
- int newPart;
- for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
- StateList::Iter state = parts[newPart].list;
- for ( ; state.lte(); state++ )
- state->alg.partition = &parts[newPart];
- }
-
- /* Put the partition we just split and any new partitions that came out
- * of the split onto the inactive list. */
- partition->active = false;
- partList.append( partition );
- for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
- parts[newPart].active = false;
- partList.append( &parts[newPart] );
- }
-
- if ( destPart == partition )
- continue;
-
- /* Now determine which partitions are splittable as a result of
- * splitting partition by walking the in lists of the states in
- * partitions that got split. Partition is the faked first item in the
- * loop. */
- MinPartition *causalPart = partition;
- newPart = firstNewPart - 1;
- while ( newPart < numParts ) {
- /* Loop all states in the causal partition. */
- StateList::Iter state = causalPart->list;
- for ( ; state.lte(); state++ ) {
- /* Walk all transition into the state and put the partition
- * that the from state is in onto the splittable list. */
- for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) {
- MinPartition *fromPart = trans->fromState->alg.partition;
- if ( ! fromPart->active ) {
- fromPart->active = true;
- partList.detach( fromPart );
- splittable.append( fromPart );
- }
- }
- }
-
- newPart += 1;
- causalPart = &parts[newPart];
- }
- }
- return numParts;
-}
-
-
-/**
- * \brief Minimize by partitioning version 2 (best alg).
- *
- * Repeatedly tries to split partitions that may splittable until there are no
- * more partitions that might possibly need splitting. Runs faster than
- * version 1. Produces the most minimal fsm possible.
- */
-void FsmGraph::minimizePartition2()
-{
- /* Need a mergesort and an initial partition compare. */
- MergeSort<FsmState*, InitPartitionCompare> mergeSort;
- InitPartitionCompare initPartCompare;
-
- /* Nothing to do if there are no states. */
- if ( stateList.length() == 0 )
- return;
-
- /*
- * First thing is to partition the states by final state status and
- * transition functions. This gives us an initial partitioning to work
- * with.
- */
-
- /* Make a array of pointers to states. */
- int numStates = stateList.length();
- FsmState** statePtrs = new FsmState*[numStates];
-
- /* Fill up an array of pointers to the states for easy sorting. */
- StateList::Iter state = stateList;
- for ( int s = 0; state.lte(); state++, s++ )
- statePtrs[s] = state;
-
- /* Sort the states using the array of states. */
- mergeSort.sort( statePtrs, numStates );
-
- /* An array of lists of states is used to partition the states. */
- MinPartition *parts = new MinPartition[numStates];
-
- /* Assign the states into partitions. */
- int destPart = 0;
- for ( int s = 0; s < numStates; s++ ) {
- /* If this state differs from the last then move to the next partition. */
- if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
- /* Move to the next partition. */
- destPart += 1;
- }
-
- /* Put the state into its partition. */
- statePtrs[s]->alg.partition = &parts[destPart];
- parts[destPart].list.append( statePtrs[s] );
- }
-
- /* We just moved all the states from the main list into partitions without
- * taking them off the main list. So clean up the main list now. */
- stateList.abandon();
-
- /* Split partitions. */
- int numParts = splitCandidates( statePtrs, parts, destPart+1 );
-
- /* Fuse states in the same partition. The states will end up back on the
- * main list. */
- fusePartitions( parts, numParts );
-
- /* Cleanup. */
- delete[] statePtrs;
- delete[] parts;
-}
-
-void FsmGraph::initialMarkRound( MarkIndex &markIndex )
-{
- /* P and q for walking pairs. */
- FsmState *p = stateList.head, *q;
-
- /* Need an initial partition compare. */
- InitPartitionCompare initPartCompare;
-
- /* Walk all unordered pairs of (p, q) where p != q.
- * The second depth of the walk stops before reaching p. This
- * gives us all unordered pairs of states (p, q) where p != q. */
- while ( p != 0 ) {
- q = stateList.head;
- while ( q != p ) {
- /* If the states differ on final state status, out transitions or
- * any transition data then they should be separated on the initial
- * round. */
- if ( initPartCompare.compare( p, q ) != 0 )
- markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
-
- q = q->next;
- }
- p = p->next;
- }
-}
-
-bool FsmGraph::markRound( MarkIndex &markIndex )
-{
- /* P an q for walking pairs. Take note if any pair gets marked. */
- FsmState *p = stateList.head, *q;
- bool pairWasMarked = false;
-
- /* Need a mark comparison. */
- MarkCompare markCompare;
-
- /* Walk all unordered pairs of (p, q) where p != q.
- * The second depth of the walk stops before reaching p. This
- * gives us all unordered pairs of states (p, q) where p != q. */
- while ( p != 0 ) {
- q = stateList.head;
- while ( q != p ) {
- /* Should we mark the pair? */
- if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
- if ( markCompare.shouldMark( markIndex, p, q ) ) {
- markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
- pairWasMarked = true;
- }
- }
- q = q->next;
- }
- p = p->next;
- }
-
- return pairWasMarked;
-}
-
-
-/**
- * \brief Minimize by pair marking.
- *
- * Decides if each pair of states is distinct or not. Uses O(n^2) memory and
- * should only be used on small graphs. Produces the most minmimal FSM
- * possible.
- */
-void FsmGraph::minimizeStable()
-{
- /* Set the state numbers. */
- setStateNumbers( 0 );
-
- /* This keeps track of which pairs have been marked. */
- MarkIndex markIndex( stateList.length() );
-
- /* Mark pairs where final stateness, out trans, or trans data differ. */
- initialMarkRound( markIndex );
-
- /* While the last round of marking succeeded in marking a state
- * continue to do another round. */
- int modified = markRound( markIndex );
- while (modified)
- modified = markRound( markIndex );
-
- /* Merge pairs that are unmarked. */
- fuseUnmarkedPairs( markIndex );
-}
-
-bool FsmGraph::minimizeRound()
-{
- /* Nothing to do if there are no states. */
- if ( stateList.length() == 0 )
- return false;
-
- /* Need a mergesort on approx compare and an approx compare. */
- MergeSort<FsmState*, ApproxCompare> mergeSort;
- ApproxCompare approxCompare;
-
- /* Fill up an array of pointers to the states. */
- FsmState **statePtrs = new FsmState*[stateList.length()];
- StateList::Iter state = stateList;
- for ( int s = 0; state.lte(); state++, s++ )
- statePtrs[s] = state;
-
- bool modified = false;
-
- /* Sort The list. */
- mergeSort.sort( statePtrs, stateList.length() );
-
- /* Walk the list looking for duplicates next to each other,
- * merge in any duplicates. */
- FsmState **pLast = statePtrs;
- FsmState **pState = statePtrs + 1;
- for ( int i = 1; i < stateList.length(); i++, pState++ ) {
- if ( approxCompare.compare( *pLast, *pState ) == 0 ) {
- /* Last and pState are the same, so fuse together. Move forward
- * with pState but not with pLast. If any more are identical, we
- * must */
- fuseEquivStates( *pLast, *pState );
- modified = true;
- }
- else {
- /* Last and this are different, do not set to merge them. Move
- * pLast to the current (it may be way behind from merging many
- * states) and pState forward one to consider the next pair. */
- pLast = pState;
- }
- }
- delete[] statePtrs;
- return modified;
-}
-
-/**
- * \brief Minmimize by an approximation.
- *
- * Repeatedly tries to find states with transitions out to the same set of
- * states on the same set of keys until no more identical states can be found.
- * Does not produce the most minimial FSM possible.
- */
-void FsmGraph::minimizeApproximate()
-{
- /* While the last minimization round succeeded in compacting states,
- * continue to try to compact states. */
- while ( true ) {
- bool modified = minimizeRound();
- if ( ! modified )
- break;
- }
-}
-
-
-/* Remove states that have no path to them from the start state. Recursively
- * traverses the graph marking states that have paths into them. Then removes
- * all states that did not get marked. */
-void FsmGraph::removeUnreachableStates()
-{
- /* Misfit accounting should be off and there should be no states on the
- * misfit list. */
- assert( !misfitAccounting && misfitList.length() == 0 );
-
- /* Mark all the states that can be reached
- * through the existing set of entry points. */
- markReachableFromHere( startState );
- for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
- markReachableFromHere( en->value );
-
- /* Delete all states that are not marked
- * and unmark the ones that are marked. */
- FsmState *state = stateList.head;
- while ( state ) {
- FsmState *next = state->next;
-
- if ( state->stateBits & SB_ISMARKED )
- state->stateBits &= ~ SB_ISMARKED;
- else {
- detachState( state );
- stateList.detach( state );
- delete state;
- }
-
- state = next;
- }
-}
-
-bool FsmGraph::outListCovers( FsmState *state )
-{
- /* Must be at least one range to cover. */
- if ( state->outList.length() == 0 )
- return false;
-
- /* The first must start at the lower bound. */
- TransList::Iter trans = state->outList.first();
- if ( keyOps->minKey < trans->lowKey )
- return false;
-
- /* Loop starts at second el. */
- trans.increment();
-
- /* Loop checks lower against prev upper. */
- for ( ; trans.lte(); trans++ ) {
- /* Lower end of the trans must be one greater than the
- * previous' high end. */
- Key lowKey = trans->lowKey;
- lowKey.decrement();
- if ( trans->prev->highKey < lowKey )
- return false;
- }
-
- /* Require that the last range extends to the upper bound. */
- trans = state->outList.last();
- if ( trans->highKey < keyOps->maxKey )
- return false;
-
- return true;
-}
-
-/* Remove states that that do not lead to a final states. Works recursivly traversing
- * the graph in reverse (starting from all final states) and marking seen states. Then
- * removes states that did not get marked. */
-void FsmGraph::removeDeadEndStates()
-{
- /* Misfit accounting should be off and there should be no states on the
- * misfit list. */
- assert( !misfitAccounting && misfitList.length() == 0 );
-
- /* Mark all states that have paths to the final states. */
- FsmState **st = finStateSet.data;
- int nst = finStateSet.length();
- for ( int i = 0; i < nst; i++, st++ )
- markReachableFromHereReverse( *st );
-
- /* Start state gets honorary marking. If the machine accepts nothing we
- * still want the start state to hang around. This must be done after the
- * recursive call on all the final states so that it does not cause the
- * start state in transitions to be skipped when the start state is
- * visited by the traversal. */
- startState->stateBits |= SB_ISMARKED;
-
- /* Delete all states that are not marked
- * and unmark the ones that are marked. */
- FsmState *state = stateList.head;
- while ( state != 0 ) {
- FsmState *next = state->next;
-
- if ( state->stateBits & SB_ISMARKED )
- state->stateBits &= ~ SB_ISMARKED;
- else {
- detachState( state );
- stateList.detach( state );
- delete state;
- }
-
- state = next;
- }
-}
-
-/* Remove states on the misfit list. To work properly misfit accounting should
- * be on when this is called. The detaching of a state will likely cause
- * another misfit to be collected and it can then be removed. */
-void FsmGraph::removeMisfits()
-{
- while ( misfitList.length() > 0 ) {
- /* Get the first state. */
- FsmState *state = misfitList.head;
-
- /* Detach and delete. */
- detachState( state );
-
- /* The state was previously on the misfit list and detaching can only
- * remove in transitions so the state must still be on the misfit
- * list. */
- misfitList.detach( state );
- delete state;
- }
-}
-
-/* Fuse src into dest because they have been deemed equivalent states.
- * Involves moving transitions into src to go into dest and invoking
- * callbacks. Src is deleted detached from the graph and deleted. */
-void FsmGraph::fuseEquivStates( FsmState *dest, FsmState *src )
-{
- /* This would get ugly. */
- assert( dest != src );
-
- /* Cur is a duplicate. We can merge it with trail. */
- inTransMove( dest, src );
-
- detachState( src );
- stateList.detach( src );
- delete src;
-}
-
-void FsmGraph::fuseUnmarkedPairs( MarkIndex &markIndex )
-{
- FsmState *p = stateList.head, *nextP, *q;
-
- /* Definition: The primary state of an equivalence class is the first state
- * encounterd that belongs to the equivalence class. All equivalence
- * classes have primary state including equivalence classes with one state
- * in it. */
-
- /* For each unmarked pair merge p into q and delete p. q is always the
- * primary state of it's equivalence class. We wouldn't have landed on it
- * here if it were not, because it would have been deleted.
- *
- * Proof that q is the primaray state of it's equivalence class: Assume q
- * is not the primary state of it's equivalence class, then it would be
- * merged into some state that came before it and thus p would be
- * equivalent to that state. But q is the first state that p is equivalent
- * to so we have a contradiction. */
-
- /* Walk all unordered pairs of (p, q) where p != q.
- * The second depth of the walk stops before reaching p. This
- * gives us all unordered pairs of states (p, q) where p != q. */
- while ( p != 0 ) {
- nextP = p->next;
-
- q = stateList.head;
- while ( q != p ) {
- /* If one of p or q is a final state then mark. */
- if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
- fuseEquivStates( q, p );
- break;
- }
- q = q->next;
- }
- p = nextP;
- }
-}
-
-void FsmGraph::fusePartitions( MinPartition *parts, int numParts )
-{
- /* For each partition, fuse state 2, 3, ... into state 1. */
- for ( int p = 0; p < numParts; p++ ) {
- /* Assume that there will always be at least one state. */
- FsmState *first = parts[p].list.head, *toFuse = first->next;
-
- /* Put the first state back onto the main state list. Don't bother
- * removing it from the partition list first. */
- stateList.append( first );
-
- /* Fuse the rest of the state into the first. */
- while ( toFuse != 0 ) {
- /* Save the next. We will trash it before it is needed. */
- FsmState *next = toFuse->next;
-
- /* Put the state to be fused in to the first back onto the main
- * list before it is fuse. the graph. The state needs to be on
- * the main list for the detach from the graph to work. Don't
- * bother removing the state from the partition list first. We
- * need not maintain it. */
- stateList.append( toFuse );
-
- /* Now fuse to the first. */
- fuseEquivStates( first, toFuse );
-
- /* Go to the next that we saved before trashing the next pointer. */
- toFuse = next;
- }
-
- /* We transfered the states from the partition list into the main list without
- * removing the states from the partition list first. Clean it up. */
- parts[p].list.abandon();
- }
-}
-
-
-/* Merge neighboring transitions go to the same state and have the same
- * transitions data. */
-void FsmGraph::compressTransitions()
-{
- for ( StateList::Iter st = stateList; st.lte(); st++ ) {
- if ( st->outList.length() > 1 ) {
- for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) {
- Key nextLow = next->lowKey;
- nextLow.decrement();
- if ( trans->highKey == nextLow && trans->toState == next->toState &&
- CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 )
- {
- trans->highKey = next->highKey;
- st->outList.detach( next );
- detachTrans( next->fromState, next->toState, next );
- delete next;
- next = trans.next();
- }
- else {
- trans.increment();
- next.increment();
- }
- }
- }
- }
-}
diff --git a/colm/fsmrun.h b/colm/fsmrun.h
deleted file mode 100644
index 821b3ccf..00000000
--- a/colm/fsmrun.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _FSMRUN2_H
-#define _FSMRUN2_H
-
-#include <colm/input.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/colm/fsmstate.cc b/colm/fsmstate.cc
deleted file mode 100644
index dae1479b..00000000
--- a/colm/fsmstate.cc
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <string.h>
-#include <assert.h>
-#include "fsmgraph.h"
-
-#include <iostream>
-using namespace std;
-
-/* Construct a mark index for a specified number of states. Must new up
- * an array that is states^2 in size. */
-MarkIndex::MarkIndex( int states ) : numStates(states)
-{
- /* Total pairs is states^2. Actually only use half of these, but we allocate
- * them all to make indexing into the array easier. */
- int total = states * states;
-
- /* New up chars so that individual DListEl constructors are
- * not called. Zero out the mem manually. */
- array = new bool[total];
- memset( array, 0, sizeof(bool) * total );
-}
-
-/* Free the array used to store state pairs. */
-MarkIndex::~MarkIndex()
-{
- delete[] array;
-}
-
-/* Mark a pair of states. States are specified by their number. The
- * marked states are moved from the unmarked list to the marked list. */
-void MarkIndex::markPair(int state1, int state2)
-{
- int pos = ( state1 >= state2 ) ?
- ( state1 * numStates ) + state2 :
- ( state2 * numStates ) + state1;
-
- array[pos] = true;
-}
-
-/* Returns true if the pair of states are marked. Returns false otherwise.
- * Ordering of states given does not matter. */
-bool MarkIndex::isPairMarked(int state1, int state2)
-{
- int pos = ( state1 >= state2 ) ?
- ( state1 * numStates ) + state2 :
- ( state2 * numStates ) + state1;
-
- return array[pos];
-}
-
-/* Create a new fsm state. State has not out transitions or in transitions, not
- * out out transition data and not number. */
-FsmState::FsmState()
-:
- /* No out or in transitions. */
- outList(),
- inList(),
-
- /* No entry points, or epsilon trans. */
- entryIds(),
- epsilonTrans(),
-
- /* Conditions. */
- stateCondList(),
-
- /* No transitions in from other states. */
- foreignInTrans(0),
-
- /* Only used during merging. Normally null. */
- stateDictEl(0),
- eptVect(0),
-
- /* No state identification bits. */
- stateBits(0),
-
- /* No Priority data. */
- outPriorTable(),
-
- /* No Action data. */
- toStateActionTable(),
- fromStateActionTable(),
- outActionTable(),
- outCondSet(),
- errActionTable(),
- eofActionTable(),
-
- eofTarget(0)
-{
-}
-
-/* Copy everything except actual the transitions. That is left up to the
- * FsmGraph copy constructor. */
-FsmState::FsmState(const FsmState &other)
-:
- /* All lists are cleared. They will be filled in when the
- * individual transitions are duplicated and attached. */
- outList(),
- inList(),
-
- /* Duplicate the entry id set and epsilon transitions. These
- * are sets of integers and as such need no fixing. */
- entryIds(other.entryIds),
- epsilonTrans(other.epsilonTrans),
-
- /* Copy in the elements of the conditions. */
- stateCondList( other.stateCondList ),
-
- /* No transitions in from other states. */
- foreignInTrans(0),
-
- /* This is only used during merging. Normally null. */
- stateDictEl(0),
- eptVect(0),
-
- /* Fsm state data. */
- stateBits(other.stateBits),
-
- /* Copy in priority data. */
- outPriorTable(other.outPriorTable),
-
- /* Copy in action data. */
- toStateActionTable(other.toStateActionTable),
- fromStateActionTable(other.fromStateActionTable),
- outActionTable(other.outActionTable),
- outCondSet(other.outCondSet),
- errActionTable(other.errActionTable),
- eofActionTable(other.eofActionTable),
-
- eofTarget(0)
-{
- /* Duplicate all the transitions. */
- for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) {
- /* Dupicate and store the orginal target in the transition. This will
- * be corrected once all the states have been created. */
- FsmTrans *newTrans = new FsmTrans(*trans);
- newTrans->toState = trans->toState;
- outList.append( newTrans );
- }
-}
-
-/* If there is a state dict element, then delete it. Everything else is left
- * up to the FsmGraph destructor. */
-FsmState::~FsmState()
-{
- if ( stateDictEl != 0 )
- delete stateDictEl;
-}
-
-/* Compare two states using pointers to the states. With the approximate
- * compare the idea is that if the compare finds them the same, they can
- * immediately be merged. */
-int ApproxCompare::compare( const FsmState *state1 , const FsmState *state2 )
-{
- int compareRes;
-
- /* Test final state status. */
- if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
- return -1;
- else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
- return 1;
-
- /* Test epsilon transition sets. */
- compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
- state2->epsilonTrans );
- if ( compareRes != 0 )
- return compareRes;
-
- /* Compare the out transitions. */
- compareRes = FsmGraph::compareStateData( state1, state2 );
- if ( compareRes != 0 )
- return compareRes;
-
- /* Use a pair iterator to get the transition pairs. */
- PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
- for ( ; !outPair.end(); outPair++ ) {
- switch ( outPair.userState ) {
-
- case RangeInS1:
- compareRes = FsmGraph::compareFullPtr( outPair.s1Tel.trans, 0 );
- if ( compareRes != 0 )
- return compareRes;
- break;
-
- case RangeInS2:
- compareRes = FsmGraph::compareFullPtr( 0, outPair.s2Tel.trans );
- if ( compareRes != 0 )
- return compareRes;
- break;
-
- case RangeOverlap:
- compareRes = FsmGraph::compareFullPtr(
- outPair.s1Tel.trans, outPair.s2Tel.trans );
- if ( compareRes != 0 )
- return compareRes;
- break;
-
- case BreakS1:
- case BreakS2:
- break;
- }
- }
-
- /* Got through the entire state comparison, deem them equal. */
- return 0;
-}
-
-/* Compare class for the sort that does the intial partition of compaction. */
-int InitPartitionCompare::compare( const FsmState *state1 , const FsmState *state2 )
-{
- int compareRes;
-
- /* Test final state status. */
- if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
- return -1;
- else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
- return 1;
-
- /* Test epsilon transition sets. */
- compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
- state2->epsilonTrans );
- if ( compareRes != 0 )
- return compareRes;
-
- /* Compare the out transitions. */
- compareRes = FsmGraph::compareStateData( state1, state2 );
- if ( compareRes != 0 )
- return compareRes;
-
- /* Use a pair iterator to test the condition pairs. */
- PairIter<StateCond> condPair( state1->stateCondList.head, state2->stateCondList.head );
- for ( ; !condPair.end(); condPair++ ) {
- switch ( condPair.userState ) {
- case RangeInS1:
- return 1;
- case RangeInS2:
- return -1;
-
- case RangeOverlap: {
- CondSpace *condSpace1 = condPair.s1Tel.trans->condSpace;
- CondSpace *condSpace2 = condPair.s2Tel.trans->condSpace;
- if ( condSpace1 < condSpace2 )
- return -1;
- else if ( condSpace1 > condSpace2 )
- return 1;
- break;
- }
- case BreakS1:
- case BreakS2:
- break;
- }
- }
-
- /* Use a pair iterator to test the transition pairs. */
- PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
- for ( ; !outPair.end(); outPair++ ) {
- switch ( outPair.userState ) {
-
- case RangeInS1:
- compareRes = FsmGraph::compareDataPtr( outPair.s1Tel.trans, 0 );
- if ( compareRes != 0 )
- return compareRes;
- break;
-
- case RangeInS2:
- compareRes = FsmGraph::compareDataPtr( 0, outPair.s2Tel.trans );
- if ( compareRes != 0 )
- return compareRes;
- break;
-
- case RangeOverlap:
- compareRes = FsmGraph::compareDataPtr(
- outPair.s1Tel.trans, outPair.s2Tel.trans );
- if ( compareRes != 0 )
- return compareRes;
- break;
-
- case BreakS1:
- case BreakS2:
- break;
- }
- }
-
- return 0;
-}
-
-/* Compare class for the sort that does the partitioning. */
-int PartitionCompare::compare( const FsmState *state1, const FsmState *state2 )
-{
- int compareRes;
-
- /* Use a pair iterator to get the transition pairs. */
- PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
- for ( ; !outPair.end(); outPair++ ) {
- switch ( outPair.userState ) {
-
- case RangeInS1:
- compareRes = FsmGraph::comparePartPtr( outPair.s1Tel.trans, 0 );
- if ( compareRes != 0 )
- return compareRes;
- break;
-
- case RangeInS2:
- compareRes = FsmGraph::comparePartPtr( 0, outPair.s2Tel.trans );
- if ( compareRes != 0 )
- return compareRes;
- break;
-
- case RangeOverlap:
- compareRes = FsmGraph::comparePartPtr(
- outPair.s1Tel.trans, outPair.s2Tel.trans );
- if ( compareRes != 0 )
- return compareRes;
- break;
-
- case BreakS1:
- case BreakS2:
- break;
- }
- }
-
- return 0;
-}
-
-/* Compare class for the sort that does the partitioning. */
-bool MarkCompare::shouldMark( MarkIndex &markIndex, const FsmState *state1,
- const FsmState *state2 )
-{
- /* Use a pair iterator to get the transition pairs. */
- PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
- for ( ; !outPair.end(); outPair++ ) {
- switch ( outPair.userState ) {
-
- case RangeInS1:
- if ( FsmGraph::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) )
- return true;
- break;
-
- case RangeInS2:
- if ( FsmGraph::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) )
- return true;
- break;
-
- case RangeOverlap:
- if ( FsmGraph::shouldMarkPtr( markIndex,
- outPair.s1Tel.trans, outPair.s2Tel.trans ) )
- return true;
- break;
-
- case BreakS1:
- case BreakS2:
- break;
- }
- }
-
- return false;
-}
-
-/*
- * Transition Comparison.
- */
-
-/* Compare target partitions. Either pointer may be null. */
-int FsmGraph::comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 )
-{
- if ( trans1 != 0 ) {
- /* If trans1 is set then so should trans2. The initial partitioning
- * guarantees this for us. */
- if ( trans1->toState == 0 && trans2->toState != 0 )
- return -1;
- else if ( trans1->toState != 0 && trans2->toState == 0 )
- return 1;
- else if ( trans1->toState != 0 ) {
- /* Both of targets are set. */
- return CmpOrd< MinPartition* >::compare(
- trans1->toState->alg.partition, trans2->toState->alg.partition );
- }
- }
- return 0;
-}
-
-
-/* Compares two transition pointers according to priority and functions.
- * Either pointer may be null. Does not consider to state or from state. */
-int FsmGraph::compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 )
-{
- if ( trans1 == 0 && trans2 != 0 )
- return -1;
- else if ( trans1 != 0 && trans2 == 0 )
- return 1;
- else if ( trans1 != 0 ) {
- /* Both of the transition pointers are set. */
- int compareRes = compareTransData( trans1, trans2 );
- if ( compareRes != 0 )
- return compareRes;
- }
- return 0;
-}
-
-/* Compares two transitions according to target state, priority and functions.
- * Does not consider from state. Either of the pointers may be null. */
-int FsmGraph::compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 )
-{
- if ( (trans1 != 0) ^ (trans2 != 0) ) {
- /* Exactly one of the transitions is set. */
- if ( trans1 != 0 )
- return -1;
- else
- return 1;
- }
- else if ( trans1 != 0 ) {
- /* Both of the transition pointers are set. Test target state,
- * priority and funcs. */
- if ( trans1->toState < trans2->toState )
- return -1;
- else if ( trans1->toState > trans2->toState )
- return 1;
- else if ( trans1->toState != 0 ) {
- /* Test transition data. */
- int compareRes = compareTransData( trans1, trans2 );
- if ( compareRes != 0 )
- return compareRes;
- }
- }
- return 0;
-}
-
-
-bool FsmGraph::shouldMarkPtr( MarkIndex &markIndex, FsmTrans *trans1,
- FsmTrans *trans2 )
-{
- if ( (trans1 != 0) ^ (trans2 != 0) ) {
- /* Exactly one of the transitions is set. The initial mark round
- * should rule out this case. */
- assert( false );
- }
- else if ( trans1 != 0 ) {
- /* Both of the transitions are set. If the target pair is marked, then
- * the pair we are considering gets marked. */
- return markIndex.isPairMarked( trans1->toState->alg.stateNum,
- trans2->toState->alg.stateNum );
- }
-
- /* Neither of the transitiosn are set. */
- return false;
-}
-
-
diff --git a/colm/global.h b/colm/global.h
deleted file mode 100644
index d67c55e4..00000000
--- a/colm/global.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __COLM_GLOBAL_H
-#define __COLM_GLOBAL_H
-
-#include <stdio.h>
-#include <iostream>
-#include <fstream>
-#include <fstream>
-#include <string>
-
-#include "config.h"
-#include "defs.h"
-#include "avltree.h"
-#include "keyops.h"
-
-#define PROGNAME "colm"
-
-/* IO filenames and stream. */
-extern bool genGraphviz;
-extern int gblErrorCount;
-
-std::ostream &error();
-
-/* IO filenames and stream. */
-extern const char *outputFileName;
-extern std::ostream *outStream;
-extern bool generateGraphviz;
-extern bool branchPointInfo;
-extern bool verbose, logging;
-extern bool addUniqueEmptyProductions;
-
-extern int gblErrorCount;
-extern char startDefName[];
-
-/* Error reporting. */
-std::ostream &error();
-std::ostream &error( int first_line, int first_column );
-std::ostream &warning( );
-std::ostream &warning( int first_line, int first_column );
-
-extern std::ostream *outStream;
-extern bool printStatistics;
-
-extern int gblErrorCount;
-extern char machineMain[];
-extern bool gblLibrary;
-extern const char *gblExportTo;
-
-/* Location in an input file. */
-struct InputLoc
-{
- const char *fileName;
- int line;
- int col;
-};
-
-/* Error reporting. */
-std::ostream &error();
-std::ostream &error( const InputLoc &loc );
-std::ostream &warning( const InputLoc &loc );
-
-void scan( char *fileName, std::istream &input, std::ostream &output );
-void terminateAllParsers( );
-void checkMachines( );
-
-void xmlEscapeHost( std::ostream &out, char *data, int len );
-void openOutput();
-void escapeLiteralString( std::ostream &out, const char *data );
-
-#endif
diff --git a/colm/input.c b/colm/input.c
deleted file mode 100644
index b6c96369..00000000
--- a/colm/input.c
+++ /dev/null
@@ -1,847 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <colm/input.h>
-#include <colm/fsmrun.h>
-#include <colm/pdarun.h>
-#include <colm/debug.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <unistd.h>
-
-#define true 1
-#define false 0
-
-RunBuf *newRunBuf()
-{
- RunBuf *rb = (RunBuf*)malloc(sizeof(RunBuf));
- memset( rb, 0, sizeof(RunBuf) );
- return rb;
-}
-
-void initFdFuncs();
-void initFileFuncs();
-void initPatternFuncs();
-void initReplFuncs();
-
-struct SourceFuncs dynamicFuncs;
-struct SourceFuncs fileFuncs;
-struct SourceFuncs fdFuncs;
-
-void initSourceStream( SourceStream *inputStream )
-{
- /* FIXME: correct values here. */
- inputStream->line = 1;
- inputStream->column = 1;
- inputStream->byte = 0;
-}
-
-void clearSourceStream( struct ColmProgram *prg, Tree **sp, SourceStream *sourceStream )
-{
- RunBuf *buf = sourceStream->queue;
- while ( buf != 0 ) {
- switch ( buf->type ) {
- case RunBufDataType:
- break;
-
- case RunBufTokenType:
- case RunBufIgnoreType:
- case RunBufSourceType:
- treeDownref( prg, sp, buf->tree );
- break;
- }
-
- RunBuf *next = buf->next;
- free( buf );
- buf = next;
- }
-
- sourceStream->queue = 0;
-}
-
-SourceStream *newSourceStreamFile( FILE *file )
-{
- SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream));
- memset( is, 0, sizeof(SourceStream) );
- is->line = 1;
- is->column = 1;
- is->file = file;
- is->funcs = &fileFuncs;
- return is;
-}
-
-SourceStream *newSourceStreamFd( long fd )
-{
- SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream));
- memset( is, 0, sizeof(SourceStream) );
- is->line = 1;
- is->column = 1;
- is->fd = fd;
- is->funcs = &fdFuncs;
- return is;
-}
-
-static RunBuf *sourceStreamPopHead( SourceStream *is )
-{
- RunBuf *ret = is->queue;
- is->queue = is->queue->next;
- if ( is->queue == 0 )
- is->queueTail = 0;
- else
- is->queue->prev = 0;
- return ret;
-}
-
-static void sourceStreamAppend( SourceStream *is, RunBuf *runBuf )
-{
- if ( is->queue == 0 ) {
- runBuf->prev = runBuf->next = 0;
- is->queue = is->queueTail = runBuf;
- }
- else {
- is->queueTail->next = runBuf;
- runBuf->prev = is->queueTail;
- runBuf->next = 0;
- is->queueTail = runBuf;
- }
-}
-
-static void sourceStreamPrepend( SourceStream *is, RunBuf *runBuf )
-{
- if ( is->queue == 0 ) {
- runBuf->prev = runBuf->next = 0;
- is->queue = is->queueTail = runBuf;
- }
- else {
- is->queue->prev = runBuf;
- runBuf->prev = 0;
- runBuf->next = is->queue;
- is->queue = runBuf;
- }
-}
-
-void initInputFuncs()
-{
- initFdFuncs();
- initFileFuncs();
- initPatternFuncs();
- initReplFuncs();
-}
-
-/*
- * Base run-time input streams.
- */
-
-int fdGetData( SourceStream *is, int skip, char *dest, int length, int *copied )
-{
- int ret = 0;
- *copied = 0;
-
- if ( skip == 9 && length == 6 ) {
- debug( REALM_INPUT, "foo\n" );
- }
-
- /* Move over skip bytes. */
- RunBuf *buf = is->queue;
- while ( true ) {
- if ( buf == 0 ) {
- /* Got through the in-mem buffers without copying anything. */
- RunBuf *runBuf = newRunBuf();
- sourceStreamAppend( is, runBuf );
- int received = is->funcs->getDataImpl( is, runBuf->data, FSM_BUFSIZE );
- if ( received == 0 ) {
- ret = INPUT_EOD;
- break;
- }
- runBuf->length = received;
-
- int slen = received < length ? received : length;
- memcpy( dest, runBuf->data, slen );
- *copied = slen;
- ret = INPUT_DATA;
- break;
- }
-
- int avail = buf->length - buf->offset;
-
- /* Anything available in the current buffer. */
- if ( avail > 0 ) {
- /* The source data from the current buffer. */
- char *src = &buf->data[buf->offset];
-
- /* Need to skip? */
- if ( skip > 0 && skip >= avail ) {
- /* Skipping the the whole source. */
- skip -= avail;
- }
- else {
- /* Either skip is zero, or less than slen. Skip goes to zero.
- * Some data left over, copy it. */
- src += skip;
- avail -= skip;
- skip = 0;
-
- int slen = avail < length ? avail : length;
- memcpy( dest, src, slen ) ;
- *copied += slen;
- ret = INPUT_DATA;
- break;
- }
- }
-
- buf = buf->next;
- }
-
- return ret;
-}
-
-int fdConsumeData( SourceStream *is, int length )
-{
- debug( REALM_INPUT, "source consuming %ld bytes\n", length );
-
- int consumed = 0;
-
- /* Move over skip bytes. */
- while ( true ) {
- RunBuf *buf = is->queue;
-
- if ( buf == 0 )
- break;
-
- if ( buf->type == RunBufTokenType )
- break;
- else if ( buf->type == RunBufIgnoreType )
- break;
- else {
- /* Anything available in the current buffer. */
- int avail = buf->length - buf->offset;
- if ( avail > 0 ) {
- /* The source data from the current buffer. */
- int slen = avail <= length ? avail : length;
- debug( REALM_INPUT, "consumed: %.*s\n", slen, buf->data + buf->offset );
- consumed += slen;
- length -= slen;
- buf->offset += slen;
- }
- }
-
- if ( length == 0 )
- break;
-
- RunBuf *runBuf = sourceStreamPopHead( is );
- free( runBuf );
- }
-
- return consumed;
-}
-
-int fdUndoConsumeData( SourceStream *is, const char *data, int length )
-{
- debug( REALM_INPUT, "undoing consume of %ld bytes\n", length );
-
- RunBuf *newBuf = newRunBuf();
- newBuf->length = length;
- memcpy( newBuf->data, data, length );
- sourceStreamPrepend( is, newBuf );
-
- return length;
-}
-
-/*
- * File
- */
-
-int fileGetDataImpl( SourceStream *is, char *dest, int length )
-{
- debug( REALM_INPUT, "inputStreamFileGetDataImpl length = %ld\n", length );
- size_t res = fread( dest, 1, length, is->file );
- return res;
-}
-
-void initFileFuncs()
-{
- memset( &fileFuncs, 0, sizeof(struct SourceFuncs) );
- fileFuncs.getData = &fdGetData;
- fileFuncs.consumeData = &fdConsumeData;
- fileFuncs.undoConsumeData = &fdUndoConsumeData;
- fileFuncs.getDataImpl = &fileGetDataImpl;
-}
-
-/*
- * FD
- */
-
-int fdGetDataImpl( SourceStream *is, char *dest, int length )
-{
- long got = read( is->fd, dest, length );
- return got;
-}
-
-void initFdFuncs()
-{
- memset( &fdFuncs, 0, sizeof(struct SourceFuncs) );
- fdFuncs.getData = &fdGetData;
- fdFuncs.consumeData = &fdConsumeData;
- fdFuncs.undoConsumeData = &fdUndoConsumeData;
- fdFuncs.getDataImpl = &fdGetDataImpl;
-}
-
-/*
- * InputStream struct, this wraps the list of input streams.
- */
-
-void initInputStream( InputStream *inputStream )
-{
- memset( inputStream, 0, sizeof(InputStream) );
-
- /* FIXME: correct values here. */
- inputStream->line = 1;
- inputStream->column = 1;
- inputStream->byte = 0;
-}
-
-void clearInputStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream )
-{
- RunBuf *buf = inputStream->queue;
- while ( buf != 0 ) {
- switch ( buf->type ) {
- case RunBufDataType:
- break;
-
- case RunBufTokenType:
- case RunBufIgnoreType:
- case RunBufSourceType:
- treeDownref( prg, sp, buf->tree );
- break;
- }
-
- RunBuf *next = buf->next;
- free( buf );
- buf = next;
- }
-
- inputStream->queue = 0;
-}
-
-static void inputStreamPrepend( InputStream *is, RunBuf *runBuf )
-{
- if ( is->queue == 0 ) {
- runBuf->prev = runBuf->next = 0;
- is->queue = is->queueTail = runBuf;
- }
- else {
- is->queue->prev = runBuf;
- runBuf->prev = 0;
- runBuf->next = is->queue;
- is->queue = runBuf;
- }
-}
-
-static RunBuf *inputStreamPopHead( InputStream *is )
-{
- RunBuf *ret = is->queue;
- is->queue = is->queue->next;
- if ( is->queue == 0 )
- is->queueTail = 0;
- else
- is->queue->prev = 0;
- return ret;
-}
-
-static void inputStreamAppend( InputStream *is, RunBuf *runBuf )
-{
- if ( is->queue == 0 ) {
- runBuf->prev = runBuf->next = 0;
- is->queue = is->queueTail = runBuf;
- }
- else {
- is->queueTail->next = runBuf;
- runBuf->prev = is->queueTail;
- runBuf->next = 0;
- is->queueTail = runBuf;
- }
-}
-
-static RunBuf *inputStreamPopTail( InputStream *is )
-{
- RunBuf *ret = is->queueTail;
- is->queueTail = is->queueTail->prev;
- if ( is->queueTail == 0 )
- is->queue = 0;
- else
- is->queueTail->next = 0;
- return ret;
-}
-
-static int isSourceStream( InputStream *is )
-{
- if ( is->queue != 0 && is->queue->type == RunBufSourceType )
- return true;
- return false;
-}
-
-void setEof( InputStream *is )
-{
- debug( REALM_INPUT, "setting EOF in input stream\n" );
- is->eof = true;
-}
-
-void unsetEof( InputStream *is )
-{
- if ( isSourceStream( is ) ) {
- Stream *stream = (Stream*)is->queue->tree;
- stream->in->eof = false;
- }
- else {
- is->eof = false;
- }
-}
-
-int getData( FsmRun *fsmRun, InputStream *is, int skip, char *dest, int length, int *copied )
-{
- int ret = 0;
- *copied = 0;
-
- /* Move over skip bytes. */
- RunBuf *buf = is->queue;
- while ( true ) {
- if ( buf == 0 ) {
- /* Got through the in-mem buffers without copying anything. */
- ret = is->eof ? INPUT_EOF : INPUT_EOD;
- break;
- }
-
- if ( buf->type == RunBufSourceType ) {
- Stream *stream = (Stream*)buf->tree;
- int type = stream->in->funcs->getData( stream->in, skip, dest, length, copied );
-
- attachSource( fsmRun, stream->in );
-
- if ( type == INPUT_EOD && is->eof ) {
- ret = INPUT_EOF;
- break;
- }
-
- ret = type;
- break;
- }
-
- if ( buf->type == RunBufTokenType ) {
- ret = INPUT_TREE;
- break;
- }
-
- if ( buf->type == RunBufIgnoreType ) {
- ret = INPUT_IGNORE;
- break;
- }
-
- int avail = buf->length - buf->offset;
-
- /* Anything available in the current buffer. */
- if ( avail > 0 ) {
- /* The source data from the current buffer. */
- char *src = &buf->data[buf->offset];
-
- /* Need to skip? */
- if ( skip > 0 && skip >= avail ) {
- /* Skipping the the whole source. */
- skip -= avail;
- }
- else {
- /* Either skip is zero, or less than slen. Skip goes to zero.
- * Some data left over, copy it. */
- src += skip;
- avail -= skip;
- skip = 0;
-
- int slen = avail <= length ? avail : length;
- memcpy( dest, src, slen ) ;
- *copied += slen;
- ret = INPUT_DATA;
- break;
- }
- }
-
- buf = buf->next;
- }
-
- attachInput( fsmRun, is );
-
-#if DEBUG
- switch ( ret ) {
- case INPUT_DATA:
- debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest );
- break;
- case INPUT_EOD:
- debug( REALM_INPUT, "get data: EOD\n" );
- break;
- case INPUT_EOF:
- debug( REALM_INPUT, "get data: EOF\n" );
- break;
- case INPUT_TREE:
- debug( REALM_INPUT, "get data: TREE\n" );
- break;
- case INPUT_IGNORE:
- debug( REALM_INPUT, "get data: IGNORE\n" );
- break;
- case INPUT_LANG_EL:
- debug( REALM_INPUT, "get data: LANG_EL\n" );
- break;
- }
-#endif
-
- return ret;
-}
-
-int consumeData( InputStream *is, int length )
-{
- debug( REALM_INPUT, "consuming %d bytes\n", length );
-
- int consumed = 0;
-
- /* Move over skip bytes. */
- while ( true ) {
- RunBuf *buf = is->queue;
-
- if ( buf == 0 )
- break;
-
- if ( buf->type == RunBufSourceType ) {
- Stream *stream = (Stream*)buf->tree;
- int slen = stream->in->funcs->consumeData( stream->in, length );
-
- consumed += slen;
- length -= slen;
- }
- else if ( buf->type == RunBufTokenType )
- break;
- else if ( buf->type == RunBufIgnoreType )
- break;
- else {
- /* Anything available in the current buffer. */
- int avail = buf->length - buf->offset;
- if ( avail > 0 ) {
- /* The source data from the current buffer. */
- int slen = avail <= length ? avail : length;
- consumed += slen;
- length -= slen;
- buf->offset += slen;
- }
- }
-
- if ( length == 0 )
- break;
-
- RunBuf *runBuf = inputStreamPopHead( is );
- free( runBuf );
- }
-
- return consumed;
-}
-
-int undoConsumeData( FsmRun *fsmRun, InputStream *is, const char *data, int length )
-{
- debug( REALM_INPUT, "undoing consume of %ld bytes\n", length );
-
- if ( isSourceStream( is ) ) {
- Stream *stream = (Stream*)is->queue->tree;
- int len = stream->in->funcs->undoConsumeData( stream->in, data, length );
-
- if ( stream->in->attached != 0 )
- detachSource( stream->in->attached, stream->in );
-
- return len;
- }
- else {
- RunBuf *newBuf = newRunBuf();
- newBuf->length = length;
- memcpy( newBuf->data, data, length );
- inputStreamPrepend( is, newBuf );
-
- if ( is->attached != 0 )
- detachInput( is->attached, is );
-
- return length;
- }
-}
-
-Tree *consumeTree( InputStream *is )
-{
- while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) {
- RunBuf *runBuf = inputStreamPopHead( is );
- free( runBuf );
- }
-
- if ( is->queue != 0 && (is->queue->type == RunBufTokenType || is->queue->type == RunBufIgnoreType) ) {
- RunBuf *runBuf = inputStreamPopHead( is );
-
- /* FIXME: using runbufs here for this is a poor use of memory. */
- Tree *tree = runBuf->tree;
- free(runBuf);
- return tree;
- }
-
- return 0;
-}
-
-void undoConsumeTree( InputStream *is, Tree *tree, int ignore )
-{
- if ( is->attached != 0 )
- detachInput( is->attached, is );
-
- /* Create a new buffer for the data. This is the easy implementation.
- * Something better is needed here. It puts a max on the amount of
- * data that can be pushed back to the inputStream. */
- RunBuf *newBuf = newRunBuf();
- newBuf->type = ignore ? RunBufIgnoreType : RunBufTokenType;
- newBuf->tree = tree;
- inputStreamPrepend( is, newBuf );
-}
-
-struct LangEl *consumeLangEl( InputStream *is, long *bindId, char **data, long *length )
-{
- if ( isSourceStream( is ) ) {
- Stream *stream = (Stream*)is->queue->tree;
- return stream->in->funcs->consumeLangEl( stream->in, bindId, data, length );
- }
- else {
- assert( false );
- }
-}
-
-void undoConsumeLangEl( InputStream *is )
-{
- if ( isSourceStream( is ) ) {
- Stream *stream = (Stream*)is->queue->tree;
- return stream->in->funcs->undoConsumeLangEl( stream->in );
- }
- else {
- assert( false );
- }
-}
-
-void prependData( InputStream *is, const char *data, long length )
-{
- if ( is->attached != 0 )
- detachInput( is->attached, is );
-
- /* Create a new buffer for the data. This is the easy implementation.
- * Something better is needed here. It puts a max on the amount of
- * data that can be pushed back to the inputStream. */
- assert( length < FSM_BUFSIZE );
-
- RunBuf *newBuf = newRunBuf();
- newBuf->length = length;
- memcpy( newBuf->data, data, length );
-
- inputStreamPrepend( is, newBuf );
-}
-
-int undoPrependData( InputStream *is, int length )
-{
- if ( is->attached != 0 )
- detachInput( is->attached, is );
-
- debug( REALM_INPUT, "consuming %d bytes\n", length );
-
- int consumed = 0;
-
- /* Move over skip bytes. */
- while ( true ) {
- RunBuf *buf = is->queue;
-
- if ( buf == 0 )
- break;
-
- if ( buf->type == RunBufSourceType ) {
- Stream *stream = (Stream*)buf->tree;
- int slen = stream->in->funcs->consumeData( stream->in, length );
-
- consumed += slen;
- length -= slen;
- }
- else if ( buf->type == RunBufTokenType )
- break;
- else if ( buf->type == RunBufIgnoreType )
- break;
- else {
- /* Anything available in the current buffer. */
- int avail = buf->length - buf->offset;
- if ( avail > 0 ) {
- /* The source data from the current buffer. */
- int slen = avail <= length ? avail : length;
- consumed += slen;
- length -= slen;
- buf->offset += slen;
- }
- }
-
- if ( length == 0 )
- break;
-
- RunBuf *runBuf = inputStreamPopHead( is );
- free( runBuf );
- }
-
- return consumed;
-}
-
-void prependTree( InputStream *is, Tree *tree, int ignore )
-{
- if ( is->attached != 0 )
- detachInput( is->attached, is );
-
- /* Create a new buffer for the data. This is the easy implementation.
- * Something better is needed here. It puts a max on the amount of
- * data that can be pushed back to the inputStream. */
- RunBuf *newBuf = newRunBuf();
- newBuf->type = ignore ? RunBufIgnoreType : RunBufTokenType;
- newBuf->tree = tree;
- inputStreamPrepend( is, newBuf );
-}
-
-Tree *undoPrependTree( InputStream *is )
-{
- if ( is->attached != 0 )
- detachInput( is->attached, is );
-
- while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) {
- RunBuf *runBuf = inputStreamPopHead( is );
- free( runBuf );
- }
-
- if ( is->queue != 0 && (is->queue->type == RunBufTokenType || is->queue->type == RunBufIgnoreType) ) {
- RunBuf *runBuf = inputStreamPopHead( is );
-
- /* FIXME: using runbufs here for this is a poor use of memory. */
- Tree *tree = runBuf->tree;
- free(runBuf);
- return tree;
- }
-
- return 0;
-}
-
-void appendData( InputStream *is, const char *data, long len )
-{
- while ( len > 0 ) {
- RunBuf *ad = newRunBuf();
- inputStreamAppend( is, ad );
-
- long consume =
- len <= (long)sizeof(ad->data) ?
- len : (long)sizeof(ad->data);
-
- memcpy( ad->data, data, consume );
- ad->length = consume;
-
- len -= consume;
- data += consume;
- }
-}
-
-Tree *undoAppendData( InputStream *is, int length )
-{
- if ( is->attached != 0 )
- detachInput( is->attached, is );
-
- int consumed = 0;
-
- /* Move over skip bytes. */
- while ( true ) {
- RunBuf *buf = is->queueTail;
-
- if ( buf == 0 )
- break;
-
- if ( buf->type == RunBufTokenType )
- break;
- else if ( buf->type == RunBufIgnoreType )
- break;
- else {
- /* Anything available in the current buffer. */
- int avail = buf->length - buf->offset;
- if ( avail > 0 ) {
- /* The source data from the current buffer. */
- int slen = avail <= length ? avail : length;
- consumed += slen;
- length -= slen;
- buf->length -= slen;
- }
- }
-
- if ( length == 0 )
- break;
-
- RunBuf *runBuf = inputStreamPopTail( is );
- free( runBuf );
- }
-
- return 0;
-}
-
-void appendTree( InputStream *is, Tree *tree )
-{
- RunBuf *ad = newRunBuf();
-
- inputStreamAppend( is, ad );
-
- ad->type = RunBufTokenType;
- ad->tree = tree;
- ad->length = 0;
-}
-
-void appendStream( InputStream *in, struct ColmTree *tree )
-{
- RunBuf *ad = newRunBuf();
-
- inputStreamAppend( in, ad );
-
- ad->type = RunBufSourceType;
- ad->tree = tree;
- ad->length = 0;
-}
-
-Tree *undoAppendStream( InputStream *is )
-{
- if ( is->attached != 0 )
- detachInput( is->attached, is );
-
- RunBuf *runBuf = inputStreamPopTail( is );
- Tree *tree = runBuf->tree;
- free( runBuf );
- return tree;
-}
-
-Tree *undoAppendTree( InputStream *is )
-{
- if ( is->attached != 0 )
- detachInput( is->attached, is );
-
- RunBuf *runBuf = inputStreamPopTail( is );
- Tree *tree = runBuf->tree;
- free( runBuf );
- return tree;
-}
diff --git a/colm/input.h b/colm/input.h
deleted file mode 100644
index 882c6b31..00000000
--- a/colm/input.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _INPUT_H
-#define _INPUT_H
-
-#include <stdio.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define FSM_BUFSIZE 8192
-//#define FSM_BUFSIZE 8
-
-#define INPUT_DATA 1
-/* This is for data sources to return, not for the wrapper. */
-#define INPUT_EOD 2
-#define INPUT_EOF 3
-#define INPUT_LANG_EL 4
-#define INPUT_TREE 5
-#define INPUT_IGNORE 6
-
-/*
- * pdaRun <- fsmRun <- stream
- *
- * Activities we need to support:
- *
- * 1. Stuff data into an input stream each time we <<
- * 2. Detach an input stream, and attach another when we include
- * 3. Send data back to an input stream when the parser backtracks
- * 4. Temporarily stop parsing due to a lack of input.
- *
- * At any given time, the fsmRun struct may have a prefix of the stream's
- * input. If getting data we first get what we can out of the fsmRun, then
- * consult the stream. If sending data back, we first shift pointers in the
- * fsmRun, then ship to the stream. If changing streams the old stream needs to
- * take back unprocessed data from the fsmRun.
- */
-
-struct LangEl;
-struct Pattern;
-struct PatternItem;
-struct Replacement;
-struct ReplItem;
-struct _FsmRun;
-struct ColmTree;
-
-enum RunBufType {
- RunBufDataType = 0,
- RunBufTokenType,
- RunBufIgnoreType,
- RunBufSourceType
-};
-
-typedef struct _RunBuf
-{
- enum RunBufType type;
- char data[FSM_BUFSIZE];
- long length;
- struct ColmTree *tree;
- long offset;
- struct _RunBuf *next, *prev;
-} RunBuf;
-
-RunBuf *newRunBuf();
-
-typedef struct _SourceStream SourceStream;
-
-struct SourceFuncs
-{
- /* Data. */
- int (*getData)( SourceStream *is, int offset, char *dest, int length, int *copied );
- int (*consumeData)( SourceStream *is, int length );
- int (*undoConsumeData)( SourceStream *is, const char *data, int length );
-
- /* Language elments (compile-time). */
- struct LangEl *(*consumeLangEl)( SourceStream *is, long *bindId, char **data, long *length );
- void (*undoConsumeLangEl)( SourceStream *is );
-
- /* Private implmentation for some shared get data functions. */
- int (*getDataImpl)( SourceStream *is, char *dest, int length );
-};
-
-struct _SourceStream
-{
- struct SourceFuncs *funcs;
-
- struct _FsmRun *hasData;
-
- char eofSent;
- char eof;
-
- long line;
- long column;
- long byte;
-
- /* This is set true for input streams that do their own line counting.
- * Causes FsmRun to ignore NLs. */
- int handlesLine;
-
- RunBuf *queue;
- RunBuf *queueTail;
-
- const char *data;
- long dlen;
- int offset;
-
- FILE *file;
- long fd;
-
- struct Pattern *pattern;
- struct PatternItem *patItem;
- struct Replacement *replacement;
- struct ReplItem *replItem;
-
- struct _FsmRun *attached;
-};
-
-SourceStream *newSourceStreamPattern( struct Pattern *pattern );
-SourceStream *newSourceStreamRepl( struct Replacement *replacement );
-SourceStream *newSourceStreamFile( FILE *file );
-SourceStream *newSourceStreamFd( long fd );
-
-void initInputFuncs();
-void initStaticFuncs();
-void initPatternFuncs();
-void initReplFuncs();
-
-/* List of input streams. Enables streams to be pushed/popped. */
-struct _InputStream
-{
- char eofSent;
- char eof;
-
- long line;
- long column;
- long byte;
-
- /* This is set true for input streams that do their own line counting.
- * Causes FsmRun to ignore NLs. */
- int handlesLine;
-
- RunBuf *queue;
- RunBuf *queueTail;
-
- const char *data;
- long dlen;
- int offset;
-
- FILE *file;
- long fd;
-
- struct Pattern *pattern;
- struct PatternItem *patItem;
- struct Replacement *replacement;
- struct ReplItem *replItem;
-
- struct _FsmRun *attached;
-};
-
-typedef struct _InputStream InputStream;
-
-/* The input stream interface. */
-
-int getData( struct _FsmRun *fsmRun, InputStream *in, int offset, char *dest, int length, int *copied );
-int consumeData( InputStream *in, int length );
-int undoConsumeData( struct _FsmRun *fsmRun, InputStream *is, const char *data, int length );
-
-struct ColmTree *consumeTree( InputStream *in );
-void undoConsumeTree( InputStream *in, struct ColmTree *tree, int ignore );
-
-struct LangEl *consumeLangEl( InputStream *in, long *bindId, char **data, long *length );
-void undoConsumeLangEl( InputStream *in );
-
-void setEof( InputStream *is );
-void unsetEof( InputStream *is );
-
-void prependData( InputStream *in, const char *data, long len );
-int undoPrependData( InputStream *is, int length );
-
-void prependTree( InputStream *is, struct ColmTree *tree, int ignore );
-struct ColmTree *undoPrependTree( InputStream *is );
-
-void appendData( InputStream *in, const char *data, long len );
-void appendTree( InputStream *in, struct ColmTree *tree );
-void appendStream( InputStream *in, struct ColmTree *tree );
-struct ColmTree *undoAppendData( InputStream *in, int length );
-struct ColmTree *undoAppendStream( InputStream *in );
-struct ColmTree *undoAppendTree( InputStream *in );
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _INPUT_H */
diff --git a/colm/keyops.h b/colm/keyops.h
deleted file mode 100644
index 1808c6a6..00000000
--- a/colm/keyops.h
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _KEYOPS_H
-#define _KEYOPS_H
-
-#include <fstream>
-#include <climits>
-
-enum MarkType
-{
- MarkNone = 0,
- MarkMark
-};
-
-typedef unsigned long long Size;
-
-struct Key
-{
-private:
- long key;
-
-public:
- friend inline Key operator+(const Key key1, const Key key2);
- friend inline Key operator-(const Key key1, const Key key2);
- friend inline Key operator/(const Key key1, const Key key2);
- friend inline long operator&(const Key key1, const Key key2);
-
- friend inline bool operator<( const Key key1, const Key key2 );
- friend inline bool operator<=( const Key key1, const Key key2 );
- friend inline bool operator>( const Key key1, const Key key2 );
- friend inline bool operator>=( const Key key1, const Key key2 );
- friend inline bool operator==( const Key key1, const Key key2 );
- friend inline bool operator!=( const Key key1, const Key key2 );
-
- friend struct KeyOps;
-
- Key( ) {}
- Key( const Key &key ) : key(key.key) {}
- Key( long key ) : key(key) {}
-
- /* Returns the value used to represent the key. This value must be
- * interpreted based on signedness. */
- long getVal() const { return key; };
-
- /* Returns the key casted to a long long. This form of the key does not
- * require and signedness interpretation. */
- long long getLongLong() const;
-
- bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); }
- bool isLower() const { return ( 'a' <= key && key <= 'z' ); }
- bool isPrintable() const { return ( 32 <= key && key < 127 ); }
-
- Key toUpper() const
- { return Key( 'A' + ( key - 'a' ) ); }
- Key toLower() const
- { return Key( 'a' + ( key - 'A' ) ); }
-
- void operator+=( const Key other )
- {
- /* FIXME: must be made aware of isSigned. */
- key += other.key;
- }
-
- void operator-=( const Key other )
- {
- /* FIXME: must be made aware of isSigned. */
- key -= other.key;
- }
-
- void operator|=( const Key other )
- {
- /* FIXME: must be made aware of isSigned. */
- key |= other.key;
- }
-
- /* Decrement. Needed only for ranges. */
- inline void decrement();
- inline void increment();
-};
-
-struct HostType
-{
- const char *data1;
- const char *data2;
- bool isSigned;
- long long minVal;
- long long maxVal;
- unsigned int size;
-};
-
-struct HostLang
-{
- HostType *hostTypes;
- int numHostTypes;
- HostType *defaultAlphType;
- bool explicitUnsigned;
-};
-
-
-/* Target language. */
-enum HostLangType
-{
- CCode,
- DCode,
- JavaCode,
- RubyCode
-};
-
-extern HostLang *hostLang;
-extern HostLangType hostLangType;
-
-extern HostLang hostLangC;
-extern HostLang hostLangD;
-extern HostLang hostLangJava;
-extern HostLang hostLangRuby;
-
-/* An abstraction of the key operators that manages key operations such as
- * comparison and increment according the signedness of the key. */
-struct KeyOps
-{
- /* Default to signed alphabet. */
- KeyOps() :
- isSigned(true),
- alphType(0)
- {}
-
- /* Default to signed alphabet. */
- KeyOps( bool isSigned )
- :isSigned(isSigned) {}
-
- bool isSigned;
- Key minKey, maxKey;
- HostType *alphType;
-
- void setAlphType( HostType *alphType )
- {
- this->alphType = alphType;
- isSigned = alphType->isSigned;
- if ( isSigned ) {
- minKey = (long) alphType->minVal;
- maxKey = (long) alphType->maxVal;
- }
- else {
- minKey = (long) (unsigned long) alphType->minVal;
- maxKey = (long) (unsigned long) alphType->maxVal;
- }
- }
-
- /* Compute the distance between two keys. */
- Size span( Key key1, Key key2 )
- {
- return isSigned ?
- (unsigned long long)(
- (long long)key2.key -
- (long long)key1.key + 1) :
- (unsigned long long)(
- (unsigned long)key2.key) -
- (unsigned long long)((unsigned long)key1.key) + 1;
- }
-
- Size alphSize()
- { return span( minKey, maxKey ); }
-
- HostType *typeSubsumes( long long maxVal )
- {
- for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
- if ( maxVal <= hostLang->hostTypes[i].maxVal )
- return hostLang->hostTypes + i;
- }
- return 0;
- }
-
- HostType *typeSubsumes( bool isSigned, long long maxVal )
- {
- for ( int i = 0; i < hostLang->numHostTypes; i++ ) {
- if ( ( (isSigned && hostLang->hostTypes[i].isSigned) || !isSigned ) &&
- maxVal <= hostLang->hostTypes[i].maxVal )
- return hostLang->hostTypes + i;
- }
- return 0;
- }
-};
-
-extern KeyOps *keyOps;
-
-inline bool operator<( const Key key1, const Key key2 )
-{
- return keyOps->isSigned ? key1.key < key2.key :
- (unsigned long)key1.key < (unsigned long)key2.key;
-}
-
-inline bool operator<=( const Key key1, const Key key2 )
-{
- return keyOps->isSigned ? key1.key <= key2.key :
- (unsigned long)key1.key <= (unsigned long)key2.key;
-}
-
-inline bool operator>( const Key key1, const Key key2 )
-{
- return keyOps->isSigned ? key1.key > key2.key :
- (unsigned long)key1.key > (unsigned long)key2.key;
-}
-
-inline bool operator>=( const Key key1, const Key key2 )
-{
- return keyOps->isSigned ? key1.key >= key2.key :
- (unsigned long)key1.key >= (unsigned long)key2.key;
-}
-
-inline bool operator==( const Key key1, const Key key2 )
-{
- return key1.key == key2.key;
-}
-
-inline bool operator!=( const Key key1, const Key key2 )
-{
- return key1.key != key2.key;
-}
-
-/* Decrement. Needed only for ranges. */
-inline void Key::decrement()
-{
- key = keyOps->isSigned ? key - 1 : ((unsigned long)key)-1;
-}
-
-/* Increment. Needed only for ranges. */
-inline void Key::increment()
-{
- key = keyOps->isSigned ? key+1 : ((unsigned long)key)+1;
-}
-
-inline long long Key::getLongLong() const
-{
- return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key;
-}
-
-inline Key operator+(const Key key1, const Key key2)
-{
- /* FIXME: must be made aware of isSigned. */
- return Key( key1.key + key2.key );
-}
-
-inline Key operator-(const Key key1, const Key key2)
-{
- /* FIXME: must be made aware of isSigned. */
- return Key( key1.key - key2.key );
-}
-
-inline long operator&(const Key key1, const Key key2)
-{
- /* FIXME: must be made aware of isSigned. */
- return key1.key & key2.key;
-}
-
-inline Key operator/(const Key key1, const Key key2)
-{
- /* FIXME: must be made aware of isSigned. */
- return key1.key / key2.key;
-}
-
-const char *findFileExtension( const char *stemFile );
-char *fileNameFromStem( const char *stemFile, const char *suffix );
-
-#endif /* _KEYOPS_H */
diff --git a/colm/list.c b/colm/list.c
deleted file mode 100644
index d9180b73..00000000
--- a/colm/list.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <colm/pdarun.h>
-
-void listAddAfter( List *list, ListEl *prev_el, ListEl *new_el )
-{
- /* Set the previous pointer of new_el to prev_el. We do
- * this regardless of the state of the list. */
- new_el->prev = prev_el;
-
- /* Set forward pointers. */
- if (prev_el == 0) {
- /* There was no prev_el, we are inserting at the head. */
- new_el->next = list->head;
- list->head = new_el;
- }
- else {
- /* There was a prev_el, we can access previous next. */
- new_el->next = prev_el->next;
- prev_el->next = new_el;
- }
-
- /* Set reverse pointers. */
- if (new_el->next == 0) {
- /* There is no next element. Set the tail pointer. */
- list->tail = new_el;
- }
- else {
- /* There is a next element. Set it's prev pointer. */
- new_el->next->prev = new_el;
- }
-
- /* Update list length. */
- list->listLen++;
-}
-
-void listAddBefore( List *list, ListEl *next_el, ListEl *new_el)
-{
- /* Set the next pointer of the new element to next_el. We do
- * this regardless of the state of the list. */
- new_el->next = next_el;
-
- /* Set reverse pointers. */
- if (next_el == 0) {
- /* There is no next elememnt. We are inserting at the tail. */
- new_el->prev = list->tail;
- list->tail = new_el;
- }
- else {
- /* There is a next element and we can access next's previous. */
- new_el->prev = next_el->prev;
- next_el->prev = new_el;
- }
-
- /* Set forward pointers. */
- if (new_el->prev == 0) {
- /* There is no previous element. Set the head pointer.*/
- list->head = new_el;
- }
- else {
- /* There is a previous element, set it's next pointer to new_el. */
- new_el->prev->next = new_el;
- }
-
- list->listLen++;
-}
-
-ListEl *listDetach( List *list, ListEl *el )
-{
- /* Set forward pointers to skip over el. */
- if (el->prev == 0)
- list->head = el->next;
- else
- el->prev->next = el->next;
-
- /* Set reverse pointers to skip over el. */
- if (el->next == 0)
- list->tail = el->prev;
- else
- el->next->prev = el->prev;
-
- /* Update List length and return element we detached. */
- list->listLen--;
- return el;
-}
-
diff --git a/colm/lmparse.kh b/colm/lmparse.kh
deleted file mode 100644
index 86b70b6f..00000000
--- a/colm/lmparse.kh
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2001-2007 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef RLPARSE_H
-#define RLPARSE_H
-
-#include <iostream>
-#include "avltree.h"
-#include "parsedata.h"
-
-#define PROPERTY_REDUCE_FIRST 0x1
-
-struct ColmParser
-{
- ColmParser( Compiler *pd, const char *fileName, const char *sectionName, const InputLoc &sectionLoc )
- : pd(pd), sectionName(sectionName), enterRl(false)
- {}
-
- %%{
- parser ColmParser;
-
- # Use a class for tokens.
- token uses class Token;
-
- # Atoms.
- token TK_Word, TK_Literal, TK_SingleLit, TK_DoubleLit, TK_Number, TK_UInt,
- TK_Hex, KW_Nil, KW_True, KW_False;
-
- # General tokens.
- token TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon,
- TK_DoubleArrow, TK_StarStar, TK_NameSep, TK_DashDash, TK_DoubleEql,
- TK_NotEql, TK_DoubleColon, TK_LessEql, TK_GrtrEql, TK_RightArrow,
- TK_LitPat, TK_AmpAmp, TK_BarBar, TK_SqOpen, TK_SqOpenNeg, TK_SqClose,
- TK_Dash, TK_ReChar, TK_LtLt;
-
- # Defining things.
- token KW_Rl, KW_Def, KW_Lex, KW_Context, KW_Ignore, KW_Token, KW_Commit, KW_Namespace,
- KW_Literal, KW_ReduceFirst, KW_Map, KW_List, KW_Vector, KW_Accum, KW_Global, KW_Export,
- KW_Iter, KW_Reject, KW_Ptr, KW_Ref, KW_Deref;
-
- # Language.
- token KW_If, KW_While, KW_Else, KW_Elsif, KW_For, KW_Return, KW_Yield, KW_In,
- KW_Break, KW_PrintXMLAC, KW_PrintXML, KW_Print, KW_PrintStream, KW_Require;
-
- # Patterns.
- token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken,
- KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni, KW_Ci;
-
- token KW_Include, KW_Preeof;
-
- token KW_Left, KW_Right, KW_Nonassoc, KW_Prec;
-
- }%%
-
- %% write instance_data;
-
-
- void init();
- int parseLangEl( int type, const Token *token );
-
- int token( InputLoc &loc, int tokId, char *tokstart, int toklen );
- void addRegularDef( const InputLoc &loc, Namespace *nspace,
- const String &name, Join *join );
- TokenRegion *createRegion( String &name );
- void addRegionDef( const InputLoc &loc, Namespace *nspace,
- const String &name, TokenRegion *join );
- void addProduction( const InputLoc &loc, const String &name,
- ProdElList *prodElList, bool commit, CodeBlock *redBlock, LangEl *predOf );
- void addArgvList();
-
- /* Report an error encountered by the parser. */
- ostream &parse_error( int tokId, Token &token );
-
- Compiler *pd;
-
- /* The name of the root section, this does not change during an include. */
- const char *sectionName;
-
- NameRef nameRef;
- NameRefList nameRefList;
-
- LangElVect langElVect;
-
- PatternItemList *patternItemList;
- ReplItemList *replItemList;
- RegionVect regionStack;
- NamespaceVect namespaceStack;
- ContextVect contextStack;
-
- String curDefineId;
- LelDefList *curDefList;
- ProdElList *curProdElList;
-
- PredType predType;
- ReCaptureVect reCaptureVect;
-
- bool enterRl;
-};
-
-%% write token_defs;
-
-#endif
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
deleted file mode 100644
index 3ead7c98..00000000
--- a/colm/lmparse.kl
+++ /dev/null
@@ -1,2677 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <iostream>
-#include <errno.h>
-
-#include "config.h"
-#include "lmparse.h"
-#include "global.h"
-#include "input.h"
-#include "fsmrun.h"
-
-using std::cout;
-using std::cerr;
-using std::endl;
-
-ParserDict parserDict;
-
-%%{
-
-parser ColmParser;
-
-include "lmparse.kh";
-
-start: root_item_list
- final {
- if ( colm_log_compile ) {
- cerr << "parsing complete" << endl;
- }
-
- pd->rootCodeBlock = new CodeBlock( $1->stmtList );
- };
-
-nonterm root_item_list uses lang_stmt_list;
-
-root_item_list: root_item_list root_item
- final {
- $$->stmtList = $1->stmtList;
-
- /* Maybe a statement. */
- if ( $2->stmt != 0 )
- $$->stmtList->append( $2->stmt );
- };
-
-root_item_list:
- final {
- $$->stmtList = new StmtList;
- };
-
-nonterm root_item uses statement;
-
-root_item: literal_def commit final { $$->stmt = 0; };
-root_item: rl_def commit final { $$->stmt = 0; };
-root_item: token_def commit final { $$->stmt = 0; };
-root_item: cfl_def commit final { $$->stmt = 0; };
-root_item: region_def commit final { $$->stmt = 0; };
-root_item: context_def commit final { $$->stmt = 0; };
-root_item: namespace_def commit final { $$->stmt = 0; };
-root_item: function_def commit final { $$->stmt = 0; };
-root_item: iter_def commit final { $$->stmt = 0; };
-root_item: global_def commit final { $$->stmt = $1->stmt; };
-root_item: statement commit final { $$->stmt = $1->stmt; };
-root_item: pre_eof commit final { $$->stmt = 0; };
-root_item: precedence commit final { $$->stmt = 0; };
-root_item: typedef commit final { $$->stmt = 0; };
-
-nonterm block_open
-{
- ObjectDef *localFrame;
-};
-
-block_open: '{'
- final {
- /* Init the object representing the local frame. */
- $$->localFrame = new ObjectDef( ObjectDef::FrameType,
- "local", pd->nextObjectId++ );
-
- pd->curLocalFrame = $$->localFrame;
-
- /* Add captures to the local frame. We Depend on these becoming the
- * first local variables so we can compute their location. */
-
- /* Make local variables corresponding to the local capture vector. */
- for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ )
- {
- ObjField *objField = new ObjField( c->objField->loc,
- c->objField->typeRef, c->objField->name );
-
- /* Insert it into the field map. */
- pd->curLocalFrame->insertField( objField->name, objField );
- }
- };
-
-block_close: '}'
- final {
- /* Pop the cur local frame, back to the root. */
- pd->curLocalFrame = pd->rootLocalFrame;
- };
-
-
-iter_def:
- KW_Iter TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close
- final {
- CodeBlock *codeBlock = new CodeBlock( $7->stmtList );
- codeBlock->localFrame = $6->localFrame;
- Function *newFunction = new Function( 0, $2->data,
- $4->paramList, codeBlock, pd->nextFuncId++, true );
- pd->functionList.append( newFunction );
- };
-
-function_def:
- type_ref TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close
- final {
- CodeBlock *codeBlock = new CodeBlock( $7->stmtList );
- codeBlock->localFrame = $6->localFrame;
- Function *newFunction = new Function( $1->typeRef, $2->data,
- $4->paramList, codeBlock, pd->nextFuncId++, false );
- pd->functionList.append( newFunction );
-
- if ( contextStack.length() > 0 )
- newFunction->inContext = contextStack.top();
- };
-
-nonterm opt_param_list uses param_list;
-
-opt_param_list: param_list
- final {
- $$->paramList = $1->paramList;
- };
-
-opt_param_list:
- final {
- $$->paramList = new ParameterList;
- };
-
-nonterm param_list
-{
- ParameterList *paramList;
-};
-
-param_list: param_list param_var_def
- final {
- $$->paramList = $1->paramList;
- $$->paramList->append( $2->objField );
- };
-
-param_list: param_var_def
- final {
- /* Create the map and insert the first item. */
- $$->paramList = new ParameterList;
- $$->paramList->append( $1->objField );
- };
-
-nonterm param_var_def uses var_def;
-
-param_var_def: TK_Word ':' type_ref
- final {
- $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data );
- $$->objField->isParam = true;
- };
-param_var_def: TK_Word ':' reference_type_ref
- final {
- $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data );
- $$->objField->isParam = true;
- };
-
-nonterm reference_type_ref uses type_ref;
-
-reference_type_ref: KW_Ref type_ref
- final {
- $$->typeRef = new TypeRef( TypeRef::Ref, $1->loc, $2->typeRef );
- };
-
-nonterm global_def uses statement;
-
-global_def: KW_Export var_def opt_def_init
- final {
- $$->stmt = 0;
-
- if ( contextStack.length() != 0 )
- error($2->objField->loc) << "cannot export parser context variables" << endp;
-
- ObjectDef *object = pd->globalObjectDef;
-
- if ( object->checkRedecl( $2->objField->name ) != 0 )
- error($2->objField->loc) << "object field renamed" << endp;
-
- object->insertField( $2->objField->name, $2->objField );
- $2->objField->isExport = true;
-
- if ( $3->expr != 0 ) {
- LangVarRef *varRef = new LangVarRef( $2->objField->loc,
- new QualItemVect, $2->objField->name );
-
- $$->stmt = new LangStmt( $2->objField->loc,
- $3->assignType, varRef, $3->expr );
- }
- };
-
-global_def: KW_Global var_def opt_def_init
- final {
- $$->stmt = 0;
-
- ObjectDef *object;
- if ( contextStack.length() == 0 )
- object = pd->globalObjectDef;
- else {
- Context *context = contextStack.top();
- $2->objField->context = context;
- object = context->contextObjDef;
- }
-
- if ( object->checkRedecl( $2->objField->name ) != 0 )
- error($2->objField->loc) << "object field renamed" << endp;
-
- object->insertField( $2->objField->name, $2->objField );
-
- if ( $3->expr != 0 ) {
- LangVarRef *varRef = new LangVarRef( $2->objField->loc,
- new QualItemVect, $2->objField->name );
-
- $$->stmt = new LangStmt( $2->objField->loc,
- $3->assignType, varRef, $3->expr );
- }
- };
-
-precedence: pred_type pred_token_list final { pd->predValue++; };
-
-pred_type: KW_Left final { predType = PredLeft; };
-pred_type: KW_Right final { predType = PredRight; };
-pred_type: KW_Nonassoc final { predType = PredNonassoc; };
-
-pred_token_list: pred_token_list ',' pred_token
- final {
- };
-
-pred_token_list: pred_token;
-
-nonterm pred_token
-{
- ProdEl *factor;
- TypeRef *typeRef;
-};
-
-pred_token:
- region_qual TK_Word
- final {
- TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data );
-
- PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue );
- pd->predDeclList.append( predDecl );
- };
-
-pred_token:
- region_qual TK_Literal
- final {
- PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
- TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal );
-
- PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue );
- pd->predDeclList.append( predDecl );
- };
-
-typedef:
- KW_Alias TK_Word type_ref
- final {
- Namespace *nspace = namespaceStack.top();
- TypeAlias *typeAlias = new TypeAlias(
- $1->loc, nspace, $2->data, $3->typeRef );
- nspace->typeAliasList.append( typeAlias );
- };
-
-cfl_def: cfl_def_head obj_var_list properties_list cfl_prod_list
- final {
- Namespace *nspace = namespaceStack.top();
- NtDef *ntDef = new NtDef(
- curDefineId,
- nspace,
- $4->defList,
- pd->objectDef,
- contextStack.length() > 0 ? contextStack.top() : 0,
- $3->property & PROPERTY_REDUCE_FIRST );
-
- nspace->ntDefList.append( ntDef );
- };
-
-cfl_def_head: KW_Def TK_Word
- final {
- curDefineId = $2->data;
- curDefList = new LelDefList;
- };
-
-nonterm cfl_prod_list
-{
- LelDefList *defList;
-};
-
-cfl_prod_list: cfl_prod_list '|' define_prod
- final {
- $$->defList = $1->defList;
- $3->definition->prodNum = $$->defList->length();
- $$->defList->append( $3->definition );
- };
-cfl_prod_list: define_prod
- final {
- $$->defList = curDefList;
- $1->definition->prodNum = $$->defList->length();
- $$->defList->append( $1->definition );
- };
-
-nonterm property
-{
- long property;
-};
-
-nonterm properties_list uses property;
-
-properties_list: properties_list property
- final {
- $$->property = $1->property | $2->property;
- };
-properties_list:
- final {
- $$->property = 0;
- };
-
-property:
- KW_ReduceFirst
- final {
- $$->property = PROPERTY_REDUCE_FIRST;
- };
-
-nonterm opt_prec
-{
- LangEl *predOf;
-};
-
-opt_prec:
- final {
- $$->predOf = 0;
- };
-
-opt_prec:
- KW_Prec pred_token
- final {
- $$->predOf = $2->factor->langEl;
- };
-
-nonterm define_prod
-{
- Definition *definition;
-};
-
-define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec
- final {
- const InputLoc &loc = $1->loc;
- //const String &name = curDefineId;
- ProdElList *prodElList = curProdElList;
- bool commit = $4->commit;
- CodeBlock *redBlock = $5->codeBlock;
- LangEl *predOf = $6->predOf;
-
- //Namespace *nspace = namespaceStack.top();
-
- Definition *newDef = new Definition( loc, 0/*prodName*/,
- prodElList, commit, redBlock,
- pd->prodList.length(), 0, Definition::Production );
- newDef->predOf = predOf;
-
- pd->prodList.append( newDef );
-
- $$->definition = newDef;
- };
-
-obj_var_list: obj_var_list var_def
- final {
- if ( pd->objectDef->checkRedecl( $2->objField->name ) != 0 )
- error() << "object field renamed" << endp;
-
- pd->objectDef->insertField( $2->objField->name, $2->objField );
- };
-
-obj_var_list:
- final {
- pd->objectDef = new ObjectDef( ObjectDef::UserType,
- curDefineId, pd->nextObjectId++ );
- };
-
-
-nonterm type_ref
-{
- TypeRef *typeRef;
-};
-
-type_ref: basic_type_ref
- final {
- $$->typeRef = $1->typeRef;
- };
-
-type_ref: KW_Map '<' type_ref type_ref '>'
- final {
- NamespaceQual *nspaceQual = new NamespaceQual(
- namespaceStack.top(), regionStack.top() );
- $$->typeRef = new TypeRef( TypeRef::Map, InputLoc(), nspaceQual,
- $3->typeRef, $4->typeRef );
- };
-
-type_ref: KW_List '<' type_ref '>'
- final {
- NamespaceQual *nspaceQual = new NamespaceQual(
- namespaceStack.top(), regionStack.top() );
- $$->typeRef = new TypeRef( TypeRef::List, InputLoc(), nspaceQual, $3->typeRef, 0 );
- };
-type_ref: KW_Vector '<' type_ref '>'
- final {
- NamespaceQual *nspaceQual = new NamespaceQual(
- namespaceStack.top(), regionStack.top() );
- $$->typeRef = new TypeRef( TypeRef::Vector, InputLoc(), nspaceQual, $3->typeRef, 0 );
- };
-type_ref: KW_Accum '<' type_ref '>'
- final {
- NamespaceQual *nspaceQual = new NamespaceQual(
- namespaceStack.top(), regionStack.top() );
- $$->typeRef = new TypeRef( TypeRef::Parser, InputLoc(), nspaceQual, $3->typeRef, 0 );
- };
-
-nonterm basic_type_ref uses type_ref;
-
-basic_type_ref: region_qual TK_Word opt_repeat
- final {
- $$->typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data );
- $$->typeRef->repeatType = $3->repeatType;
- };
-
-basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat
- final {
- $$->typeRef = new TypeRef( $1->loc, $2->nspaceQual, $3->data );
- $$->typeRef->repeatType = $4->repeatType;
- $$->typeRef = new TypeRef( TypeRef::Ptr, $1->loc, $$->typeRef );
- };
-
-
-nonterm var_def
-{
- InputLoc loc;
- ObjField *objField;
-};
-
-var_def: TK_Word ':' type_ref
- final {
- /* Return an object field object. The user of this nonterminal must
- * load it into the approrpriate map and do error checking. */
- $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data );
- };
-
-region_def:
- region_head '{' root_item_list '}'
- final {
- /* Pop the top of the stack. */
- regionStack.pop();
- };
-
-region_head:
- KW_Lex TK_Word
- final {
- /* Just for ignores. */
- String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data );
- TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn );
-
- /* Just for collect ignores. Will use the ignore-only start state. */
- String scannerNameCi( $2->data.length() + 2, "<%s>-ci", $2->data.data );
- TokenRegion *tokenRegionCi = createRegion( scannerNameCi );
-
- /* Just for tokens. */
- String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data );
- TokenRegion *tokenRegionTok = createRegion( scannerNameTok );
-
- /* Make the new token region. */
- String scannerName( $2->data.length() + 2, "<%s>", $2->data.data );
- TokenRegion *tokenRegion = createRegion( scannerName );
-
- regionStack.push( tokenRegion );
-
- tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
- tokenRegion->tokenOnlyRegion = tokenRegionTok;
- tokenRegion->ciRegion = tokenRegionCi;
-
- tokenRegion->isFullRegion = true;
- tokenRegionIgn->isIgnoreOnly = true;
- tokenRegionCi->isCiOnly = true;
- tokenRegionTok->isTokenOnly = true;
-
- tokenRegionIgn->derivedFrom = tokenRegion;
- tokenRegionCi->derivedFrom = tokenRegion;
- tokenRegionTok->derivedFrom = tokenRegion;
- };
-
-namespace_def:
- namespace_head '{' root_item_list '}'
- final {
- namespaceStack.pop();
- };
-
-namespace_head:
- KW_Namespace TK_Word
- final {
- /* Make the new namespace. */
- Namespace *nspace = new Namespace( InputLoc(), $2->data,
- pd->namespaceList.length(), namespaceStack.top() );
- namespaceStack.top()->childNamespaces.append( nspace );
- pd->namespaceList.append( nspace );
- namespaceStack.push( nspace );
- };
-
-context_var_def:
- var_def
- final {
- ObjectDef *object;
- if ( contextStack.length() == 0 )
- error($1->loc) << "internal error: no context stack items found" << endp;
-
- Context *context = contextStack.top();
- $1->objField->context = context;
- object = context->contextObjDef;
-
- if ( object->checkRedecl( $1->objField->name ) != 0 )
- error($1->objField->loc) << "object field renamed" << endp;
-
- object->insertField( $1->objField->name, $1->objField );
- };
-
-
-context_item: context_var_def commit;
-context_item: literal_def commit;
-context_item: rl_def commit;
-context_item: token_def commit;
-context_item: cfl_def commit;
-context_item: region_def commit;
-context_item: context_def commit;
-context_item: function_def commit;
-context_item: iter_def commit;
-context_item: pre_eof commit;
-context_item: precedence commit;
-
-context_item_list:
- context_item_list context_item;
-context_item_list:
- ;
-
-context_def:
- context_head '{' context_item_list '}'
- final {
- contextStack.pop();
- namespaceStack.pop();
- };
-
-context_head:
- KW_Context TK_Word
- final {
- /* Make the new namespace. */
- Namespace *nspace = new Namespace( InputLoc(), $2->data,
- pd->namespaceList.length(), namespaceStack.top() );
- namespaceStack.top()->childNamespaces.append( nspace );
- pd->namespaceList.append( nspace );
- namespaceStack.push( nspace );
-
- Context *context = new Context( $1->loc, 0 );
- contextStack.push( context );
-
- ContextDef *contextDef = new ContextDef( $2->data, context, nspace );
- nspace->contextDefList.append( contextDef );
-
- context->contextObjDef = new ObjectDef( ObjectDef::UserType,
- $2->data, pd->nextObjectId++ );
- };
-
-pattern_list: pattern_list pattern;
-pattern_list: init_pattern_list pattern;
-
-init_pattern_list:
- final {
- patternItemList = new PatternItemList;
- };
-
-pattern: '"' litpat_el_list '"';
-pattern: '[' pattern_el_list ']';
-
-litpat_el_list: litpat_el_list litpat_el;
-litpat_el_list: ;
-
-litpat_el: TK_LitPat
- final {
- PatternItem *patternItem = new PatternItem( $1->loc, $1->data,
- PatternItem::InputText );
- patternItemList->append( patternItem );
- };
-
-litpat_el: '[' pattern_el_list ']';
-
-pattern_el_list: pattern_el_list pattern_el;
-pattern_el_list: ;
-
-pattern_el: opt_label pattern_el_type_or_lit
- final {
- /* Store the variable reference in the pattern itemm. */
- $2->patternItem->varRef = $1->varRef;
-
- if ( $1->varRef != 0 ) {
- if ( pd->curLocalFrame->checkRedecl( $1->varRef->name ) != 0 ) {
- error( $1->varRef->loc ) << "variable " << $1->varRef->name <<
- " redeclared" << endp;
- }
-
- TypeRef *typeRef = $2->patternItem->factor->typeRef;
- ObjField *objField = new ObjField( InputLoc(), typeRef, $1->varRef->name );
-
- /* Insert it into the field map. */
- pd->curLocalFrame->insertField( $1->varRef->name, objField );
- }
- };
-
-pattern_el: '"' litpat_el_list '"';
-pattern_el: '?' TK_Word
- final {
- /* FIXME: Implement */
- assert(false);
- };
-
-nonterm pattern_el_type_or_lit
-{
- PatternItem *patternItem;
-};
-
-pattern_el_type_or_lit: region_qual TK_Word opt_repeat
- final {
- TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data );
- typeRef->repeatType = $3->repeatType;
- ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 );
- $$->patternItem = new PatternItem( $2->loc, factor, PatternItem::FactorType );
- patternItemList->append( $$->patternItem );
- };
-
-pattern_el_type_or_lit: region_qual TK_Literal opt_repeat
- final {
- PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
- TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal );
- typeRef->repeatType = $3->repeatType;
-
- ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 );
- $$->patternItem = new PatternItem( $2->loc, factor, PatternItem::FactorType );
- patternItemList->append( $$->patternItem );
- };
-
-nonterm opt_label
-{
- /* Variable reference. */
- LangVarRef *varRef;
-};
-
-opt_label: TK_Word ':'
- final {
- $$->varRef = new LangVarRef( $1->loc, new QualItemVect, $1->data );
- };
-opt_label:
- final {
- $$->varRef = 0;
- };
-
-#
-# Replacement
-#
-
-repl_list: repl_list replacement;
-repl_list: init_repl_list replacement;
-
-init_repl_list:
- final {
- replItemList = new ReplItemList;
- };
-
-replacement: '"' lit_repl_el_list '"';
-replacement: '[' repl_el_list ']';
-
-lit_repl_el_list: lit_repl_el_list lit_repl_el;
-lit_repl_el_list: ;
-
-lit_repl_el: TK_LitPat
- final {
- ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data );
- replItemList->append( replItem );
- };
-
-lit_repl_el: '[' repl_el_list ']';
-
-repl_el_list: repl_el_list repl_el;
-repl_el_list: ;
-
-repl_el: region_qual TK_Literal
- final {
- PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
- TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal );
- typeRef->repeatType = RepeatNone;
- ProdEl *factor = new ProdEl( ProdEl::LiteralType, $2->loc, 0, false, typeRef, 0 );
- ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor );
- replItemList->append( replItem );
- };
-repl_el: '"' lit_repl_el_list '"';
-
-repl_el: code_expr
- final {
- ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr );
- replItemList->append( replItem );
- };
-
-#
-# Accum
-#
-accumulate: init_repl_list accum_list;
-accumulate: init_repl_list code_expr
- final {
- ReplItem *replItem = new ReplItem( $2->expr->loc, ReplItem::ExprType, $2->expr );
- replItemList->append( replItem );
- };
-
-accum_list: accum_list accum;
-accum_list: accum;
-
-init_accum_list:
- final {
- replItemList = new ReplItemList;
- };
-
-accum: '"' lit_accum_el_list '"';
-accum: '[' accum_el_list ']';
-
-lit_accum_el_list: lit_accum_el_list lit_accum_el;
-lit_accum_el_list: ;
-
-lit_accum_el: TK_LitPat
- final {
- ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data );
- replItemList->append( replItem );
- };
-
-lit_accum_el: '[' accum_el_list ']';
-
-accum_el_list: accum_el_list accum_el;
-accum_el_list: ;
-
-#accum_el: region_qual TK_Literal
-# final {
-# PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
-# ProdEl *factor = new ProdEl( $2->loc, false, $1->nspaceQual,
-# literal, 0 );
-# ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor );
-# replItemList->append( replItem );
-# };
-accum_el: code_expr
- final {
- ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr );
- replItemList->append( replItem );
- };
-
-accum_el: '"' lit_accum_el_list '"';
-
-
-#
-# String
-#
-
-string_list: string_list string;
-string_list: init_string_list string;
-
-init_string_list:
- final {
- replItemList = new ReplItemList;
- };
-
-string: '"' lit_string_el_list '"';
-string: '[' string_el_list ']';
-
-lit_string_el_list: lit_string_el_list lit_string_el;
-lit_string_el_list: ;
-
-lit_string_el: TK_LitPat
- final {
- ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data );
- replItemList->append( replItem );
- };
-
-lit_string_el: '[' string_el_list ']';
-
-string_el_list: string_el_list string_el;
-string_el_list: ;
-
-#accum_el: region_qual TK_Literal
-# final {
-# PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 );
-# ProdEl *factor = new ProdEl( $2->loc, false, $1->nspaceQual,
-# literal, 0 );
-# ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor );
-# replItemList->append( replItem );
-# };
-string_el: code_expr
- final {
- ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr );
- replItemList->append( replItem );
- };
-
-string_el: '"' lit_string_el_list '"';
-
-prod_el_list:
- prod_el_list prod_el
- final {
- curProdElList->append( $2->factor );
- };
-
-prod_el_list:
- final { curProdElList = new ProdElList; };
-
-nonterm opt_no_ignore { bool value; };
-
-opt_no_ignore: KW_Ni final { $$->value = true; };
-opt_no_ignore: final { $$->value = false; };
-
-nonterm prod_el
-{
- ProdEl *factor;
-};
-
-prod_el:
- opt_capture opt_commit region_qual TK_Word opt_repeat
- final {
- TypeRef *typeRef = new TypeRef( $4->loc, $3->nspaceQual, $4->data );
- typeRef->repeatType = $5->repeatType;
- $$->factor = new ProdEl( ProdEl::ReferenceType, $4->loc, $1->objField, $2->commit, typeRef, 0 );
-
- /* If there is a capture, create the field. */
- if ( $1->objField != 0 ) {
- /* Might already exist. */
- ObjField *objField = pd->objectDef->checkRedecl( $1->objField->name );
- if ( objField == 0 ) {
- objField = $1->objField;
- objField->typeRef = typeRef;
- pd->objectDef->insertField( objField->name, objField );
- }
- else {
- /* FIXME: check the types are the same. */
- //error() << "object field renamed" << endp;
- }
-
- objField->isRhsGet = true;
- RhsVal rhsVal( curDefList->length(), curProdElList->length() );
- objField->rhsVal.append( RhsVal( curDefList->length(), curProdElList->length() ) );
- }
- };
-
-prod_el:
- opt_capture opt_commit region_qual TK_Literal opt_repeat
- final {
- /* Create a new factor node going to a concat literal. */
- PdaLiteral *literal = new PdaLiteral( $4->loc, *$4 );
- TypeRef *typeRef = new TypeRef( $4->loc, $3->nspaceQual, literal );
- typeRef->repeatType = $5->repeatType;
- $$->factor = new ProdEl( ProdEl::LiteralType, $4->loc, $1->objField, $2->commit, typeRef, 0 );
-
- /* If there is a capture, create the field. */
- if ( $1->objField != 0 ) {
- $1->objField->typeRef = typeRef;
- if ( pd->objectDef->checkRedecl( $1->objField->name ) != 0 )
- error() << "object field renamed" << endp;
-
- pd->objectDef->insertField( $1->objField->name, $1->objField );
- }
- };
-
-nonterm opt_repeat
-{
- bool opt;
- bool repeat;
- RepeatType repeatType;
-};
-
-opt_repeat: '*' final { $$->opt = false; $$->repeat = true; $$->repeatType = RepeatRepeat; };
-opt_repeat: '+' final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatList; };
-opt_repeat: '?' final { $$->opt = true; $$->repeat = false; $$->repeatType = RepeatOpt; };
-opt_repeat: final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatNone; };
-
-nonterm region_qual
-{
- NamespaceQual *nspaceQual;
-};
-
-region_qual: region_qual TK_Word TK_DoubleColon
- final {
- $$->nspaceQual = $1->nspaceQual;
- $$->nspaceQual->qualNames.append( $2->data );
- };
-
-region_qual:
- final {
- $$->nspaceQual = new NamespaceQual( namespaceStack.top(), regionStack.top() );
- };
-
-literal_def: KW_Literal literal_list;
-
-literal_list: literal_list ',' literal_item;
-literal_list: literal_item;
-
-literal_item: opt_no_ignore TK_Literal opt_no_ignore
- final {
- /* Create a name for the literal. */
- String name( 32, "_literal_%.4x", pd->nextTokenId );
-
- bool insideRegion = regionStack.top() != pd->rootRegion;
- if ( !insideRegion ) {
- /* Just for ignores. */
- String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
- TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn );
-
- /* Just for collect ignores. Will use the ignore-only start state. */
- String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
- TokenRegion *tokenRegionCi = createRegion( scannerNameCi );
-
- /* Just for tokens. */
- String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
- TokenRegion *tokenRegionTok = createRegion( scannerNameTok );
-
- /* Make a new token region just for the token. */
- String scannerName( name.length() + 2, "<%s>", name.data );
- TokenRegion *tokenRegion = createRegion( scannerName );
-
- regionStack.push( tokenRegion );
-
- tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
- tokenRegion->tokenOnlyRegion = tokenRegionTok;
- tokenRegion->ciRegion = tokenRegionCi;
-
- tokenRegion->isFullRegion = true;
- tokenRegionIgn->isIgnoreOnly = true;
- tokenRegionCi->isCiOnly = true;
- tokenRegionTok->isTokenOnly = true;
-
- tokenRegionIgn->derivedFrom = tokenRegion;
- tokenRegionCi->derivedFrom = tokenRegion;
- tokenRegionTok->derivedFrom = tokenRegion;
- }
-
- bool unusedCI;
- String interp;
- prepareLitString( interp, unusedCI, $2->data, $2->loc );
-
- /* Look for the production's associated region. */
- Namespace *nspace = namespaceStack.top();
- TokenRegion *region = regionStack.top();
-
-
- LiteralDictEl *ldel = nspace->literalDict.find( interp );
- if ( ldel != 0 )
- error( $2->loc ) << "literal already defined in this namespace" << endp;
- else {
- Join *join = new Join( new Expression( new Term( new FactorWithAug(
- new FactorWithRep( $2->loc, new FactorWithNeg( $2->loc, new Factor(
- new Literal( $2->loc, $2->data,
- Literal::LitString ) ) ) ) ) ) ) );
-
- if ( strcmp( interp.data, "" ) == 0 ) {
- TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join,
- 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
-
- //region->tokenDefList.append( tokenDef );
-
- ldel = nspace->literalDict.insert( interp, tokenDef );
- nspace->tokenDefList.append( tokenDef );
-
- tokenDef->isZero = true;
- }
- else {
- TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join,
- 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
- region->tokenDefList.append( tokenDef );
- ldel = nspace->literalDict.insert( interp, tokenDef );
- nspace->tokenDefList.append( tokenDef );
-
- if ( $1->value )
- tokenDef->noPreIgnore = true;
- if ( $3->value )
- tokenDef->noPostIgnore = true;
-
- TokenDef *tokenDefTok = new TokenDef( name + "_tok", $2->data, true, false, join,
- 0, $2->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 );
- tokenDefTok->dupOf = tokenDef;
- region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
- ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok );
- nspace->tokenDefList.append( tokenDefTok );
- }
- }
-
- if ( !insideRegion ) {
- /* Leave the region just for this token. */
- regionStack.pop();
- }
- };
-
-
-# These two productions are responsible for setting and unsetting the Regular
-# language scanning context.
-enter_rl:
- try {
- enterRl = true;
- }
- undo {
- enterRl = false;
- };
-leave_rl:
- try {
- enterRl = false;
- }
- undo {
- enterRl = true;
- };
-
-token_def:
- token_or_ignore token_def_name obj_var_list
- enter_rl opt_no_ignore '/' opt_rl_join leave_rl '/' opt_no_ignore
- opt_translate
- final {
- bool ignore = $1->ignore;
- String name = $2->name;
- Join *join = $7->join;
- CodeBlock *transBlock = $11->transBlock;
-
- /* Check the region if this is for an ignore. */
- if ( ignore && !pd->insideRegion )
- error($1->loc) << "ignore tokens can only appear inside scanners" << endp;
-
- /* Check the name if this is a token. */
- if ( !ignore && name == 0 )
- error($1->loc) << "tokens must have a name" << endp;
-
- /* Give a default name to ignores. */
- if ( name == 0 )
- name.setAs( 32, "_ignore_%.4x", pd->nextTokenId );
-
- Namespace *nspace = namespaceStack.top();
- TokenRegion *region = regionStack.top();
-
- TokenDef *tokenDef = new TokenDef( name, String(), false, ignore, join,
- transBlock, $1->loc, pd->nextTokenId++, nspace, region,
- &reCaptureVect, pd->objectDef,
- contextStack.length() > 0 ? contextStack.top() : 0 );
-
- region->tokenDefList.append( tokenDef );
- nspace->tokenDefList.append( tokenDef );
-
- if ( $5->value )
- tokenDef->noPreIgnore = true;
- if ( $10->value )
- tokenDef->noPostIgnore = true;
-
- /* All again for the ignore. */
- if ( ignore ) {
- TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join,
- 0, $1->loc, pd->nextTokenId++, nspace, region->ignoreOnlyRegion,
- &reCaptureVect, pd->objectDef,
- contextStack.length() > 0 ? contextStack.top() : 0 );
-
- tokenDefIgn->dupOf = tokenDef;
-
- region->ignoreOnlyRegion->tokenDefList.append( tokenDefIgn );
- nspace->tokenDefList.append( tokenDefIgn );
- }
- else {
- TokenDef *tokenDefTok = new TokenDef( name + "_tok", String(), false, ignore, join,
- 0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion,
- &reCaptureVect, pd->objectDef,
- contextStack.length() > 0 ? contextStack.top() : 0 );
-
- tokenDefTok->dupOf = tokenDef;
-
- region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
- nspace->tokenDefList.append( tokenDefTok );
- }
-
- /* This is created and pushed in the name. */
- if ( !pd->insideRegion ) {
- /* Leave the region that we made just for this token. */
- regionStack.pop();
- }
-
- if ( join != 0 ) {
- /* Create a regular language definition so the token can be used to
- * make other tokens */
- addRegularDef( $1->loc, namespaceStack.top(), name, join );
- }
-
-
- reCaptureVect.empty();
- };
-
-nonterm token_or_ignore
-{
- InputLoc loc;
- bool ignore;
-};
-
-token_or_ignore: KW_Token
- final { $$->loc = $1->loc; $$->ignore = false; };
-
-token_or_ignore: KW_Ignore
- final { $$->loc = $1->loc; $$->ignore = true; };
-
-nonterm class token_def_name
-{
- String name;
-};
-
-token_def_name:
- opt_name
- final {
- String name = $1->name;
-
- $$->name = name;
- pd->insideRegion = regionStack.top() != pd->rootRegion;
- curDefineId = name;
-
- if ( !pd->insideRegion ) {
- /* For just ignores. */
- String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
- TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn );
-
- /* Just for explicitly collecting ignores. */
- String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
- TokenRegion *tokenRegionCi = createRegion( scannerNameCi );
-
- /* Just for tokens. */
- String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
- TokenRegion *tokenRegionTok = createRegion( scannerNameTok );
-
- /* If not inside a region, make one for the token. */
- String scannerName( name.length() + 2, "<%s>", name.data );
- TokenRegion *tokenRegion = createRegion( scannerName );
-
- regionStack.push( tokenRegion );
-
- tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
- tokenRegion->tokenOnlyRegion = tokenRegionTok;
- tokenRegion->ciRegion = tokenRegionCi;
-
- tokenRegion->isFullRegion = true;
- tokenRegionIgn->isIgnoreOnly = true;
- tokenRegionCi->isCiOnly = true;
- tokenRegionTok->isTokenOnly = true;
-
- tokenRegionIgn->derivedFrom = tokenRegion;
- tokenRegionCi->derivedFrom = tokenRegion;
- tokenRegionTok->derivedFrom = tokenRegion;
- }
-
- /* Reset the lable id counter. */
- pd->nextLabelId = 0;
- };
-
-nonterm class opt_name
-{
- String name;
-};
-
-opt_name: TK_Word final { $$->name = $1->data; };
-opt_name: ;
-
-nonterm opt_translate
-{
- CodeBlock *transBlock;
-};
-
-opt_translate:
- block_open lang_stmt_list block_close
- final {
- $$->transBlock = new CodeBlock( $2->stmtList );
- $$->transBlock->localFrame = $1->localFrame;
- $$->transBlock->context = contextStack.length() == 0 ? 0 : contextStack.top();
- };
-
-opt_translate:
- final {
- $$->transBlock = 0;
- };
-
-pre_eof:
- KW_Preeof block_open lang_stmt_list block_close
- final {
- bool insideRegion = regionStack.top() != pd->rootRegion;
- if ( !insideRegion )
- error($1->loc) << "preeof must be used inside an existing region" << endl;
-
- CodeBlock *codeBlock = new CodeBlock( $3->stmtList );
- codeBlock->localFrame = $2->localFrame;
- codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top();
-
- TokenRegion *region = regionStack.top();
- region->preEofBlock = codeBlock;
- };
-
-rl_def:
- KW_Rl machine_name enter_rl '/' rl_join leave_rl '/'
- final {
- /* Generic creation of machine for instantiation and assignment. */
- addRegularDef( $2->loc, namespaceStack.top(), $2->data, $5->join );
-
- if ( reCaptureVect.length() > 0 )
- error($1->loc) << "rl definitions cannot capture vars" << endl;
- };
-
-type class token_data
-{
- InputLoc loc;
- String data;
-};
-
-nonterm machine_name uses token_data;
-
-machine_name:
- TK_Word
- final {
- /* Make/get the priority key. The name may have already been referenced
- * and therefore exist. */
- PriorDictEl *priorDictEl;
- if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) )
- pd->nextPriorKey += 1;
- pd->curDefPriorKey = priorDictEl->value;
-
- /* Make/get the local error key. */
- LocalErrDictEl *localErrDictEl;
- if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) )
- pd->nextLocalErrKey += 1;
- pd->curDefLocalErrKey = localErrDictEl->value;
-
- $$->loc = $1->loc;
- $$->data = $1->data;
- };
-
-#
-# Reduce statements
-#
-
-nonterm opt_reduce_code
-{
- CodeBlock *codeBlock;
-};
-
-opt_reduce_code:
- final { $$->codeBlock = 0; };
-
-opt_reduce_code:
- start_reduce lang_stmt_list block_close
- final {
- $$->codeBlock = new CodeBlock( $2->stmtList );
- $$->codeBlock->localFrame = $1->localFrame;
- $$->codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top();
- };
-
-nonterm start_reduce uses block_open;
-
-start_reduce:
- block_open
- final {
- $$->localFrame = $1->localFrame;
- };
-
-nonterm lang_stmt_list
-{
- StmtList *stmtList;
-};
-
-lang_stmt_list: rec_stmt_list opt_require_stmt
- final {
- $$->stmtList = $1->stmtList;
- if ( $2->stmt != 0 )
- $$->stmtList->append( $2->stmt );
- };
-
-nonterm rec_stmt_list uses lang_stmt_list;
-
-rec_stmt_list: rec_stmt_list statement
- final {
- $$->stmtList = $1->stmtList;
-
- /* Maybe a statement was generated. */
- if ( $2->stmt != 0 )
- $$->stmtList->append( $2->stmt );
- };
-
-rec_stmt_list:
- final {
- $$->stmtList = new StmtList;
- };
-
-nonterm opt_def_init
-{
- LangExpr *expr;
- LangStmt::Type assignType;
-};
-
-opt_def_init: '=' code_expr
- final {
- $$->expr = $2->expr;
- $$->assignType = LangStmt::AssignType;
- };
-opt_def_init:
- final {
- $$->expr = 0;
- };
-
-scope_push:
- final {
- pd->curLocalFrame->pushScope();
- //cout << "push scope" << endl;
- };
-
-scope_pop:
- final {
- pd->curLocalFrame->popScope();
- //cout << "pop scope" << endl;
- };
-
-nonterm statement
-{
- LangStmt *stmt;
-};
-nonterm for_scope uses statement;
-
-statement: var_def opt_def_init
- final {
- /* By default no statement here. Maybe will add an initialization. */
- $$->stmt = 0;
-
- /* Check for redeclaration. */
- if ( pd->curLocalFrame->checkRedecl( $1->objField->name ) != 0 ) {
- error( $1->objField->loc ) << "variable " << $1->objField->name <<
- " redeclared" << endp;
- }
-
- /* Insert it into the field map. */
- pd->curLocalFrame->insertField( $1->objField->name, $1->objField );
-
- //cout << "var def " << $1->objField->name << endl;
-
- if ( $2->expr != 0 ) {
- LangVarRef *varRef = new LangVarRef( $1->objField->loc,
- new QualItemVect, $1->objField->name );
-
- $$->stmt = new LangStmt( $1->objField->loc,
- $2->assignType, varRef, $2->expr );
- }
- };
-statement: var_ref '=' code_expr
- final {
- $$->stmt = new LangStmt( $2->loc, LangStmt::AssignType, $1->varRef, $3->expr );
- };
-statement: KW_Print '(' code_expr_list ')'
- final {
- $$->stmt = new LangStmt( $1->loc, LangStmt::PrintType, $3->exprVect );
- };
-statement: KW_PrintXMLAC '(' code_expr_list ')'
- final {
- $$->stmt = new LangStmt( $1->loc, LangStmt::PrintXMLACType, $3->exprVect );
- };
-statement: KW_PrintXML '(' code_expr_list ')'
- final {
- $$->stmt = new LangStmt( $1->loc, LangStmt::PrintXMLType, $3->exprVect );
- };
-statement: KW_PrintStream '(' code_expr_list ')'
- final {
- $$->stmt = new LangStmt( $1->loc, LangStmt::PrintStreamType, $3->exprVect );
- };
-statement: code_expr
- final {
- $$->stmt = new LangStmt( InputLoc(), LangStmt::ExprType, $1->expr );
- };
-statement: if_stmt
- final {
- $$->stmt = $1->stmt;
- };
-statement: KW_Reject
- final {
- $$->stmt = new LangStmt( $1->loc, LangStmt::RejectType );
- };
-statement: KW_While scope_push code_expr block_or_single scope_pop
- final {
- $$->stmt = new LangStmt( LangStmt::WhileType, $3->expr, $4->stmtList );
- };
-
-for_scope: TK_Word ':' type_ref KW_In iter_call block_or_single
- final {
- /* Check for redeclaration. */
- if ( pd->curLocalFrame->checkRedecl( $1->data ) != 0 )
- error( $1->loc ) << "variable " << $1->data << " redeclared" << endp;
-
- /* Note that we pass in a null type reference. This type is dependent
- * on the result of the iter_call lookup since it must contain a reference
- * to the iterator that is called. This lookup is done at compile time. */
- ObjField *iterField = new ObjField( $1->loc, (TypeRef*)0, $1->data );
- pd->curLocalFrame->insertField( $1->data, iterField );
-
- $$->stmt = new LangStmt( $1->loc, LangStmt::ForIterType,
- iterField, $3->typeRef, $5->langTerm, $6->stmtList );
- };
-
-statement: KW_For scope_push for_scope scope_pop
- final {
- $$->stmt = $3->stmt;
- };
-
-statement: KW_Return code_expr
- final {
- $$->stmt = new LangStmt( $1->loc, LangStmt::ReturnType, $2->expr );
- };
-statement: KW_Break
- final {
- $$->stmt = new LangStmt( LangStmt::BreakType );
- };
-statement: KW_Yield var_ref
- final {
- $$->stmt = new LangStmt( LangStmt::YieldType, $2->varRef );
- };
-statement: var_ref TK_LtLt accumulate
- final {
- Namespace *nspace = namespaceStack.top();
- TokenRegion *region = regionStack.top();
- ParserText *parserText = new ParserText( $2->loc, nspace, region, replItemList );
- pd->parserTextList.append( parserText );
-
- $$->stmt = new LangStmt( LangStmt::ParserType, $1->varRef, parserText );
- };
-statement: KW_Send var_ref accumulate
- final {
- Namespace *nspace = namespaceStack.top();
- TokenRegion *region = regionStack.top();
- ParserText *parserText = new ParserText( $1->loc, nspace, region, replItemList );
- pd->parserTextList.append( parserText );
-
- $$->stmt = new LangStmt( LangStmt::ParserType, $2->varRef, parserText );
- };
-
-nonterm opt_require_stmt uses statement;
-
-opt_require_stmt:
- scope_push require_pattern lang_stmt_list scope_pop
- final {
- $$->stmt = new LangStmt( LangStmt::IfType, $2->expr, $3->stmtList, 0 );
- };
-opt_require_stmt:
- final {
- $$->stmt = 0;
- };
-
-nonterm require_pattern uses code_expr;
-
-require_pattern:
- KW_Require var_ref pattern_list
- final {
- Namespace *nspace = namespaceStack.top();
- TokenRegion *region = regionStack.top();
- Pattern *pattern = new Pattern( $1->loc, nspace, region,
- patternItemList, pd->nextPatReplId++ );
- pd->patternList.append( pattern );
-
- $$->expr = new LangExpr(
- new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) );
- };
-
-nonterm block_or_single uses lang_stmt_list;
-
-block_or_single: '{' lang_stmt_list '}'
- final {
- $$->stmtList = $2->stmtList;
- };
-block_or_single: statement
- final {
- $$->stmtList = new StmtList;
- $$->stmtList->append( $1->stmt );
- };
-
-nonterm iter_call
-{
- LangTerm *langTerm;
-};
-
-iter_call: var_ref '(' opt_code_expr_list ')'
- final {
- $$->langTerm = new LangTerm( $1->varRef, $3->exprVect );
- };
-iter_call: TK_Word
- final {
- $$->langTerm = new LangTerm( LangTerm::VarRefType,
- new LangVarRef( $1->loc, new QualItemVect, $1->data ) );
- };
-
-#
-# If Statements
-#
-
-nonterm if_stmt uses statement;
-
-if_stmt: KW_If scope_push code_expr block_or_single scope_pop elsif_list
- final {
- $$->stmt = new LangStmt( LangStmt::IfType, $3->expr, $4->stmtList, $6->stmt );
- };
-
-nonterm elsif_list
-{
- LangStmt *stmt;
-};
-
-elsif_list:
- elsif_clause elsif_list
- final {
- /* Put any of the followng elseif part, an else, or null into the elsePart. */
- $$->stmt = $1->stmt;
- $$->stmt->elsePart = $2->stmt;
- };
-elsif_list:
- optional_else
- final {
- $$->stmt = $1->stmt;
- };
-
-nonterm elsif_clause
-{
- LangStmt *stmt;
-};
-
-elsif_clause:
- KW_Elsif scope_push code_expr block_or_single scope_pop
- final {
- $$->stmt = new LangStmt( LangStmt::IfType, $3->expr, $4->stmtList, 0 );
- };
-
-nonterm optional_else
-{
- LangStmt *stmt;
-};
-
-optional_else:
- KW_Else scope_push block_or_single scope_pop
- final {
- $$->stmt = new LangStmt( LangStmt::ElseType, $3->stmtList );
- };
-
-optional_else:
- final {
- $$->stmt = 0;
- };
-
-#
-# Code Expression Lists.
-#
-nonterm code_expr_list
-{
- ExprVect *exprVect;
-};
-
-code_expr_list: code_expr_list code_expr
- final {
- $$->exprVect = $1->exprVect;
- $$->exprVect->append( $2->expr );
- };
-code_expr_list: code_expr
- final {
- $$->exprVect = new ExprVect;
- $$->exprVect->append( $1->expr );
- };
-
-nonterm opt_code_expr_list uses code_expr_list;
-
-opt_code_expr_list: code_expr_list
- final {
- $$->exprVect = $1->exprVect;
- };
-
-opt_code_expr_list:
- final {
- $$->exprVect = 0;
- };
-
-#
-# Type list
-#
-
-nonterm type_list
-{
- TypeRefVect *typeRefVect;
-};
-
-type_list: type_list ',' type_ref
- final {
- $$->typeRefVect = $1->typeRefVect;
- $$->typeRefVect->append( $3->typeRef );
- };
-type_list: type_ref
- final {
- $$->typeRefVect = new TypeRefVect;
- $$->typeRefVect->append( $1->typeRef );
- };
-
-nonterm opt_type_list uses type_list;
-
-opt_type_list: type_list
- final {
- $$->typeRefVect = $1->typeRefVect;
- };
-
-opt_type_list:
- final {
- $$->typeRefVect = 0;
- };
-
-
-#
-# Variable reference
-#
-
-nonterm var_ref
-{
- LangVarRef *varRef;
-};
-
-var_ref: qual TK_Word
- final {
- $$->varRef = new LangVarRef( $2->loc, $1->qual, $2->data );
- };
-
-nonterm qual
-{
- QualItemVect *qual;
-};
-
-qual: qual TK_Word '.'
- final {
- $$->qual = $1->qual;
- $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Dot ) );
- };
-qual: qual TK_Word TK_RightArrow
- final {
- $$->qual = $1->qual;
- $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Arrow ) );
- };
-qual:
- final {
- $$->qual = new QualItemVect;
- };
-
-#
-# Code expression
-#
-
-nonterm code_expr
-{
- LangExpr *expr;
-};
-
-code_expr: code_expr TK_AmpAmp code_relational
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalAnd, $3->expr );
- };
-
-code_expr: code_expr TK_BarBar code_relational
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalOr, $3->expr );
- };
-
-code_expr: code_relational
- final {
- $$->expr = $1->expr;
- };
-
-nonterm code_relational uses code_expr;
-
-code_relational: code_relational TK_DoubleEql code_additive
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, OP_DoubleEql, $3->expr );
- };
-
-code_relational: code_relational TK_NotEql code_additive
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, OP_NotEql, $3->expr );
- };
-
-code_relational: code_relational '<' code_additive
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, '<', $3->expr );
- };
-
-code_relational: code_relational '>' code_additive
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, '>', $3->expr );
- };
-
-code_relational: code_relational TK_LessEql code_additive
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, OP_LessEql, $3->expr );
- };
-
-code_relational: code_relational TK_GrtrEql code_additive
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, OP_GrtrEql, $3->expr );
- };
-
-
-code_relational: code_additive
- final {
- $$->expr = $1->expr;
- };
-
-nonterm code_additive uses code_expr;
-
-code_additive: code_additive '+' code_multiplicitive
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, '+', $3->expr );
- };
-
-code_additive: code_additive '-' code_multiplicitive
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, '-', $3->expr );
- };
-
-code_additive: code_multiplicitive
- final {
- $$->expr = $1->expr;
- };
-
-nonterm code_multiplicitive uses code_expr;
-
-code_multiplicitive: code_multiplicitive '*' code_unary
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, '*', $3->expr );
- };
-
-code_multiplicitive: code_multiplicitive '/' code_unary
- final {
- $$->expr = new LangExpr( $2->loc, $1->expr, '/', $3->expr );
- };
-
-code_multiplicitive: code_unary
- final {
- $$->expr = $1->expr;
- };
-
-nonterm code_unary uses code_expr;
-code_unary: '!' code_factor
- final {
- $$->expr = new LangExpr( $1->loc, '!', $2->expr );
- };
-code_unary: '$' code_factor
- final {
- $$->expr = new LangExpr( $1->loc, '$', $2->expr );
- };
-code_unary: '^' code_factor
- final {
- $$->expr = new LangExpr( $1->loc, '^', $2->expr );
- };
-code_unary: '%' code_factor
- final {
- $$->expr = new LangExpr( $1->loc, '%', $2->expr );
- };
-code_unary: code_factor
- final {
- $$->expr = $1->expr;
- };
-
-nonterm opt_capture uses var_def;
-
-opt_capture: TK_Word ':'
- final {
- $$->objField = new ObjField( $1->loc, 0, $1->data );
- };
-opt_capture:
- final {
- $$->objField = 0;
- };
-
-nonterm code_factor uses code_expr;
-
-code_factor: TK_Number
- final {
- $$->expr = new LangExpr( new LangTerm( LangTerm::NumberType, $1->data ) );
- };
-code_factor: TK_Literal
- final {
- $$->expr = new LangExpr( new LangTerm( LangTerm::StringType, $1->data ) );
- };
-code_factor: var_ref '(' opt_code_expr_list ')'
- final {
- $$->expr = new LangExpr( new LangTerm( $1->varRef, $3->exprVect ) );
- };
-code_factor: var_ref
- final {
- $$->expr = new LangExpr( new LangTerm( LangTerm::VarRefType, $1->varRef ) );
- };
-code_factor: KW_Match var_ref pattern_list
- final {
- Namespace *nspace = namespaceStack.top();
- TokenRegion *region = regionStack.top();
- Pattern *pattern = new Pattern( $1->loc, nspace, region,
- patternItemList, pd->nextPatReplId++ );
- pd->patternList.append( pattern );
-
- $$->expr = new LangExpr( new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) );
- };
-code_factor: KW_New code_factor
- final {
- $$->expr = new LangExpr( new LangTerm( LangTerm::NewType, $2->expr ) );
- };
-code_factor:
- KW_Construct opt_capture type_ref opt_field_init repl_list
- final {
- Namespace *nspace = namespaceStack.top();
- TokenRegion *region = regionStack.top();
- Replacement *replacement = new Replacement( $1->loc, nspace, region,
- replItemList, pd->nextPatReplId++ );
- pd->replList.append( replacement );
-
- LangVarRef *varRef = 0;
- if ( $2->objField != 0 )
- varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name );
-
- $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ConstructType,
- varRef, $2->objField, $3->typeRef, $4->fieldInitVect, replacement ) );
-
- /* Check for redeclaration. */
- if ( $2->objField != 0 ) {
- if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) {
- error( $2->objField->loc ) << "variable " << $2->objField->name <<
- " redeclared" << endp;
- }
-
- /* Insert it into the field map. */
- $2->objField->typeRef = $3->typeRef;
- pd->curLocalFrame->insertField( $2->objField->name, $2->objField );
- }
- };
-code_factor: KW_Parse opt_capture type_ref '(' opt_code_expr_list ')'
- final {
- String parserName = $3->typeRef->typeName + "_parser";
-
- /* Get the language element. */
- Namespace *nspace = namespaceStack.top();
-
- GenericType *generic = 0;
-
- NamespaceQual *nspaceQual = new NamespaceQual(
- namespaceStack.top(), regionStack.top() );
- TypeRef *parserTypeRef = new TypeRef( TypeRef::Parser,
- InputLoc(), nspaceQual, $3->typeRef, 0 );
-
- Replacement *replacement = new Replacement( $1->loc, nspace, pd->rootRegion,
- new ReplItemList, pd->nextPatReplId++ );
- pd->replList.append( replacement );
-
- LangVarRef *varRef = 0;
- if ( $2->objField != 0 )
- varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name );
-
- $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ParseType,
- varRef, $2->objField, $3->typeRef, generic, parserTypeRef, replacement ) );
- $$->expr->term->args = $5->exprVect;
-
- /* Check for redeclaration. */
- if ( $2->objField != 0 ) {
- if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) {
- error( $2->objField->loc ) << "variable " << $2->objField->name <<
- " redeclared" << endp;
- }
-
- /* Insert it into the field map. */
- $2->objField->typeRef = $3->typeRef;
- pd->curLocalFrame->insertField( $2->objField->name, $2->objField );
- }
- };
-code_factor: KW_ParseStop opt_capture type_ref '(' opt_code_expr_list ')'
- final {
- /* This is a silly clone. To be fixed later. */
- String parserName = $3->typeRef->typeName + "_parser";
-
- /* Get the language element. */
- Namespace *nspace = namespaceStack.top();
-
- GenericType *generic = 0;
-
- NamespaceQual *nspaceQual = new NamespaceQual(
- namespaceStack.top(), regionStack.top() );
- TypeRef *parserTypeRef = new TypeRef( TypeRef::Parser,
- InputLoc(), nspaceQual, $3->typeRef, 0 );
-
- Replacement *replacement = new Replacement( $1->loc, nspace, pd->rootRegion,
- new ReplItemList, pd->nextPatReplId++ );
- pd->replList.append( replacement );
-
- LangVarRef *varRef = 0;
- if ( $2->objField != 0 )
- varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name );
-
- $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ParseStopType,
- varRef, $2->objField, $3->typeRef, generic, parserTypeRef, replacement ) );
- $$->expr->term->args = $5->exprVect;
-
- /* Check for redeclaration. */
- if ( $2->objField != 0 ) {
- if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) {
- error( $2->objField->loc ) << "variable " << $2->objField->name <<
- " redeclared" << endp;
- }
-
- /* Insert it into the field map. */
- $2->objField->typeRef = $3->typeRef;
- pd->curLocalFrame->insertField( $2->objField->name, $2->objField );
- }
-
- };
-code_factor: KW_TypeId '<' type_ref '>'
- final {
- $$->expr = new LangExpr( new LangTerm( $1->loc,
- LangTerm::TypeIdType, $3->typeRef ) );
- };
-code_factor: type_ref KW_In var_ref
- final {
- $$->expr = new LangExpr( new LangTerm( $2->loc,
- LangTerm::SearchType, $1->typeRef, $3->varRef ) );
- };
-code_factor: KW_Nil
- final {
- $$->expr = new LangExpr( new LangTerm( $1->loc,
- LangTerm::NilType ) );
- };
-code_factor: KW_True
- final {
- $$->expr = new LangExpr( new LangTerm( $1->loc,
- LangTerm::TrueType ) );
- };
-code_factor: KW_False
- final {
- $$->expr = new LangExpr( new LangTerm( $1->loc,
- LangTerm::FalseType ) );
- };
-code_factor: '(' code_expr ')'
- final {
- $$->expr = $2->expr;
- };
-code_factor: KW_MakeTree '(' opt_code_expr_list ')'
- final {
- $$->expr = new LangExpr( new LangTerm( $1->loc,
- LangTerm::MakeTreeType, $3->exprVect ) );
- };
-code_factor: KW_MakeToken '(' opt_code_expr_list ')'
- final {
- $$->expr = new LangExpr( new LangTerm( $1->loc,
- LangTerm::MakeTokenType, $3->exprVect ) );
- };
-code_factor: KW_Deref code_expr
- final {
- $$->expr = new LangExpr( $1->loc, OP_Deref, $2->expr );
- };
-code_factor: string_list
- final {
- $$->expr = new LangExpr( new LangTerm( replItemList ) );
- };
-
-nonterm opt_field_init uses field_init_list;
-
-opt_field_init: '(' opt_field_init_list ')'
- final {
- $$->fieldInitVect = $2->fieldInitVect;
- };
-opt_field_init:
- final {
- $$->fieldInitVect = 0;
- };
-
-nonterm opt_field_init_list uses field_init_list;
-
-opt_field_init_list: field_init_list
- final {
- $$->fieldInitVect = $1->fieldInitVect;
- };
-opt_field_init_list:
- final {
- $$->fieldInitVect = 0;
- };
-
-nonterm field_init_list
-{
- FieldInitVect *fieldInitVect;
-};
-
-field_init_list: field_init_list field_init
- final {
- $$->fieldInitVect = $1->fieldInitVect;
- $$->fieldInitVect->append( $2->fieldInit );
- };
-field_init_list: field_init
- final {
- $$->fieldInitVect = new FieldInitVect;
- $$->fieldInitVect->append( $1->fieldInit );
- };
-
-nonterm field_init
-{
- FieldInit *fieldInit;
-};
-
-field_init: code_expr
- final {
- $$->fieldInit = new FieldInit( InputLoc(), "_name", $1->expr );
- };
-
-#
-# Regular Expressions
-#
-
-nonterm opt_rl_join uses rl_join;
-
-opt_rl_join: rl_join opt_context
- final {
- $$->join = $1->join;
- $$->context = $2->context;
-
- if ( $2->context != 0 ) {
- /* Create the enter and leaving actions that will mark the substring. */
- Action *mark = new Action( MarkMark, pd->nextMatchEndNum++ );
- pd->actionList.append( mark );
-
- $$->join->context = $2->context;
- $$->join->mark = mark;
- }
- };
-
-opt_rl_join:
- final {
- $$->join = 0;
- $$->context = 0;
- };
-
-nonterm rl_join
-{
- Join *join;
- Join *context;
-};
-
-rl_join:
- rl_join ',' rl_expr
- final {
- /* Append the expression to the list and return it. */
- $1->join->exprList.append( $3->expression );
- $$->join = $1->join;
- };
-rl_join:
- rl_expr
- final {
- $$->join = new Join( $1->expression );
- };
-
-# Context at the end of a pattern that is not included in the match
-nonterm opt_context uses rl_join;
-
-opt_context: '@' rl_join final { $$->context = $2->join; };
-opt_context: final { $$->context = 0; };
-
-nonterm rl_expr
-{
- Expression *expression;
-};
-
-rl_expr:
- rl_expr '|' rl_term_short final {
- $$->expression = new Expression( $1->expression,
- $3->term, Expression::OrType );
- };
-rl_expr:
- rl_expr '&' rl_term_short final {
- $$->expression = new Expression( $1->expression,
- $3->term, Expression::IntersectType );
- };
-# This priority specification overrides the innermost parsing strategy which
-# results ordered choice interpretation of the grammar.
-rl_expr:
- rl_expr '-' rl_term_short final {
- $$->expression = new Expression( $1->expression,
- $3->term, Expression::SubtractType );
- };
-rl_expr:
- rl_expr TK_DashDash rl_term_short final {
- $$->expression = new Expression( $1->expression,
- $3->term, Expression::StrongSubtractType );
- };
-rl_expr:
- rl_term_short final {
- $$->expression = new Expression( $1->term );
- };
-
-nonterm rl_term_short
-{
- Term *term;
-};
-
-shortest rl_term_short;
-
-rl_term_short: rl_term
- final { $$->term = $1->term; };
-
-nonterm rl_term
-{
- Term *term;
-};
-
-rl_term:
- rl_term factor_with_label final {
- $$->term = new Term( $1->term, $2->factorWithAug );
- };
-rl_term:
- rl_term '.' factor_with_label final {
- $$->term = new Term( $1->term, $3->factorWithAug );
- };
-rl_term:
- rl_term TK_ColonGt factor_with_label final {
- $$->term = new Term( $1->term, $3->factorWithAug, Term::RightStartType );
- };
-rl_term:
- rl_term TK_ColonGtGt factor_with_label final {
- $$->term = new Term( $1->term, $3->factorWithAug, Term::RightFinishType );
- };
-rl_term:
- rl_term TK_LtColon factor_with_label final {
- $$->term = new Term( $1->term,
- $3->factorWithAug, Term::LeftType );
- };
-rl_term:
- factor_with_label final {
- $$->term = new Term( $1->factorWithAug );
- };
-
-nonterm factor_with_label
-{
- FactorWithAug *factorWithAug;
-};
-
-factor_with_label:
- factor_with_ep final {
- $$->factorWithAug = $1->factorWithAug;
- };
-
-factor_with_label:
- TK_Word ':' factor_with_label final {
- $$->factorWithAug = $3->factorWithAug;
-
- if ( pd->objectDef->checkRedecl( $1->data ) != 0 )
- error($1->loc) << "label name \"" << $1->data << "\" already in use" << endp;
-
- /* Create the object field. */
- NamespaceQual *qual = new NamespaceQual( namespaceStack.top(), regionStack.top() );
- TypeRef *typeRef = new TypeRef( $1->loc, qual, "str" );
- ObjField *objField = new ObjField( $1->loc, typeRef, $1->data );
-
- /* Insert it into the map. */
- pd->objectDef->insertField( $1->data, objField );
-
- /* Create the enter and leaving actions that will mark the substring. */
- Action *enter = new Action( MarkMark, pd->nextMatchEndNum++ );
- Action *leave = new Action( MarkMark, pd->nextMatchEndNum++ );
- pd->actionList.append( enter );
- pd->actionList.append( leave );
-
- /* Add entering and leaving actions. */
- $$->factorWithAug->actions.append( ParserAction( $1->loc, at_start, 0, enter ) );
- $$->factorWithAug->actions.append( ParserAction( $1->loc, at_leave, 0, leave ) );
-
- reCaptureVect.append( ReCapture( enter, leave, objField ) );
- };
-
-nonterm factor_with_ep
-{
- FactorWithAug *factorWithAug;
-};
-
-factor_with_ep:
- factor_with_aug final {
- $$->factorWithAug = $1->factorWithAug;
- };
-
-nonterm factor_with_aug
-{
- FactorWithAug *factorWithAug;
-};
-
-factor_with_aug:
- factor_with_rep final {
- $$->factorWithAug = new FactorWithAug( $1->factorWithRep );
- };
-
-
-# The fourth level of precedence. These are the trailing unary operators that
-# allow for repetition.
-
-nonterm factor_with_rep
-{
- FactorWithRep *factorWithRep;
-};
-
-factor_with_rep:
- factor_with_rep '*' final {
- $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
- 0, 0, FactorWithRep::StarType );
- };
-factor_with_rep:
- factor_with_rep TK_StarStar final {
- $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
- 0, 0, FactorWithRep::StarStarType );
- };
-factor_with_rep:
- factor_with_rep '?' final {
- $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
- 0, 0, FactorWithRep::OptionalType );
- };
-factor_with_rep:
- factor_with_rep '+' final {
- $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
- 0, 0, FactorWithRep::PlusType );
- };
-factor_with_rep:
- factor_with_rep '{' factor_rep_num '}' final {
- $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
- $3->rep, 0, FactorWithRep::ExactType );
- };
-factor_with_rep:
- factor_with_rep '{' ',' factor_rep_num '}' final {
- $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
- 0, $4->rep, FactorWithRep::MaxType );
- };
-factor_with_rep:
- factor_with_rep '{' factor_rep_num ',' '}' final {
- $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
- $3->rep, 0, FactorWithRep::MinType );
- };
-factor_with_rep:
- factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final {
- $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep,
- $3->rep, $5->rep, FactorWithRep::RangeType );
- };
-factor_with_rep:
- factor_with_neg final {
- $$->factorWithRep = new FactorWithRep(
- $1->factorWithNeg->loc, $1->factorWithNeg );
- };
-
-nonterm factor_rep_num
-{
- int rep;
-};
-
-factor_rep_num:
- TK_UInt final {
- // Convert the priority number to a long. Check for overflow.
- errno = 0;
- int rep = strtol( $1->data, 0, 10 );
- if ( errno == ERANGE && rep == LONG_MAX ) {
- // Repetition too large. Recover by returing repetition 1. */
- error($1->loc) << "repetition number " << $1->data << " overflows" << endl;
- $$->rep = 1;
- }
- else {
- // Cannot be negative, so no overflow.
- $$->rep = rep;
- }
- };
-
-
-#
-# The fifth level up in precedence. Negation.
-#
-
-nonterm factor_with_neg
-{
- FactorWithNeg *factorWithNeg;
-};
-
-factor_with_neg:
- '!' factor_with_neg final {
- $$->factorWithNeg = new FactorWithNeg( $1->loc,
- $2->factorWithNeg, FactorWithNeg::NegateType );
- };
-factor_with_neg:
- '^' factor_with_neg final {
- $$->factorWithNeg = new FactorWithNeg( $1->loc,
- $2->factorWithNeg, FactorWithNeg::CharNegateType );
- };
-factor_with_neg:
- rl_factor final {
- $$->factorWithNeg = new FactorWithNeg( $1->factor->loc, $1->factor );
- };
-
-nonterm rl_factor
-{
- Factor *factor;
-};
-
-rl_factor:
- TK_Literal final {
- /* Create a new factor node going to a concat literal. */
- $$->factor = new Factor( new Literal( $1->loc, $1->data, Literal::LitString ) );
- };
-rl_factor:
- alphabet_num final {
- /* Create a new factor node going to a literal number. */
- $$->factor = new Factor( new Literal( $1->loc,
- $1->data, Literal::Number ) );
- };
-rl_factor:
- TK_Word final {
- /* Find the named graph. */
- Namespace *nspace = namespaceStack.top();
-
- while ( nspace != 0 ) {
- GraphDictEl *gdNode = nspace->rlMap.find( $1->data );
- if ( gdNode != 0 ) {
- if ( gdNode->isInstance ) {
- /* Recover by retuning null as the factor node. */
- error($1->loc) << "references to graph instantiations not allowed "
- "in expressions" << endl;
- $$->factor = 0;
- }
- else {
- /* Create a factor node that is a lookup of an expression. */
- $$->factor = new Factor( $1->loc, gdNode->value );
- }
- break;
- }
-
- nspace = nspace->parentNamespace;
- }
-
- if ( nspace == 0 ) {
- /* Recover by returning null as the factor node. */
- error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl;
- $$->factor = 0;
- }
- };
-rl_factor:
- TK_SqOpen regular_expr_or_data TK_SqClose final {
- /* Create a new factor node going to an OR expression. */
- $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ) );
- };
-rl_factor:
- TK_SqOpenNeg regular_expr_or_data TK_SqClose final {
- /* Create a new factor node going to a negated OR expression. */
- $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) );
- };
-rl_factor:
- range_lit TK_DotDot range_lit final {
- /* Create a new factor node going to a range. */
- $$->factor = new Factor( new Range( $1->literal, $3->literal ) );
- };
-rl_factor:
- '(' rl_join ')' final {
- /* Create a new factor going to a parenthesized join. */
- $$->factor = new Factor( $2->join );
- };
-
-nonterm range_lit
-{
- Literal *literal;
-};
-
-# Literals which can be the end points of ranges.
-range_lit:
- TK_Literal final {
- /* Range literas must have only one char. We restrict this in the parse tree. */
- $$->literal = new Literal( $1->loc, $1->data, Literal::LitString );
- };
-range_lit:
- alphabet_num final {
- /* Create a new literal number. */
- $$->literal = new Literal( $1->loc, $1->data, Literal::Number );
- };
-
-nonterm alphabet_num uses token_data;
-
-# Any form of a number that can be used as a basic machine. */
-alphabet_num:
- TK_UInt final {
- $$->loc = $1->loc;
- $$->data = $1->data;
- };
-alphabet_num:
- '-' TK_UInt final {
- $$->loc = $1->loc;
- $$->data = '+';
- $$->data += $2->data;
- };
-alphabet_num:
- TK_Hex final {
- $$->loc = $1->loc;
- $$->data = $1->data;
- };
-
-#
-# Regular Expressions.
-#
-
-
-# The data inside of a [] expression in a regular expression. Accepts any
-# number of characters or ranges. */
-nonterm regular_expr_or_data
-{
- ReOrBlock *reOrBlock;
-};
-
-regular_expr_or_data:
- regular_expr_or_data regular_expr_or_char final {
- /* An optimization to lessen the tree size. If an or char is directly
- * under the left side on the right and the right side is another or
- * char then paste them together and return the left side. Otherwise
- * just put the two under a new or data node. */
- if ( $2->reOrItem->type == ReOrItem::Data &&
- $1->reOrBlock->type == ReOrBlock::RecurseItem &&
- $1->reOrBlock->item->type == ReOrItem::Data )
- {
- /* Append the right side to right side of the left and toss the
- * right side. */
- $1->reOrBlock->item->data += $2->reOrItem->data;
- delete $2->reOrItem;
- $$->reOrBlock = $1->reOrBlock;
- }
- else {
- /* Can't optimize, put the left and right under a new node. */
- $$->reOrBlock = new ReOrBlock( $1->reOrBlock, $2->reOrItem );
- }
- };
-regular_expr_or_data:
- final {
- $$->reOrBlock = new ReOrBlock();
- };
-
-# A single character inside of an or expression. Can either be a character or a
-# set of characters.
-nonterm regular_expr_or_char
-{
- ReOrItem *reOrItem;
-};
-
-regular_expr_or_char:
- TK_ReChar final {
- $$->reOrItem = new ReOrItem( $1->loc, $1->data );
- };
-regular_expr_or_char:
- TK_ReChar TK_Dash TK_ReChar final {
- $$->reOrItem = new ReOrItem( $2->loc, $1->data[0], $3->data[0] );
- };
-
-# A local state reference. Cannot have :: prefix.
-local_state_ref:
- no_name_sep state_ref_names;
-
-# Clear the name ref structure.
-no_name_sep:
- final {
- nameRef.empty();
- };
-
-# A qualified state reference.
-state_ref: opt_name_sep state_ref_names;
-
-# Optional leading name separator.
-opt_name_sep:
- TK_NameSep
- final {
- /* Insert an initial null pointer val to indicate the existence of the
- * initial name seperator. */
- nameRef.setAs( 0 );
- };
-opt_name_sep:
- final {
- nameRef.empty();
- };
-
-# List of names separated by ::
-state_ref_names:
- state_ref_names TK_NameSep TK_Word
- final {
- nameRef.append( $3->data );
- };
-state_ref_names:
- TK_Word
- final {
- nameRef.append( $1->data );
- };
-
-nonterm opt_commit
-{
- bool commit;
-};
-
-opt_commit: final { $$->commit = false; };
-opt_commit: KW_Commit final { $$->commit = true; };
-
-#
-# Grammar Finished
-#
-
- write types;
- write data;
-}%%
-
-void ColmParser::init()
-{
- /* Set up the root namespace. */
- const char *rootNamespaceName = "___ROOT_NAMESPACE";
- Namespace *rootNamespace = new Namespace( InputLoc(),
- rootNamespaceName, pd->namespaceList.length(), 0 );
- pd->namespaceList.append( rootNamespace );
- namespaceStack.push( rootNamespace );
- pd->rootNamespace = rootNamespace;
-
- /* Set up the root token region. */
- const char *rootRegionName = "___ROOT_REGION";
-
- TokenRegion *rootRegion = new TokenRegion( InputLoc(), rootRegionName,
- pd->regionList.length(), 0 );
- pd->regionList.append( rootRegion );
- addRegionDef( InputLoc(), namespaceStack.top(), rootRegionName, rootRegion );
-
- regionStack.push( rootRegion );
-
- pd->rootRegion = rootRegion;
-
- /* Set up the global object. */
- String global = "global";
- pd->globalObjectDef = new ObjectDef( ObjectDef::UserType,
- global, pd->nextObjectId++ );
-
- /* The eofTokenRegion defaults to the root region. */
- pd->eofTokenRegion = rootRegion;
-
- /* Initialize the dictionary of graphs. This is our symbol table. The
- * initialization needs to be done on construction which happens at the
- * beginning of a machine spec so any assignment operators can reference
- * the builtins. */
- pd->initGraphDict();
-
- pd->rootLocalFrame = new ObjectDef( ObjectDef::FrameType,
- "local", pd->nextObjectId++ );
- pd->curLocalFrame = pd->rootLocalFrame;
-
- %% write init;
-
- addArgvList();
-}
-
-void ColmParser::addArgvList()
-{
- NamespaceQual *nspaceQual1 = new NamespaceQual(
- namespaceStack.top(), regionStack.top() );
- TypeRef *typeRef = new TypeRef( InputLoc(), nspaceQual1, "str" );
-
- NamespaceQual *nspaceQual2 = new NamespaceQual(
- namespaceStack.top(), regionStack.top() );
-
- pd->argvTypeRef = new TypeRef( TypeRef::List, InputLoc(),
- nspaceQual2, typeRef, 0 );
-}
-
-int ColmParser::parseLangEl( int type, const Token *token )
-{
- %% write exec;
- return errCount == 0 ? 0 : -1;
-}
-
-void ColmParser::addRegularDef( const InputLoc &loc, Namespace *nspace,
- const String &name, Join *join )
-{
- GraphDictEl *newEl = nspace->rlMap.insert( name );
- if ( newEl != 0 ) {
- /* New element in the dict, all good. */
- newEl->value = new VarDef( name, join );
- newEl->isInstance = false;
- newEl->loc = loc;
- }
- else {
- // Recover by ignoring the duplicate.
- error(loc) << "regular definition \"" << name << "\" already exists" << endl;
- }
-}
-
-TokenRegion *ColmParser::createRegion( String &scannerName )
-{
- TokenRegion *tokenRegion = new TokenRegion( InputLoc(), scannerName,
- pd->regionList.length(), regionStack.top() );
-
- regionStack.top()->childRegions.append( tokenRegion );
-
- pd->regionList.append( tokenRegion );
-
- addRegionDef( InputLoc(), namespaceStack.top(), scannerName, tokenRegion );
-
- return tokenRegion;
-}
-
-
-void ColmParser::addRegionDef( const InputLoc &loc, Namespace *nspace,
- const String &name, TokenRegion *tokenRegion )
-{
- RegionGraphDictEl *newEl = nspace->graphDict.insert( name );
- if ( newEl != 0 ) {
- /* New element in the dict, all good. */
- newEl->value = new RegionDef( name, tokenRegion );
- newEl->isInstance = true;
- newEl->loc = loc;
-
- /* It it is an instance, put on the instance list. */
- pd->instanceList.append( newEl );
- }
- else {
- // Recover by ignoring the duplicate.
- error(loc) << "regular definition \"" << name << "\" already exists" << endl;
- }
-}
-
-ostream &ColmParser::parse_error( int tokId, Token &token )
-{
- /* Maintain the error count. */
- gblErrorCount += 1;
-
- cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": ";
- cerr << "at token ";
- if ( tokId < 128 )
- cerr << "\"" << ColmParser_lelNames[tokId] << "\"";
- else
- cerr << ColmParser_lelNames[tokId];
- if ( token.data != 0 )
- cerr << " with data \"" << token.data << "\"";
- cerr << ": ";
-
- return cerr;
-}
-
-int ColmParser::token( InputLoc &loc, int tokId, char *tokstart, int toklen )
-{
- Token token;
-
- if ( toklen > 0 )
- token.data.setAs( tokstart, toklen );
-
- token.loc = loc;
- int res = parseLangEl( tokId, &token );
- if ( res < 0 ) {
- parse_error(tokId, token) << "parse error" << endl;
- exit(1);
- }
- return res;
-}
diff --git a/colm/lmscan.h b/colm/lmscan.h
deleted file mode 100644
index 5badaed5..00000000
--- a/colm/lmscan.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _RLSCAN_H
-#define _RLSCAN_H
-
-#include <iostream>
-#include <fstream>
-#include <string.h>
-
-#include "global.h"
-#include "lmparse.h"
-#include "parsedata.h"
-#include "avltree.h"
-#include "vector.h"
-#include "buffer.h"
-
-using std::ifstream;
-using std::istream;
-using std::ostream;
-using std::cout;
-using std::cerr;
-using std::endl;
-
-extern char *Parser_lelNames[];
-
-/* This is used for tracking the current stack of include file/machine pairs. It is
- * is used to detect and recursive include structure. */
-struct IncludeStackItem
-{
- IncludeStackItem( const char *fileName )
- : fileName(fileName) {}
-
- const char *fileName;
-};
-
-typedef Vector<IncludeStackItem> IncludeStack;
-typedef Vector<const char *> ArgsVector;
-
-extern ArgsVector includePaths;
-
-struct ColmScanner
-{
- ColmScanner( const char *fileName, istream &input,
- ostream &output, ColmParser *parser, int includeDepth )
- :
- fileName(fileName), input(input), output(output),
- includeDepth(includeDepth),
- line(1), column(1), lastnl(0),
- parser(parser),
- parserExistsError(false),
- whitespaceOn(true)
- {
- }
-
- ifstream *tryOpenInclude( char **pathChecks, long &found );
- char **makeIncludePathChecks( const char *thisFileName, const char *fileName );
- bool recursiveInclude( const char *inclFileName );
-
- void sectionParseInit();
- void token( int type, char *start, char *end );
- void token( int type, char c );
- void token( int type );
- void updateCol();
- void endSection();
- void scan();
- void eof();
- ostream &scan_error();
-
- const char *fileName;
- istream &input;
- ostream &output;
- int includeDepth;
-
- int cs;
- int line;
- char *word, *lit;
- int word_len, lit_len;
- InputLoc sectionLoc;
- char *ts, *te;
- int column;
- char *lastnl;
-
- /* Set by machine statements, these persist from section to section
- * allowing for unnamed sections. */
- ColmParser *parser;
- IncludeStack includeStack;
-
- /* This is set if ragel has already emitted an error stating that
- * no section name has been seen and thus no parser exists. */
- bool parserExistsError;
-
- /* This is for inline code. By default it is on. It goes off for
- * statements and values in inline blocks which are parsed. */
- bool whitespaceOn;
-
- Buffer litBuf;
-};
-
-#endif /* _RLSCAN_H */
diff --git a/colm/lmscan.rl b/colm/lmscan.rl
deleted file mode 100644
index 070a1e66..00000000
--- a/colm/lmscan.rl
+++ /dev/null
@@ -1,636 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <iostream>
-#include <fstream>
-#include <string.h>
-
-#include "global.h"
-#include "lmscan.h"
-#include "lmparse.h"
-#include "parsedata.h"
-#include "avltree.h"
-#include "vector.h"
-
-//#define PRINT_TOKENS
-
-using std::ifstream;
-using std::istream;
-using std::ostream;
-using std::cout;
-using std::cerr;
-using std::endl;
-
-%%{
- machine section_parse;
- alphtype int;
- write data;
-}%%
-
-void ColmScanner::sectionParseInit()
-{
- %% write init;
-}
-
-ostream &ColmScanner::scan_error()
-{
- /* Maintain the error count. */
- gblErrorCount += 1;
- cerr << fileName << ":" << line << ":" << column << ": ";
- return cerr;
-}
-
-bool ColmScanner::recursiveInclude( const char *inclFileName )
-{
- for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
- if ( strcmp( si->fileName, inclFileName ) == 0 )
- return true;
- }
- return false;
-}
-
-void ColmScanner::updateCol()
-{
- char *from = lastnl;
- if ( from == 0 )
- from = ts;
- //cerr << "adding " << te - from << " to column" << endl;
- column += te - from;
- lastnl = 0;
-}
-
-void ColmScanner::token( int type, char c )
-{
- token( type, &c, &c + 1 );
-}
-
-void ColmScanner::token( int type )
-{
- token( type, 0, 0 );
-}
-
-bool isAbsolutePath( const char *path )
-{
- return path[0] == '/';
-}
-
-ifstream *ColmScanner::tryOpenInclude( char **pathChecks, long &found )
-{
- char **check = pathChecks;
- ifstream *inFile = new ifstream;
-
- while ( *check != 0 ) {
- inFile->open( *check );
- if ( inFile->is_open() ) {
- found = check - pathChecks;
- return inFile;
- }
- check += 1;
- }
-
- found = -1;
- delete inFile;
- return 0;
-}
-
-char **ColmScanner::makeIncludePathChecks( const char *thisFileName, const char *fileName )
-{
- char **checks = 0;
- long nextCheck = 0;
- char *data = strdup(fileName);
- long length = strlen(fileName);
-
- /* Absolute path? */
- if ( isAbsolutePath( data ) ) {
- checks = new char*[2];
- checks[nextCheck++] = data;
- }
- else {
- /* Search from the the location of the current file. */
- checks = new char *[2 + includePaths.length()];
- const char *lastSlash = strrchr( thisFileName, '/' );
- if ( lastSlash == 0 )
- checks[nextCheck++] = data;
- else {
- long givenPathLen = (lastSlash - thisFileName) + 1;
- long checklen = givenPathLen + length;
- char *check = new char[checklen+1];
- memcpy( check, thisFileName, givenPathLen );
- memcpy( check+givenPathLen, data, length );
- check[checklen] = 0;
- checks[nextCheck++] = check;
- }
-
- /* Search from the include paths given on the command line. */
- for ( ArgsVector::Iter incp = includePaths; incp.lte(); incp++ ) {
- long pathLen = strlen( *incp );
- long checkLen = pathLen + 1 + length;
- char *check = new char[checkLen+1];
- memcpy( check, *incp, pathLen );
- check[pathLen] = '/';
- memcpy( check+pathLen+1, data, length );
- check[checkLen] = 0;
- checks[nextCheck++] = check;
- }
- }
-
- checks[nextCheck] = 0;
- return checks;
-}
-
-
-%%{
- machine section_parse;
- import "lmparse.h";
-
- action clear_words { word = lit = 0; word_len = lit_len = 0; }
- action store_lit { lit = tokdata; lit_len = toklen; }
-
- action mach_err { scan_error() << "bad machine statement" << endl; }
- action incl_err { scan_error() << "bad include statement" << endl; }
- action write_err { scan_error() << "bad write statement" << endl; }
-
- action handle_include
- {
- String src( lit, lit_len );
- String fileName;
- bool unused;
-
- /* Need a location. */
- InputLoc here;
- here.fileName = fileName;
- here.line = line;
- here.col = column;
-
- prepareLitString( fileName, unused, src, here );
- char **checks = makeIncludePathChecks( this->fileName, fileName );
-
- /* Open the input file for reading. */
- long found = 0;
- ifstream *inFile = tryOpenInclude( checks, found );
- if ( inFile == 0 ) {
- scan_error() << "include: could not open " <<
- fileName << " for reading" << endl;
- }
- else {
- /* Only proceed with the include if it was found. */
- if ( recursiveInclude( checks[found] ) )
- scan_error() << "include: this is a recursive include operation" << endl;
-
- /* Check for a recursive include structure. Add the current file/section
- * name then check if what we are including is already in the stack. */
- includeStack.append( IncludeStackItem( checks[found] ) );
-
- ColmScanner *scanner = new ColmScanner( fileName, *inFile, output, parser, includeDepth+1 );
- scanner->scan();
- delete inFile;
-
- /* Remove the last element (len-1) */
- includeStack.remove( -1 );
-
- delete scanner;
- }
- }
-
- include_target =
- TK_Literal >clear_words @store_lit;
-
- include_stmt =
- ( KW_Include include_target ) @handle_include
- <>err incl_err <>eof incl_err;
-
- action handle_token
- {
-// cout << Parser_lelNames[type] << " ";
-// if ( start != 0 ) {
-// cout.write( start, end-start );
-// }
-// cout << endl;
-
- InputLoc loc;
-
- #ifdef PRINT_TOKENS
- cerr << "scanner:" << line << ":" << column <<
- ": sending token to the parser " << Parser_lelNames[*p];
- cerr << " " << toklen;
- if ( tokdata != 0 )
- cerr << " " << tokdata;
- cerr << endl;
- #endif
-
- loc.fileName = fileName;
- loc.line = line;
- loc.col = column;
-
- if ( tokdata != 0 && tokdata[toklen-1] == '\n' )
- loc.line -= 1;
-
- parser->token( loc, type, tokdata, toklen );
- }
-
- # Catch everything else.
- everything_else = ^( KW_Include ) @handle_token;
-
- main := (
- include_stmt |
- everything_else
- )*;
-}%%
-
-void ColmScanner::token( int type, char *start, char *end )
-{
- char *tokdata = 0;
- int toklen = 0;
- int *p = &type;
- int *pe = &type + 1;
- int *eof = 0;
-
- if ( start != 0 ) {
- toklen = end-start;
- tokdata = new char[toklen+1];
- memcpy( tokdata, start, toklen );
- tokdata[toklen] = 0;
- }
-
- %%{
- machine section_parse;
- write exec;
- }%%
-
- updateCol();
-}
-
-void ColmScanner::endSection( )
-{
- /* Execute the eof actions for the section parser. */
- /* Probably use: token( -1 ); */
-}
-
-%%{
- machine rlscan;
-
- # This is sent by the driver code.
- EOF = 0;
-
- action inc_nl {
- lastnl = p;
- column = 0;
- line++;
- }
- NL = '\n' @inc_nl;
-
- # Identifiers, numbers, commetns, and other common things.
- ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
- number = digit+;
- hex_number = '0x' [0-9a-fA-F]+;
-
- # These literal forms are common to C-like host code and ragel.
- s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
- d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
-
- whitespace = [ \t] | NL;
- pound_comment = '#' [^\n]* NL;
-
- or_literal := |*
- # Escape sequences in OR expressions.
- '\\0' => { token( TK_ReChar, '\0' ); };
- '\\a' => { token( TK_ReChar, '\a' ); };
- '\\b' => { token( TK_ReChar, '\b' ); };
- '\\t' => { token( TK_ReChar, '\t' ); };
- '\\n' => { token( TK_ReChar, '\n' ); };
- '\\v' => { token( TK_ReChar, '\v' ); };
- '\\f' => { token( TK_ReChar, '\f' ); };
- '\\r' => { token( TK_ReChar, '\r' ); };
- '\\\n' => { updateCol(); };
- '\\' any => { token( TK_ReChar, ts+1, te ); };
-
- # Range dash in an OR expression.
- '-' => { token( TK_Dash, 0, 0 ); };
-
- # Terminate an OR expression.
- ']' => { token( TK_SqClose ); fret; };
-
- EOF => {
- scan_error() << "unterminated OR literal" << endl;
- };
-
- # Characters in an OR expression.
- [^\]] => { token( TK_ReChar, ts, te ); };
-
- *|;
-
- regular_type := |*
- # Identifiers.
- ident => { token( TK_Word, ts, te ); } ;
-
- # Numbers
- number => { token( TK_UInt, ts, te ); };
- hex_number => { token( TK_Hex, ts, te ); };
-
- # Literals, with optionals.
- ( s_literal | d_literal ) [i]?
- => { token( TK_Literal, ts, te ); };
-
- '[' => { token( TK_SqOpen ); fcall or_literal; };
- '[^' => { token( TK_SqOpenNeg ); fcall or_literal; };
-
- '/' => { token( '/'); fret; };
-
- # Ignore.
- pound_comment => { updateCol(); };
-
- '..' => { token( TK_DotDot ); };
- '**' => { token( TK_StarStar ); };
- '--' => { token( TK_DashDash ); };
-
- ':>' => { token( TK_ColonGt ); };
- ':>>' => { token( TK_ColonGtGt ); };
- '<:' => { token( TK_LtColon ); };
-
- # Whitespace other than newline.
- [ \t\r]+ => { updateCol(); };
-
- # If we are in a single line machine then newline may end the spec.
- NL => { updateCol(); };
-
- # Consume eof.
- EOF;
-
- any => { token( *ts ); } ;
- *|;
-
- literal_pattern := |*
- '\\' '0' { litBuf.append( '\0' ); };
- '\\' 'a' { litBuf.append( '\a' ); };
- '\\' 'b' { litBuf.append( '\b' ); };
- '\\' 't' { litBuf.append( '\t' ); };
- '\\' 'n' { litBuf.append( '\n' ); };
- '\\' 'v' { litBuf.append( '\v' ); };
- '\\' 'f' { litBuf.append( '\f' ); };
- '\\' 'r' { litBuf.append( '\r' ); };
-
- '\\' any {
- litBuf.append( ts[1] );
- };
- '"' => {
- if ( litBuf.length > 0 ) {
- token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
- litBuf.clear();
- }
- token( '"' );
- fret;
- };
- NL => {
- litBuf.append( '\n' );
- token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
- litBuf.clear();
- token( '"' );
- fret;
- };
- '[' => {
- if ( litBuf.length > 0 ) {
- token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
- litBuf.clear();
- }
- token( '[' );
- fcall main;
- };
- any => {
- litBuf.append( *ts );
- };
- *|;
-
- # Parser definitions.
- main := |*
- 'lex' => { token( KW_Lex ); };
- 'commit' => { token( KW_Commit ); };
- 'token' => { token( KW_Token ); };
- 'literal' => { token( KW_Literal ); };
- 'rl' => { token( KW_Rl ); };
- 'def' => { token( KW_Def ); };
- 'ignore' => { token( KW_Ignore ); };
- 'construct' => { token( KW_Construct ); };
- 'cons' => { token( KW_Construct ); };
- 'new' => { token( KW_New ); };
- 'if' => { token( KW_If ); };
- 'reject' => { token( KW_Reject ); };
- 'while' => { token( KW_While ); };
- 'else' => { token( KW_Else ); };
- 'elsif' => { token( KW_Elsif ); };
- 'match' => { token( KW_Match ); };
- 'for' => { token( KW_For ); };
- 'iter' => { token( KW_Iter ); };
- 'prints' => { token( KW_PrintStream ); };
- 'print' => { token( KW_Print ); };
- 'print_xml_ac' => { token( KW_PrintXMLAC ); };
- 'print_xml' => { token( KW_PrintXML ); };
- 'namespace' => { token( KW_Namespace ); };
- 'lex' => { token( KW_Lex ); };
- 'map' => { token( KW_Map ); };
- 'list' => { token( KW_List ); };
- 'vector' => { token( KW_Vector ); };
- 'accum' => { token( KW_Accum ); };
- 'parser' => { token( KW_Accum ); };
- 'return' => { token( KW_Return ); };
- 'break' => { token( KW_Break ); };
- 'yield' => { token( KW_Yield ); };
- 'typeid' => { token( KW_TypeId ); };
- 'make_token' => { token( KW_MakeToken ); };
- 'make_tree' => { token( KW_MakeTree ); };
- 'reducefirst' => { token( KW_ReduceFirst ); };
- 'for' => { token( KW_For ); };
- 'in' => { token( KW_In ); };
- 'nil' => { token( KW_Nil ); };
- 'true' => { token( KW_True ); };
- 'false' => { token( KW_False ); };
- 'parse' => { token( KW_Parse ); };
- 'parse_stop' => { token( KW_ParseStop ); };
- 'global' => { token( KW_Global ); };
- 'export' => { token( KW_Export ); };
- 'ptr' => { token( KW_Ptr ); };
- 'ref' => { token( KW_Ref ); };
- 'deref' => { token( KW_Deref ); };
- 'require' => { token( KW_Require ); };
- 'preeof' => { token( KW_Preeof ); };
- 'left' => { token( KW_Left ); };
- 'right' => { token( KW_Right ); };
- 'nonassoc' => { token( KW_Nonassoc ); };
- 'prec' => { token( KW_Prec ); };
- 'include' => { token( KW_Include ); };
- 'context' => { token( KW_Context ); };
- 'alias' => { token( KW_Alias ); };
- 'send' => { token( KW_Send ); };
- 'ni' => { token( KW_Ni ); };
- 'ci' => { token( KW_Ci ); };
-
- # Identifiers.
- ident => { token( TK_Word, ts, te ); } ;
-
- number => { token( TK_Number, ts, te ); };
-
- '/' => {
- token( '/' );
- if ( parser->enterRl )
- fcall regular_type;
- };
-
- "~" [^\n]* NL => {
- token( '"' );
- token( TK_LitPat, ts+1, te );
- token( '"' );
- };
-
- "'" ([^'\\\n] | '\\' (any | NL))* ( "'" | NL ) => {
- token( TK_Literal, ts, te );
- };
-
- '"' => {
- token( '"' );
- litBuf.clear();
- fcall literal_pattern;
- };
- '[' => {
- token( '[' );
- fcall main;
- };
-
- ']' => {
- token( ']' );
- if ( top > 0 )
- fret;
- };
-
- # Ignore.
- pound_comment => { updateCol(); };
-
- '=>' => { token( TK_DoubleArrow ); };
- '==' => { token( TK_DoubleEql ); };
- '!=' => { token( TK_NotEql ); };
- '::' => { token( TK_DoubleColon ); };
- '<=' => { token( TK_LessEql ); };
- '>=' => { token( TK_GrtrEql ); };
- '->' => { token( TK_RightArrow ); };
- '&&' => { token( TK_AmpAmp ); };
- '||' => { token( TK_BarBar ); };
- '<<' => { token( TK_LtLt ); };
-
- ('+' | '-' | '*' | '/' | '(' | ')' | '@' | '$' | '^' ) => { token( *ts ); };
-
-
- # Whitespace other than newline.
- [ \t\r]+ => { updateCol(); };
- NL => { updateCol(); };
-
- # Consume eof.
- EOF;
-
- any => { token( *ts ); } ;
- *|;
-}%%
-
-%% write data;
-
-void ColmScanner::scan()
-{
- int bufsize = 8;
- char *buf = new char[bufsize];
- const char last_char = 0;
- int cs, act, have = 0;
- int top, stack[32];
- bool execute = true;
-
- sectionParseInit();
- %% write init;
-
- while ( execute ) {
- char *p = buf + have;
- int space = bufsize - have;
-
- if ( space == 0 ) {
- /* We filled up the buffer trying to scan a token. Grow it. */
- bufsize = bufsize * 2;
- char *newbuf = new char[bufsize];
-
- /* Recompute p and space. */
- p = newbuf + have;
- space = bufsize - have;
-
- /* Patch up pointers possibly in use. */
- if ( ts != 0 )
- ts = newbuf + ( ts - buf );
- te = newbuf + ( te - buf );
-
- /* Copy the new buffer in. */
- memcpy( newbuf, buf, have );
- delete[] buf;
- buf = newbuf;
- }
-
- input.read( p, space );
- int len = input.gcount();
-
- /* If we see eof then append the EOF char. */
- if ( len == 0 ) {
- p[0] = last_char, len = 1;
- execute = false;
- }
-
- char *pe = p + len;
- char *eof = 0;
- %% write exec;
-
- /* Check if we failed. */
- if ( cs == rlscan_error ) {
- /* Machine failed before finding a token. I'm not yet sure if this
- * is reachable. */
- scan_error() << "colm scanner error (metalanguage)" << endl;
- exit(1);
- }
-
- /* Decide if we need to preserve anything. */
- char *preserve = ts;
-
- /* Now set up the prefix. */
- if ( preserve == 0 )
- have = 0;
- else {
- /* There is data that needs to be shifted over. */
- have = pe - preserve;
- memmove( buf, preserve, have );
- unsigned int shiftback = preserve - buf;
- if ( ts != 0 )
- ts -= shiftback;
- te -= shiftback;
-
- preserve = buf;
- }
- }
- delete[] buf;
-}
-
-void ColmScanner::eof()
-{
- InputLoc loc;
- loc.fileName = "<EOF>";
- loc.line = line;
- loc.col = 1;
- parser->token( loc, ColmParser_tk_eof, 0, 0 );
-}
diff --git a/colm/main.cc b/colm/main.cc
deleted file mode 100644
index 435bb697..00000000
--- a/colm/main.cc
+++ /dev/null
@@ -1,623 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <iostream>
-#include <fstream>
-#include <unistd.h>
-#include <sstream>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include "global.h"
-#include "debug.h"
-#include "lmscan.h"
-#include "pcheck.h"
-#include "vector.h"
-#include "version.h"
-#include "keyops.h"
-#include "parsedata.h"
-#include "vector.h"
-#include "version.h"
-#include "fsmcodegen.h"
-
-using std::istream;
-using std::ifstream;
-using std::ostream;
-using std::ios;
-using std::cin;
-using std::cout;
-using std::cerr;
-using std::endl;
-
-/* Graphviz dot file generation. */
-bool genGraphviz = false;
-
-using std::ostream;
-using std::istream;
-using std::ifstream;
-using std::ofstream;
-using std::ios;
-using std::cout;
-using std::cerr;
-using std::cin;
-using std::endl;
-
-/* Io globals. */
-istream *inStream = 0;
-ostream *outStream = 0;
-const char *inputFileName = 0;
-const char *outputFileName = 0;
-const char *gblExportTo = 0;
-const char *gblExpImplTo = 0;
-bool exportCode = false;
-
-bool generateGraphviz = false;
-bool verbose = false;
-bool logging = false;
-bool branchPointInfo = false;
-bool addUniqueEmptyProductions = false;
-bool gblLibrary = false;
-
-ArgsVector includePaths;
-
-/* Print version information. */
-void version();
-
-/* Total error count. */
-int gblErrorCount = 0;
-
-HostType hostTypesC[] =
-{
- { "char", 0, true, CHAR_MIN, CHAR_MAX, sizeof(char) },
-};
-
-HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true };
-
-HostLang *hostLang = &hostLangC;
-HostLangType hostLangType = CCode;
-
-/* Print the opening to an error in the input, then return the error ostream. */
-ostream &error( const InputLoc &loc )
-{
- /* Keep the error count. */
- gblErrorCount += 1;
-
- cerr << "error: " << inputFileName << ":" <<
- loc.line << ":" << loc.col << ": ";
- return cerr;
-}
-
-/* Print the opening to a program error, then return the error stream. */
-ostream &error()
-{
- gblErrorCount += 1;
- cerr << "error: " PROGNAME ": ";
- return cerr;
-}
-
-
-/* Print the opening to a warning, then return the error ostream. */
-ostream &warning( )
-{
- cerr << "warning: " << inputFileName << ": ";
- return cerr;
-}
-
-/* Print the opening to a warning in the input, then return the error ostream. */
-ostream &warning( const InputLoc &loc )
-{
- assert( inputFileName != 0 );
- cerr << "warning: " << inputFileName << ":" <<
- loc.line << ":" << loc.col << ": ";
- return cerr;
-}
-
-void escapeLineDirectivePath( std::ostream &out, char *path )
-{
- for ( char *pc = path; *pc != 0; pc++ ) {
- if ( *pc == '\\' )
- out << "\\\\";
- else
- out << *pc;
- }
-}
-
-void escapeLineDirectivePath( std::ostream &out, char *path );
-void scan( char *fileName, istream &input );
-
-bool printStatistics = false;
-
-/* Print a summary of the options. */
-void usage()
-{
- cout <<
-"usage: colm [options] file\n"
-"general:\n"
-" -h, -H, -?, --help print this usage and exit\n"
-" -v --version print version information and exit\n"
-" -o <file> write output to <file>\n"
-" -i show conflict information\n"
-" -d make colm verbose\n"
-" -l compile logging into the output executable\n"
- ;
-}
-
-/* Print version information. */
-void version()
-{
- cout << "Colm version " VERSION << " " PUBDATE << endl <<
- "Copyright (c) 2007-2012 by Adrian D. Thurston" << endl;
-}
-
-/* Scans a string looking for the file extension. If there is a file
- * extension then pointer returned points to inside the string
- * passed in. Otherwise returns null. */
-const char *findFileExtension( const char *stemFile )
-{
- const char *ppos = stemFile + strlen(stemFile) - 1;
-
- /* Scan backwards from the end looking for the first dot.
- * If we encounter a '/' before the first dot, then stop the scan. */
- while ( 1 ) {
- /* If we found a dot or got to the beginning of the string then
- * we are done. */
- if ( ppos == stemFile || *ppos == '.' )
- break;
-
- /* If we hit a / then there is no extension. Done. */
- if ( *ppos == '/' ) {
- ppos = stemFile;
- break;
- }
- ppos--;
- }
-
- /* If we got to the front of the string then bail we
- * did not find an extension */
- if ( ppos == stemFile )
- ppos = 0;
-
- return ppos;
-}
-
-/* Make a file name from a stem. Removes the old filename suffix and
- * replaces it with a new one. Returns a newed up string. */
-char *fileNameFromStem( const char *stemFile, const char *suffix )
-{
- int len = strlen( stemFile );
- assert( len > 0 );
-
- /* Get the extension. */
- const char *ppos = findFileExtension( stemFile );
-
- /* If an extension was found, then shorten what we think the len is. */
- if ( ppos != 0 )
- len = ppos - stemFile;
-
- /* Make the return string from the stem and the suffix. */
- char *retVal = new char[ len + strlen( suffix ) + 1 ];
- strncpy( retVal, stemFile, len );
- strcpy( retVal + len, suffix );
-
- return retVal;
-}
-
-
-/* Invoked by the parser when the root element is opened. */
-void openOutput( )
-{
- /* If the output format is code and no output file name is given, then
- * make a default. */
- if ( outputFileName == 0 ) {
- const char *ext = findFileExtension( inputFileName );
- if ( ext != 0 && strcmp( ext, ".rh" ) == 0 )
- outputFileName = fileNameFromStem( inputFileName, ".h" );
- else {
- const char *defExtension = ".c";
- outputFileName = fileNameFromStem( inputFileName, defExtension );
- }
- }
-
- if ( colm_log_compile ) {
- cerr << "opening output file: " << outputFileName << endl;
- }
-
- /* Make sure we are not writing to the same file as the input file. */
- if ( outputFileName != 0 && strcmp( inputFileName, outputFileName ) == 0 ) {
- error() << "output file \"" << outputFileName <<
- "\" is the same as the input file" << endl;
- }
-
- if ( outputFileName != 0 ) {
- /* Open the output stream, attaching it to the filter. */
- ofstream *outFStream = new ofstream( outputFileName );
-
- if ( !outFStream->is_open() ) {
- error() << "error opening " << outputFileName << " for writing" << endl;
- exit(1);
- }
-
- outStream = outFStream;
- }
- else {
- /* Writing out ot std out. */
- outStream = &cout;
- }
-}
-
-void openExports( )
-{
- /* Make sure we are not writing to the same file as the input file. */
- if ( gblExportTo != 0 && strcmp( inputFileName, gblExportTo ) == 0 ) {
- error() << "output file \"" << gblExportTo <<
- "\" is the same as the input file" << endl;
- }
-
- if ( gblExportTo != 0 ) {
- /* Open the output stream, attaching it to the filter. */
- ofstream *outFStream = new ofstream( gblExportTo );
-
- if ( !outFStream->is_open() ) {
- error() << "error opening " << outputFileName << " for writing" << endl;
- exit(1);
- }
-
- outStream = outFStream;
- }
- else {
- /* Writing out ot std out. */
- outStream = &cout;
- }
-}
-
-void openExportsImpl( )
-{
- /* Make sure we are not writing to the same file as the input file. */
- if ( gblExpImplTo != 0 && strcmp( inputFileName, gblExpImplTo ) == 0 ) {
- error() << "output file \"" << gblExpImplTo <<
- "\" is the same as the input file" << endl;
- }
-
- if ( gblExpImplTo != 0 ) {
- /* Open the output stream, attaching it to the filter. */
- ofstream *outFStream = new ofstream( gblExpImplTo );
-
- if ( !outFStream->is_open() ) {
- error() << "error opening " << outputFileName << " for writing" << endl;
- exit(1);
- }
-
- outStream = outFStream;
- }
- else {
- /* Writing out ot std out. */
- outStream = &cout;
- }
-}
-
-void compileOutputCommand( const char *command )
-{
- if ( colm_log_compile )
- cout << "compiling with: " << command << endl;
- int res = system( command );
- if ( res != 0 )
- cout << "there was a problem compiling the output" << endl;
-}
-
-void compileOutputInstalled( const char *argv0 )
-{
- /* Find the location of the colm program that is executing. */
- char *location = strdup( argv0 );
- char *last = location + strlen(location) - 1;
- while ( true ) {
- if ( last == location ) {
- last[0] = '.';
- last[1] = 0;
- break;
- }
- if ( *last == '/' ) {
- last[0] = 0;
- break;
- }
- last -= 1;
- }
-
- char *exec = fileNameFromStem( outputFileName, ".bin" );
-
- int length = 1024 + 3*strlen(location) + strlen(outputFileName) + strlen(exec);
- char command[length];
- sprintf( command,
- "gcc -Wall -Wwrite-strings"
- " -I" PREFIX "/include"
- " -g"
- " -o %s"
- " %s"
- " -L" PREFIX "/lib"
- " -lcolm%c",
- exec, outputFileName, logging ? 'd' : 'p' );
-
- compileOutputCommand( command );
-}
-
-void compileOutputInSource( const char *argv0 )
-{
- /* Find the location of the colm program that is executing. */
- char *location = strdup( argv0 );
- char *last = strrchr( location, '/' );
- assert( last != 0 );
- last[1] = 0;
-
- char *exec = fileNameFromStem( outputFileName, ".bin" );
-
- int length = 1024 + 3*strlen(location) + strlen(outputFileName) + strlen(exec);
- char command[length];
- sprintf( command,
- "gcc -Wall -Wwrite-strings"
- " -I%s.."
- " -I%s../aapl"
- " -g"
- " -o %s"
- " %s"
- " -L%s"
- " -lcolm%c",
- location, location,
- exec, outputFileName, location, logging ? 'd' : 'p' );
-
- compileOutputCommand( command );
-}
-
-bool inSourceTree( const char *argv0 )
-{
- const char *lastSlash = strrchr( argv0, '/' );
- if ( lastSlash != 0 ) {
- int rootLen = lastSlash - argv0 + 1;
- char *mainPath = new char[rootLen + 16];
- memcpy( mainPath, argv0, rootLen );
- strcpy( mainPath + rootLen, "main.cc" );
-
- struct stat sb;
- int res = stat( mainPath, &sb );
- delete[] mainPath;
-
- if ( res == 0 && S_ISREG( sb.st_mode ) )
- return true;
- }
-
- return false;
-}
-
-void processArgs( int argc, const char **argv )
-{
- ParamCheck pc( "D:e:c:LI:vdlio:S:M:vHh?-:sV", argc, argv );
-
- while ( pc.check() ) {
- switch ( pc.state ) {
- case ParamCheck::match:
- switch ( pc.parameter ) {
- case 'I':
- includePaths.append( pc.parameterArg );
- break;
- case 'v':
- version();
- exit(0);
- break;
- case 'd':
- verbose = true;
- break;
- case 'l':
- logging = true;
- break;
- case 'i':
- branchPointInfo = true;
- break;
- /* Output. */
- case 'o':
- if ( *pc.parameterArg == 0 )
- error() << "a zero length output file name was given" << endl;
- else if ( outputFileName != 0 )
- error() << "more than one output file name was given" << endl;
- else {
- /* Ok, remember the output file name. */
- outputFileName = pc.parameterArg;
- }
- break;
-
- case 'H': case 'h': case '?':
- usage();
- exit(0);
- case 's':
- printStatistics = true;
- break;
- case 'V':
- generateGraphviz = true;
- break;
- case '-':
- if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
- usage();
- exit(0);
- }
- else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
- version();
- exit(0);
- }
- else {
- error() << "--" << pc.parameterArg <<
- " is an invalid argument" << endl;
- }
- break;
- case 'L':
- gblLibrary = true;
- break;
- case 'e':
- gblExportTo = pc.parameterArg;
- break;
- case 'c':
- gblExpImplTo = pc.parameterArg;
- break;
- case 'D':
-#if DEBUG
- if ( strcmp( pc.parameterArg, "BYTECODE" ) == 0 )
- colmActiveRealm |= REALM_BYTECODE;
- else if ( strcmp( pc.parameterArg, "PARSE" ) == 0 )
- colmActiveRealm |= REALM_PARSE;
- else if ( strcmp( pc.parameterArg, "MATCH" ) == 0 )
- colmActiveRealm |= REALM_MATCH;
- else if ( strcmp( pc.parameterArg, "COMPILE" ) == 0 )
- colmActiveRealm |= REALM_COMPILE;
- else if ( strcmp( pc.parameterArg, "POOL" ) == 0 )
- colmActiveRealm |= REALM_POOL;
- else if ( strcmp( pc.parameterArg, "PRINT" ) == 0 )
- colmActiveRealm |= REALM_PRINT;
- else if ( strcmp( pc.parameterArg, "INPUT" ) == 0 )
- colmActiveRealm |= REALM_INPUT;
- else if ( strcmp( pc.parameterArg, "SCAN" ) == 0 )
- colmActiveRealm |= REALM_SCAN;
- else
- fatal( "unknown argument to -D %s\n", pc.parameterArg );
-#else
- fatal("-D option specified but debugging messsages not compiled in");
-#endif
-
- }
- break;
-
- case ParamCheck::invalid:
- error() << "-" << pc.parameter << " is an invalid argument" << endl;
- break;
-
- case ParamCheck::noparam:
- /* It is interpreted as an input file. */
- if ( *pc.curArg == 0 )
- error() << "a zero length input file name was given" << endl;
- else if ( inputFileName != 0 )
- error() << "more than one input file name was given" << endl;
- else {
- /* OK, Remember the filename. */
- inputFileName = pc.curArg;
- }
- break;
- }
- }
-}
-
-/* Main, process args and call yyparse to start scanning input. */
-int main(int argc, const char **argv)
-{
- processArgs( argc, argv );
-
- if ( verbose ) {
- colm_log_bytecode = 1;
- colm_log_parse = 1;
- colm_log_match = 1;
- colm_log_compile = 1;
- colm_log_conds = 1;
- colmActiveRealm = 0xffffffff;
- }
- initInputFuncs();
-
- /* Bail on above errors. */
- if ( gblErrorCount > 0 )
- exit(1);
-
- /* Make sure we are not writing to the same file as the input file. */
- if ( inputFileName != 0 && outputFileName != 0 &&
- strcmp( inputFileName, outputFileName ) == 0 )
- {
- error() << "output file \"" << outputFileName <<
- "\" is the same as the input file" << endl;
- }
-
- /* Open the input file for reading. */
- istream *inStream;
- if ( inputFileName != 0 ) {
- /* Open the input file for reading. */
- ifstream *inFile = new ifstream( inputFileName );
- inStream = inFile;
- if ( ! inFile->is_open() )
- error() << "could not open " << inputFileName << " for reading" << endl;
- }
- else {
- inputFileName = "<stdin>";
- inStream = &cin;
- }
-
- /* Bail on above errors. */
- if ( gblErrorCount > 0 )
- exit(1);
-
- Compiler *pd = new Compiler( inputFileName, "machine", InputLoc(), std::cout );
- ColmParser *parser = new ColmParser( pd, inputFileName, "machine", InputLoc() );
- ColmScanner *scanner = new ColmScanner( inputFileName, *inStream, cout, parser, 0 );
-
- parser->init();
- scanner->scan();
- scanner->eof();
-
- /* Parsing complete, check for errors.. */
- if ( gblErrorCount > 0 )
- return 1;
-
- /* Initiate a compile following a parse. */
- pd->compile();
-
- /*
- * Write output.
- */
- if ( generateGraphviz ) {
- outStream = &cout;
- pd->writeDotFile();
- }
- else {
- openOutput();
- pd->generateOutput();
-
- if ( outStream != 0 )
- delete outStream;
-
- if ( !gblLibrary ) {
- if ( inSourceTree( argv[0] ) )
- compileOutputInSource( argv[0] );
- else
- compileOutputInstalled( argv[0] );
- }
-
- if ( gblExportTo != 0 ) {
- openExports();
- pd->generateExports();
- delete outStream;
- }
- if ( gblExpImplTo != 0 ) {
- openExportsImpl();
- scanner->parser->pd->generateExportsImpl();
- delete outStream;
- }
- }
-
- delete scanner;
- delete parser;
- delete pd;
-
- return 0;
-}
diff --git a/colm/map.c b/colm/map.c
deleted file mode 100644
index 4609db58..00000000
--- a/colm/map.c
+++ /dev/null
@@ -1,763 +0,0 @@
-/*
- * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <assert.h>
-#include <colm/pdarun.h>
-#include <colm/map.h>
-#include <colm/pool.h>
-
-#define true 1
-#define false 0
-
-void mapListAbandon( Map *map )
-{
- map->head = map->tail = 0;
-}
-
-void mapListAddBefore( Map *map, MapEl *next_el, MapEl *new_el )
-{
- /* Set the next pointer of the new element to next_el. We do
- * this regardless of the state of the list. */
- new_el->next = next_el;
-
- /* Set reverse pointers. */
- if ( next_el == 0 ) {
- /* There is no next elememnt. We are inserting at the tail. */
- new_el->prev = map->tail;
- map->tail = new_el;
- }
- else {
- /* There is a next element and we can access next's previous. */
- new_el->prev = next_el->prev;
- next_el->prev = new_el;
- }
-
- /* Set forward pointers. */
- if ( new_el->prev == 0 ) {
- /* There is no previous element. Set the head pointer.*/
- map->head = new_el;
- }
- else {
- /* There is a previous element, set it's next pointer to new_el. */
- new_el->prev->next = new_el;
- }
-}
-
-void mapListAddAfter( Map *map, MapEl *prev_el, MapEl *new_el )
-{
- /* Set the previous pointer of new_el to prev_el. We do
- * this regardless of the state of the list. */
- new_el->prev = prev_el;
-
- /* Set forward pointers. */
- if (prev_el == 0) {
- /* There was no prev_el, we are inserting at the head. */
- new_el->next = map->head;
- map->head = new_el;
- }
- else {
- /* There was a prev_el, we can access previous next. */
- new_el->next = prev_el->next;
- prev_el->next = new_el;
- }
-
- /* Set reverse pointers. */
- if (new_el->next == 0) {
- /* There is no next element. Set the tail pointer. */
- map->tail = new_el;
- }
- else {
- /* There is a next element. Set it's prev pointer. */
- new_el->next->prev = new_el;
- }
-}
-
-
-MapEl *mapListDetach( Map *map, MapEl *el )
-{
- /* Set forward pointers to skip over el. */
- if ( el->prev == 0 )
- map->head = el->next;
- else
- el->prev->next = el->next;
-
- /* Set reverse pointers to skip over el. */
- if ( el->next == 0 )
- map->tail = el->prev;
- else
- el->next->prev = el->prev;
-
- /* Update List length and return element we detached. */
- return el;
-}
-
-
-/* Once an insertion position is found, attach a element to the tree. */
-void mapAttachRebal( Map *map, MapEl *element, MapEl *parentEl, MapEl *lastLess )
-{
- /* Increment the number of element in the tree. */
- map->treeSize += 1;
-
- /* Set element's parent. */
- element->parent = parentEl;
-
- /* New element always starts as a leaf with height 1. */
- element->left = 0;
- element->right = 0;
- element->height = 1;
-
- /* Are we inserting in the tree somewhere? */
- if ( parentEl != 0 ) {
- /* We have a parent so we are somewhere in the tree. If the parent
- * equals lastLess, then the last traversal in the insertion went
- * left, otherwise it went right. */
- if ( lastLess == parentEl ) {
- parentEl->left = element;
-
- mapListAddBefore( map, parentEl, element );
- }
- else {
- parentEl->right = element;
-
- mapListAddAfter( map, parentEl, element );
- }
- }
- else {
- /* No parent element so we are inserting the root. */
- map->root = element;
-
- mapListAddAfter( map, map->tail, element );
- }
-
- /* Recalculate the heights. */
- mapRecalcHeights( map, parentEl );
-
- /* Find the first unbalance. */
- MapEl *ub = mapFindFirstUnbalGP( map, element );
-
- /* rebalance. */
- if ( ub != 0 )
- {
- /* We assert that after this single rotation the
- * tree is now properly balanced. */
- mapRebalance( map, ub );
- }
-}
-
-#if 0
-/* Recursively delete all the children of a element. */
-void mapDeleteChildrenOf( Map *map, MapEl *element )
-{
- /* Recurse left. */
- if ( element->left ) {
- mapDeleteChildrenOf( map, element->left );
-
- /* Delete left element. */
- delete element->left;
- element->left = 0;
- }
-
- /* Recurse right. */
- if ( element->right ) {
- mapDeleteChildrenOf( map, element->right );
-
- /* Delete right element. */
- delete element->right;
- element->left = 0;
- }
-}
-
-void mapEmpty( Map *map )
-{
- if ( map->root ) {
- /* Recursively delete from the tree structure. */
- mapDeleteChildrenOf( map, map->root );
- delete map->root;
- map->root = 0;
- map->treeSize = 0;
-
- mapListAbandon( map );
- }
-}
-#endif
-
-/* rebalance from a element whose gradparent is unbalanced. Only
- * call on a element that has a grandparent. */
-MapEl *mapRebalance( Map *map, MapEl *n )
-{
- long lheight, rheight;
- MapEl *a, *b, *c;
- MapEl *t1, *t2, *t3, *t4;
-
- MapEl *p = n->parent; /* parent (Non-NUL). L*/
- MapEl *gp = p->parent; /* Grand-parent (Non-NULL). */
- MapEl *ggp = gp->parent; /* Great grand-parent (may be NULL). */
-
- if (gp->right == p)
- {
- /* gp
- * * p
- p
- */
- if (p->right == n)
- {
- /* gp
- * * p
- p
- * * n
- n
- */
- a = gp;
- b = p;
- c = n;
- t1 = gp->left;
- t2 = p->left;
- t3 = n->left;
- t4 = n->right;
- }
- else
- {
- /* gp
- * * p
- p
- * /
- * n
- */
- a = gp;
- b = n;
- c = p;
- t1 = gp->left;
- t2 = n->left;
- t3 = n->right;
- t4 = p->right;
- }
- }
- else
- {
- /* gp
- * /
- * p
- */
- if (p->right == n)
- {
- /* gp
- * /
- * p
- * * n
- n
- */
- a = p;
- b = n;
- c = gp;
- t1 = p->left;
- t2 = n->left;
- t3 = n->right;
- t4 = gp->right;
- }
- else
- {
- /* gp
- * /
- * p
- * /
- * n
- */
- a = n;
- b = p;
- c = gp;
- t1 = n->left;
- t2 = n->right;
- t3 = p->right;
- t4 = gp->right;
- }
- }
-
- /* Perform rotation.
- */
-
- /* Tie b to the great grandparent. */
- if ( ggp == 0 )
- map->root = b;
- else if ( ggp->left == gp )
- ggp->left = b;
- else
- ggp->right = b;
- b->parent = ggp;
-
- /* Tie a as a leftchild of b. */
- b->left = a;
- a->parent = b;
-
- /* Tie c as a rightchild of b. */
- b->right = c;
- c->parent = b;
-
- /* Tie t1 as a leftchild of a. */
- a->left = t1;
- if ( t1 != 0 ) t1->parent = a;
-
- /* Tie t2 as a rightchild of a. */
- a->right = t2;
- if ( t2 != 0 ) t2->parent = a;
-
- /* Tie t3 as a leftchild of c. */
- c->left = t3;
- if ( t3 != 0 ) t3->parent = c;
-
- /* Tie t4 as a rightchild of c. */
- c->right = t4;
- if ( t4 != 0 ) t4->parent = c;
-
- /* The heights are all recalculated manualy and the great
- * grand-parent is passed to recalcHeights() to ensure
- * the heights are correct up the tree.
- *
- * Note that recalcHeights() cuts out when it comes across
- * a height that hasn't changed.
- */
-
- /* Fix height of a. */
- lheight = a->left ? a->left->height : 0;
- rheight = a->right ? a->right->height : 0;
- a->height = (lheight > rheight ? lheight : rheight) + 1;
-
- /* Fix height of c. */
- lheight = c->left ? c->left->height : 0;
- rheight = c->right ? c->right->height : 0;
- c->height = (lheight > rheight ? lheight : rheight) + 1;
-
- /* Fix height of b. */
- lheight = a->height;
- rheight = c->height;
- b->height = (lheight > rheight ? lheight : rheight) + 1;
-
- /* Fix height of b's parents. */
- mapRecalcHeights( map, ggp );
- return ggp;
-}
-
-/* Recalculates the heights of all the ancestors of element. */
-void mapRecalcHeights( Map *map, MapEl *element )
-{
- while ( element != 0 )
- {
- long lheight = element->left ? element->left->height : 0;
- long rheight = element->right ? element->right->height : 0;
-
- long new_height = (lheight > rheight ? lheight : rheight) + 1;
-
- /* If there is no chage in the height, then there will be no
- * change in any of the ancestor's height. We can stop going up.
- * If there was a change, continue upward. */
- if (new_height == element->height)
- return;
- else
- element->height = new_height;
-
- element = element->parent;
- }
-}
-
-/* Finds the first element whose grandparent is unbalanced. */
-MapEl *mapFindFirstUnbalGP( Map *map, MapEl *element )
-{
- long lheight, rheight, balanceProp;
- MapEl *gp;
-
- if ( element == 0 || element->parent == 0 ||
- element->parent->parent == 0 )
- return 0;
-
- /* Don't do anything if we we have no grandparent. */
- gp = element->parent->parent;
- while ( gp != 0 )
- {
- lheight = gp->left ? gp->left->height : 0;
- rheight = gp->right ? gp->right->height : 0;
- balanceProp = lheight - rheight;
-
- if ( balanceProp < -1 || balanceProp > 1 )
- return element;
-
- element = element->parent;
- gp = gp->parent;
- }
- return 0;
-}
-
-
-
-/* Finds the first element that is unbalanced. */
-MapEl *mapFindFirstUnbalEl( Map *map, MapEl *element )
-{
- if ( element == 0 )
- return 0;
-
- while ( element != 0 )
- {
- long lheight = element->left ?
- element->left->height : 0;
- long rheight = element->right ?
- element->right->height : 0;
- long balanceProp = lheight - rheight;
-
- if ( balanceProp < -1 || balanceProp > 1 )
- return element;
-
- element = element->parent;
- }
- return 0;
-}
-
-/* Replace a element in the tree with another element not in the tree. */
-void mapReplaceEl( Map *map, MapEl *element, MapEl *replacement )
-{
- MapEl *parent = element->parent,
- *left = element->left,
- *right = element->right;
-
- replacement->left = left;
- if (left)
- left->parent = replacement;
- replacement->right = right;
- if (right)
- right->parent = replacement;
-
- replacement->parent = parent;
- if (parent)
- {
- if (parent->left == element)
- parent->left = replacement;
- else
- parent->right = replacement;
- }
- else {
- map->root = replacement;
- }
-
- replacement->height = element->height;
-}
-
-
-/* Removes a element from a tree and puts filler in it's place.
- * Filler should be null or a child of element. */
-void mapRemoveEl( Map *map, MapEl *element, MapEl *filler )
-{
- MapEl *parent = element->parent;
-
- if ( parent )
- {
- if ( parent->left == element )
- parent->left = filler;
- else
- parent->right = filler;
- }
- else {
- map->root = filler;
- }
-
- if ( filler )
- filler->parent = parent;
-
- return;
-}
-
-/* Recursive worker for tree copying. */
-MapEl *mapCopyBranch( Program *prg, Map *map, MapEl *el, Kid *oldNextDown, Kid **newNextDown )
-{
- /* Duplicate element. Either the base element's copy constructor or defaul
- * constructor will get called. Both will suffice for initting the
- * pointers to null when they need to be. */
- MapEl *newEl = mapElAllocate( prg );
-
- if ( (Kid*)el == oldNextDown )
- *newNextDown = (Kid*)newEl;
-
- /* If the left tree is there, copy it. */
- if ( newEl->left ) {
- newEl->left = mapCopyBranch( prg, map, newEl->left, oldNextDown, newNextDown );
- newEl->left->parent = newEl;
- }
-
- mapListAddAfter( map, map->tail, newEl );
-
- /* If the right tree is there, copy it. */
- if ( newEl->right ) {
- newEl->right = mapCopyBranch( prg, map, newEl->right, oldNextDown, newNextDown );
- newEl->right->parent = newEl;
- }
-
- return newEl;
-}
-
-MapEl *mapInsertEl( Program *prg, Map *map, MapEl *element, MapEl **lastFound )
-{
- long keyRelation;
- MapEl *curEl = map->root, *parentEl = 0;
- MapEl *lastLess = 0;
-
- while ( true ) {
- if ( curEl == 0 ) {
- /* We are at an external element and did not find the key we were
- * looking for. Attach underneath the leaf and rebalance. */
- mapAttachRebal( map, element, parentEl, lastLess );
-
- if ( lastFound != 0 )
- *lastFound = element;
- return element;
- }
-
- keyRelation = cmpTree( prg,
- element->key, curEl->key );
-
- /* Do we go left? */
- if ( keyRelation < 0 ) {
- parentEl = lastLess = curEl;
- curEl = curEl->left;
- }
- /* Do we go right? */
- else if ( keyRelation > 0 ) {
- parentEl = curEl;
- curEl = curEl->right;
- }
- /* We have hit the target. */
- else {
- if ( lastFound != 0 )
- *lastFound = curEl;
- return 0;
- }
- }
-}
-
-MapEl *mapInsertKey( Program *prg, Map *map, Tree *key, MapEl **lastFound )
-{
- long keyRelation;
- MapEl *curEl = map->root, *parentEl = 0;
- MapEl *lastLess = 0;
-
- while ( true ) {
- if ( curEl == 0 ) {
- /* We are at an external element and did not find the key we were
- * looking for. Create the new element, attach it underneath the leaf
- * and rebalance. */
- MapEl *element = mapElAllocate( prg );
- element->key = key;
- element->tree = 0;
- mapAttachRebal( map, element, parentEl, lastLess );
-
- if ( lastFound != 0 )
- *lastFound = element;
- return element;
- }
-
- keyRelation = cmpTree( prg, key, curEl->key );
-
- /* Do we go left? */
- if ( keyRelation < 0 ) {
- parentEl = lastLess = curEl;
- curEl = curEl->left;
- }
- /* Do we go right? */
- else if ( keyRelation > 0 ) {
- parentEl = curEl;
- curEl = curEl->right;
- }
- /* We have hit the target. */
- else {
- if ( lastFound != 0 )
- *lastFound = curEl;
- return 0;
- }
- }
-}
-
-
-/**
- * \brief Find a element in the tree with the given key.
- *
- * \returns The element if key exists, null if the key does not exist.
- */
-MapEl *mapImplFind( Program *prg, Map *map, Tree *key )
-{
- MapEl *curEl = map->root;
- long keyRelation;
-
- while ( curEl != 0 ) {
- keyRelation = cmpTree( prg, key, curEl->key );
-
- /* Do we go left? */
- if ( keyRelation < 0 )
- curEl = curEl->left;
- /* Do we go right? */
- else if ( keyRelation > 0 )
- curEl = curEl->right;
- /* We have hit the target. */
- else {
- return curEl;
- }
- }
- return 0;
-}
-
-
-/**
- * \brief Find a element, then detach it from the tree.
- *
- * The element is not deleted.
- *
- * \returns The element detached if the key is found, othewise returns null.
- */
-MapEl *mapDetachByKey( Program *prg, Map *map, Tree *key )
-{
- MapEl *element = mapImplFind( prg, map, key );
- if ( element )
- mapDetach( prg, map, element );
-
- return element;
-}
-
-/**
- * \brief Detach a element from the tree.
- *
- * If the element is not in the tree then undefined behaviour results.
- *
- * \returns The element given.
- */
-MapEl *mapDetach( Program *prg, Map *map, MapEl *element )
-{
- MapEl *replacement, *fixfrom;
- long lheight, rheight;
-
- /* Remove the element from the ordered list. */
- mapListDetach( map, element );
-
- /* Update treeSize. */
- map->treeSize--;
-
- /* Find a replacement element. */
- if (element->right)
- {
- /* Find the leftmost element of the right subtree. */
- replacement = element->right;
- while (replacement->left)
- replacement = replacement->left;
-
- /* If replacing the element the with its child then we need to start
- * fixing at the replacement, otherwise we start fixing at the
- * parent of the replacement. */
- if (replacement->parent == element)
- fixfrom = replacement;
- else
- fixfrom = replacement->parent;
-
- mapRemoveEl( map, replacement, replacement->right );
- mapReplaceEl( map, element, replacement );
- }
- else if (element->left)
- {
- /* Find the rightmost element of the left subtree. */
- replacement = element->left;
- while (replacement->right)
- replacement = replacement->right;
-
- /* If replacing the element the with its child then we need to start
- * fixing at the replacement, otherwise we start fixing at the
- * parent of the replacement. */
- if (replacement->parent == element)
- fixfrom = replacement;
- else
- fixfrom = replacement->parent;
-
- mapRemoveEl( map, replacement, replacement->left );
- mapReplaceEl( map, element, replacement );
- }
- else
- {
- /* We need to start fixing at the parent of the element. */
- fixfrom = element->parent;
-
- /* The element we are deleting is a leaf element. */
- mapRemoveEl( map, element, 0 );
- }
-
- /* If fixfrom is null it means we just deleted
- * the root of the tree. */
- if ( fixfrom == 0 )
- return element;
-
- /* Fix the heights after the deletion. */
- mapRecalcHeights( map, fixfrom );
-
- /* Fix every unbalanced element going up in the tree. */
- MapEl *ub = mapFindFirstUnbalEl( map, fixfrom );
- while ( ub )
- {
- /* Find the element to rebalance by moving down from the first unbalanced
- * element 2 levels in the direction of the greatest heights. On the
- * second move down, the heights may be equal ( but not on the first ).
- * In which case go in the direction of the first move. */
- lheight = ub->left ? ub->left->height : 0;
- rheight = ub->right ? ub->right->height : 0;
- assert( lheight != rheight );
- if (rheight > lheight)
- {
- ub = ub->right;
- lheight = ub->left ?
- ub->left->height : 0;
- rheight = ub->right ?
- ub->right->height : 0;
- if (rheight > lheight)
- ub = ub->right;
- else if (rheight < lheight)
- ub = ub->left;
- else
- ub = ub->right;
- }
- else
- {
- ub = ub->left;
- lheight = ub->left ?
- ub->left->height : 0;
- rheight = ub->right ?
- ub->right->height : 0;
- if (rheight > lheight)
- ub = ub->right;
- else if (rheight < lheight)
- ub = ub->left;
- else
- ub = ub->left;
- }
-
-
- /* rebalance returns the grandparant of the subtree formed
- * by the element that were rebalanced.
- * We must continue upward from there rebalancing. */
- fixfrom = mapRebalance( map, ub );
-
- /* Find the next unbalaced element. */
- ub = mapFindFirstUnbalEl( map, fixfrom );
- }
-
- return element;
-}
-
-
-
diff --git a/colm/map.cc b/colm/map.cc
deleted file mode 100644
index 52dd2697..00000000
--- a/colm/map.cc
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2008-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "pdarun.h"
-#include <assert.h>
-
-
-
diff --git a/colm/map.h b/colm/map.h
deleted file mode 100644
index 993ca86e..00000000
--- a/colm/map.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _MAP_H
-#define _MAP_H
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#include <colm/program.h>
-
-typedef struct _MapEl
-{
- /* Must overlay Kid. */
- Tree *tree;
- struct _MapEl *next;
- struct _MapEl *prev;
-
- struct _MapEl *left, *right, *parent;
- long height;
- Tree *key;
-} MapEl;
-
-typedef struct _Map
-{
- /* Must overlay Tree. */
- short id;
- unsigned short flags;
- long refs;
- MapEl *head;
-
- MapEl *tail;
- MapEl *root;
- long treeSize;
- GenericInfo *genericInfo;
-} Map;
-
-void mapListAbandon( Map *map );
-
-void mapListAddBefore( Map *map, MapEl *next_el, MapEl *new_el );
-void mapListAddAfter( Map *map, MapEl *prev_el, MapEl *new_el );
-MapEl *mapListDetach( Map *map, MapEl *el );
-void mapAttachRebal( Map *map, MapEl *element, MapEl *parentEl, MapEl *lastLess );
-void mapDeleteChildrenOf( Map *map, MapEl *element );
-void mapEmpty( Map *map );
-MapEl *mapRebalance( Map *map, MapEl *n );
-void mapRecalcHeights( Map *map, MapEl *element );
-MapEl *mapFindFirstUnbalGP( Map *map, MapEl *element );
-MapEl *mapFindFirstUnbalEl( Map *map, MapEl *element );
-void mapRemoveEl( Map *map, MapEl *element, MapEl *filler );
-void mapReplaceEl( Map *map, MapEl *element, MapEl *replacement );
-MapEl *mapInsertEl( Program *prg, Map *map, MapEl *element, MapEl **lastFound );
-MapEl *mapInsertKey( Program *prg, Map *map, Tree *key, MapEl **lastFound );
-MapEl *mapImplFind( Program *prg, Map *map, Tree *key );
-MapEl *mapDetachByKey( Program *prg, Map *map, Tree *key );
-MapEl *mapDetach( Program *prg, Map *map, MapEl *element );
-MapEl *mapCopyBranch( Program *prg, Map *map, MapEl *el, Kid *oldNextDown, Kid **newNextDown );
-
-long cmpTree( Program *prg, const Tree *tree1, const Tree *tree2 );
-
-void mapImplRemoveEl( Program *prg, Map *map, MapEl *element );
-int mapImplRemoveKey( Program *prg, Map *map, Tree *key );
-
-/*
- * Iterators.
- */
-
-void initTreeIter( TreeIter *treeIter, const Ref *rootRef, int searchId, Tree **stackRoot );
-void initRevTreeIter( RevTreeIter *revTriter, const Ref *rootRef,
- int searchId, Tree **stackRoot, int children );
-
-
-void initUserIter( UserIter *userIter, Tree **stackRoot, long argSize, long searchId );
-
-Tree *mapFind( Program *prg, Map *map, Tree *key );
-long mapLength( Map *map );
-Tree *mapUnstore( Program *prg, Map *map, Tree *key, Tree *existing );
-int mapInsert( Program *prg, Map *map, Tree *key, Tree *element );
-void mapUnremove( Program *prg, Map *map, Tree *key, Tree *element );
-Tree *mapUninsert( Program *prg, Map *map, Tree *key );
-Tree *mapStore( Program *prg, Map *map, Tree *key, Tree *element );
-
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
-
diff --git a/colm/parsedata.h b/colm/parsedata.h
deleted file mode 100644
index 79ba08c1..00000000
--- a/colm/parsedata.h
+++ /dev/null
@@ -1,1063 +0,0 @@
-/*
- * Copyright 2001-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _PARSEDATA_H
-#define _PARSEDATA_H
-
-#include <iostream>
-#include <limits.h>
-#include "bstset.h"
-#include "global.h"
-#include "avlmap.h"
-#include "avlset.h"
-#include "bstmap.h"
-#include "vector.h"
-#include "dlist.h"
-#include "dlistmel.h"
-#include "fsmgraph.h"
-#include "compare.h"
-#include "vector.h"
-#include "keyops.h"
-#include "parsetree.h"
-#include "astring.h"
-#include "pdagraph.h"
-#include "compare.h"
-#include "pdarun.h"
-#include "bytecode.h"
-#include "program.h"
-
-using std::ostream;
-
-struct exit_object { };
-extern exit_object endp;
-void operator<<( std::ostream &out, exit_object & );
-
-/* Forwards. */
-struct RedFsm;
-struct LangEl;
-struct Compiler;
-struct PdaCodeGen;
-struct FsmCodeGen;
-
-#define SHIFT_CODE 0x1
-#define REDUCE_CODE 0x2
-#define SHIFT_REDUCE_CODE 0x3
-
-inline long makeReduceCode( long reduction, bool isShiftReduce )
-{
- return ( isShiftReduce ? SHIFT_REDUCE_CODE : REDUCE_CODE ) |
- ( reduction << 2 );
-}
-
-struct ProdEl;
-struct ProdElList;
-struct PdaLiteral;
-struct Definition;
-
-/* A pointer to this is in PdaRun, but it's specification is not known by the
- * runtime code. The runtime functions that access it are defined in
- * ctinput.cpp and stubbed in fsmcodegen.cpp */
-struct Bindings
- : public Vector<ParseTree*>
-{};
-
-struct DefListEl { Definition *prev, *next; };
-struct LelDefListEl { Definition *prev, *next; };
-typedef Vector< LangEl* > LangElVect;
-typedef Vector< ProdEl* > FactorVect;
-
-typedef AvlMap<String, long, CmpStr> StringMap;
-typedef AvlMapEl<String, long> StringMapEl;
-
-enum PredType {
- PredLeft,
- PredRight,
- PredNonassoc,
- PredNone
-};
-
-struct PredDecl
-{
- PredDecl( TypeRef *typeRef, PredType predType, long predValue )
- : typeRef(typeRef), predType(predType), predValue(predValue)
- {}
-
- TypeRef *typeRef;
- PredType predType;
- long predValue;
-
- PredDecl *prev, *next;
-};
-
-typedef DList<PredDecl> PredDeclList;
-
-/* Graph dictionary. */
-struct Definition
-:
- public DefListEl, public LelDefListEl
-{
- enum Type { Production };
-
- Definition( const InputLoc &loc, LangEl *prodName, ProdElList *prodElList,
- bool prodCommit, CodeBlock *redBlock, int prodId, int prodNum, Type type )
- :
- loc(loc), prodName(prodName), prodElList(prodElList),
- prodCommit(prodCommit), redBlock(redBlock), prodId(prodId), prodNum(prodNum),
- type(type), fsm(0), fsmLength(0), uniqueEmptyLeader(0),
- isLeftRec(false), localFrame(0), lhsField(0), predOf(0),
- collectIgnoreRegion(0) {}
-
- InputLoc loc;
- LangEl *prodName;
- ProdElList *prodElList;
- bool prodCommit;
-
- CodeBlock *redBlock;
-
- int prodId;
- int prodNum;
- Type type;
-
- PdaGraph *fsm;
- int fsmLength;
- String data;
- LongSet reducesTo;
-
- LangEl *uniqueEmptyLeader;
-
- ProdIdSet nonTermFirstSet;
- AlphSet firstSet;
- bool isLeftRec;
-
- ObjectDef *localFrame;
- ObjField *lhsField;
-
- LangEl *predOf;
-
- UnsignedCharVect copy;
-
- TokenRegion *collectIgnoreRegion;
-};
-
-struct CmpDefById
-{
- static int compare( Definition *d1, Definition *d2 )
- {
- if ( d1->prodId < d2->prodId )
- return -1;
- else if ( d1->prodId > d2->prodId )
- return 1;
- else
- return 0;
- }
-};
-
-
-/* Map dotItems to productions. */
-typedef BstMap< int, Definition*, CmpOrd<int> > DotItemIndex;
-typedef BstMapEl< int, Definition*> DotItemIndexEl;
-
-struct DefList
-:
- public DListMel<Definition, DefListEl>
-{};
-
-/* A vector of production vectors. Each non terminal can have many productions. */
-struct LelDefList
-:
- public DListMel<Definition, LelDefListEl>
-{};
-
-/* A set of machines made during a closure round. */
-typedef Vector< PdaGraph* > Machines;
-
-/* List of language elements. */
-typedef DList<LangEl> LelList;
-
-typedef Vector< TokenDef* > TokenDefVect;
-
-struct UniqueType;
-
-typedef Vector<LangEl*> LangElVect;
-typedef BstSet<LangEl*> LangElSet;
-
-/* A language element class. Can be a nonTerm or a term. */
-struct LangEl : public DListEl<LangEl>
-{
- enum Type { Unknown, Term, NonTerm };
-
- LangEl( Namespace *nspace, const String &name, Type type );
- ~LangEl();
-
- /* The region the language element was defined in. */
- Namespace *nspace;
-
- String name;
- String lit;
-
- String fullName;
- String fullLit;
-
- /* For referencing the type. */
- String refName;
-
- /* For declaring things inside the type. */
- String declName;
-
- String xmlTag;
-
- Type type;
- long id;
- bool isUserTerm;
- bool isContext;
- String displayString;
- long numAppearances;
- bool commit;
- bool ignore;
- bool reduceFirst;
- bool isLiteral;
- bool isRepeat;
- bool isList;
- bool isOpt;
- bool parseStop;
- bool isEOF;
-
- LangEl *repeatOf;
-
- /* Productions from the language element if it is a non-terminal. */
- LelDefList defList;
-
- TokenDef *tokenDef;
- Definition *rootDef;
- LangEl *termDup;
- LangEl *eofLel;
-
- PdaGraph *pdaGraph;
- PdaTables *pdaTables;
-
- PdaState *startState;
-
- CodeBlock *transBlock;
-
- ObjectDef *objectDef;
- NamespaceQual *objectDefUsesQual;
- String objectDefUses;
-
- long thisSize;
- long ofiOffset;
-
- GenericType *generic;
-
- long parserId;
-
- PredType predType;
- long predValue;
-
- Context *contextDef;
- Context *contextIn;
- bool noPreIgnore;
- bool noPostIgnore;
- bool isCI;
- TokenRegion *ciRegion;
-};
-
-struct ProdEl
-{
- /* Language elements a factor node can be. */
- enum Type {
- LiteralType,
- ReferenceType
- };
-
- /* Construct with a reference to a var def. */
- ProdEl( Type type, const InputLoc &loc, ObjField *captureField, bool commit, TypeRef *typeRef, int priorVal )
- :
- captureField(captureField),
- commit(commit),
- typeRef(typeRef),
- langEl(0),
- priorVal(priorVal),
- type(type),
- objField(0)
- {}
-
- ProdEl( const InputLoc &loc, TypeRef *typeRef )
- :
- captureField(0),
- commit(false),
- typeRef(typeRef),
- langEl(0),
- priorVal(0),
- type(ReferenceType),
- objField(0)
- {}
-
- ObjField *captureField;
- bool commit;
-
- TypeRef *typeRef;
-
- LangEl *langEl;
- int priorVal;
- Type type;
- ObjField *objField;
- ProdEl *prev, *next;
-};
-
-struct ProdElList : public DList<ProdEl>
-{
- PdaGraph *walk( Compiler *pd, Definition *prod );
-};
-
-/* This should be renamed. It is a literal string in a type reference. */
-struct PdaLiteral
-{
- PdaLiteral( const InputLoc &loc, const Token &token )
- : loc(loc), token(token), value(0) { }
-
- InputLoc loc;
- Token token;
- long value;
-};
-
-/* Nodes in the tree that use this action. */
-typedef Vector<NameInst*> ActionRefs;
-
-/* Element in list of actions. Contains the string for the code to exectute. */
-struct Action
-:
- public DListEl<Action>,
- public AvlTreeEl<Action>
-{
-public:
-
- Action( const InputLoc &loc, const String &name, InlineList *inlineList )
- :
- loc(loc),
- name(name),
- markType(MarkNone),
- objField(0),
- markId(-1),
- inlineList(inlineList),
- actionId(-1),
- numTransRefs(0),
- numToStateRefs(0),
- numFromStateRefs(0),
- numEofRefs(0),
- numCondRefs(0),
- anyCall(false),
- isLmAction(false)
- {
- }
-
- Action( MarkType markType, long markId )
- :
- name("mark"),
- markType(markType),
- objField(0),
- markId(markId),
- inlineList(new InlineList),
- actionId(-1),
- numTransRefs(0),
- numToStateRefs(0),
- numFromStateRefs(0),
- numEofRefs(0),
- numCondRefs(0),
- anyCall(false),
- isLmAction(false)
- {
- }
-
- /* Key for action dictionary. */
- const String &getKey() const { return name; }
-
- /* Data collected during parse. */
- InputLoc loc;
- String name;
-
- MarkType markType;
- ObjField *objField;
- long markId;
-
- InlineList *inlineList;
- int actionId;
-
- void actionName( ostream &out )
- {
- if ( name != 0 )
- out << name;
- else
- out << loc.line << ":" << loc.col;
- }
-
- /* Places in the input text that reference the action. */
- ActionRefs actionRefs;
-
- /* Number of references in the final machine. */
- bool numRefs()
- { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
- int numTransRefs;
- int numToStateRefs;
- int numFromStateRefs;
- int numEofRefs;
- int numCondRefs;
- bool anyCall;
-
- bool isLmAction;
-};
-
-/* A list of actions. */
-typedef DList<Action> ActionList;
-typedef AvlTree<Action, String, CmpStr> ActionDict;
-
-struct VarDef;
-struct Join;
-struct Expression;
-struct Term;
-struct FactorWithAug;
-struct FactorWithLabel;
-struct FactorWithRep;
-struct FactorWithNeg;
-struct Factor;
-struct Literal;
-struct Range;
-struct RegExpr;
-struct ReItem;
-struct ReOrBlock;
-struct ReOrItem;
-struct TokenRegion;
-
-/* Priority name dictionary. */
-typedef AvlMapEl<String, int> PriorDictEl;
-typedef AvlMap<String, int, CmpStr> PriorDict;
-
-/* Local error name dictionary. */
-typedef AvlMapEl<String, int> LocalErrDictEl;
-typedef AvlMap<String, int, CmpStr> LocalErrDict;
-
-/* Tree of instantiated names. */
-typedef BstMapEl<String, NameInst*> NameMapEl;
-typedef BstMap<String, NameInst*, CmpStr> NameMap;
-typedef Vector<NameInst*> NameVect;
-typedef BstSet<NameInst*> NameSet;
-
-/* Node in the tree of instantiated names. */
-struct NameInst
-{
- NameInst( const InputLoc &loc, NameInst *parent, const String &name,
- int id, bool isLabel ) :
- loc(loc), parent(parent), name(name), id(id), isLabel(isLabel),
- isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {}
-
- InputLoc loc;
-
- /* Keep parent pointers in the name tree to retrieve
- * fully qulified names. */
- NameInst *parent;
-
- String name;
- int id;
- bool isLabel;
- bool isLongestMatch;
-
- int numRefs;
- int numUses;
-
- /* Names underneath us, excludes anonymous names. */
- NameMap children;
-
- /* All names underneath us in order of appearance. */
- NameVect childVect;
-
- /* Join scopes need an implicit "final" target. */
- NameInst *start, *final;
-
- /* During a fsm generation walk, lists the names that are referenced by
- * epsilon operations in the current scope. After the link is made by the
- * epsilon reference and the join operation is complete, the label can
- * have its refcount decremented. Once there are no more references the
- * entry point can be removed from the fsm returned. */
- NameVect referencedNames;
-
- /* Pointers for the name search queue. */
- NameInst *prev, *next;
-
- /* Check if this name inst or any name inst below is referenced. */
- bool anyRefsRec();
-};
-
-typedef DList<NameInst> NameInstList;
-
-/* Stack frame used in walking the name tree. */
-struct NameFrame
-{
- NameInst *prevNameInst;
- int prevNameChild;
- NameInst *prevLocalScope;
-};
-
-/* Class to collect information about the machine during the
- * parse of input. */
-struct Compiler
-{
- /* Create a new parse data object. This is done at the beginning of every
- * fsm specification. */
- Compiler( const String &fileName, const String &sectionName,
- const InputLoc &sectionLoc, ostream &out );
- ~Compiler();
-
- /*
- * Setting up the graph dict.
- */
-
- void compileLiteralTokens();
- void initEmptyScanners();
- void initUniqueTypes();
-
- /* Initialize a graph dict with the basic fsms. */
- void initGraphDict();
- void createBuiltin( const char *name, BuiltinMachine builtin );
-
- /* Make a name id in the current name instantiation scope if it is not
- * already there. */
- NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel );
- NameInst *makeJoinNameTree( Join *join );
- NameInst *makeNameTree( );
- void fillNameIndex( NameInst **nameIndex, NameInst *from );
- NameInst **makeNameIndex( NameInst *rootName );
-
-
- void printNameTree( NameInst *rootName );
- void printNameIndex( NameInst **nameIndex );
-
- /* Increments the usage count on entry names. Names that are no longer
- * needed will have their entry points unset. */
- void unsetObsoleteEntries( FsmGraph *graph );
-
- /* Resove name references in action code and epsilon transitions. */
- NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly );
- void resolveFrom( NameSet &result, NameInst *refFrom,
- const NameRef &nameRef, int namePos );
- void referenceRegions( NameInst *root );
-
- /* Set the alphabet type. If type types are not valid returns false. */
- bool setAlphType( char *s1, char *s2 );
- bool setAlphType( char *s1 );
-
- /* Unique actions. */
- void removeDups( ActionTable &actionTable );
- void removeActionDups( FsmGraph *graph );
-
- /* Dumping the name instantiation tree. */
- void printNameInst( NameInst *nameInst, int level );
-
- /* Make the graph from a graph dict node. Does minimization. */
- void finishGraphBuild( FsmGraph *graph );
- FsmGraph *makeAllRegions();
- FsmGraph *makeScanner();
-
- void analyzeAction( Action *action, InlineList *inlineList );
- void analyzeGraph( FsmGraph *graph );
- void resolvePrecedence( PdaGraph *pdaGraph );
- LangEl *predOf( PdaTrans *trans, long action );
- bool precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 );
- bool precedenceRemoveBoth( LangEl *l1, LangEl *l2 );
-
- void initKeyOps();
-
- /*
- * Data collected during the parse.
- */
-
- /* The list of instances. */
- RegionGraphList instanceList;
-
- /* Dictionary of actions. Lets actions be defined and then referenced. */
- ActionDict actionDict;
-
- /* Dictionary of named priorities. */
- PriorDict priorDict;
-
- /* Dictionary of named local errors. */
- LocalErrDict localErrDict;
-
- /* List of actions. Will be pasted into a switch statement. */
- ActionList actionList;
-
- /* The id of the next priority name and label. */
- int nextPriorKey, nextLocalErrKey, nextNameId;
-
- /* The default priority number key for a machine. This is active during
- * the parse of the rhs of a machine assignment. */
- int curDefPriorKey;
-
- int curDefLocalErrKey;
-
- /* Alphabet type. */
- HostType *userAlphType;
- bool alphTypeSet;
-
- /* Element type and get key expression. */
- InlineList *getKeyExpr;
- InlineList *accessExpr;
- InlineList *curStateExpr;
-
- /* The alphabet range. */
- char *lowerNum, *upperNum;
- Key lowKey, highKey;
- InputLoc rangeLowLoc, rangeHighLoc;
-
- /* The name of the file the fsm is from, and the spec name. */
- String fileName;
- String sectionName;
- InputLoc sectionLoc;
-
- /* Number of errors encountered parsing the fsm spec. */
- int errorCount;
-
- /* Counting the action and priority ordering. */
- int curActionOrd;
- int curPriorOrd;
-
- /* Root of the name tree. */
- NameInst *curNameInst;
- int curNameChild;
-
- /* The place where resolved epsilon transitions go. These cannot go into
- * the parse tree because a single epsilon op can resolve more than once
- * to different nameInsts if the machine it's in is used more than once. */
- NameVect epsilonResolvedLinks;
- int nextEpsilonResolvedLink;
-
- /* Root of the name tree used for doing local name searches. */
- NameInst *localNameScope;
-
- void setLmInRetLoc( InlineList *inlineList );
- void initLongestMatchData();
- void initNameWalk( NameInst *rootName );
- NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
- NameFrame enterNameScope( bool isLocal, int numScopes );
- void popNameScope( const NameFrame &frame );
- void resetNameScope( const NameFrame &frame );
-
- /* Counter for assigning ids to longest match items. */
- int nextTokenId;
-
- /* List of all longest match parse tree items. */
- RegionList regionList;
-
- NamespaceList namespaceList;
-
- Action *newAction( const String &name, InlineList *inlineList );
-
- Action *setTokStart;
- int setTokStartOrd;
-
- Action *initActId;
- int initActIdOrd;
-
- Action *setTokEnd;
- int setTokEndOrd;
-
- CodeBlock *rootCodeBlock;
-
- void beginProcessing()
- {
- ::condData = &thisCondData;
- ::keyOps = &thisKeyOps;
- }
-
- CondData thisCondData;
- KeyOps thisKeyOps;
-
- UniqueType *mainReturnUT;
-
- /* CONTEXT FREE */
- ProdElList *makeProdElList( LangEl *langEl );
- void wrapNonTerminals();
- void makeDefinitionNames();
- void noUndefindLangEls();
- void declareBaseLangEls();
- void makeLangElIds();
- void makeLangElNames();
- void makeTerminalWrappers();
- void makeEofElements();
- void makeIgnoreCollectors();
- void setPrecedence();
-
- void typeDeclaration();
- void typeResolve();
-
- /* Parser generation. */
- void advanceReductions( PdaGraph *pdaGraph );
- void sortActions( PdaGraph *pdaGraph );
- void addDupTerms( PdaGraph *pdaGraph );
- void linkExpansions( PdaGraph *pdaGraph );
- void lalr1FollowEpsilonOp( PdaGraph *pdaGraph );
-
- void transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, PdaState *state, long prodId );
-
- void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys );
- void lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state );
-
- void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior );
- void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans );
-
- void lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls );
-
- void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState,
- PdaTrans *expandFrom, Definition *prod );
- void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state );
- void lr0CloseAllStates( PdaGraph *pdaGraph );
-
- void lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls );
-
- void reduceActions( PdaGraph *pdaGraph );
-
- bool makeNonTermFirstSetProd( Definition *prod, PdaState *state );
- void makeNonTermFirstSets();
-
- bool makeFirstSetProd( Definition *prod, PdaState *state );
- void makeFirstSets();
-
- int findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &currLen );
- void trySetTime( PdaTrans *trans, long code, long &time );
- void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey,
- bool noPreIgnore, bool noPostIgnore );
- PdaState *followProd( PdaState *tabState, PdaState *prodState );
- void findFollow( AlphSet &result, PdaState *overTab,
- PdaState *overSrc, Definition *parentDef );
- void pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls );
- void pdaOrderFollow( LangEl *rootEl, PdaState *tabState,
- PdaTrans *tabTrans, PdaTrans *srcTrans,
- Definition *parentDef, Definition *definition, long &time );
- void pdaOrderProd( LangEl *rootEl, PdaState *tabState,
- PdaState *srcState, Definition *parentDef, long &time );
- void analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls );
-
- void makeProdFsms();
- void insertUniqueEmptyProductions();
- void printNonTermFirstSets();
- void printFirstSets();
-
- LangEl *makeRepeatProd( Namespace *nspace, const String &repeatName,
- NamespaceQual *nspaceQual, const String &name );
- LangEl *makeListProd( Namespace *nspace, const String &listName,
- NamespaceQual *nspaceQual, const String &name );
- LangEl *makeOptProd( Namespace *nspace, const String &optName,
- NamespaceQual *nspaceQual, const String &name );
- void resolveFactor( ProdEl *fact );
- void resolveProductionEls();
- void resolvePatternEls();
- void resolveReplacementEls();
- void resolveParserEls();
-
- void addMatchText( ObjectDef *frame, LangEl *lel );
- void addMatchLength( ObjectDef *frame, LangEl *lel );
- void addInput( ObjectDef *frame );
- void addCtx( ObjectDef *frame );
- void addTransTokVar( ObjectDef *frame, LangEl *lel );
- void addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList );
- void addProdRedObjectVar( ObjectDef *localFrame, LangEl *langEl );
- void addProdObjects();
-
- void addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos );
- void addProdLHSLoad( Definition *prod, CodeVect &code, long &insertPos );
- void addPushBackLHS( Definition *prod, CodeVect &code, long &insertPos );
-
- void prepGrammar();
- void parsePatterns();
-
- void collectParserEls( LangElSet &parserEls );
- void makeParser( LangElSet &parserEls );
- PdaGraph *makePdaGraph( BstSet<LangEl*> &parserEls );
- PdaTables *makePdaTables( PdaGraph *pdaGraph );
-
- void fillInPatterns( Program *prg );
- void makeRuntimeData();
-
- /* Generate and write out the fsm. */
- void generateGraphviz();
-
- void verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph );
- void computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph );
-
- void initFieldInstructions( ObjField *el );
- void initLocalInstructions( ObjField *el );
- void initLocalRefInstructions( ObjField *el );
-
- void initMapFunctions( GenericType *gen );
- void initListField( GenericType *gen, const char *name, int offset );
- void initListFields( GenericType *gen );
- void initListFunctions( GenericType *gen );
- void initVectorFunctions( GenericType *gen );
- void initParserFunctions( GenericType *gen );
- void initParserFields( GenericType *gen );
- void initCtxField( GenericType *gen );
-
- void addStdin();
- void addStdout();
- void addStderr();
- void addArgv();
- int argvOffset();
- void initGlobalFunctions();
- void makeDefaultIterators();
- void addLengthField( ObjectDef *objDef, Code getLength );
- ObjectDef *findObject( const String &name );
- void initAllLanguageObjects();
- void resolveListElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof );
- void resolveMapElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof );
- void resolveElementOf( ObjectDef *obj );
- void makeFuncVisible( Function *func, bool isUserIter );
-
- void resolveFunction( Function *func );
- void resolveUserIter( Function *func );
- void resolvePreEof( TokenRegion *region );
- void resolveRootBlock();
- void resolveTranslateBlock( LangEl *langEl );
- void resolveReductionCode( Definition *prod );
- void resolveParseTree();
- void resolveGenericTypes();
-
- void compileFunction( Function *func, CodeVect &code );
- void compileFunction( Function *func );
- void compileUserIter( Function *func, CodeVect &code );
- void compileUserIter( Function *func );
- void compilePreEof( TokenRegion *region );
- void compileRootBlock();
- void compileTranslateBlock( LangEl *langEl );
- void findLocalTrees( CharSet &trees );
- void makeProdCopies( Definition *prod );
- void compileReductionCode( Definition *prod );
- void initGenericTypes();
- void removeNonUnparsableRepls();
- void compileByteCode();
-
- void resolveUses();
- void createDefaultScanner();
- void generateOutput();
- void compile();
-
- void openNameSpace( ostream &out, Namespace *nspace );
- void closeNameSpace( ostream &out, Namespace *nspace );
- void refNameSpace( LangEl *lel, Namespace *nspace );
- void generateExports();
- void generateExportsImpl();
-
- /*
- * Graphviz Generation
- */
- void writeTransList( PdaState *state );
- void writeDotFile( PdaGraph *graph );
- void writeDotFile( );
-
-
- /*
- * Data collected during the parse.
- */
-
- LelList langEls;
- DefList prodList;
-
- /* Dumping. */
- DotItemIndex dotItemIndex;
-
- PredDeclList predDeclList;
-
- /* The name of the file the fsm is from, and the spec name. */
- // EXISTS IN RL: char *fileName;
- String parserName;
- ostream &out;
- // EXISTS IN RL: InputLoc sectionLoc;
-
- /* How to access the instance data. */
- String access;
-
- /* The name of the token structure. */
- String tokenStruct;
-
- GenericType *anyList;
- GenericType *anyMap;
- GenericType *anyVector;
-
- LangEl *ptrLangEl;
- LangEl *boolLangEl;
- LangEl *intLangEl;
- LangEl *strLangEl;
- LangEl *streamLangEl;
- LangEl *inputLangEl;
- LangEl *anyLangEl;
- LangEl *rootLangEl;
- LangEl *noTokenLangEl;
- LangEl *eofLangEl;
- LangEl *errorLangEl;
- LangEl *defaultCharLangEl;
- LangEl *ignoreLangEl;
-
- TokenRegion *rootRegion;
- TokenRegion *defaultRegion;
- TokenRegion *eofTokenRegion;
-
- Namespace *defaultNamespace;
- Namespace *rootNamespace;
-
- int nextSymbolId;
- int firstNonTermId;
-
- LangEl **langElIndex;
- PdaState *actionDestState;
- DefSetSet prodSetSet;
-
- Definition **prodIdIndex;
- AlphSet literalSet;
-
- PatternList patternList;
- ReplList replList;
- ParserTextList parserTextList;
-
- ObjectDef *globalObjectDef;
-
- VectorTypeIdMap vectorTypeIdMap;
- ObjectDef *curLocalFrame;
-
- UniqueType *findUniqueType( int typeId );
- UniqueType *findUniqueType( int typeId, LangEl *langEl );
- UniqueType *findUniqueType( int typeId, IterDef *iterDef );
-
- UniqueType *uniqueTypeNil;
- UniqueType *uniqueTypePtr;
- UniqueType *uniqueTypeBool;
- UniqueType *uniqueTypeInt;
- UniqueType *uniqueTypeStr;
- UniqueType *uniqueTypeStream;
- UniqueType *uniqueTypeInput;
- UniqueType *uniqueTypeIgnore;
- UniqueType *uniqueTypeAny;
-
- UniqueTypeMap uniqeTypeMap;
- UniqueRepeatMap uniqeRepeatMap;
- UniqueMapMap uniqueMapMap;
- UniqueListMap uniqueListMap;
- UniqueVectorMap uniqueVectorMap;
- UniqueParserMap uniqueParserMap;
-
- void initStrObject();
- void initStreamObject();
- void initInputObject();
- void initIntObject();
- void initTokenObjects();
-
- ObjectDef *intObj;
- ObjectDef *strObj;
- ObjectDef *streamObj;
- ObjectDef *inputObj;
- ObjectDef *tokenObj;
-
- FsmTables *fsmTables;
- RuntimeData *runtimeData;
-
- int nextPatReplId;
- int nextGenericId;
-
- FunctionList functionList;
- int nextFuncId;
-
- enum CompileContext {
- CompileTranslation,
- CompileReduction,
- CompileFunction,
- CompileRoot
- };
-
- CompileContext compileContext;
- LongVect returnJumps;
- LongVect breakJumps;
- Function *curFunction;
-
- /* Loops fill this in for return statements to use. */
- CodeVect *loopCleanup;
-
- ObjField *makeDataEl();
- ObjField *makePosEl();
- ObjField *makeLineEl();
-
- IterDef *findIterDef( IterDef::Type type, GenericType *generic );
- IterDef *findIterDef( IterDef::Type type, Function *func );
- IterDef *findIterDef( IterDef::Type type );
- IterDefSet iterDefSet;
-
- enum GeneratesType { GenToken, GenIgnore, GenCfl };
-
- int nextObjectId;
- GeneratesType generatesType;
- bool generatesIgnore;
- bool insideRegion;
-
- StringMap literalStrings;
-
- long nextFrameId;
- long nextParserId;
-
- ObjectDef *rootLocalFrame;
-
- long nextLabelId;
- ObjectDef *objectDef;
-
- bool revertOn;
-
- RedFsm *redFsm;
-
- PdaGraph *pdaGraph;
- PdaTables *pdaTables;
-
- long predValue;
- long nextMatchEndNum;
-
- TypeRef *argvTypeRef;
-
- Context *context;
-};
-
-void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true );
-Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd );
-Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd );
-Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd );
-Key makeFsmKeyChar( char c, Compiler *pd );
-void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd );
-void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
- bool caseInsensitive, Compiler *pd );
-FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd );
-FsmGraph *dotFsm( Compiler *pd );
-FsmGraph *dotStarFsm( Compiler *pd );
-
-void errorStateLabels( const NameSet &locations );
-
-struct ColmParser;
-
-typedef AvlMap<String, ColmParser *, CmpStr> ParserDict;
-typedef AvlMapEl<String, ColmParser *> ParserDictEl;
-
-LangEl *declareLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type );
-LangEl *addLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type );
-void declareTypeAlias( Compiler *pd, Namespace *nspace, const String &data, TypeRef *typeRef );
-LangEl *findType( Compiler *pd, Namespace *nspace, const String &data );
-
-#endif /* _PARSEDATA_H */
diff --git a/colm/parsetree.cc b/colm/parsetree.cc
deleted file mode 100644
index 084ffbb8..00000000
--- a/colm/parsetree.cc
+++ /dev/null
@@ -1,1776 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "lmparse.h"
-#include "parsetree.h"
-#include "input.h"
-#include "fsmrun.h"
-
-#include <iostream>
-#include <iomanip>
-#include <errno.h>
-#include <limits.h>
-#include <stdlib.h>
-
-
-using namespace std;
-ostream &operator<<( ostream &out, const NameRef &nameRef );
-ostream &operator<<( ostream &out, const NameInst &nameInst );
-ostream &operator<<( ostream &out, const Token &token );
-
-/* Convert the literal string which comes in from the scanner into an array of
- * characters with escapes and options interpreted. Also null terminates the
- * string. Though this null termination should not be relied on for
- * interpreting literals in the parser because the string may contain a
- * literal string with \0 */
-void prepareLitString( String &result, bool &caseInsensitive,
- const String &srcString, const InputLoc &loc )
-{
- result.setAs( String::Fresh(), srcString.length() );
- caseInsensitive = false;
-
- char *src = srcString.data + 1;
- char *end = srcString.data + srcString.length() - 1;
-
- while ( *end != '\'' && *end != '\"' && *end != '\n' ) {
- if ( *end == 'i' )
- caseInsensitive = true;
- else {
- error( loc ) << "literal string '" << *end <<
- "' option not supported" << endl;
- }
- end -= 1;
- }
-
- if ( *end == '\n' )
- end++;
-
- char *dest = result.data;
- int len = 0;
- while ( src != end ) {
- if ( *src == '\\' ) {
- switch ( src[1] ) {
- case '0': dest[len++] = '\0'; break;
- case 'a': dest[len++] = '\a'; break;
- case 'b': dest[len++] = '\b'; break;
- case 't': dest[len++] = '\t'; break;
- case 'n': dest[len++] = '\n'; break;
- case 'v': dest[len++] = '\v'; break;
- case 'f': dest[len++] = '\f'; break;
- case 'r': dest[len++] = '\r'; break;
- case '\n': break;
- default: dest[len++] = src[1]; break;
- }
- src += 2;
- }
- else {
- dest[len++] = *src++;
- }
- }
-
- result.chop( len );
-}
-
-int CmpUniqueType::compare( const UniqueType &ut1, const UniqueType &ut2 )
-{
- if ( ut1.typeId < ut2.typeId )
- return -1;
- else if ( ut1.typeId > ut2.typeId )
- return 1;
- else if ( ut1.typeId == TYPE_TREE ||
- ut1.typeId == TYPE_PTR ||
- ut1.typeId == TYPE_REF )
- {
- if ( ut1.langEl < ut2.langEl )
- return -1;
- else if ( ut1.langEl > ut2.langEl )
- return 1;
- }
- else if ( ut1.typeId == TYPE_ITER ) {
- if ( ut1.iterDef < ut2.iterDef )
- return -1;
- else if ( ut1.iterDef > ut2.iterDef )
- return 1;
- }
- else {
- /* Fail on anything unimplemented. */
- assert( false );
- }
-
- return 0;
-}
-
-int CmpUniqueRepeat::compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 )
-{
- if ( ut1.repeatType < ut2.repeatType )
- return -1;
- else if ( ut1.repeatType > ut2.repeatType )
- return 1;
- else {
- if ( ut1.langEl < ut2.langEl )
- return -1;
- else if ( ut1.langEl > ut2.langEl )
- return 1;
- }
-
- return 0;
-}
-
-int CmpUniqueMap::compare( const UniqueMap &ut1, const UniqueMap &ut2 )
-{
- if ( ut1.key < ut2.key )
- return -1;
- else if ( ut1.key > ut2.key )
- return 1;
- else {
- if ( ut1.value < ut2.value )
- return -1;
- else if ( ut1.value > ut2.value )
- return 1;
- }
-
- return 0;
-}
-
-int CmpUniqueList::compare( const UniqueList &ut1, const UniqueList &ut2 )
-{
- if ( ut1.value < ut2.value )
- return -1;
- else if ( ut1.value > ut2.value )
- return 1;
-
- return 0;
-}
-
-int CmpUniqueVector::compare( const UniqueVector &ut1, const UniqueVector &ut2 )
-{
- if ( ut1.value < ut2.value )
- return -1;
- else if ( ut1.value > ut2.value )
- return 1;
-
- return 0;
-}
-
-int CmpUniqueParser::compare( const UniqueParser &ut1, const UniqueParser &ut2 )
-{
- if ( ut1.parseType < ut2.parseType )
- return -1;
- else if ( ut1.parseType > ut2.parseType )
- return 1;
-
- return 0;
-}
-
-FsmGraph *VarDef::walk( Compiler *pd )
-{
- /* Recurse on the expression. */
- FsmGraph *rtnVal = join->walk( pd );
-
- /* Do the tranfer of local error actions. */
- LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
- if ( localErrDictEl != 0 ) {
- for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
- rtnVal->transferErrorActions( state, localErrDictEl->value );
- }
-
- /* If the expression below is a join operation with multiple expressions
- * then it just had epsilon transisions resolved. If it is a join
- * with only a single expression then run the epsilon op now. */
- if ( join->exprList.length() == 1 )
- rtnVal->epsilonOp();
-
- /* We can now unset entry points that are not longer used. */
- pd->unsetObsoleteEntries( rtnVal );
-
- return rtnVal;
-}
-
-
-FsmGraph *RegionDef::walk( Compiler *pd )
-{
- /* We enter into a new name scope. */
- NameFrame nameFrame = pd->enterNameScope( true, 1 );
-
- /* Recurse on the expression. */
- FsmGraph *rtnVal = tokenRegion->walk( pd );
-
- /* Do the tranfer of local error actions. */
- LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
- if ( localErrDictEl != 0 ) {
- for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
- rtnVal->transferErrorActions( state, localErrDictEl->value );
- }
-
- /* We can now unset entry points that are not longer used. */
- pd->unsetObsoleteEntries( rtnVal );
-
- /* If the name of the variable is referenced then add the entry point to
- * the graph. */
- if ( pd->curNameInst->numRefs > 0 )
- rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
-
- /* Pop the name scope. */
- pd->popNameScope( nameFrame );
- return rtnVal;
-}
-
-void RegionDef::makeNameTree( const InputLoc &loc, Compiler *pd )
-{
- /* The variable definition enters a new scope. */
- NameInst *prevNameInst = pd->curNameInst;
- pd->curNameInst = pd->addNameInst( loc, name, false );
-
- /* Guess we do this now. */
- tokenRegion->makeActions( pd );
-
- /* Save off the name inst into the token region. This is only legal for
- * token regions because they are only ever referenced once (near the root
- * of the name tree). They cannot have more than one corresponding name
- * inst. */
- assert( tokenRegion->regionNameInst == 0 );
- tokenRegion->regionNameInst = pd->curNameInst;
-
- /* The name scope ends, pop the name instantiation. */
- pd->curNameInst = prevNameInst;
-}
-
-InputLoc TokenDef::getLoc()
-{
- return action != 0 ? action->loc : semiLoc;
-}
-
-/*
- * If there are any LMs then all of the following entry points must reset
- * tokstart:
- *
- * 1. fentry(StateRef)
- * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
- * 3. targt of any transition that has an fcall (the return loc).
- * 4. start state of all longest match routines.
- */
-
-Action *TokenRegion::newAction( Compiler *pd, const InputLoc &loc,
- const String &name, InlineList *inlineList )
-{
- Action *action = new Action( loc, name, inlineList );
- pd->actionList.append( action );
- action->isLmAction = true;
- return action;
-}
-
-void TokenRegion::makeActions( Compiler *pd )
-{
- /* Make actions that set the action id. */
- for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
- /* For each part create actions for setting the match type. We need
- * to do this so that the actions will go into the actionIndex. */
- InlineList *inlineList = new InlineList;
- inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
- InlineItem::LmSetActId ) );
- char *actName = new char[50];
- sprintf( actName, "store%i", lmi->longestMatchId );
- lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
- }
-
- /* Make actions that execute the user action and restart on the last character. */
- for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
- /* For each part create actions for setting the match type. We need
- * to do this so that the actions will go into the actionIndex. */
- InlineList *inlineList = new InlineList;
- inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
- InlineItem::LmOnLast ) );
- char *actName = new char[50];
- sprintf( actName, "imm%i", lmi->longestMatchId );
- lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
- }
-
- /* Make actions that execute the user action and restart on the next
- * character. These actions will set tokend themselves (it is the current
- * char). */
- for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
- /* For each part create actions for setting the match type. We need
- * to do this so that the actions will go into the actionIndex. */
- InlineList *inlineList = new InlineList;
- inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
- InlineItem::LmOnNext ) );
- char *actName = new char[50];
- sprintf( actName, "lagh%i", lmi->longestMatchId );
- lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
- }
-
- /* Make actions that execute the user action and restart at tokend. These
- * actions execute some time after matching the last char. */
- for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
- /* For each part create actions for setting the match type. We need
- * to do this so that the actions will go into the actionIndex. */
- InlineList *inlineList = new InlineList;
- inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
- InlineItem::LmOnLagBehind ) );
- char *actName = new char[50];
- sprintf( actName, "lag%i", lmi->longestMatchId );
- lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
- }
-
- InputLoc loc;
- loc.line = 1;
- loc.col = 1;
-
- /* Create the error action. */
- InlineList *il6 = new InlineList;
- il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
- lmActSelect = newAction( pd, loc, "lagsel", il6 );
-}
-
-void TokenRegion::restart( FsmGraph *graph, FsmTrans *trans )
-{
- FsmState *fromState = trans->fromState;
- graph->detachTrans( fromState, trans->toState, trans );
- graph->attachTrans( fromState, graph->startState, trans );
-}
-
-void TokenRegion::runLongestMatch( Compiler *pd, FsmGraph *graph )
-{
- graph->markReachableFromHereStopFinal( graph->startState );
- for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & SB_ISMARKED ) {
- ms->lmItemSet.insert( 0 );
- ms->stateBits &= ~ SB_ISMARKED;
- }
- }
-
- /* Transfer the first item of non-empty lmAction tables to the item sets
- * of the states that follow. Exclude states that have no transitions out.
- * This must happen on a separate pass so that on each iteration of the
- * next pass we have the item set entries from all lmAction tables. */
- for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
- for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
- if ( trans->lmActionTable.length() > 0 ) {
- LmActionTableEl *lmAct = trans->lmActionTable.data;
- FsmState *toState = trans->toState;
- assert( toState );
-
- /* Check if there are transitions out, this may be a very
- * close approximation? Out transitions going nowhere?
- * FIXME: Check. */
- if ( toState->outList.length() > 0 ) {
- /* Fill the item sets. */
- graph->markReachableFromHereStopFinal( toState );
- for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & SB_ISMARKED ) {
- ms->lmItemSet.insert( lmAct->value );
- ms->stateBits &= ~ SB_ISMARKED;
- }
- }
- }
- }
- }
- }
-
- /* The lmItem sets are now filled, telling us which longest match rules
- * can succeed in which states. First determine if we need to make sure
- * act is defaulted to zero. */
- int maxItemSetLength = 0;
- graph->markReachableFromHereStopFinal( graph->startState );
- for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & SB_ISMARKED ) {
- if ( ms->lmItemSet.length() > maxItemSetLength )
- maxItemSetLength = ms->lmItemSet.length();
- ms->stateBits &= ~ SB_ISMARKED;
- }
- }
-
- /* The actions executed on starting to match a token. */
- graph->isolateStartState();
- graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
- if ( maxItemSetLength > 1 ) {
- /* The longest match action switch may be called when tokens are
- * matched, in which case act must be initialized, there must be a
- * case to handle the error, and the generated machine will require an
- * error state. */
- lmSwitchHandlesError = true;
- graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
- }
-
- /* The place to store transitions to restart. It maybe possible for the
- * restarting to affect the searching through the graph that follows. For
- * now take the safe route and save the list of transitions to restart
- * until after all searching is done. */
- Vector<FsmTrans*> restartTrans;
-
- /* Set actions that do immediate token recognition, set the longest match part
- * id and set the token ending. */
- for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
- for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
- if ( trans->lmActionTable.length() > 0 ) {
- LmActionTableEl *lmAct = trans->lmActionTable.data;
- FsmState *toState = trans->toState;
- assert( toState );
-
- /* Check if there are transitions out, this may be a very
- * close approximation? Out transitions going nowhere?
- * FIXME: Check. */
- if ( toState->outList.length() == 0 ) {
- /* Can execute the immediate action for the longest match
- * part. Redirect the action to the start state. */
- trans->actionTable.setAction( lmAct->key,
- lmAct->value->actOnLast );
- restartTrans.append( trans );
- }
- else {
- /* Look for non final states that have a non-empty item
- * set. If these are present then we need to record the
- * end of the token. Also Find the highest item set
- * length reachable from here (excluding at transtions to
- * final states). */
- bool nonFinalNonEmptyItemSet = false;
- maxItemSetLength = 0;
- graph->markReachableFromHereStopFinal( toState );
- for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & SB_ISMARKED ) {
- if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
- nonFinalNonEmptyItemSet = true;
- if ( ms->lmItemSet.length() > maxItemSetLength )
- maxItemSetLength = ms->lmItemSet.length();
- ms->stateBits &= ~ SB_ISMARKED;
- }
- }
-
- /* If there are reachable states that are not final and
- * have non empty item sets or that have an item set
- * length greater than one then we need to set tokend
- * because the error action that matches the token will
- * require it. */
- if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
- trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
-
- /* Some states may not know which longest match item to
- * execute, must set it. */
- if ( maxItemSetLength > 1 ) {
- /* There are transitions out, another match may come. */
- trans->actionTable.setAction( lmAct->key,
- lmAct->value->setActId );
- }
- }
- }
- }
- }
-
- /* Now that all graph searching is done it certainly safe set the
- * restarting. It may be safe above, however this must be verified. */
- for ( Vector<FsmTrans*>::Iter rs = restartTrans; rs.lte(); rs++ )
- restart( graph, *rs );
-
- int lmErrActionOrd = pd->curActionOrd++;
-
- /* Embed the error for recognizing a char. */
- for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
- if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
- if ( st->isFinState() ) {
- /* On error execute the onActNext action, which knows that
- * the last character of the token was one back and restart. */
- graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
- &st->lmItemSet[0]->actOnNext, 1 );
- st->eofActionTable.setAction( lmErrActionOrd,
- st->lmItemSet[0]->actOnNext );
- st->eofTarget = graph->startState;
- }
- else {
- graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
- &st->lmItemSet[0]->actLagBehind, 1 );
- st->eofActionTable.setAction( lmErrActionOrd,
- st->lmItemSet[0]->actLagBehind );
- st->eofTarget = graph->startState;
- }
- }
- else if ( st->lmItemSet.length() > 1 ) {
- /* Need to use the select. Take note of the which items the select
- * is needed for so only the necessary actions are included. */
- for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
- if ( *plmi != 0 )
- (*plmi)->inLmSelect = true;
- }
- /* On error, execute the action select and go to the start state. */
- graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
- &lmActSelect, 1 );
- st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
- st->eofTarget = graph->startState;
- }
- }
-
- /* Finally, the start state should be made final. */
- graph->setFinState( graph->startState );
-}
-
-void TokenRegion::transferScannerLeavingActions( FsmGraph *graph )
-{
- for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
- if ( st->outActionTable.length() > 0 )
- graph->setErrorActions( st, st->outActionTable );
- }
-}
-
-FsmGraph *TokenRegion::walk( Compiler *pd )
-{
- /* Make each part of the longest match. */
- int numParts = 0;
- FsmGraph **parts = new FsmGraph*[tokenDefList.length()];
- for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) {
- /* Watch out for patternless tokens. */
- if ( lmi->join != 0 ) {
- /* Create the machine and embed the setting of the longest match id. */
- parts[numParts] = lmi->join->walk( pd );
- parts[numParts]->longMatchAction( pd->curActionOrd++, lmi );
-
- /* Look for tokens that accept the zero length-word. The first one found
- * will be used as the default token. */
- if ( defaultTokenDef == 0 && parts[numParts]->startState->isFinState() )
- defaultTokenDef = lmi;
-
- numParts += 1;
- }
- }
- FsmGraph *retFsm = parts[0];
-
- if ( defaultTokenDef != 0 && defaultTokenDef->tdLangEl->ignore )
- error() << "ignore token cannot be a scanner's zero-length token" << endp;
-
- /* The region is empty. Return the empty set. */
- if ( numParts == 0 ) {
- retFsm = new FsmGraph();
- retFsm->lambdaFsm();
- }
- else {
- /* Before we union the patterns we need to deal with leaving actions. They
- * are transfered to error transitions out of the final states (like local
- * error actions) and to eof actions. In the scanner we need to forbid
- * on_last for any final state that has an leaving action. */
- for ( int i = 0; i < numParts; i++ )
- transferScannerLeavingActions( parts[i] );
-
- /* Union machines one and up with machine zero. */
- FsmGraph *retFsm = parts[0];
- for ( int i = 1; i < numParts; i++ ) {
- retFsm->unionOp( parts[i] );
- afterOpMinimize( retFsm );
- }
-
- runLongestMatch( pd, retFsm );
- delete[] parts;
- }
-
- return retFsm;
-}
-
-/* Construct with a location and the first expression. */
-Join::Join( Expression *expr )
-:
- context(0),
- mark(0)
-{
- exprList.append( expr );
-}
-
-/* Walk an expression node. */
-FsmGraph *Join::walk( Compiler *pd )
-{
- assert( exprList.length() == 1 );
-
- FsmGraph *retFsm = exprList.head->walk( pd );
-
- /* Maybe the the context. */
- if ( context != 0 ) {
- retFsm->leaveFsmAction( pd->curActionOrd++, mark );
- FsmGraph *contextGraph = context->walk( pd );
- retFsm->concatOp( contextGraph );
- }
-
- return retFsm;
-}
-
-/* Clean up after an expression node. */
-Expression::~Expression()
-{
- switch ( type ) {
- case OrType: case IntersectType: case SubtractType:
- case StrongSubtractType:
- delete expression;
- delete term;
- break;
- case TermType:
- delete term;
- break;
- case BuiltinType:
- break;
- }
-}
-
-/* Evaluate a single expression node. */
-FsmGraph *Expression::walk( Compiler *pd, bool lastInSeq )
-{
- FsmGraph *rtnVal = 0;
- switch ( type ) {
- case OrType: {
- /* Evaluate the expression. */
- rtnVal = expression->walk( pd, false );
- /* Evaluate the term. */
- FsmGraph *rhs = term->walk( pd );
- /* Perform union. */
- rtnVal->unionOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case IntersectType: {
- /* Evaluate the expression. */
- rtnVal = expression->walk( pd );
- /* Evaluate the term. */
- FsmGraph *rhs = term->walk( pd );
- /* Perform intersection. */
- rtnVal->intersectOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case SubtractType: {
- /* Evaluate the expression. */
- rtnVal = expression->walk( pd );
- /* Evaluate the term. */
- FsmGraph *rhs = term->walk( pd );
- /* Perform subtraction. */
- rtnVal->subtractOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case StrongSubtractType: {
- /* Evaluate the expression. */
- rtnVal = expression->walk( pd );
-
- /* Evaluate the term and pad it with any* machines. */
- FsmGraph *rhs = dotStarFsm( pd );
- FsmGraph *termFsm = term->walk( pd );
- FsmGraph *trailAnyStar = dotStarFsm( pd );
- rhs->concatOp( termFsm );
- rhs->concatOp( trailAnyStar );
-
- /* Perform subtraction. */
- rtnVal->subtractOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case TermType: {
- /* Return result of the term. */
- rtnVal = term->walk( pd );
- break;
- }
- case BuiltinType: {
- /* Duplicate the builtin. */
- rtnVal = makeBuiltin( builtin, pd );
- break;
- }
- }
-
- return rtnVal;
-}
-
-/* Clean up after a term node. */
-Term::~Term()
-{
- switch ( type ) {
- case ConcatType:
- case RightStartType:
- case RightFinishType:
- case LeftType:
- delete term;
- delete factorWithAug;
- break;
- case FactorWithAugType:
- delete factorWithAug;
- break;
- }
-}
-
-/* Evaluate a term node. */
-FsmGraph *Term::walk( Compiler *pd, bool lastInSeq )
-{
- FsmGraph *rtnVal = 0;
- switch ( type ) {
- case ConcatType: {
- /* Evaluate the Term. */
- rtnVal = term->walk( pd, false );
- /* Evaluate the FactorWithRep. */
- FsmGraph *rhs = factorWithAug->walk( pd );
- /* Perform concatenation. */
- rtnVal->concatOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case RightStartType: {
- /* Evaluate the Term. */
- rtnVal = term->walk( pd );
-
- /* Evaluate the FactorWithRep. */
- FsmGraph *rhs = factorWithAug->walk( pd );
-
- /* Set up the priority descriptors. The left machine gets the
- * lower priority where as the right get the higher start priority. */
- priorDescs[0].key = pd->nextPriorKey++;
- priorDescs[0].priority = 0;
- rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
-
- /* The start transitions right machine get the higher priority.
- * Use the same unique key. */
- priorDescs[1].key = priorDescs[0].key;
- priorDescs[1].priority = 1;
- rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
-
- /* Perform concatenation. */
- rtnVal->concatOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case RightFinishType: {
- /* Evaluate the Term. */
- rtnVal = term->walk( pd );
-
- /* Evaluate the FactorWithRep. */
- FsmGraph *rhs = factorWithAug->walk( pd );
-
- /* Set up the priority descriptors. The left machine gets the
- * lower priority where as the finishing transitions to the right
- * get the higher priority. */
- priorDescs[0].key = pd->nextPriorKey++;
- priorDescs[0].priority = 0;
- rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
-
- /* The finishing transitions of the right machine get the higher
- * priority. Use the same unique key. */
- priorDescs[1].key = priorDescs[0].key;
- priorDescs[1].priority = 1;
- rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
-
- /* Perform concatenation. */
- rtnVal->concatOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case LeftType: {
- /* Evaluate the Term. */
- rtnVal = term->walk( pd );
-
- /* Evaluate the FactorWithRep. */
- FsmGraph *rhs = factorWithAug->walk( pd );
-
- /* Set up the priority descriptors. The left machine gets the
- * higher priority. */
- priorDescs[0].key = pd->nextPriorKey++;
- priorDescs[0].priority = 1;
- rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
-
- /* The right machine gets the lower priority. Since
- * startTransPrior might unnecessarily increase the number of
- * states during the state machine construction process (due to
- * isolation), we use allTransPrior instead, which has the same
- * effect. */
- priorDescs[1].key = priorDescs[0].key;
- priorDescs[1].priority = 0;
- rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
-
- /* Perform concatenation. */
- rtnVal->concatOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case FactorWithAugType: {
- rtnVal = factorWithAug->walk( pd );
- break;
- }
- }
- return rtnVal;
-}
-
-/* Clean up after a factor with augmentation node. */
-FactorWithAug::~FactorWithAug()
-{
- delete factorWithRep;
-
- /* Walk the vector of parser actions, deleting function names. */
-
- /* Clean up priority descriptors. */
- if ( priorDescs != 0 )
- delete[] priorDescs;
-}
-
-void FactorWithAug::assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd )
-{
- /* Assign actions. */
- for ( int i = 0; i < actions.length(); i++ ) {
- switch ( actions[i].type ) {
- /* Transition actions. */
- case at_start:
- graph->startFsmAction( actionOrd[i], actions[i].action );
- afterOpMinimize( graph );
- break;
- case at_all:
- graph->allTransAction( actionOrd[i], actions[i].action );
- break;
- case at_finish:
- graph->finishFsmAction( actionOrd[i], actions[i].action );
- break;
- case at_leave:
- graph->leaveFsmAction( actionOrd[i], actions[i].action );
- break;
-
- /* Global error actions. */
- case at_start_gbl_error:
- graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
- afterOpMinimize( graph );
- break;
- case at_all_gbl_error:
- graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
- case at_final_gbl_error:
- graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
- case at_not_start_gbl_error:
- graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
- case at_not_final_gbl_error:
- graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
- case at_middle_gbl_error:
- graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
-
- /* Local error actions. */
- case at_start_local_error:
- graph->startErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- afterOpMinimize( graph );
- break;
- case at_all_local_error:
- graph->allErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
- case at_final_local_error:
- graph->finalErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
- case at_not_start_local_error:
- graph->notStartErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
- case at_not_final_local_error:
- graph->notFinalErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
- case at_middle_local_error:
- graph->middleErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
-
- /* EOF actions. */
- case at_start_eof:
- graph->startEOFAction( actionOrd[i], actions[i].action );
- afterOpMinimize( graph );
- break;
- case at_all_eof:
- graph->allEOFAction( actionOrd[i], actions[i].action );
- break;
- case at_final_eof:
- graph->finalEOFAction( actionOrd[i], actions[i].action );
- break;
- case at_not_start_eof:
- graph->notStartEOFAction( actionOrd[i], actions[i].action );
- break;
- case at_not_final_eof:
- graph->notFinalEOFAction( actionOrd[i], actions[i].action );
- break;
- case at_middle_eof:
- graph->middleEOFAction( actionOrd[i], actions[i].action );
- break;
-
- /* To State Actions. */
- case at_start_to_state:
- graph->startToStateAction( actionOrd[i], actions[i].action );
- afterOpMinimize( graph );
- break;
- case at_all_to_state:
- graph->allToStateAction( actionOrd[i], actions[i].action );
- break;
- case at_final_to_state:
- graph->finalToStateAction( actionOrd[i], actions[i].action );
- break;
- case at_not_start_to_state:
- graph->notStartToStateAction( actionOrd[i], actions[i].action );
- break;
- case at_not_final_to_state:
- graph->notFinalToStateAction( actionOrd[i], actions[i].action );
- break;
- case at_middle_to_state:
- graph->middleToStateAction( actionOrd[i], actions[i].action );
- break;
-
- /* From State Actions. */
- case at_start_from_state:
- graph->startFromStateAction( actionOrd[i], actions[i].action );
- afterOpMinimize( graph );
- break;
- case at_all_from_state:
- graph->allFromStateAction( actionOrd[i], actions[i].action );
- break;
- case at_final_from_state:
- graph->finalFromStateAction( actionOrd[i], actions[i].action );
- break;
- case at_not_start_from_state:
- graph->notStartFromStateAction( actionOrd[i], actions[i].action );
- break;
- case at_not_final_from_state:
- graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
- break;
- case at_middle_from_state:
- graph->middleFromStateAction( actionOrd[i], actions[i].action );
- break;
-
- /* Remaining cases, prevented by the parser. */
- default:
- assert( false );
- break;
- }
- }
-}
-
-void FactorWithAug::assignPriorities( FsmGraph *graph, int *priorOrd )
-{
- /* Assign priorities. */
- for ( int i = 0; i < priorityAugs.length(); i++ ) {
- switch ( priorityAugs[i].type ) {
- case at_start:
- graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
- /* Start fsm priorities are a special case that may require
- * minimization afterwards. */
- afterOpMinimize( graph );
- break;
- case at_all:
- graph->allTransPrior( priorOrd[i], &priorDescs[i] );
- break;
- case at_finish:
- graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
- break;
- case at_leave:
- graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
- break;
-
- default:
- /* Parser Prevents this case. */
- break;
- }
- }
-}
-
-void FactorWithAug::assignConditions( FsmGraph *graph )
-{
- for ( int i = 0; i < conditions.length(); i++ ) {
- switch ( conditions[i].type ) {
- /* Transition actions. */
- case at_start:
- graph->startFsmCondition( conditions[i].action );
- afterOpMinimize( graph );
- break;
- case at_all:
- graph->allTransCondition( conditions[i].action );
- break;
- case at_leave:
- graph->leaveFsmCondition( conditions[i].action );
- break;
- default:
- break;
- }
- }
-}
-
-
-/* Evaluate a factor with augmentation node. */
-FsmGraph *FactorWithAug::walk( Compiler *pd )
-{
- /* Make the array of function orderings. */
- int *actionOrd = 0;
- if ( actions.length() > 0 )
- actionOrd = new int[actions.length()];
-
- /* First walk the list of actions, assigning order to all starting
- * actions. */
- for ( int i = 0; i < actions.length(); i++ ) {
- if ( actions[i].type == at_start ||
- actions[i].type == at_start_gbl_error ||
- actions[i].type == at_start_local_error ||
- actions[i].type == at_start_to_state ||
- actions[i].type == at_start_from_state ||
- actions[i].type == at_start_eof )
- actionOrd[i] = pd->curActionOrd++;
- }
-
- /* Evaluate the factor with repetition. */
- FsmGraph *rtnVal = factorWithRep->walk( pd );
-
- /* Compute the remaining action orderings. */
- for ( int i = 0; i < actions.length(); i++ ) {
- if ( actions[i].type != at_start &&
- actions[i].type != at_start_gbl_error &&
- actions[i].type != at_start_local_error &&
- actions[i].type != at_start_to_state &&
- actions[i].type != at_start_from_state &&
- actions[i].type != at_start_eof )
- actionOrd[i] = pd->curActionOrd++;
- }
-
- assignConditions( rtnVal );
-
- assignActions( pd, rtnVal , actionOrd );
-
- /* Make the array of priority orderings. Orderings are local to this walk
- * of the factor with augmentation. */
- int *priorOrd = 0;
- if ( priorityAugs.length() > 0 )
- priorOrd = new int[priorityAugs.length()];
-
- /* Walk all priorities, assigning the priority ordering. */
- for ( int i = 0; i < priorityAugs.length(); i++ )
- priorOrd[i] = pd->curPriorOrd++;
-
- /* If the priority descriptors have not been made, make them now. Make
- * priority descriptors for each priority asignment that will be passed to
- * the fsm. Used to keep track of the key, value and used bit. */
- if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
- priorDescs = new PriorDesc[priorityAugs.length()];
- for ( int i = 0; i < priorityAugs.length(); i++ ) {
- /* Init the prior descriptor for the priority setting. */
- priorDescs[i].key = priorityAugs[i].priorKey;
- priorDescs[i].priority = priorityAugs[i].priorValue;
- }
- }
-
- /* Assign priorities into the machine. */
- assignPriorities( rtnVal, priorOrd );
-
- /* Assign epsilon transitions. */
- for ( int e = 0; e < epsilonLinks.length(); e++ ) {
- /* Get the name, which may not exist. If it doesn't then silently
- * ignore it because an error has already been reported. */
- NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
- if ( epTarg != 0 ) {
- /* Make the epsilon transitions. */
- rtnVal->epsilonTrans( epTarg->id );
-
- /* Note that we have made a link to the name. */
- pd->localNameScope->referencedNames.append( epTarg );
- }
- }
-
- if ( priorOrd != 0 )
- delete[] priorOrd;
- if ( actionOrd != 0 )
- delete[] actionOrd;
- return rtnVal;
-}
-
-
-/* Clean up after a factor with repetition node. */
-FactorWithRep::~FactorWithRep()
-{
- switch ( type ) {
- case StarType: case StarStarType: case OptionalType: case PlusType:
- case ExactType: case MaxType: case MinType: case RangeType:
- delete factorWithRep;
- break;
- case FactorWithNegType:
- delete factorWithNeg;
- break;
- }
-}
-
-/* Evaluate a factor with repetition node. */
-FsmGraph *FactorWithRep::walk( Compiler *pd )
-{
- FsmGraph *retFsm = 0;
-
- switch ( type ) {
- case StarType: {
- /* Evaluate the FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying kleene star to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* Shift over the start action orders then do the kleene star. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
- retFsm->starOp( );
- afterOpMinimize( retFsm );
- break;
- }
- case StarStarType: {
- /* Evaluate the FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying kleene star to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* Set up the prior descs. All gets priority one, whereas leaving gets
- * priority zero. Make a unique key so that these priorities don't
- * interfere with any priorities set by the user. */
- priorDescs[0].key = pd->nextPriorKey++;
- priorDescs[0].priority = 1;
- retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
-
- /* Leaveing gets priority 0. Use same unique key. */
- priorDescs[1].key = priorDescs[0].key;
- priorDescs[1].priority = 0;
- retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
-
- /* Shift over the start action orders then do the kleene star. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
- retFsm->starOp( );
- afterOpMinimize( retFsm );
- break;
- }
- case OptionalType: {
- /* Make the null fsm. */
- FsmGraph *nu = new FsmGraph();
- nu->lambdaFsm( );
-
- /* Evaluate the FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
-
- /* Perform the question operator. */
- retFsm->unionOp( nu );
- afterOpMinimize( retFsm );
- break;
- }
- case PlusType: {
- /* Evaluate the FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying plus operator to a machine that "
- "accpets zero length word" << endl;
- }
-
- /* Need a duplicated for the star end. */
- FsmGraph *dup = new FsmGraph( *retFsm );
-
- /* The start func orders need to be shifted before doing the star. */
- pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
-
- /* Star the duplicate. */
- dup->starOp( );
- afterOpMinimize( dup );
-
- retFsm->concatOp( dup );
- afterOpMinimize( retFsm );
- break;
- }
- case ExactType: {
- /* Get an int from the repetition amount. */
- if ( lowerRep == 0 ) {
- /* No copies. Don't need to evaluate the factorWithRep.
- * This Defeats the purpose so give a warning. */
- warning(loc) << "exactly zero repetitions results "
- "in the null machine" << endl;
-
- retFsm = new FsmGraph();
- retFsm->lambdaFsm();
- }
- else {
- /* Evaluate the first FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying repetition to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* The start func orders need to be shifted before doing the
- * repetition. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
-
- /* Do the repetition on the machine. Already guarded against n == 0 */
- retFsm->repeatOp( lowerRep );
- afterOpMinimize( retFsm );
- }
- break;
- }
- case MaxType: {
- /* Get an int from the repetition amount. */
- if ( upperRep == 0 ) {
- /* No copies. Don't need to evaluate the factorWithRep.
- * This Defeats the purpose so give a warning. */
- warning(loc) << "max zero repetitions results "
- "in the null machine" << endl;
-
- retFsm = new FsmGraph();
- retFsm->lambdaFsm();
- }
- else {
- /* Evaluate the first FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying max repetition to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* The start func orders need to be shifted before doing the
- * repetition. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
-
- /* Do the repetition on the machine. Already guarded against n == 0 */
- retFsm->optionalRepeatOp( upperRep );
- afterOpMinimize( retFsm );
- }
- break;
- }
- case MinType: {
- /* Evaluate the repeated machine. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying min repetition to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* The start func orders need to be shifted before doing the repetition
- * and the kleene star. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
-
- if ( lowerRep == 0 ) {
- /* Acts just like a star op on the machine to return. */
- retFsm->starOp( );
- afterOpMinimize( retFsm );
- }
- else {
- /* Take a duplicate for the plus. */
- FsmGraph *dup = new FsmGraph( *retFsm );
-
- /* Do repetition on the first half. */
- retFsm->repeatOp( lowerRep );
- afterOpMinimize( retFsm );
-
- /* Star the duplicate. */
- dup->starOp( );
- afterOpMinimize( dup );
-
- /* Tak on the kleene star. */
- retFsm->concatOp( dup );
- afterOpMinimize( retFsm );
- }
- break;
- }
- case RangeType: {
- /* Check for bogus range. */
- if ( upperRep - lowerRep < 0 ) {
- error(loc) << "invalid range repetition" << endl;
-
- /* Return null machine as recovery. */
- retFsm = new FsmGraph();
- retFsm->lambdaFsm();
- }
- else if ( lowerRep == 0 && upperRep == 0 ) {
- /* No copies. Don't need to evaluate the factorWithRep. This
- * defeats the purpose so give a warning. */
- warning(loc) << "zero to zero repetitions results "
- "in the null machine" << endl;
-
- retFsm = new FsmGraph();
- retFsm->lambdaFsm();
- }
- else {
- /* Now need to evaluate the repeated machine. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying range repetition to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* The start func orders need to be shifted before doing both kinds
- * of repetition. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
-
- if ( lowerRep == 0 ) {
- /* Just doing max repetition. Already guarded against n == 0. */
- retFsm->optionalRepeatOp( upperRep );
- afterOpMinimize( retFsm );
- }
- else if ( lowerRep == upperRep ) {
- /* Just doing exact repetition. Already guarded against n == 0. */
- retFsm->repeatOp( lowerRep );
- afterOpMinimize( retFsm );
- }
- else {
- /* This is the case that 0 < lowerRep < upperRep. Take a
- * duplicate for the optional repeat. */
- FsmGraph *dup = new FsmGraph( *retFsm );
-
- /* Do repetition on the first half. */
- retFsm->repeatOp( lowerRep );
- afterOpMinimize( retFsm );
-
- /* Do optional repetition on the second half. */
- dup->optionalRepeatOp( upperRep - lowerRep );
- afterOpMinimize( dup );
-
- /* Tak on the duplicate machine. */
- retFsm->concatOp( dup );
- afterOpMinimize( retFsm );
- }
- }
- break;
- }
- case FactorWithNegType: {
- /* Evaluate the Factor. Pass it up. */
- retFsm = factorWithNeg->walk( pd );
- break;
- }}
- return retFsm;
-}
-
-
-/* Clean up after a factor with negation node. */
-FactorWithNeg::~FactorWithNeg()
-{
- switch ( type ) {
- case NegateType:
- case CharNegateType:
- delete factorWithNeg;
- break;
- case FactorType:
- delete factor;
- break;
- }
-}
-
-/* Evaluate a factor with negation node. */
-FsmGraph *FactorWithNeg::walk( Compiler *pd )
-{
- FsmGraph *retFsm = 0;
-
- switch ( type ) {
- case NegateType: {
- /* Evaluate the factorWithNeg. */
- FsmGraph *toNegate = factorWithNeg->walk( pd );
-
- /* Negation is subtract from dot-star. */
- retFsm = dotStarFsm( pd );
- retFsm->subtractOp( toNegate );
- afterOpMinimize( retFsm );
- break;
- }
- case CharNegateType: {
- /* Evaluate the factorWithNeg. */
- FsmGraph *toNegate = factorWithNeg->walk( pd );
-
- /* CharNegation is subtract from dot. */
- retFsm = dotFsm( pd );
- retFsm->subtractOp( toNegate );
- afterOpMinimize( retFsm );
- break;
- }
- case FactorType: {
- /* Evaluate the Factor. Pass it up. */
- retFsm = factor->walk( pd );
- break;
- }}
- return retFsm;
-}
-
-/* Clean up after a factor node. */
-Factor::~Factor()
-{
- switch ( type ) {
- case LiteralType:
- delete literal;
- break;
- case RangeType:
- delete range;
- break;
- case OrExprType:
- delete reItem;
- break;
- case RegExprType:
- delete regExp;
- break;
- case ReferenceType:
- break;
- case ParenType:
- delete join;
- break;
- }
-}
-
-/* Evaluate a factor node. */
-FsmGraph *Factor::walk( Compiler *pd )
-{
- FsmGraph *rtnVal = 0;
- switch ( type ) {
- case LiteralType:
- rtnVal = literal->walk( pd );
- break;
- case RangeType:
- rtnVal = range->walk( pd );
- break;
- case OrExprType:
- rtnVal = reItem->walk( pd, 0 );
- break;
- case RegExprType:
- rtnVal = regExp->walk( pd, 0 );
- break;
- case ReferenceType:
- rtnVal = varDef->walk( pd );
- break;
- case ParenType:
- rtnVal = join->walk( pd );
- break;
- }
-
- return rtnVal;
-}
-
-
-/* Clean up a range object. Must delete the two literals. */
-Range::~Range()
-{
- delete lowerLit;
- delete upperLit;
-}
-
-bool Range::verifyRangeFsm( FsmGraph *rangeEnd )
-{
- /* Must have two states. */
- if ( rangeEnd->stateList.length() != 2 )
- return false;
- /* The start state cannot be final. */
- if ( rangeEnd->startState->isFinState() )
- return false;
- /* There should be only one final state. */
- if ( rangeEnd->finStateSet.length() != 1 )
- return false;
- /* The final state cannot have any transitions out. */
- if ( rangeEnd->finStateSet[0]->outList.length() != 0 )
- return false;
- /* The start state should have only one transition out. */
- if ( rangeEnd->startState->outList.length() != 1 )
- return false;
- /* The singe transition out of the start state should not be a range. */
- FsmTrans *startTrans = rangeEnd->startState->outList.head;
- if ( startTrans->lowKey != startTrans->highKey )
- return false;
- return true;
-}
-
-/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
-FsmGraph *Range::walk( Compiler *pd )
-{
- /* Construct and verify the suitability of the lower end of the range. */
- FsmGraph *lowerFsm = lowerLit->walk( pd );
- if ( !verifyRangeFsm( lowerFsm ) ) {
- error(lowerLit->loc) <<
- "bad range lower end, must be a single character" << endl;
- }
-
- /* Construct and verify the upper end. */
- FsmGraph *upperFsm = upperLit->walk( pd );
- if ( !verifyRangeFsm( upperFsm ) ) {
- error(upperLit->loc) <<
- "bad range upper end, must be a single character" << endl;
- }
-
- /* Grab the keys from the machines, then delete them. */
- Key lowKey = lowerFsm->startState->outList.head->lowKey;
- Key highKey = upperFsm->startState->outList.head->lowKey;
- delete lowerFsm;
- delete upperFsm;
-
- /* Validate the range. */
- if ( lowKey > highKey ) {
- /* Recover by setting upper to lower; */
- error(lowerLit->loc) << "lower end of range is greater then upper end" << endl;
- highKey = lowKey;
- }
-
- /* Return the range now that it is validated. */
- FsmGraph *retFsm = new FsmGraph();
- retFsm->rangeFsm( lowKey, highKey );
- return retFsm;
-}
-
-/* Evaluate a literal object. */
-FsmGraph *Literal::walk( Compiler *pd )
-{
- /* FsmGraph to return, is the alphabet signed. */
- FsmGraph *rtnVal = 0;
-
- switch ( type ) {
- case Number: {
- /* Make the fsm key in int format. */
- Key fsmKey = makeFsmKeyNum( literal.data, loc, pd );
- /* Make the new machine. */
- rtnVal = new FsmGraph();
- rtnVal->concatFsm( fsmKey );
- break;
- }
- case LitString: {
- /* Make the array of keys in int format. */
- String interp;
- bool caseInsensitive;
- prepareLitString( interp, caseInsensitive, literal, loc );
- Key *arr = new Key[interp.length()];
- makeFsmKeyArray( arr, interp.data, interp.length(), pd );
-
- /* Make the new machine. */
- rtnVal = new FsmGraph();
- if ( caseInsensitive )
- rtnVal->concatFsmCI( arr, interp.length() );
- else
- rtnVal->concatFsm( arr, interp.length() );
- delete[] arr;
- break;
- }}
- return rtnVal;
-}
-
-/* Clean up after a regular expression object. */
-RegExpr::~RegExpr()
-{
- switch ( type ) {
- case RecurseItem:
- delete regExp;
- delete item;
- break;
- case Empty:
- break;
- }
-}
-
-/* Evaluate a regular expression object. */
-FsmGraph *RegExpr::walk( Compiler *pd, RegExpr *rootRegex )
-{
- /* This is the root regex, pass down a pointer to this. */
- if ( rootRegex == 0 )
- rootRegex = this;
-
- FsmGraph *rtnVal = 0;
- switch ( type ) {
- case RecurseItem: {
- /* Walk both items. */
- FsmGraph *fsm1 = regExp->walk( pd, rootRegex );
- FsmGraph *fsm2 = item->walk( pd, rootRegex );
- if ( fsm1 == 0 )
- rtnVal = fsm2;
- else {
- fsm1->concatOp( fsm2 );
- rtnVal = fsm1;
- }
- break;
- }
- case Empty: {
- /* FIXME: Return something here. */
- rtnVal = 0;
- break;
- }
- }
- return rtnVal;
-}
-
-/* Clean up after an item in a regular expression. */
-ReItem::~ReItem()
-{
- switch ( type ) {
- case Data:
- case Dot:
- break;
- case OrBlock:
- case NegOrBlock:
- delete orBlock;
- break;
- }
-}
-
-/* Evaluate a regular expression object. */
-FsmGraph *ReItem::walk( Compiler *pd, RegExpr *rootRegex )
-{
- /* The fsm to return, is the alphabet signed? */
- FsmGraph *rtnVal = 0;
-
- switch ( type ) {
- case Data: {
- /* Move the data into an integer array and make a concat fsm. */
- Key *arr = new Key[data.length()];
- makeFsmKeyArray( arr, data.data, data.length(), pd );
-
- /* Make the concat fsm. */
- rtnVal = new FsmGraph();
- if ( rootRegex != 0 && rootRegex->caseInsensitive )
- rtnVal->concatFsmCI( arr, data.length() );
- else
- rtnVal->concatFsm( arr, data.length() );
- delete[] arr;
- break;
- }
- case Dot: {
- /* Make the dot fsm. */
- rtnVal = dotFsm( pd );
- break;
- }
- case OrBlock: {
- /* Get the or block and minmize it. */
- rtnVal = orBlock->walk( pd, rootRegex );
- rtnVal->minimizePartition2();
- break;
- }
- case NegOrBlock: {
- /* Get the or block and minimize it. */
- FsmGraph *fsm = orBlock->walk( pd, rootRegex );
- fsm->minimizePartition2();
-
- /* Make a dot fsm and subtract from it. */
- rtnVal = dotFsm( pd );
- rtnVal->subtractOp( fsm );
- rtnVal->minimizePartition2();
- break;
- }
- }
-
- /* If the item is followed by a star, then apply the star op. */
- if ( star ) {
- if ( rtnVal->startState->isFinState() ) {
- warning(loc) << "applying kleene star to a machine that "
- "accpets zero length word" << endl;
- }
-
- rtnVal->starOp();
- rtnVal->minimizePartition2();
- }
- return rtnVal;
-}
-
-/* Clean up after an or block of a regular expression. */
-ReOrBlock::~ReOrBlock()
-{
- switch ( type ) {
- case RecurseItem:
- delete orBlock;
- delete item;
- break;
- case Empty:
- break;
- }
-}
-
-
-/* Evaluate an or block of a regular expression. */
-FsmGraph *ReOrBlock::walk( Compiler *pd, RegExpr *rootRegex )
-{
- FsmGraph *rtnVal = 0;
- switch ( type ) {
- case RecurseItem: {
- /* Evaluate the two fsm. */
- FsmGraph *fsm1 = orBlock->walk( pd, rootRegex );
- FsmGraph *fsm2 = item->walk( pd, rootRegex );
- if ( fsm1 == 0 )
- rtnVal = fsm2;
- else {
- fsm1->unionOp( fsm2 );
- rtnVal = fsm1;
- }
- break;
- }
- case Empty: {
- rtnVal = 0;
- break;
- }
- }
- return rtnVal;;
-}
-
-/* Evaluate an or block item of a regular expression. */
-FsmGraph *ReOrItem::walk( Compiler *pd, RegExpr *rootRegex )
-{
- /* The return value, is the alphabet signed? */
- FsmGraph *rtnVal = 0;
- switch ( type ) {
- case Data: {
- /* Make the or machine. */
- rtnVal = new FsmGraph();
-
- /* Put the or data into an array of ints. Note that we find unique
- * keys. Duplicates are silently ignored. The alternative would be to
- * issue warning or an error but since we can't with [a0-9a] or 'a' |
- * 'a' don't bother here. */
- KeySet keySet;
- makeFsmUniqueKeyArray( keySet, data.data, data.length(),
- rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
-
- /* Run the or operator. */
- rtnVal->orFsm( keySet.data, keySet.length() );
- break;
- }
- case Range: {
- /* Make the upper and lower keys. */
- Key lowKey = makeFsmKeyChar( lower, pd );
- Key highKey = makeFsmKeyChar( upper, pd );
-
- /* Validate the range. */
- if ( lowKey > highKey ) {
- /* Recover by setting upper to lower; */
- error(loc) << "lower end of range is greater then upper end" << endl;
- highKey = lowKey;
- }
-
- /* Make the range machine. */
- rtnVal = new FsmGraph();
- rtnVal->rangeFsm( lowKey, highKey );
-
- if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
- if ( lowKey <= 'Z' && 'A' <= highKey ) {
- Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
- Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
-
- otherLow = 'a' + ( otherLow - 'A' );
- otherHigh = 'a' + ( otherHigh - 'A' );
-
- FsmGraph *otherRange = new FsmGraph();
- otherRange->rangeFsm( otherLow, otherHigh );
- rtnVal->unionOp( otherRange );
- rtnVal->minimizePartition2();
- }
- else if ( lowKey <= 'z' && 'a' <= highKey ) {
- Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
- Key otherHigh = 'z' < highKey ? Key('z') : highKey;
-
- otherLow = 'A' + ( otherLow - 'a' );
- otherHigh = 'A' + ( otherHigh - 'a' );
-
- FsmGraph *otherRange = new FsmGraph();
- otherRange->rangeFsm( otherLow, otherHigh );
- rtnVal->unionOp( otherRange );
- rtnVal->minimizePartition2();
- }
- }
-
- break;
- }}
- return rtnVal;
-}
diff --git a/colm/parsetree.h b/colm/parsetree.h
deleted file mode 100644
index c3a75df5..00000000
--- a/colm/parsetree.h
+++ /dev/null
@@ -1,2253 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _PARSETREE_H
-#define _PARSETREE_H
-
-#include <iostream>
-#include <string.h>
-#include "global.h"
-#include "avlmap.h"
-#include "bstmap.h"
-#include "bstset.h"
-#include "vector.h"
-#include "dlist.h"
-#include "dlistval.h"
-#include "dlistmel.h"
-#include "astring.h"
-#include "bytecode.h"
-#include "avlbasic.h"
-#include "fsmrun.h"
-
-/* Operators that are represented with single symbol characters. */
-#define OP_DoubleEql 'e'
-#define OP_NotEql 'q'
-#define OP_LessEql 'l'
-#define OP_GrtrEql 'g'
-#define OP_LogicalAnd 'a'
-#define OP_LogicalOr 'o'
-#define OP_Deref 'd'
-
-#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
- #error "SIZEOF_LONG contained an unexpected value"
-#endif
-
-struct NameInst;
-struct FsmGraph;
-struct RedFsm;
-struct _FsmRun;
-struct ObjectDef;
-struct ElementOf;
-struct UniqueType;
-struct ObjField;
-struct TransBlock;
-struct CodeBlock;
-struct PdaLiteral;
-struct TypeAlias;
-typedef struct _PdaRun PdaRun;
-
-/*
- * Code Vector
- */
-struct CodeVect : public Vector<Code>
-{
- void appendHalf( Half half )
- {
- /* not optimal. */
- append( half & 0xff );
- append( (half>>8) & 0xff );
- }
-
- void appendWord( Word word )
- {
- /* not optimal. */
- append( word & 0xff );
- append( (word>>8) & 0xff );
- append( (word>>16) & 0xff );
- append( (word>>24) & 0xff );
- #if SIZEOF_LONG == 8
- append( (word>>32) & 0xff );
- append( (word>>40) & 0xff );
- append( (word>>48) & 0xff );
- append( (word>>56) & 0xff );
- #endif
- }
-
- void setHalf( long pos, Half half )
- {
- /* not optimal. */
- data[pos] = half & 0xff;
- data[pos+1] = (half>>8) & 0xff;
- }
-
- void insertHalf( long pos, Half half )
- {
- /* not optimal. */
- insert( pos, half & 0xff );
- insert( pos+1, (half>>8) & 0xff );
- }
-
- void insertWord( long pos, Word word )
- {
- /* not at all optimal. */
- insert( pos, word & 0xff );
- insert( pos+1, (word>>8) & 0xff );
- insert( pos+2, (word>>16) & 0xff );
- insert( pos+3, (word>>24) & 0xff );
- #if SIZEOF_LONG == 8
- insert( pos+4, (word>>32) & 0xff );
- insert( pos+5, (word>>40) & 0xff );
- insert( pos+6, (word>>48) & 0xff );
- insert( pos+7, (word>>56) & 0xff );
- #endif
- }
-
- void insertTree( long pos, Tree *tree )
- { insertWord( pos, (Word) tree ); }
-};
-
-
-
-/* Types of builtin machines. */
-enum BuiltinMachine
-{
- BT_Any,
- BT_Ascii,
- BT_Extend,
- BT_Alpha,
- BT_Digit,
- BT_Alnum,
- BT_Lower,
- BT_Upper,
- BT_Cntrl,
- BT_Graph,
- BT_Print,
- BT_Punct,
- BT_Space,
- BT_Xdigit,
- BT_Lambda,
- BT_Empty
-};
-
-typedef BstSet<char> CharSet;
-typedef Vector<unsigned char> UnsignedCharVect;
-
-
-struct Compiler;
-struct TypeRef;
-
-/* Leaf type. */
-struct Literal;
-
-/* Tree nodes. */
-
-struct Term;
-struct FactorWithAug;
-struct FactorWithRep;
-struct FactorWithNeg;
-struct Factor;
-struct Expression;
-struct Join;
-struct JoinOrLm;
-struct RegionJoinOrLm;
-struct TokenRegion;
-struct Namespace;
-struct Context;
-struct TokenDef;
-struct TokenDefListReg;
-struct TokenDefListNs;
-struct Range;
-struct LangEl;
-
-/* Type of augmentation. Describes locations in the machine. */
-enum AugType
-{
- /* Transition actions/priorities. */
- at_start,
- at_all,
- at_finish,
- at_leave,
-
- /* Global error actions. */
- at_start_gbl_error,
- at_all_gbl_error,
- at_final_gbl_error,
- at_not_start_gbl_error,
- at_not_final_gbl_error,
- at_middle_gbl_error,
-
- /* Local error actions. */
- at_start_local_error,
- at_all_local_error,
- at_final_local_error,
- at_not_start_local_error,
- at_not_final_local_error,
- at_middle_local_error,
-
- /* To State Action embedding. */
- at_start_to_state,
- at_all_to_state,
- at_final_to_state,
- at_not_start_to_state,
- at_not_final_to_state,
- at_middle_to_state,
-
- /* From State Action embedding. */
- at_start_from_state,
- at_all_from_state,
- at_final_from_state,
- at_not_start_from_state,
- at_not_final_from_state,
- at_middle_from_state,
-
- /* EOF Action embedding. */
- at_start_eof,
- at_all_eof,
- at_final_eof,
- at_not_start_eof,
- at_not_final_eof,
- at_middle_eof
-};
-
-/* IMPORTANT: These must follow the same order as the state augs in AugType
- * since we will be using this to compose AugType. */
-enum StateAugType
-{
- sat_start = 0,
- sat_all,
- sat_final,
- sat_not_start,
- sat_not_final,
- sat_middle
-};
-
-struct Action;
-struct PriorDesc;
-struct RegExpr;
-struct ReItem;
-struct ReOrBlock;
-struct ReOrItem;
-struct ExplicitMachine;
-struct InlineItem;
-struct InlineList;
-
-/* Reference to a named state. */
-typedef Vector<String> NameRef;
-typedef Vector<NameRef*> NameRefList;
-typedef Vector<NameInst*> NameTargList;
-
-/* Structure for storing location of epsilon transitons. */
-struct EpsilonLink
-{
- EpsilonLink( const InputLoc &loc, NameRef &target )
- : loc(loc), target(target) { }
-
- InputLoc loc;
- NameRef target;
-};
-
-struct Label
-{
- Label( const InputLoc &loc, const String &data, ObjField *objField )
- : loc(loc), data(data), objField(objField) { }
-
- InputLoc loc;
- String data;
- ObjField *objField;
-};
-
-/* Structure represents an action assigned to some FactorWithAug node. The
- * factor with aug will keep an array of these. */
-struct ParserAction
-{
- ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
- : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
-
- InputLoc loc;
- AugType type;
- int localErrKey;
- Action *action;
-};
-
-struct Token
-{
- String data;
- InputLoc loc;
-};
-
-void prepareLitString( String &result, bool &caseInsensitive,
- const String &srcString, const InputLoc &loc );
-
-std::ostream &operator<<(std::ostream &out, const Token &token );
-
-typedef AvlMap< String, TokenDef*, CmpStr > LiteralDict;
-typedef AvlMapEl< String, TokenDef* > LiteralDictEl;
-
-/* Store the value and type of a priority augmentation. */
-struct PriorityAug
-{
- PriorityAug( AugType type, int priorKey, int priorValue ) :
- type(type), priorKey(priorKey), priorValue(priorValue) { }
-
- AugType type;
- int priorKey;
- int priorValue;
-};
-
-/*
- * A Variable Definition
- */
-struct VarDef
-{
- VarDef( const String &name, Join *join )
- : name(name), join(join) { }
-
- /* Parse tree traversal. */
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( const InputLoc &loc, Compiler *pd );
-
- String name;
- Join *join;
-};
-
-/*
- * A Variable Definition
- */
-struct RegionDef
-{
- RegionDef( const String &name, TokenRegion *tokenRegion )
- : name(name), tokenRegion(tokenRegion) { }
-
- /* Parse tree traversal. */
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( const InputLoc &loc, Compiler *pd );
-
- String name;
- TokenRegion *tokenRegion;
-};
-
-typedef Vector<String> StringVect;
-typedef CmpTable<String, CmpStr> CmpStrVect;
-
-struct NamespaceQual
-{
- NamespaceQual( Namespace *declInNspace, TokenRegion *declInRegion ) :
- cachedNspaceQual(0), declInNspace(declInNspace) {}
-
- Namespace *cachedNspaceQual;
- Namespace *declInNspace;
-
- StringVect qualNames;
-
- Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart );
- Namespace *getQual( Compiler *pd );
-};
-
-struct ReCapture
-{
- ReCapture( Action *markEnter, Action *markLeave, ObjField *objField )
- : markEnter(markEnter), markLeave(markLeave), objField(objField) {}
-
- Action *markEnter;
- Action *markLeave;
- ObjField *objField;
-};
-
-typedef Vector<Context*> ContextVect;
-
-struct Context
-{
- Context( InputLoc &loc, LangEl *lel )
- :
- loc(loc),
- lel(lel)
- {}
-
- InputLoc loc;
- LangEl *lel;
-
- ObjectDef *contextObjDef;
-};
-
-typedef Vector<ReCapture> ReCaptureVect;
-
-struct TokenDefPtr1
-{
- TokenDef *prev, *next;
-};
-
-struct TokenDefPtr2
-{
- TokenDef *prev, *next;
-};
-
-struct TokenDef
-:
- public TokenDefPtr1,
- public TokenDefPtr2
-{
- TokenDef( const String &name, const String &literal, bool isLiteral, bool ignore,
- Join *join, CodeBlock *codeBlock, InputLoc &semiLoc,
- int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion,
- ReCaptureVect *pReCaptureVect, ObjectDef *objectDef, Context *contextIn )
- :
- name(name), literal(literal), isLiteral(isLiteral), ignore(ignore), join(join), action(0),
- codeBlock(codeBlock), tdLangEl(0), semiLoc(semiLoc),
- longestMatchId(longestMatchId), inLmSelect(false),
- nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef),
- contextIn(contextIn),
- dupOf(0), noPostIgnore(false), noPreIgnore(false), isZero(false)
- {
- if ( pReCaptureVect != 0 )
- reCaptureVect = *pReCaptureVect;
- }
-
- InputLoc getLoc();
-
- String name;
- String literal;
- bool isLiteral;
- bool ignore;
- Join *join;
- Action *action;
- CodeBlock *codeBlock;
- LangEl *tdLangEl;
- InputLoc semiLoc;
-
- Action *setActId;
- Action *actOnLast;
- Action *actOnNext;
- Action *actLagBehind;
- int longestMatchId;
- bool inLmSelect;
- Namespace *nspace;
- TokenRegion *tokenRegion;
- ReCaptureVect reCaptureVect;
- ObjectDef *objectDef;
- Context *contextIn;
-
- TokenDef *dupOf;
- bool noPostIgnore;
- bool noPreIgnore;
- bool isZero;
-};
-
-struct LelDefList;
-
-struct NtDef
-{
- NtDef( const String &name, Namespace *nspace,
- LelDefList *defList, ObjectDef *objectDef,
- Context *contextIn, bool reduceFirst )
- :
- name(name),
- nspace(nspace),
- defList(defList),
- objectDef(objectDef),
- contextIn(contextIn),
- reduceFirst(reduceFirst)
- {}
-
- String name;
- Namespace *nspace;
- LelDefList *defList;
- ObjectDef *objectDef;
- Context *contextIn;
- bool reduceFirst;
-
- NtDef *prev, *next;
-};
-
-struct NtDefList : DList<NtDef> {};
-
-/* Declare a new type so that ptreetypes.h need not include dlist.h. */
-struct TokenDefListReg : DListMel<TokenDef, TokenDefPtr1> {};
-struct TokenDefListNs : DListMel<TokenDef, TokenDefPtr2> {};
-
-struct ContextDef
-{
- ContextDef( const String &name, Context *context, Namespace *nspace )
- : name(name), context(context), nspace(nspace) {}
-
- String name;
- Context *context;
- Namespace *nspace;
-
- ContextDef *prev, *next;
-};
-
-struct ContextDefList : DList<ContextDef> {};
-
-struct TypeMapEl
- : public AvlTreeEl<TypeMapEl>
-{
- enum Type
- {
- TypeAliasType = 1,
- LangElType
- };
-
- const String &getKey() { return key; }
-
- TypeMapEl( const String &key, TypeRef *typeRef )
- : type(TypeAliasType), key(key), value(0), typeRef(typeRef) {}
-
- TypeMapEl( const String &key, LangEl *value )
- : type(LangElType), key(key), value(value), typeRef(0) {}
-
-
- Type type;
- String key;
- LangEl *value;
- TypeRef *typeRef;
-
- TypeMapEl *prev, *next;
-};
-
-/* Symbol Map. */
-typedef AvlTree< TypeMapEl, String, CmpStr > TypeMap;
-
-typedef Vector<TokenRegion*> RegionVect;
-
-struct TokenRegion
-{
- /* Construct with a list of joins */
- TokenRegion( const InputLoc &loc, const String &name, int id,
- TokenRegion *parentRegion ) :
- loc(loc), name(name), id(id),
- lmSwitchHandlesError(false), regionNameInst(0),
- parentRegion(parentRegion), defaultTokenDef(0),
- preEofBlock(0),
- ignoreOnlyRegion(0), tokenOnlyRegion(0), ciRegion(0),
- wasEmpty(false),
- isFullRegion(false),
- isIgnoreOnly(false),
- isTokenOnly(false),
- isCiOnly(false),
- ciLel(0),
- derivedFrom(0)
- { }
-
- /* Tree traversal. */
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( Compiler *pd );
- void runLongestMatch( Compiler *pd, FsmGraph *graph );
- void transferScannerLeavingActions( FsmGraph *graph );
- Action *newAction( Compiler *pd, const InputLoc &loc, const String &name,
- InlineList *inlineList );
- void makeActions( Compiler *pd );
- void findName( Compiler *pd );
- void restart( FsmGraph *graph, FsmTrans *trans );
-
- InputLoc loc;
- TokenDefListReg tokenDefList;
- String name;
- int id;
-
- Action *lmActSelect;
- bool lmSwitchHandlesError;
-
- /* This gets saved off during the name walk. Can save it off because token
- * regions are referenced once only. */
- NameInst *regionNameInst;
-
- TokenRegion *parentRegion;
- RegionVect childRegions;
-
- TokenDef *defaultTokenDef;
-
- CodeBlock *preEofBlock;
-
- /* Dupe of the region, containing only the ignore tokens. */
- TokenRegion *ignoreOnlyRegion;
- TokenRegion *tokenOnlyRegion;
- TokenRegion *ciRegion;
-
- /* We alway init empty scanners with a single token. If we had to do this
- * then wasEmpty is true. */
- bool wasEmpty;
-
- bool isFullRegion;
- bool isIgnoreOnly;
- bool isTokenOnly;
- bool isCiOnly;
-
- LangEl *ciLel;
- TokenRegion *derivedFrom;
-
- TokenRegion *next, *prev;
-};
-
-typedef DList<TokenRegion> RegionList;
-typedef BstSet< TokenRegion*, CmpOrd<TokenRegion*> > RegionSet;
-
-typedef Vector<Namespace*> NamespaceVect;
-
-struct GenericType
- : public DListEl<GenericType>
-{
- GenericType( const String &name, long typeId, long id,
- LangEl *langEl, TypeRef *typeArg )
- :
- name(name), typeId(typeId), id(id), langEl(langEl),
- typeArg(typeArg), keyTypeArg(0),
- utArg(0), keyUT(0),
- objDef(0)
- {}
-
- const String &getKey() const
- { return name; };
-
- void declare( Compiler *pd, Namespace *nspace );
-
- String name;
- long typeId;
- long id;
- LangEl *langEl;
- TypeRef *typeArg;
- TypeRef *keyTypeArg;
- UniqueType *utArg;
- UniqueType *keyUT;
-
- ObjectDef *objDef;
-};
-
-typedef DList<GenericType> GenericList;
-
-typedef struct _UserIter UserIter;
-typedef AvlMap<String, UserIter*, CmpStr> UserIterMap;
-typedef AvlMapEl<String, UserIter*> UserIterMapEl;
-
-/* Graph dictionary. */
-struct GraphDictEl
-:
- public AvlTreeEl<GraphDictEl>,
- public DListEl<GraphDictEl>
-{
- GraphDictEl( const String &key )
- : key(key), value(0), isInstance(false) { }
- GraphDictEl( const String &key, VarDef *value )
- : key(key), value(value), isInstance(false) { }
-
- const String &getKey() { return key; }
-
- String key;
- VarDef *value;
- bool isInstance;
-
- /* Location info of graph definition. Points to variable name of assignment. */
- InputLoc loc;
-};
-
-typedef AvlTree<GraphDictEl, String, CmpStr> GraphDict;
-typedef DList<GraphDictEl> GraphList;
-
-/* Graph dictionary. */
-struct RegionGraphDictEl
-:
- public AvlTreeEl<RegionGraphDictEl>,
- public DListEl<RegionGraphDictEl>
-{
- RegionGraphDictEl( const String &key )
- : key(key), value(0), isInstance(false) { }
- RegionGraphDictEl( const String &key, RegionDef *value )
- : key(key), value(value), isInstance(false) { }
-
- const String &getKey() { return key; }
-
- String key;
- RegionDef *value;
- bool isInstance;
-
- /* Location info of graph definition. Points to variable name of assignment. */
- InputLoc loc;
-};
-
-typedef AvlTree<RegionGraphDictEl, String, CmpStr> RegionGraphDict;
-typedef DList<RegionGraphDictEl> RegionGraphList;
-
-struct TypeAlias
-{
- TypeAlias( const InputLoc &loc, Namespace *nspace,
- const String &name, TypeRef *typeRef )
- :
- loc(loc),
- nspace(nspace),
- name(name),
- typeRef(typeRef)
- {}
-
- InputLoc loc;
- Namespace *nspace;
- String name;
- TypeRef *typeRef;
-
- TypeAlias *prev, *next;
-};
-
-typedef DList<TypeAlias> TypeAliasList;
-
-struct Namespace
-{
- /* Construct with a list of joins */
- Namespace( const InputLoc &loc, const String &name, int id,
- Namespace *parentNamespace ) :
- loc(loc), name(name), id(id),
- parentNamespace(parentNamespace) { }
-
- /* Tree traversal. */
- Namespace *findNamespace( const String &name );
-
- InputLoc loc;
- String name;
- int id;
-
- /* Literal patterns and the dictionary mapping literals to the underlying
- * tokens. */
- LiteralDict literalDict;
-
- /* List of tokens defs in the namespace. */
- TokenDefListNs tokenDefList;
-
- /* List of nonterminal defs in the namespace. */
- NtDefList ntDefList;
-
- /* List of context definitions for encapsulating the data of a parser. */
- ContextDefList contextDefList;
-
- /* Dictionary of symbols within the region. */
- TypeMap typeMap;
- GenericList genericList;
-
- /* Dictionary of graphs. Both instances and non-instances go here. */
- RegionGraphDict graphDict;
-
- /* regular language definitions. */
- GraphDict rlMap;
-
- TypeAliasList typeAliasList;
-
- Namespace *parentNamespace;
- NamespaceVect childNamespaces;
-
- Namespace *next, *prev;
-
- void declare( Compiler *pd );
-};
-
-typedef DList<Namespace> NamespaceList;
-typedef BstSet< Namespace*, CmpOrd<Namespace*> > NamespaceSet;
-
-/* List of Expressions. */
-typedef DList<Expression> ExprList;
-
-struct JoinOrLm
-{
- JoinOrLm( Join *join ) :
- join(join) {}
-
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( Compiler *pd );
-
- Join *join;
-};
-
-struct RegionJoinOrLm
-{
- enum Type { LongestMatchType };
-
- RegionJoinOrLm( TokenRegion *tokenRegion ) :
- tokenRegion(tokenRegion) {}
-
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( Compiler *pd );
-
- TokenRegion *tokenRegion;
-};
-
-/*
- * Join
- */
-struct Join
-{
- /* Construct with the first expression. */
- Join( Expression *expr );
-
- /* Tree traversal. */
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( Compiler *pd );
-
- /* Data. */
- ExprList exprList;
-
- Join *context;
- Action *mark;
-};
-
-/*
- * Expression
- */
-struct Expression
-{
- enum Type {
- OrType,
- IntersectType,
- SubtractType,
- StrongSubtractType,
- TermType,
- BuiltinType
- };
-
- /* Construct with an expression on the left and a term on the right. */
- Expression( Expression *expression, Term *term, Type type ) :
- expression(expression), term(term),
- builtin(builtin), type(type), prev(this), next(this) { }
-
- /* Construct with only a term. */
- Expression( Term *term ) :
- expression(0), term(term), builtin(builtin),
- type(TermType) , prev(this), next(this) { }
-
- /* Construct with a builtin type. */
- Expression( BuiltinMachine builtin ) :
- expression(0), term(0), builtin(builtin),
- type(BuiltinType), prev(this), next(this) { }
-
- ~Expression();
-
- /* Tree traversal. */
- FsmGraph *walk( Compiler *pd, bool lastInSeq = true );
- void makeNameTree( Compiler *pd );
-
- /* Node data. */
- Expression *expression;
- Term *term;
- BuiltinMachine builtin;
- Type type;
-
- Expression *prev, *next;
-};
-
-/*
- * Term
- */
-struct Term
-{
- enum Type {
- ConcatType,
- RightStartType,
- RightFinishType,
- LeftType,
- FactorWithAugType
- };
-
- Term( Term *term, FactorWithAug *factorWithAug ) :
- term(term), factorWithAug(factorWithAug), type(ConcatType) { }
-
- Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
- term(term), factorWithAug(factorWithAug), type(type) { }
-
- Term( FactorWithAug *factorWithAug ) :
- term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
-
- ~Term();
-
- FsmGraph *walk( Compiler *pd, bool lastInSeq = true );
- void makeNameTree( Compiler *pd );
-
- Term *term;
- FactorWithAug *factorWithAug;
- Type type;
-
- /* Priority descriptor for RightFinish type. */
- PriorDesc priorDescs[2];
-};
-
-
-/* Third level of precedence. Augmenting nodes with actions and priorities. */
-struct FactorWithAug
-{
- FactorWithAug( FactorWithRep *factorWithRep ) :
- priorDescs(0), factorWithRep(factorWithRep) { }
- ~FactorWithAug();
-
- /* Tree traversal. */
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( Compiler *pd );
-
- void assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd );
- void assignPriorities( FsmGraph *graph, int *priorOrd );
-
- void assignConditions( FsmGraph *graph );
-
- /* Actions and priorities assigned to the factor node. */
- Vector<ParserAction> actions;
- Vector<PriorityAug> priorityAugs;
- PriorDesc *priorDescs;
- Vector<EpsilonLink> epsilonLinks;
- Vector<ParserAction> conditions;
-
- FactorWithRep *factorWithRep;
-};
-
-/* Fourth level of precedence. Trailing unary operators. Provide kleen star,
- * optional and plus. */
-struct FactorWithRep
-{
- enum Type {
- StarType,
- StarStarType,
- OptionalType,
- PlusType,
- ExactType,
- MaxType,
- MinType,
- RangeType,
- FactorWithNegType
- };
-
- FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
- int lowerRep, int upperRep, Type type ) :
- loc(loc), factorWithRep(factorWithRep),
- factorWithNeg(0), lowerRep(lowerRep),
- upperRep(upperRep), type(type) { }
-
- FactorWithRep( const InputLoc &loc, FactorWithNeg *factorWithNeg )
- : loc(loc), factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
-
- ~FactorWithRep();
-
- /* Tree traversal. */
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( Compiler *pd );
-
- InputLoc loc;
- FactorWithRep *factorWithRep;
- FactorWithNeg *factorWithNeg;
- int lowerRep, upperRep;
- Type type;
-
- /* Priority descriptor for StarStar type. */
- PriorDesc priorDescs[2];
-};
-
-/* Fifth level of precedence. Provides Negation. */
-struct FactorWithNeg
-{
- enum Type {
- NegateType,
- CharNegateType,
- FactorType
- };
-
- FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
- loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
-
- FactorWithNeg( const InputLoc &loc, Factor *factor ) :
- loc(loc), factorWithNeg(0), factor(factor), type(FactorType) { }
-
- ~FactorWithNeg();
-
- /* Tree traversal. */
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( Compiler *pd );
-
- InputLoc loc;
- FactorWithNeg *factorWithNeg;
- Factor *factor;
- Type type;
-};
-
-/*
- * Factor
- */
-struct Factor
-{
- /* Language elements a factor node can be. */
- enum Type {
- LiteralType,
- RangeType,
- OrExprType,
- RegExprType,
- ReferenceType,
- ParenType,
- };
-
- /* Construct with a literal fsm. */
- Factor( Literal *literal ) :
- literal(literal), type(LiteralType) { }
-
- /* Construct with a range. */
- Factor( Range *range ) :
- range(range), type(RangeType) { }
-
- /* Construct with the or part of a regular expression. */
- Factor( ReItem *reItem ) :
- reItem(reItem), type(OrExprType) { }
-
- /* Construct with a regular expression. */
- Factor( RegExpr *regExp ) :
- regExp(regExp), type(RegExprType) { }
-
- /* Construct with a reference to a var def. */
- Factor( const InputLoc &loc, VarDef *varDef ) :
- loc(loc), varDef(varDef), type(ReferenceType) {}
-
- /* Construct with a parenthesized join. */
- Factor( Join *join ) :
- join(join), type(ParenType) {}
-
- /* Cleanup. */
- ~Factor();
-
- /* Tree traversal. */
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( Compiler *pd );
-
- InputLoc loc;
- Literal *literal;
- Range *range;
- ReItem *reItem;
- RegExpr *regExp;
- VarDef *varDef;
- Join *join;
- int lower, upper;
- Type type;
-};
-
-/* A range machine. Only ever composed of two literals. */
-struct Range
-{
- Range( Literal *lowerLit, Literal *upperLit )
- : lowerLit(lowerLit), upperLit(upperLit) { }
-
- ~Range();
- FsmGraph *walk( Compiler *pd );
- bool verifyRangeFsm( FsmGraph *rangeEnd );
-
- Literal *lowerLit;
- Literal *upperLit;
-};
-
-/* Some literal machine. Can be a number or literal string. */
-struct Literal
-{
- enum LiteralType { Number, LitString };
-
- Literal( const InputLoc &loc, const String &literal, LiteralType type )
- : loc(loc), literal(literal), type(type) { }
-
- FsmGraph *walk( Compiler *pd );
-
- InputLoc loc;
- String literal;
- LiteralType type;
-};
-
-/* Regular expression. */
-struct RegExpr
-{
- enum RegExpType { RecurseItem, Empty };
-
- /* Constructors. */
- RegExpr() :
- type(Empty), caseInsensitive(false) { }
- RegExpr(RegExpr *regExp, ReItem *item) :
- regExp(regExp), item(item),
- type(RecurseItem), caseInsensitive(false) { }
-
- ~RegExpr();
- FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
-
- RegExpr *regExp;
- ReItem *item;
- RegExpType type;
- bool caseInsensitive;
-};
-
-/* An item in a regular expression. */
-struct ReItem
-{
- enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
-
- ReItem( const InputLoc &loc, const String &data )
- : loc(loc), data(data), star(false), type(Data) { }
- ReItem( const InputLoc &loc, ReItemType type )
- : loc(loc), star(false), type(type) { }
- ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
- : loc(loc), orBlock(orBlock), star(false), type(type) { }
-
- ~ReItem();
- FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
-
- InputLoc loc;
- String data;
- ReOrBlock *orBlock;
- bool star;
- ReItemType type;
-};
-
-/* An or block item. */
-struct ReOrBlock
-{
- enum ReOrBlockType { RecurseItem, Empty };
-
- /* Constructors. */
- ReOrBlock()
- : type(Empty) { }
- ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
- : orBlock(orBlock), item(item), type(RecurseItem) { }
-
- ~ReOrBlock();
- FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
-
- ReOrBlock *orBlock;
- ReOrItem *item;
- ReOrBlockType type;
-};
-
-/* An item in an or block. */
-struct ReOrItem
-{
- enum ReOrItemType { Data, Range };
-
- ReOrItem( const InputLoc &loc, const String &data )
- : loc(loc), data(data), type(Data) {}
- ReOrItem( const InputLoc &loc, char lower, char upper )
- : loc(loc), lower(lower), upper(upper), type(Range) { }
-
- FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
-
- InputLoc loc;
- String data;
- char lower;
- char upper;
- ReOrItemType type;
-};
-
-
-/*
- * Inline code tree
- */
-struct InlineList;
-struct InlineItem
-{
- enum Type
- {
- Text,
- LmSwitch,
- LmSetActId,
- LmSetTokEnd,
- LmOnLast,
- LmOnNext,
- LmOnLagBehind,
- LmInitAct,
- LmInitTokStart,
- LmSetTokStart
- };
-
- InlineItem( const InputLoc &loc, const String &data, Type type ) :
- loc(loc), data(data), nameRef(0), children(0), type(type) { }
-
- InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
- loc(loc), nameRef(nameRef), children(0), type(type) { }
-
- InlineItem( const InputLoc &loc, TokenRegion *tokenRegion,
- TokenDef *longestMatchPart, Type type ) : loc(loc),
- nameRef(0), children(0), tokenRegion(tokenRegion),
- longestMatchPart(longestMatchPart), type(type) { }
-
- InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
- loc(loc), nameRef(0), nameTarg(nameTarg), children(0),
- type(type) { }
-
- InlineItem( const InputLoc &loc, Type type ) :
- loc(loc), nameRef(0), children(0), type(type) { }
-
- InputLoc loc;
- String data;
- NameRef *nameRef;
- NameInst *nameTarg;
- InlineList *children;
- TokenRegion *tokenRegion;
- TokenDef *longestMatchPart;
- Type type;
-
- InlineItem *prev, *next;
-};
-
-/* Normally this would be atypedef, but that would entail including DList from
- * ptreetypes, which should be just typedef forwards. */
-struct InlineList : public DList<InlineItem> { };
-
-struct ProdEl;
-struct LangVarRef;
-struct ObjField;
-
-struct PatternItem
-{
- enum Type {
- FactorType,
- InputText
- };
-
- PatternItem( const InputLoc &loc, const String &data, Type type ) :
- loc(loc), factor(0), data(data), type(type), region(0),
- varRef(0), bindId(0) {}
-
- PatternItem( const InputLoc &loc, ProdEl *factor, Type type ) :
- loc(loc), factor(factor), type(type), region(0),
- varRef(0), bindId(0) {}
-
- InputLoc loc;
- ProdEl *factor;
- String data;
- Type type;
- TokenRegion *region;
- LangVarRef *varRef;
- long bindId;
-
- PatternItem *prev, *next;
-};
-
-struct LangExpr;
-typedef DList<PatternItem> PatternItemList;
-
-struct ReplItem
-{
- enum Type {
- InputText,
- ExprType,
- FactorType
- };
-
- ReplItem( const InputLoc &loc, Type type, const String &data ) :
- loc(loc), type(type), data(data), expr(0), bindId(0) {}
-
- ReplItem( const InputLoc &loc, Type type, LangExpr *expr ) :
- loc(loc), type(type), expr(expr), bindId(0) {}
-
- ReplItem( const InputLoc &loc, Type type, ProdEl *factor ) :
- loc(loc), type(type), expr(expr), factor(factor), bindId(0) {}
-
- InputLoc loc;
- Type type;
- String data;
- LangExpr *expr;
- LangEl *langEl;
- ProdEl *factor;
- long bindId;
-
- ReplItem *prev, *next;
-};
-
-typedef DList<ReplItem> ReplItemList;
-
-
-struct Pattern
-{
- Pattern( const InputLoc &loc, Namespace *nspace, TokenRegion *region,
- PatternItemList *list, int patRepId ) :
- loc(loc), nspace(nspace), region(region), list(list), patRepId(patRepId),
- langEl(0), pdaRun(0), nextBindId(1) {}
-
- InputLoc loc;
- Namespace *nspace;
- TokenRegion *region;
- PatternItemList *list;
- long patRepId;
- LangEl *langEl;
- PdaRun *pdaRun;
- long nextBindId;
-
- Pattern *prev, *next;
-};
-
-typedef DList<Pattern> PatternList;
-
-struct Replacement
-{
- Replacement( const InputLoc &loc, Namespace *nspace,
- TokenRegion *region, ReplItemList *list, int patRepId ) :
- loc(loc), nspace(nspace), region(region), list(list),
- patRepId(patRepId), langEl(0), pdaRun(0), nextBindId(1), parse(true) {}
-
- InputLoc loc;
- Namespace *nspace;
- TokenRegion *region;
- ReplItemList *list;
- int patRepId;
- LangEl *langEl;
- PdaRun *pdaRun;
- long nextBindId;
- bool parse;
-
- Replacement *prev, *next;
-};
-
-typedef DList<Replacement> ReplList;
-
-struct ParserText
-{
- ParserText( const InputLoc &loc, Namespace *nspace,
- TokenRegion *region, ReplItemList *list ) :
- loc(loc), nspace(nspace), region(region), list(list),
- langEl(0), pdaRun(0), nextBindId(1), parse(true) {}
-
- InputLoc loc;
- Namespace *nspace;
- TokenRegion *region;
- ReplItemList *list;
- LangEl *langEl;
- PdaRun *pdaRun;
- long nextBindId;
- bool parse;
-
- ParserText *prev, *next;
-};
-
-typedef DList<ParserText> ParserTextList;
-
-struct Function;
-
-struct IterDef
-{
- enum Type { Tree, Child, RevChild, Repeat, RevRepeat, User };
-
- IterDef( Type type, Function *func );
- IterDef( Type type );
-
- Type type;
-
- Function *func;
- bool useFuncId;
- bool useSearchUT;
-
- Code inCreateWV;
- Code inCreateWC;
- Code inDestroy;
- Code inAdvance;
-
- Code inGetCurR;
- Code inGetCurWC;
- Code inSetCurWC;
-
- Code inRefFromCur;
-};
-
-struct CmpIterDef
-{
- static int compare( const IterDef &id1, const IterDef &id2 )
- {
- if ( id1.type < id2.type )
- return -1;
- else if ( id1.type > id2.type )
- return 1;
- else if ( id1.type == IterDef::User ) {
- if ( id1.func < id2.func )
- return -1;
- else if ( id1.func > id2.func )
- return 1;
- }
-
- return 0;
- }
-};
-
-typedef AvlSet<IterDef, CmpIterDef> IterDefSet;
-typedef AvlSetEl<IterDef> IterDefSetEl;
-
-
-/*
- * Unique Types.
- */
-
-/*
- * type_ref -> qualified_name
- * type_ref -> '*' type_ref
- * type_ref -> '&' type_ref
- * type_ref -> list type_ref type_ref
- * type_ref -> map type_ref type_ref
- * type_ref -> vector type_ref
- * type_ref -> parser type_ref
- * type_ref -> iter_tree type_ref
- * type_ref -> iter_child type_ref
- * type_ref -> iter_revchild type_ref
- * type_ref -> iter_repeat type_ref
- * type_ref -> iter_revrepeat type_ref
- * type_ref -> iter_user type_ref
- *
- * type -> nil
- * type -> def term
- * type -> def nonterm
- * type -> '*' type
- * type -> '&' type
- * type -> list type
- * type -> map type type
- * type -> vector type
- * type -> parser type
- * type -> iter_tree type
- * type -> iter_child type
- * type -> iter_revchild type
- * type -> iter_repeat type
- * type -> iter_revrepeat type
- * type -> iter_user type
- */
-
-struct UniqueType : public AvlTreeEl<UniqueType>
-{
- UniqueType( int typeId ) :
- typeId(typeId),
- langEl(0),
- iterDef(0) {}
-
- UniqueType( int typeId, LangEl *langEl ) :
- typeId(typeId),
- langEl(langEl),
- iterDef(0) {}
-
- UniqueType( int typeId, IterDef *iterDef ) :
- typeId(typeId),
- langEl(langEl),
- iterDef(iterDef) {}
-
- int typeId;
- LangEl *langEl;
- IterDef *iterDef;
-};
-
-struct CmpUniqueType
-{
- static int compare( const UniqueType &ut1, const UniqueType &ut2 );
-};
-
-typedef AvlBasic< UniqueType, CmpUniqueType > UniqueTypeMap;
-
-enum RepeatType {
- RepeatNone = 1,
- RepeatRepeat,
- RepeatList,
- RepeatOpt,
-};
-
-/*
- * Repeat types.
- */
-
-struct UniqueRepeat
- : public AvlTreeEl<UniqueRepeat>
-{
- UniqueRepeat( RepeatType repeatType, LangEl *langEl ) :
- repeatType(repeatType),
- langEl(langEl), declLangEl(0) {}
-
- RepeatType repeatType;
- LangEl *langEl;
- LangEl *declLangEl;
-};
-
-struct CmpUniqueRepeat
-{
- static int compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 );
-};
-
-typedef AvlBasic< UniqueRepeat, CmpUniqueRepeat > UniqueRepeatMap;
-
-/*
- * Unique Map Types
- */
-
-struct UniqueMap
- : public AvlTreeEl<UniqueMap>
-{
- UniqueMap( UniqueType *key, UniqueType *value ) :
- key(key), value(value), generic(0) {}
-
- UniqueType *key;
- UniqueType *value;
-
- GenericType *generic;
-};
-
-struct CmpUniqueMap
-{
- static int compare( const UniqueMap &ut1, const UniqueMap &ut2 );
-};
-
-typedef AvlBasic< UniqueMap, CmpUniqueMap > UniqueMapMap;
-
-/*
- * Unique List Types
- */
-
-struct UniqueList
- : public AvlTreeEl<UniqueList>
-{
- UniqueList( UniqueType *value ) :
- value(value), generic(0) {}
-
- UniqueType *value;
- GenericType *generic;
-};
-
-struct CmpUniqueList
-{
- static int compare( const UniqueList &ut1, const UniqueList &ut2 );
-};
-
-typedef AvlBasic< UniqueList, CmpUniqueList > UniqueListMap;
-
-/*
- * Unique Vector Types
- */
-
-struct UniqueVector
- : public AvlTreeEl<UniqueVector>
-{
- UniqueVector( UniqueType *value ) :
- value(value), generic(0) {}
-
- UniqueType *value;
- GenericType *generic;
-};
-
-struct CmpUniqueVector
-{
- static int compare( const UniqueVector &ut1, const UniqueVector &ut2 );
-};
-
-typedef AvlBasic< UniqueVector, CmpUniqueVector > UniqueVectorMap;
-
-/*
- * Unique Parser Types
- */
-
-struct UniqueParser
- : public AvlTreeEl<UniqueParser>
-{
- UniqueParser( UniqueType *parseType ) :
- parseType(parseType), generic(0) {}
-
- UniqueType *parseType;
- GenericType *generic;
-};
-
-struct CmpUniqueParser
-{
- static int compare( const UniqueParser &ut1, const UniqueParser &ut2 );
-};
-
-typedef AvlBasic< UniqueParser, CmpUniqueParser > UniqueParserMap;
-
-/*
- *
- */
-
-typedef AvlMap< StringVect, int, CmpStrVect > VectorTypeIdMap;
-typedef AvlMapEl< StringVect, int > VectorTypeIdMapEl;
-
-typedef Vector<TypeRef*> TypeRefVect;
-
-struct TypeRef
-{
- enum Type
- {
- Unspecified,
- Name,
- Literal,
- Iterator,
- Map,
- List,
- Vector,
- Parser,
- Ref,
- Ptr,
- };
-
- /* Qualification and a type name. These require lookup. */
- TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, String typeName ) :
- type(Name), loc(loc), nspaceQual(nspaceQual), typeName(typeName), pdaLiteral(0), iterDef(0),
- typeRef1(0), typeRef2(0),
- repeatType(RepeatNone),
- nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
-
- /* Qualification and a type name. These require lookup. */
- TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral ) :
- type(Literal), loc(loc), nspaceQual(nspaceQual), pdaLiteral(pdaLiteral), iterDef(0),
- typeRef1(0), typeRef2(0),
- repeatType(RepeatNone),
- nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
-
- /* Generics. */
- TypeRef( Type type, const InputLoc &loc, NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 ) :
- type(type), loc(loc), nspaceQual(nspaceQual), pdaLiteral(0), iterDef(0),
- typeRef1(typeRef1), typeRef2(typeRef2),
- repeatType(RepeatNone),
- nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
-
- /* Pointers and Refs. */
- TypeRef( Type type, const InputLoc &loc, TypeRef *typeRef1 ) :
- type(type), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0),
- typeRef1(typeRef1), typeRef2(0),
- repeatType(RepeatNone),
- nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
-
- /* Resolution not needed. */
-
- /* Iterator definition. */
- TypeRef( const InputLoc &loc, IterDef *iterDef, UniqueType *uniqueType,
- UniqueType *searchUniqueType ) :
- type(Iterator), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(iterDef),
- typeRef1(0), typeRef2(0),
- repeatType(RepeatNone),
- nspace(0), uniqueType(uniqueType), searchUniqueType(searchUniqueType), generic(0) {}
-
- /* Unique type is given directly. */
- TypeRef( const InputLoc &loc, UniqueType *uniqueType ) :
- type(Unspecified), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0),
- typeRef1(0), typeRef2(0),
- repeatType(RepeatNone),
- nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {}
-
- void resolveRepeat( Compiler *pd );
-
- UniqueType *lookupTypeName( Compiler *pd );
- UniqueType *lookupTypeLiteral( Compiler *pd );
- UniqueType *lookupTypeMap( Compiler *pd );
- UniqueType *lookupTypeList( Compiler *pd );
- UniqueType *lookupTypeVector( Compiler *pd );
- UniqueType *lookupTypeParser( Compiler *pd );
- UniqueType *lookupType( Compiler *pd );
- UniqueType *lookupTypePtr( Compiler *pd );
- UniqueType *lookupTypeRef( Compiler *pd );
-
- Type type;
- InputLoc loc;
- NamespaceQual *nspaceQual;
- String typeName;
- PdaLiteral *pdaLiteral;
- IterDef *iterDef;
- TypeRef *typeRef1;
- TypeRef *typeRef2;
- RepeatType repeatType;
-
- /* Resolved. */
- Namespace *nspace;
- UniqueType *uniqueType;
- UniqueType *searchUniqueType;
- GenericType *generic;
-};
-
-typedef DList<ObjField> ParameterList;
-
-struct ObjMethod
-{
- ObjMethod( UniqueType *returnUT, String name,
- int opcodeWV, int opcodeWC, int numParams,
- UniqueType **types, ParameterList *paramList, bool isConst )
- :
- returnUT(returnUT),
- returnTypeId(0),
- name(name),
- opcodeWV(opcodeWV),
- opcodeWC(opcodeWC),
- numParams(numParams),
- paramList(paramList),
- isConst(isConst),
- funcId(0),
- useFuncId(false),
- useCallObj(true),
- isCustom(false),
- func(0),
- iterDef(0)
- {
- this->paramUTs = new UniqueType*[numParams];
- memcpy( this->paramUTs, types, sizeof(UniqueType*)*numParams );
- }
-
- UniqueType *returnUT;
- long returnTypeId;
- String name;
- long opcodeWV;
- long opcodeWC;
- long numParams;
- UniqueType **paramUTs;
- ParameterList *paramList;
- bool isConst;
- long funcId;
- bool useFuncId;
- bool useCallObj;
- bool isCustom;
- Function *func;
- IterDef *iterDef;
-};
-
-typedef AvlMap<String, ObjMethod*, CmpStr> ObjMethodMap;
-typedef AvlMapEl<String, ObjMethod*> ObjMethodMapEl;
-
-struct RhsVal { RhsVal( int prodNum, int childNum ) : prodNum(prodNum), childNum(childNum) { } int prodNum; int childNum; };
-
-struct ObjField
-{
- ObjField( const InputLoc &loc, TypeRef *typeRef, const String &name ) :
- loc(loc), typeRef(typeRef), name(name),
- context(0),
- pos(0), offset(0),
- beenReferenced(false),
- beenInitialized(false),
- useOffset(true),
- isConst(false),
- isLhsEl(false), isRhsEl(false),
- refActive(false),
- isArgv(false),
- isCustom(false),
- isParam(false),
- isRhsGet(false),
- isExport(false),
- dirtyTree(false),
- inGetR( IN_HALT ),
- inGetWC( IN_HALT ),
- inGetWV( IN_HALT ),
- inSetWC( IN_HALT ),
- inSetWV( IN_HALT )
- {}
-
- InputLoc loc;
- TypeRef *typeRef;
- String name;
- Context *context;
- long pos;
- long offset;
- bool beenReferenced;
- bool beenInitialized;
- bool useOffset;
- bool isConst;
- bool isLhsEl;
- bool isRhsEl;
- bool refActive;
- bool isArgv;
- bool isCustom;
- bool isParam;
- bool isRhsGet;
- bool isExport;
-
- /* True if some aspect of the tree has possibly been written to. This does
- * not include attributes. This is here so we can optimize the storage of
- * old lhs vars. If only a lhs attribute changes we don't need to preserve
- * the original for backtracking. */
- bool dirtyTree;
-
- Vector<RhsVal> rhsVal;
-
- Code inGetR;
- Code inGetWC;
- Code inGetWV;
- Code inSetWC;
- Code inSetWV;
-
- ObjField *prev, *next;
-};
-
-typedef AvlMap<String, ObjField*, CmpStr> ObjFieldMap;
-typedef AvlMapEl<String, ObjField*> ObjFieldMapEl;
-
-typedef DListVal<ObjField*> ObjFieldList;
-
-typedef DList<ObjField> ParameterList;
-
-struct TemplateType;
-
-/* Tree of name scopes for an object def. All of the object fields inside this
- * tree live in one object def. This is used for scoping names in functions. */
-struct ObjNameScope
-{
- ObjNameScope()
- : parentScope(0), childIter(0)
- {}
-
- ObjFieldMap *objFieldMap;
-
- ObjNameScope *parentScope;
- DList<ObjNameScope> children;
-
- /* For iteration after declaration. */
- ObjNameScope *childIter;
-
- ObjNameScope *prev, *next;
-};
-
-struct ObjectDef
-{
- enum Type {
- UserType,
- FrameType,
- IterType,
- BuiltinType
- };
-
- ObjectDef( Type type, String name, int id )
- :
- type(type), name(name), id(id),
- nextOffset(0), firstNonTree(0)
- {
- scope = new ObjNameScope;
- scope->objFieldMap = new ObjFieldMap;
-
- objFieldList = new ObjFieldList;
- objMethodMap = new ObjMethodMap();
- }
-
- Type type;
- String name;
- ObjFieldList *objFieldList;
- ObjMethodMap *objMethodMap;
-
- /* Head of stack of name scopes. */
- ObjNameScope *scope;
-
- void pushScope();
- void popScope();
- void iterPushScope();
- void iterPopScope();
-
- long id;
- long nextOffset;
- long firstNonTree;
-
- void referenceField( Compiler *pd, ObjField *field );
- void initField( Compiler *pd, ObjField *field );
- void createCode( Compiler *pd, CodeVect &code );
- ObjField *checkRedecl( const String &name );
- ObjMethod *findMethod( const String &name );
- ObjField *findFieldInScope( const String &name, ObjNameScope *inScope );
- ObjField *findField( const String &name );
- void insertField( const String &name, ObjField *value );
- void resolve( Compiler *pd );
- ObjField *findFieldNum( long offset );
-
- long size() { return nextOffset; }
- long sizeTrees() { return firstNonTree; }
-};
-
-typedef Vector<LangExpr*> ExprVect;
-typedef Vector<String> StringVect;
-
-struct FieldInit
-{
- FieldInit( const InputLoc &loc, String name, LangExpr *expr )
- : loc(loc), name(name), expr(expr) {}
-
- InputLoc loc;
- String name;
- LangExpr *expr;
-
- UniqueType *exprUT;
-};
-
-typedef Vector<FieldInit*> FieldInitVect;
-
-struct VarRefLookup
-{
- VarRefLookup( int lastPtrInQual, int firstConstPart, ObjectDef *inObject ) :
- lastPtrInQual(lastPtrInQual),
- firstConstPart(firstConstPart),
- inObject(inObject),
- objField(0),
- objMethod(0),
- uniqueType(0),
- iterSearchUT(0)
- {}
-
- int lastPtrInQual;
- int firstConstPart;
- ObjectDef *inObject;
- ObjField *objField;
- ObjMethod *objMethod;
- UniqueType *uniqueType;
- UniqueType *iterSearchUT;
-};
-
-struct QualItem
-{
- enum Type { Dot, Arrow };
-
- QualItem( const InputLoc &loc, const String &data, Type type )
- : loc(loc), data(data), type(type) {}
-
- InputLoc loc;
- String data;
- Type type;
-};
-
-typedef Vector<QualItem> QualItemVect;
-
-struct LangVarRef
-{
- LangVarRef( const InputLoc &loc, QualItemVect *qual, String name )
- : loc(loc), qual(qual), name(name) {}
-
- void resolve( Compiler *pd ) const;
-
- UniqueType *loadFieldInstr( Compiler *pd, CodeVect &code, ObjectDef *inObject,
- ObjField *el, bool forWriting, bool revert ) const;
- void setFieldInstr( Compiler *pd, CodeVect &code, ObjectDef *inObject,
- ObjField *el, UniqueType *exprUT, bool revert ) const;
-
- VarRefLookup lookupMethod( Compiler *pd ) ;
- VarRefLookup lookupField( Compiler *pd ) const;
-
- VarRefLookup lookupQualification( Compiler *pd, ObjectDef *rootDef ) const;
- VarRefLookup lookupObj( Compiler *pd ) const;
-
- bool isCustom( Compiler *pd ) const;
- bool isLocalRef( Compiler *pd ) const;
- bool isContextRef( Compiler *pd ) const;
- void loadQualification( Compiler *pd, CodeVect &code, ObjectDef *rootObj,
- int lastPtrInQual, bool forWriting, bool revert ) const;
- void loadCustom( Compiler *pd, CodeVect &code,
- int lastPtrInQual, bool forWriting ) const;
- void loadLocalObj( Compiler *pd, CodeVect &code,
- int lastPtrInQual, bool forWriting ) const;
- void loadContextObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const;
- void loadGlobalObj( Compiler *pd, CodeVect &code,
- int lastPtrInQual, bool forWriting ) const;
- void loadObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const;
- void canTakeRef( Compiler *pd, VarRefLookup &lookup ) const;
-
- void setFieldIter( Compiler *pd, CodeVect &code,
- ObjectDef *inObject, UniqueType *objUT, UniqueType *exprType, bool revert ) const;
- void setFieldSearch( Compiler *pd, CodeVect &code,
- ObjectDef *inObject, UniqueType *exprType ) const;
- void setField( Compiler *pd, CodeVect &code,
- ObjectDef *inObject, UniqueType *type, bool revert ) const;
-
- void assignValue( Compiler *pd, CodeVect &code, UniqueType *exprUT ) const;
- ObjField **evaluateArgs( Compiler *pd, CodeVect &code,
- VarRefLookup &lookup, ExprVect *args ) const;
- void callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const;
- UniqueType *evaluateCall( Compiler *pd, CodeVect &code, ExprVect *args );
- UniqueType *evaluate( Compiler *pd, CodeVect &code, bool forWriting = false ) const;
- ObjField *evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const;
- ObjField *preEvaluateRef( Compiler *pd, CodeVect &code ) const;
- void resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjField **paramRefs ) const;
- long loadQualificationRefs( Compiler *pd, CodeVect &code ) const;
- void popRefQuals( Compiler *pd, CodeVect &code,
- VarRefLookup &lookup, ExprVect *args ) const;
-
- InputLoc loc;
- QualItemVect *qual;
- String name;
-};
-
-struct LangTerm
-{
- enum Type {
- VarRefType,
- MethodCallType,
- NumberType,
- StringType,
- MatchType,
- NewType,
- ConstructType,
- TypeIdType,
- SearchType,
- NilType,
- TrueType,
- FalseType,
- ParseType,
- ParseStopType,
- MakeTreeType,
- MakeTokenType,
- EmbedStringType
- };
-
- LangTerm( Type type, LangVarRef *varRef )
- : type(type), varRef(varRef) {}
-
- LangTerm( LangVarRef *varRef, ExprVect *args )
- : type(MethodCallType), varRef(varRef), args(args) {}
-
- LangTerm( const InputLoc &loc, Type type, ExprVect *args )
- : loc(loc), type(type), args(args) {}
-
- LangTerm( Type type, String data )
- : type(type), varRef(0), data(data) {}
-
- LangTerm( Type type, NamespaceQual *nspaceQual, const String &data )
- : type(type), varRef(0), nspaceQual(nspaceQual), data(data) {}
-
- LangTerm( const InputLoc &loc, Type type )
- : loc(loc), type(type), varRef(0), typeRef(0) {}
-
- LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef )
- : loc(loc), type(type), varRef(0), typeRef(typeRef) {}
-
- LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef )
- : loc(loc), type(type), varRef(varRef) {}
-
- LangTerm( Type type, LangVarRef *varRef, Pattern *pattern )
- : type(type), varRef(varRef), pattern(pattern) {}
-
- LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef, LangVarRef *varRef )
- : loc(loc), type(type), varRef(varRef), typeRef(typeRef) {}
-
- LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef, FieldInitVect *fieldInitArgs,
- Replacement *replacement )
- : loc(loc), type(type), typeRef(typeRef), fieldInitArgs(fieldInitArgs),
- replacement(replacement) {}
-
- LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef, ObjField *objField,
- TypeRef *typeRef, FieldInitVect *fieldInitArgs, Replacement *replacement )
- : loc(loc), type(type), varRef(varRef), objField(objField), typeRef(typeRef),
- fieldInitArgs(fieldInitArgs), replacement(replacement) {}
-
- LangTerm( Type type, LangExpr *expr )
- : type(type), expr(expr) {}
-
- LangTerm( ReplItemList *replItemList )
- : type(EmbedStringType), replItemList(replItemList) {}
-
- LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef,
- ObjField *objField, TypeRef *typeRef, GenericType *generic, TypeRef *parserTypeRef,
- Replacement *replacement )
- : loc(loc), type(type), varRef(varRef), objField(objField),
- typeRef(typeRef), generic(generic), parserTypeRef(parserTypeRef),
- replacement(replacement) {}
-
- void resolve( Compiler *pd );
-
- UniqueType *evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const;
- UniqueType *evaluateNew( Compiler *pd, CodeVect &code ) const;
- UniqueType *evaluateConstruct( Compiler *pd, CodeVect &code ) const;
- UniqueType *evaluateMatch( Compiler *pd, CodeVect &code ) const;
- UniqueType *evaluate( Compiler *pd, CodeVect &code ) const;
- void assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const;
- UniqueType *evaluateMakeToken( Compiler *pd, CodeVect &code ) const;
- UniqueType *evaluateMakeTree( Compiler *pd, CodeVect &code ) const;
- UniqueType *evaluateEmbedString( Compiler *pd, CodeVect &code ) const;
-
- InputLoc loc;
- Type type;
- LangVarRef *varRef;
- ExprVect *args;
- NamespaceQual *nspaceQual;
- String data;
- ObjField *objField;
- TypeRef *typeRef;
- Pattern *pattern;
- FieldInitVect *fieldInitArgs;
- GenericType *generic;
- TypeRef *parserTypeRef;
- Replacement *replacement;
- LangExpr *expr;
- ReplItemList *replItemList;
-};
-
-struct LangExpr
-{
- enum Type {
- BinaryType,
- UnaryType,
- TermType
- };
-
- LangExpr( const InputLoc &loc, LangExpr *left, char op, LangExpr *right )
- : loc(loc), type(BinaryType), left(left), op(op), right(right) {}
-
- LangExpr( const InputLoc &loc, char op, LangExpr *right )
- : loc(loc), type(UnaryType), left(0), op(op), right(right) {}
-
- LangExpr( LangTerm *term )
- : type(TermType), term(term) {}
-
- void resolve( Compiler *pd ) const;
-
- UniqueType *evaluate( Compiler *pd, CodeVect &code ) const;
-
- InputLoc loc;
- Type type;
- LangExpr *left;
- char op;
- LangExpr *right;
- LangTerm *term;
-};
-
-struct LangStmt;
-typedef DList<LangStmt> StmtList;
-
-struct LangStmt
-{
- enum Type {
- AssignType,
- PrintType,
- PrintXMLACType,
- PrintXMLType,
- PrintStreamType,
- ExprType,
- IfType,
- ElseType,
- RejectType,
- WhileType,
- ReturnType,
- YieldType,
- ForIterType,
- BreakType,
- ParserType
- };
-
- LangStmt( const InputLoc &loc, Type type, FieldInitVect *fieldInitVect ) :
- loc(loc), type(type), varRef(0), expr(0), fieldInitVect(fieldInitVect), next(0) {}
-
- LangStmt( const InputLoc &loc, Type type, ExprVect *exprPtrVect ) :
- loc(loc), type(type), varRef(0), expr(0), exprPtrVect(exprPtrVect), next(0) {}
-
- LangStmt( const InputLoc &loc, Type type, LangExpr *expr ) :
- loc(loc), type(type), varRef(0), expr(expr), exprPtrVect(0), next(0) {}
-
- LangStmt( Type type, LangVarRef *varRef ) :
- type(type), varRef(varRef), expr(0), exprPtrVect(0), next(0) {}
-
- LangStmt( const InputLoc &loc, Type type, ObjField *objField ) :
- loc(loc), type(type), varRef(0), objField(objField), expr(0),
- exprPtrVect(0), next(0) {}
-
- LangStmt( const InputLoc &loc, Type type, LangVarRef *varRef, LangExpr *expr ) :
- loc(loc), type(type), varRef(varRef), expr(expr), exprPtrVect(0), next(0) {}
-
- LangStmt( Type type, LangExpr *expr, StmtList *stmtList ) :
- type(type), expr(expr), stmtList(stmtList), next(0) {}
-
- LangStmt( Type type, StmtList *stmtList ) :
- type(type), stmtList(stmtList), next(0) {}
-
- LangStmt( Type type, LangExpr *expr, StmtList *stmtList, LangStmt *elsePart ) :
- type(type), expr(expr), stmtList(stmtList), elsePart(elsePart), next(0) {}
-
- LangStmt( const InputLoc &loc, Type type ) :
- loc(loc), type(type), next(0) {}
-
- LangStmt( Type type, LangVarRef *varRef, Replacement *replacement ) :
- type(type), varRef(varRef), expr(0), replacement(replacement),
- exprPtrVect(0), next(0) {}
-
- LangStmt( Type type, LangVarRef *varRef, ParserText *parserText ) :
- type(type), varRef(varRef), expr(0), parserText(parserText),
- exprPtrVect(0), next(0) {}
-
- /* ForIterType */
- LangStmt( const InputLoc &loc, Type type, ObjField *objField,
- TypeRef *typeRef, LangTerm *langTerm, StmtList *stmtList ) :
- loc(loc), type(type), langTerm(langTerm), objField(objField), typeRef(typeRef),
- stmtList(stmtList), next(0) {}
-
- LangStmt( Type type ) :
- type(type), next(0) {}
-
- void resolve( Compiler *pd ) const;
- void resolveParserItems( Compiler *pd ) const;
-
- void evaluateParserItems( Compiler *pd, CodeVect &code ) const;
- LangTerm *chooseDefaultIter( Compiler *pd, LangTerm *fromVarRef ) const;
- void compileWhile( Compiler *pd, CodeVect &code ) const;
- void compileForIterBody( Compiler *pd, CodeVect &code, UniqueType *iterUT ) const;
- void compileForIter( Compiler *pd, CodeVect &code ) const;
- void compile( Compiler *pd, CodeVect &code ) const;
-
- InputLoc loc;
- Type type;
- LangVarRef *varRef;
- LangTerm *langTerm;
- ObjField *objField;
- TypeRef *typeRef;
- LangExpr *expr;
- Replacement *replacement;
- ParserText *parserText;
- ExprVect *exprPtrVect;
- FieldInitVect *fieldInitVect;
- StmtList *stmtList;
- /* Either another if, or an else. */
- LangStmt *elsePart;
- String name;
-
- /* Normally you don't need to initialize double list pointers, however, we
- * make use of the next pointer for returning a pair of statements using
- * one pointer to a LangStmt, so we need to initialize it above. */
- LangStmt *prev, *next;
-};
-
-struct CodeBlock
-{
- CodeBlock( StmtList *stmtList )
- :
- frameId(-1),
- stmtList(stmtList),
- localFrame(0),
- context(0) {}
-
- void compile( Compiler *pd, CodeVect &code ) const;
- void resolve( Compiler *pd ) const;
-
- long frameId;
- StmtList *stmtList;
- ObjectDef *localFrame;
- CharSet trees;
- Context *context;
-
- /* Each frame has two versions of
- * the code: revert and commit. */
- CodeVect codeWV, codeWC;
-};
-
-struct Function
-{
- Function( TypeRef *typeRef, const String &name,
- ParameterList *paramList, CodeBlock *codeBlock,
- int funcId, bool isUserIter )
- :
- typeRef(typeRef),
- name(name),
- paramList(paramList),
- codeBlock(codeBlock),
- funcId(funcId),
- isUserIter(isUserIter),
- paramListSize(0),
- paramUTs(0),
- inContext(0)
- {}
-
- TransBlock *transBlock;
- TypeRef *typeRef;
- String name;
- ParameterList *paramList;
- CodeBlock *codeBlock;
- ObjectDef *localFrame;
- long funcId;
- bool isUserIter;
- long paramListSize;
- UniqueType **paramUTs;
- Context *inContext;
-
- Function *prev, *next;
-};
-
-typedef DList<Function> FunctionList;
-
-#endif /* _PARSETREE_H */
diff --git a/colm/pcheck.cc b/colm/pcheck.cc
deleted file mode 100644
index d5401f7d..00000000
--- a/colm/pcheck.cc
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "pcheck.h"
-#include <assert.h>
-
-/* Construct a new parameter checker with for paramSpec. */
-ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv )
-:
- state(noparam),
- argOffset(0),
- curArg(0),
- iCurArg(1),
- paramSpec(paramSpec),
- argc(argc),
- argv(argv)
-{
-}
-
-/* Check a single option. Returns the index of the next parameter. Sets p to
- * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if
- * there is one, NULL otherwise. */
-bool ParamCheck::check()
-{
- bool requiresParam;
-
- if ( iCurArg >= argc ) { /* Off the end of the arg list. */
- state = noparam;
- return false;
- }
-
- if ( argOffset != 0 && *argOffset == 0 ) {
- /* We are at the end of an arg string. */
- iCurArg += 1;
- if ( iCurArg >= argc ) {
- state = noparam;
- return false;
- }
- argOffset = 0;
- }
-
- if ( argOffset == 0 ) {
- /* Set the current arg. */
- curArg = argv[iCurArg];
-
- /* We are at the beginning of an arg string. */
- if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */
- argv[iCurArg][0] != '-' || /* Not a param. */
- argv[iCurArg][1] == 0 ) { /* Only a dash. */
- parameter = 0;
- parameterArg = 0;
-
- iCurArg += 1;
- state = noparam;
- return true;
- }
- argOffset = argv[iCurArg] + 1;
- }
-
- /* Get the arg char. */
- char argChar = *argOffset;
-
- /* Loop over all the parms and look for a match. */
- const char *pSpec = paramSpec;
- while ( *pSpec != 0 ) {
- char pSpecChar = *pSpec;
-
- /* If there is a ':' following the char then
- * it requires a parm. If a parm is required
- * then move ahead two in the parmspec. Otherwise
- * move ahead one in the parm spec. */
- if ( pSpec[1] == ':' ) {
- requiresParam = true;
- pSpec += 2;
- }
- else {
- requiresParam = false;
- pSpec += 1;
- }
-
- /* Do we have a match. */
- if ( argChar == pSpecChar ) {
- if ( requiresParam ) {
- if ( argOffset[1] == 0 ) {
- /* The param must follow. */
- if ( iCurArg + 1 == argc ) {
- /* We are the last arg so there
- * cannot be a parameter to it. */
- parameter = argChar;
- parameterArg = 0;
- iCurArg += 1;
- argOffset = 0;
- state = invalid;
- return true;
- }
- else {
- /* the parameter to the arg is the next arg. */
- parameter = pSpecChar;
- parameterArg = argv[iCurArg + 1];
- iCurArg += 2;
- argOffset = 0;
- state = match;
- return true;
- }
- }
- else {
- /* The param for the arg is built in. */
- parameter = pSpecChar;
- parameterArg = argOffset + 1;
- iCurArg += 1;
- argOffset = 0;
- state = match;
- return true;
- }
- }
- else {
- /* Good, we matched the parm and no
- * arg is required. */
- parameter = pSpecChar;
- parameterArg = 0;
- argOffset += 1;
- state = match;
- return true;
- }
- }
- }
-
- /* We did not find a match. Bad Argument. */
- parameter = argChar;
- parameterArg = 0;
- argOffset += 1;
- state = invalid;
- return true;
-}
-
-
diff --git a/colm/pcheck.h b/colm/pcheck.h
deleted file mode 100644
index 5be60426..00000000
--- a/colm/pcheck.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2001, 2002 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _PCHECK_H
-#define _PCHECK_H
-
-class ParamCheck
-{
-public:
- ParamCheck( const char *paramSpec, int argc, const char **argv );
-
- bool check();
-
- const char *parameterArg; /* The argument to the parameter. */
- char parameter; /* The parameter matched. */
- enum { match, invalid, noparam } state;
-
- const char *argOffset; /* If we are reading params inside an
- * arg this points to the offset. */
-
- const char *curArg; /* Pointer to the current arg. */
- int iCurArg; /* Index to the current arg. */
-
-private:
- const char *paramSpec; /* Parameter spec supplied by the coder. */
- int argc; /* Arguement data from the command line. */
- const char **argv;
-};
-
-#endif /* _PCHECK_H */
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
deleted file mode 100644
index 4bce96ce..00000000
--- a/colm/pdabuild.cc
+++ /dev/null
@@ -1,2091 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <iostream>
-#include <iomanip>
-#include <errno.h>
-#include <stdlib.h>
-
-/* Parsing. */
-#include "global.h"
-#include "parsedata.h"
-#include "pdacodegen.h"
-#include "pdarun.h"
-#include "redfsm.h"
-#include "fsmcodegen.h"
-#include "redbuild.h"
-#include "fsmrun.h"
-
-/* Dumping the fsm. */
-#include "mergesort.h"
-
-using namespace std;
-
-char startDefName[] = "start";
-
-/* Count the transitions in the fsm by walking the state list. */
-int countTransitions( PdaGraph *fsm )
-{
- int numTrans = 0;
- PdaState *state = fsm->stateList.head;
- while ( state != 0 ) {
- numTrans += state->transMap.length();
- state = state->next;
- }
- return numTrans;
-}
-
-LangEl::LangEl( Namespace *nspace, const String &name, Type type )
-:
- nspace(nspace),
- name(name),
- lit(name),
- type(type),
- id(-1),
- isUserTerm(false),
- isContext(false),
- displayString(0),
- numAppearances(0),
- commit(false),
- ignore(false),
- reduceFirst(false),
- isLiteral(false),
- isRepeat(false),
- isList(false),
- isOpt(false),
- parseStop(false),
- isEOF(false),
- repeatOf(0),
- tokenDef(0),
- rootDef(0),
- termDup(0),
- eofLel(0),
- pdaGraph(0),
- pdaTables(0),
- transBlock(0),
- objectDef(0),
- thisSize(0),
- ofiOffset(0),
- generic(0),
- parserId(-1),
- predType(PredNone),
- predValue(0),
- contextDef(0),
- contextIn(0),
- noPreIgnore(false),
- noPostIgnore(false),
- isCI(false),
- ciRegion(0)
-{
-}
-
-PdaGraph *ProdElList::walk( Compiler *pd, Definition *prod )
-{
- PdaGraph *prodFsm = new PdaGraph();
- PdaState *last = prodFsm->addState();
- prodFsm->setStartState( last );
-
- if ( prod->collectIgnoreRegion != 0 ) {
-// cerr << "production " << prod->data << " has collect ignore region " <<
-// prod->collectIgnoreRegion->name << endl;
-
- /* Use the IGNORE TOKEN lang el for the region. */
- long value = prod->collectIgnoreRegion->ciLel->id;
-
- PdaState *newState = prodFsm->addState();
- PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value );
-
- newTrans->isShift = true;
- newTrans->shiftPrior = 0; // WAT
- last = newState;
- }
-
- int prodLength = 0;
- for ( Iter prodEl = first(); prodEl.lte(); prodEl++, prodLength++ ) {
- //PdaGraph *itemFsm = prodEl->walk( pd );
- long value = prodEl->langEl->id;
-
- PdaState *newState = prodFsm->addState();
- PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value );
-
- newTrans->isShift = true;
- newTrans->shiftPrior = prodEl->priorVal;
- //cerr << "PRIOR VAL: " << newTrans->shiftPrior << endl;
-
- if ( prodEl->commit ) {
- //cout << "COMMIT: inserting commit of length: " << pd->prodLength << endl;
- /* Insert the commit into transitions out of last */
- for ( TransMap::Iter trans = last->transMap; trans.lte(); trans++ )
- trans->value->commits.insert( prodLength );
- }
-
- last = newState;
- }
-
- /* Make the last state the final state. */
- prodFsm->setFinState( last );
- return prodFsm;
-}
-
-
-ProdElList *Compiler::makeProdElList( LangEl *langEl )
-{
- ProdElList *prodElList = new ProdElList();
- UniqueType *uniqueType = findUniqueType( TYPE_TREE, langEl );
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueType );
- prodElList->append( new ProdEl( InputLoc(), typeRef ) );
- prodElList->tail->langEl = langEl;
- return prodElList;
-}
-
-void Compiler::makeDefinitionNames()
-{
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- int prodNum = 1;
- for ( LelDefList::Iter def = lel->defList; def.lte(); def++ ) {
- def->data.setAs( lel->name.length() + 32, "%s-%i",
- lel->name.data, prodNum++ );
- }
- }
-}
-
-/* Make sure there there are no language elements whose type is unkonwn. This
- * can happen when an id is used on the rhs of a definition but is not defined
- * as anything. */
-void Compiler::noUndefindLangEls()
-{
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->type == LangEl::Unknown )
- error() << "'" << lel->name << "' was not defined as anything" << endp;
- }
-}
-
-void Compiler::makeLangElIds()
-{
- /* The first id 0 is reserved for the stack sentinal. A negative id means
- * error to the parsing function, inducing backtracking. */
- nextSymbolId = 1;
-
- /* First pass assigns to the user terminals. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- /* Must be a term, and not any of the special reserved terminals.
- * Remember if the non terminal is a user non terminal. */
- if ( lel->type == LangEl::Term &&
- !lel->isEOF &&
- lel != errorLangEl &&
- lel != noTokenLangEl )
- {
- lel->isUserTerm = true;
- lel->id = nextSymbolId++;
- }
- }
-
- //eofLangEl->id = nextSymbolId++;
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- /* Must be a term, and not any of the special reserved terminals.
- * Remember if the non terminal is a user non terminal. */
- if ( lel->isEOF )
- lel->id = nextSymbolId++;
- }
-
- /* Next assign to the eof notoken, which we always create. */
- noTokenLangEl->id = nextSymbolId++;
-
- /* Possibly assign to the error language element. */
- if ( errorLangEl != 0 )
- errorLangEl->id = nextSymbolId++;
-
- /* Save this for the code generation. */
- firstNonTermId = nextSymbolId;
-
- /* A third and final pass assigns to everything else. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- /* Anything else not yet assigned gets assigned now. */
- if ( lel->id < 0 )
- lel->id = nextSymbolId++;
- }
-
- assert( ptrLangEl->id == LEL_ID_PTR );
- assert( boolLangEl->id == LEL_ID_BOOL );
- assert( intLangEl->id == LEL_ID_INT );
- assert( strLangEl->id == LEL_ID_STR );
- assert( streamLangEl->id == LEL_ID_STREAM );
- assert( inputLangEl->id == LEL_ID_INPUT );
- assert( ignoreLangEl->id == LEL_ID_IGNORE );
-}
-
-void Compiler::refNameSpace( LangEl *lel, Namespace *nspace )
-{
- if ( nspace == defaultNamespace || nspace == rootNamespace ) {
- lel->refName = "::" + lel->refName;
- return;
- }
-
- lel->refName = nspace->name + "::" + lel->refName;
- lel->declName = nspace->name + "::" + lel->declName;
- lel->xmlTag = nspace->name + "::" + lel->xmlTag;
- refNameSpace( lel, nspace->parentNamespace );
-}
-
-void Compiler::makeLangElNames()
-{
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->id == LEL_ID_INT ) {
- lel->fullName = "_int";
- lel->fullLit = "_int";
- lel->refName = "_int";
- lel->declName = "_int";
- lel->xmlTag = "int";
- }
- else if ( lel->id == LEL_ID_BOOL ) {
- lel->fullName = "_bool";
- lel->fullLit = "_bool";
- lel->refName = "_bool";
- lel->declName = "_bool";
- lel->xmlTag = "bool";
- }
- else {
- lel->fullName = lel->name;
- lel->fullLit = lel->lit;
- lel->refName = lel->lit;
- lel->declName = lel->lit;
- lel->xmlTag = lel->name;
- }
-
- /* If there is also a namespace next to the type, we add a prefix to
- * the type. It's not convenient to name C++ classes the same as a
- * namespace in the same scope. We don't want to restrict colm, so we
- * add a workaround for the least-common case. The type gets t_ prefix.
- * */
- Namespace *nspace = lel->nspace->findNamespace( lel->name );
- if ( nspace != 0 ) {
- lel->refName = "t_" + lel->refName;
- lel->fullName = "t_" + lel->fullName;
- lel->declName = "t_" + lel->declName;
- lel->xmlTag = "t_" + lel->xmlTag;
- }
-
- refNameSpace( lel, lel->nspace );
- }
-}
-
-/* Set up dot sets, shift info, and prod sets. */
-void Compiler::makeProdFsms()
-{
- /* There are two items in the index for each production (high and low). */
- int indexLen = prodList.length() * 2;
- dotItemIndex.setAsNew( indexLen );
- int dsiLow = 0, indexPos = 0;
-
- /* Build FSMs for all production language elements. */
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
- prod->fsm = prod->prodElList->walk( this, prod );
-
- makeNonTermFirstSets();
- makeFirstSets();
-
- /* Build FSMs for all production language elements. */
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- if ( addUniqueEmptyProductions ) {
- /* This must be re-implemented. */
- assert( false );
- //if ( !prod->isLeftRec && prod->uniqueEmptyLeader != 0 ) {
- // PdaGraph *emptyLeader = prod->uniqueEmptyLeader->walk( this );
- // emptyLeader->concatOp( prod->fsm );
- // prod->fsm = emptyLeader;
- //}
- }
-
- /* Compute the machine's length. */
- prod->fsmLength = prod->fsm->fsmLength( );
-
- /* Productions have a unique production id for each final state.
- * This lets us use a production length specific to each final state.
- * Start states are always isolated therefore if the start state is
- * final then reductions from it will always have a fixed production
- * length. This is a simple method for determining the length
- * of zero-length derivations when reducing. */
-
- /* Number of dot items needed for the production is elements + 1
- * because the dot can be before the first and after the last element. */
- int numForProd = prod->fsm->stateList.length() + 1;
-
- /* Set up the low and high values in the index for this production. */
- dotItemIndex.data[indexPos].key = dsiLow;
- dotItemIndex.data[indexPos].value = prod;
- dotItemIndex.data[indexPos+1].key = dsiLow + numForProd - 1;
- dotItemIndex.data[indexPos+1].value = prod;
-
- int dsi = dsiLow;
- for ( PdaStateList::Iter state = prod->fsm->stateList; state.lte(); state++, dsi++ ) {
- /* All transitions are shifts. */
- for ( TransMap::Iter out = state->transMap; out.lte(); out++ )
- assert( out->value->isShift );
-
- state->dotSet.insert( dsi );
- }
-
- /* Move over the production. */
- dsiLow += numForProd;
- indexPos += 2;
-
- if ( prod->prodCommit ) {
- for ( PdaStateSet::Iter fin = prod->fsm->finStateSet; fin.lte(); fin++ ) {
- int length = prod->fsmLength;
- //cerr << "PENDING COMMIT IN FINAL STATE of " << prod->prodId <<
- // " with len: " << length << endl;
- (*fin)->pendingCommits.insert( ProdIdPair( prod->prodId, length ) );
- }
- }
- }
-
- /* Make the final state specific prod id to prod id mapping. */
- prodIdIndex = new Definition*[prodList.length()];
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
- prodIdIndex[prod->prodId] = prod;
-}
-
-/* Want the first set of over src. If the first set contains epsilon, go over
- * it and over tab. If overSrc is the end of the production, find the follow
- * from the table, taking only the characters on which the parent is reduced.
- * */
-void Compiler::findFollow( AlphSet &result, PdaState *overTab,
- PdaState *overSrc, Definition *parentDef )
-{
- if ( overSrc->isFinState() ) {
- assert( overSrc->transMap.length() == 0 );
-
- /* At the end of the production. Turn to the table. */
- long redCode = makeReduceCode( parentDef->prodId, false );
- for ( TransMap::Iter tabTrans = overTab->transMap; tabTrans.lte(); tabTrans++ ) {
- for ( ActDataList::Iter adl = tabTrans->value->actions; adl.lte(); adl++ ) {
- if ( *adl == redCode )
- result.insert( tabTrans->key );
- }
- }
- }
- else {
- /* Get the first set of the item. If the first set contains epsilon
- * then move over overSrc and overTab and recurse. */
- assert( overSrc->transMap.length() == 1 );
- TransMap::Iter pastTrans = overSrc->transMap;
-
- LangEl *langEl = langElIndex[pastTrans->key];
- if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
- bool hasEpsilon = false;
- for ( LelDefList::Iter def = langEl->defList; def.lte(); def++ ) {
- result.insert( def->firstSet );
-
- if ( def->firstSet.find( -1 ) )
- hasEpsilon = true;
- }
-
- /* Find the equivalent state in the parser. */
- if ( hasEpsilon ) {
- PdaTrans *tabTrans = overTab->findTrans( pastTrans->key );
- findFollow( result, tabTrans->toState,
- pastTrans->value->toState, parentDef );
- }
-
- /* Now possibly the dup. */
- if ( langEl->termDup != 0 )
- result.insert( langEl->termDup->id );
- }
- else {
- result.insert( pastTrans->key );
- }
- }
-}
-
-PdaState *Compiler::followProd( PdaState *tabState, PdaState *prodState )
-{
- while ( prodState->transMap.length() == 1 ) {
- TransMap::Iter prodTrans = prodState->transMap;
- PdaTrans *tabTrans = tabState->findTrans( prodTrans->key );
- prodState = prodTrans->value->toState;
- tabState = tabTrans->toState;
- }
- return tabState;
-}
-
-void Compiler::trySetTime( PdaTrans *trans, long code, long &time )
-{
- /* Find the item. */
- for ( ActDataList::Iter adl = trans->actions; adl.lte(); adl++ ) {
- if ( *adl == code ) {
- /* If the time of the shift is not already set, set it. */
- if ( trans->actOrds[adl.pos()] == 0 ) {
- //cerr << "setting time: state = " << tabState->stateNum
- // << ", trans = " << tabTrans->lowKey
- // << ", time = " << time << endl;
- trans->actOrds[adl.pos()] = time++;
- }
- break;
- }
- }
-}
-
-/* Go down a defintiion and then handle the follow actions. */
-void Compiler::pdaOrderFollow( LangEl *rootEl, PdaState *tabState,
- PdaTrans *tabTrans, PdaTrans *srcTrans, Definition *parentDef,
- Definition *definition, long &time )
-{
- /* We need the follow from tabState/srcState over the defintion we are
- * currently processing. */
- PdaState *overTab = tabTrans->toState;
- PdaState *overSrc = srcTrans->toState;
-
- AlphSet alphSet;
- if ( parentDef == rootEl->rootDef )
- alphSet.insert( rootEl->eofLel->id );
- else
- findFollow( alphSet, overTab, overSrc, parentDef );
-
- /* Now follow the production to find out where it expands to. */
- PdaState *expandToState = followProd( tabState, definition->fsm->startState );
-
- /* Find the reduce item. */
- long redCode = makeReduceCode( definition->prodId, false );
-
- for ( TransMap::Iter tt = expandToState->transMap; tt.lte(); tt++ ) {
- if ( alphSet.find( tt->key ) ) {
- trySetTime( tt->value, redCode, time );
-
- /* If the items token region is not recorded in the state, do it now. */
- addRegion( expandToState, tt->value, tt->key,
- tt->value->noPreIgnore, tt->value->noPostIgnore );
- }
- }
-}
-
-bool regionVectHas( RegionVect &regVect, TokenRegion *region )
-{
- for ( RegionVect::Iter trvi = regVect; trvi.lte(); trvi++ ) {
- if ( *trvi == region )
- return true;
- }
- return false;
-}
-
-void Compiler::addRegion( PdaState *tabState, PdaTrans *tabTrans,
- long pdaKey, bool noPreIgnore, bool noPostIgnore )
-{
- LangEl *langEl = langElIndex[pdaKey];
- if ( langEl != 0 && langEl->type == LangEl::Term ) {
- TokenRegion *region = 0;
-
- /* If it is not the eof, then use the region associated
- * with the token definition. */
- if ( langEl->isCI ) {
- //cerr << "isCI" << endl;
- region = langEl->ciRegion->ciRegion;
- }
- else if ( !langEl->isEOF && langEl->tokenDef != 0 ) {
- region = langEl->tokenDef->tokenRegion;
- }
-
- if ( region != 0 ) {
- /* region. */
- TokenRegion *scanRegion = region;
-
- if ( langEl->noPreIgnore )
- scanRegion = region->tokenOnlyRegion;
-
- if ( !regionVectHas( tabState->regions, scanRegion ) ) {
- tabState->regions.append( scanRegion );
- }
-
- /* Pre-region of to state */
- PdaState *toState = tabTrans->toState;
- if ( !langEl->noPostIgnore &&
- region->ignoreOnlyRegion != 0 &&
- !regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) )
- {
- toState->preRegions.append( region->ignoreOnlyRegion );
- }
- }
- }
-}
-
-#if 0
- orderState( tabState, prodState, time ):
- if not tabState.dotSet.find( prodState.dotID )
- tabState.dotSet.insert( prodState.dotID )
- tabTrans = tabState.findMatchingTransition( prodState.getTransition() )
-
- if tabTrans is NonTerminal:
- for production in tabTrans.nonTerm.prodList:
- orderState( tabState, production.startState, time )
-
- for all expandToState in tabTrans.expandToStates:
- for all followTrans in expandToState.transList
- reduceAction = findAction( production.reduction )
- if reduceAction.time is unset:
- reduceAction.time = time++
- end
- end
- end
- end
- end
-
- shiftAction = tabTrans.findAction( shift )
- if shiftAction.time is unset:
- shiftAction.time = time++
- end
-
- orderState( tabTrans.toState, prodTrans.toState, time )
- end
- end
-
- orderState( parseTable.startState, startProduction.startState, 1 )
-#endif
-
-void Compiler::pdaOrderProd( LangEl *rootEl, PdaState *tabState,
- PdaState *srcState, Definition *parentDef, long &time )
-{
- assert( srcState->dotSet.length() == 1 );
- if ( tabState->dotSet2.find( srcState->dotSet[0] ) )
- return;
- tabState->dotSet2.insert( srcState->dotSet[0] );
-
- assert( srcState->transMap.length() == 0 || srcState->transMap.length() == 1 );
-
- if ( srcState->transMap.length() == 1 ) {
- TransMap::Iter srcTrans = srcState->transMap;
-
- /* Find the equivalent state in the parser. */
- PdaTrans *tabTrans = tabState->findTrans( srcTrans->key );
-
- /* Recurse into the transition if it is a non-terminal. */
- LangEl *langEl = langElIndex[srcTrans->key];
- if ( langEl != 0 ) {
- if ( langEl->reduceFirst ) {
- /* Use a shortest match ordering for the contents of this
- * nonterminal. Does follows for all productions first, then
- * goes down the productions. */
- for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) {
- pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value,
- parentDef, expDef, time );
- }
- for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ )
- pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time );
-
- }
- else {
- /* The default action ordering. For each prod, goes down the
- * prod then sets the follow before going to the next prod. */
- for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) {
- pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time );
-
- pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value,
- parentDef, expDef, time );
- }
- }
- }
-
- trySetTime( tabTrans, SHIFT_CODE, time );
-
- /* Now possibly for the dup. */
- if ( langEl != 0 && langEl->termDup != 0 ) {
- PdaTrans *dupTrans = tabState->findTrans( langEl->termDup->id );
- trySetTime( dupTrans, SHIFT_CODE, time );
- }
-
- /* If the items token region is not recorded in the state, do it now. */
- addRegion( tabState, tabTrans, srcTrans->key,
- srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore );
-
- /* Go over one in the production. */
- pdaOrderProd( rootEl, tabTrans->toState,
- srcTrans->value->toState, parentDef, time );
- }
-}
-
-void Compiler::pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls )
-{
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- assert( (state->stateBits & SB_ISMARKED) == 0 );
-
- /* Traverse the src state's transitions. */
- long last = 0;
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- if ( ! trans.first() )
- assert( last < trans->key );
- last = trans->key;
- }
- }
-
- /* Compute the action orderings, record the max value. */
- long time = 1;
- for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
- PdaState *startState = (*pe)->rootDef->fsm->startState;
- pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time );
-
- /* Walk over the start lang el and set the time for shift of
- * the eof action that completes the parse. */
- PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id );
- PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id );
- eofTrans->actOrds[0] = time++;
- }
-
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- if ( state->regions.length() == 0 ) {
- for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
- /* There are no regions and EOF leaves the state. Add the eof
- * token region. */
- PdaTrans *trans = tel->value;
- LangEl *lel = langElIndex[trans->lowKey];
- if ( lel != 0 && lel->isEOF )
- state->regions.append( eofTokenRegion );
- }
- }
- }
-
- if ( colm_log_compile ) {
- /* Warn about states with empty token region lists. */
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- if ( state->regions.length() == 0 ) {
- warning() << "state has an empty token region, state: " <<
- state->stateNum << endl;
- }
- }
- }
-
- /* Some actions may not have an ordering. I believe these to be actions
- * that result in a parse error and they arise because the state tables
- * are LALR(1) but the action ordering is LR(1). LALR(1) causes some
- * reductions that lead nowhere. */
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
- for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
- PdaTrans *trans = tel->value;
- /* Check every action has an ordering. */
- for ( ActDataList::Iter adl = trans->actOrds; adl.lte(); adl++ ) {
- if ( *adl == 0 )
- *adl = time++;
- }
- }
- }
-}
-
-void Compiler::advanceReductions( PdaGraph *pdaGraph )
-{
- /* Loop all states. */
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- if ( !state->advanceReductions )
- continue;
-
- bool outHasShift = false;
- ReductionMap outReds;
- LongSet outCommits;
- for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) {
- /* Get the transition from the trans el. */
- if ( out->value->isShift )
- outHasShift = true;
- outReds.insert( out->value->reductions );
- outCommits.insert( out->value->commits );
- }
-
- bool inHasShift = false;
- ReductionMap inReds;
- for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
- /* Get the transition from the trans el. */
- if ( in->isShift )
- inHasShift = true;
- inReds.insert( in->reductions );
- }
-
- if ( !outHasShift && outReds.length() == 1 &&
- inHasShift && inReds.length() == 0 )
- {
- //cerr << "moving reduction to shift" << endl;
-
- /* Move the reduction to all in transitions. */
- for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
- assert( in->actions.length() == 1 );
- assert( in->actions[0] == SHIFT_CODE );
- in->actions[0] = makeReduceCode( outReds[0].key, true );
- in->afterShiftCommits.insert( outCommits );
- }
-
- /*
- * Remove all transitions out of the state.
- */
-
- /* Detach out range transitions. */
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- pdaGraph->detachTrans( state, trans->value->toState, trans->value );
- delete trans->value;
- }
- state->transMap.empty();
-
- /* Redirect all the in transitions to the actionDestState. */
- pdaGraph->inTransMove( actionDestState, state );
- }
- }
-
- pdaGraph->removeUnreachableStates();
-}
-
-void Compiler::sortActions( PdaGraph *pdaGraph )
-{
- /* Sort the actions. */
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
- for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
- PdaTrans *trans = tel->value;
-
- /* Sort by the action ords. */
- ActDataList actions( trans->actions );
- ActDataList actOrds( trans->actOrds );
- ActDataList actPriors( trans->actPriors );
- trans->actions.empty();
- trans->actOrds.empty();
- trans->actPriors.empty();
- while ( actOrds.length() > 0 ) {
- int min = 0;
- for ( int i = 1; i < actOrds.length(); i++ ) {
- if ( actPriors[i] > actPriors[min] ||
- (actPriors[i] == actPriors[min] &&
- actOrds[i] < actOrds[min] ) )
- {
- min = i;
- }
- }
- trans->actions.append( actions[min] );
- trans->actOrds.append( actOrds[min] );
- trans->actPriors.append( actPriors[min] );
- actions.remove(min);
- actOrds.remove(min);
- actPriors.remove(min);
- }
-
- if ( branchPointInfo && trans->actions.length() > 1 ) {
- cerr << "info: branch point"
- << " state: " << state->stateNum
- << " trans: ";
- LangEl *lel = langElIndex[trans->lowKey];
- if ( lel == 0 )
- cerr << (char)trans->lowKey << endl;
- else
- cerr << lel->lit << endl;
-
- for ( ActDataList::Iter act = trans->actions; act.lte(); act++ ) {
- switch ( *act & 0x3 ) {
- case 1:
- cerr << " shift" << endl;
- break;
- case 2:
- cerr << " reduce " <<
- prodIdIndex[(*act >> 2)]->data << endl;
- break;
- case 3:
- cerr << " shift-reduce" << endl;
- break;
- }
- }
- }
-
- /* Verify that shifts of nonterminals don't have any branch
- * points or commits. */
- if ( trans->lowKey >= firstNonTermId ) {
- if ( trans->actions.length() != 1 ||
- (trans->actions[0] & 0x3) != 1 )
- {
- error() << "TRANS ON NONTERMINAL is something "
- "other than a shift" << endl;
- }
- if ( trans->commits.length() > 0 )
- error() << "TRANS ON NONTERMINAL has a commit" << endl;
- }
-
- /* TODO: Shift-reduces are optimizations. Verify that
- * shift-reduces exist only if they don't entail a conflict. */
- }
- }
-}
-
-void Compiler::reduceActions( PdaGraph *pdaGraph )
-{
- /* Reduce the actions. */
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
- PdaTrans *trans = tel->value;
- PdaActionSetEl *inSet;
-
- int commitLen = trans->commits.length() > 0 ?
- trans->commits[trans->commits.length()-1] : 0;
-
- if ( trans->afterShiftCommits.length() > 0 ) {
- int afterShiftCommit = trans->afterShiftCommits[
- trans->afterShiftCommits.length()-1];
-
- if ( commitLen > 0 && commitLen+1 > afterShiftCommit )
- commitLen = ( commitLen + 1 );
- else
- commitLen = afterShiftCommit;
- }
- else {
- commitLen = commitLen * -1;
- }
-
- //if ( commitLen != 0 ) {
- // cerr << "FINAL ACTION COMMIT LEN: " << commitLen << endl;
- //}
-
- pdaGraph->actionSet.insert( ActionData( trans->toState->stateNum,
- trans->actions, commitLen ), &inSet );
- trans->actionSetEl = inSet;
- }
- }
-}
-
-void Compiler::computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph )
-{
- /* Get the entry into the graph and traverse over the root. The resulting
- * state can have eof, nothing else can. */
- PdaState *overStart = pdaGraph->followFsm(
- langEl->startState,
- langEl->rootDef->fsm );
-
- /* The graph must reduce to root all on it's own. It cannot depend on
- * require EOF. */
- for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
- if ( st == overStart )
- continue;
-
- for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
- if ( tr->value->lowKey == langEl->eofLel->id )
- st->advanceReductions = true;
- }
- }
-}
-
-void Compiler::verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph )
-{
- /* Get the entry into the graph and traverse over the root. The resulting
- * state can have eof, nothing else can. */
- PdaState *overStart = pdaGraph->followFsm(
- langEl->startState,
- langEl->rootDef->fsm );
-
- /* The graph must reduce to root all on it's own. It cannot depend on
- * require EOF. */
- for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
- if ( st == overStart )
- continue;
-
- for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
- if ( tr->value->lowKey == langEl->eofLel->id ) {
- /* This needs a better error message. Appears to be voodoo. */
- error() << "grammar is not usable with parse_stop" << endp;
- }
- }
- }
-}
-
-LangEl *Compiler::predOf( PdaTrans *trans, long action )
-{
- LangEl *lel;
- if ( action == SHIFT_CODE )
- lel = langElIndex[trans->lowKey];
- else
- lel = prodIdIndex[action >> 2]->predOf;
- return lel;
-}
-
-
-bool Compiler::precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 )
-{
- bool swap = false;
- if ( l2->predValue > l1->predValue )
- swap = true;
- else if ( l1->predValue == l2->predValue ) {
- if ( l1->predType == PredLeft && action1 == SHIFT_CODE )
- swap = true;
- else if ( l1->predType == PredRight && action2 == SHIFT_CODE )
- swap = true;
- }
- return swap;
-}
-
-bool Compiler::precedenceRemoveBoth( LangEl *l1, LangEl *l2 )
-{
- if ( l1->predValue == l2->predValue && l1->predType == PredNonassoc )
- return true;
- return false;
-}
-
-void Compiler::resolvePrecedence( PdaGraph *pdaGraph )
-{
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
-
- for ( long t = 0; t < state->transMap.length(); /* increment at end */ ) {
- PdaTrans *trans = state->transMap[t].value;
-
-again:
- /* Find action with precedence. */
- for ( int i = 0; i < trans->actions.length(); i++ ) {
- LangEl *li = predOf( trans, trans->actions[i] );
-
- if ( li != 0 && li->predType != PredNone ) {
- /* Find another action with precedence. */
- for ( int j = i+1; j < trans->actions.length(); j++ ) {
- LangEl *lj = predOf( trans, trans->actions[j] );
-
- if ( lj != 0 && lj->predType != PredNone ) {
- /* Conflict to check. */
- bool swap = precedenceSwap( trans->actions[i],
- trans->actions[j], li, lj );
-
- if ( swap ) {
- long t = trans->actions[i];
- trans->actions[i] = trans->actions[j];
- trans->actions[j] = t;
- }
-
- trans->actions.remove( j );
- if ( precedenceRemoveBoth( li, lj ) )
- trans->actions.remove( i );
-
- goto again;
- }
- }
- }
- }
-
- /* If there are still actions then move to the next one. If not,
- * (due to nonassoc) then remove the transition. */
- if ( trans->actions.length() > 0 )
- t += 1;
- else
- state->transMap.vremove( t );
- }
- }
-}
-
-void Compiler::analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls )
-{
- pdaGraph->maxState = pdaGraph->stateList.length() - 1;
- pdaGraph->maxLelId = nextSymbolId - 1;
- pdaGraph->maxOffset = pdaGraph->stateList.length() * pdaGraph->maxLelId;
-
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- if ( trans->value->isShift ) {
- trans->value->actions.append( SHIFT_CODE );
- trans->value->actPriors.append( trans->value->shiftPrior );
- }
- for ( ReductionMap::Iter red = trans->value->reductions; red.lte(); red++ ) {
- trans->value->actions.append( makeReduceCode( red->key, false ) );
- trans->value->actPriors.append( red->value );
- }
- trans->value->actOrds.appendDup( 0, trans->value->actions.length() );
- }
- }
-
- pdaActionOrder( pdaGraph, parserEls );
- sortActions( pdaGraph );
- resolvePrecedence( pdaGraph );
-
- /* Verify that any type we parse_stop can actually be parsed that way. */
- for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
- LangEl *lel = *pe;
- if ( lel->parseStop )
- computeAdvanceReductions(lel , pdaGraph);
- }
-
- advanceReductions( pdaGraph );
- pdaGraph->setStateNumbers();
- reduceActions( pdaGraph );
-
- /* Set the action ids. */
- int actionSetId = 0;
- for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
- asi->key.id = actionSetId++;
-
- /* Get the max index. */
- pdaGraph->maxIndex = actionSetId - 1;
-
- /* Compute the max prod length. */
- pdaGraph->maxProdLen = 0;
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- if ( (unsigned)prod->fsmLength > pdaGraph->maxProdLen )
- pdaGraph->maxProdLen = prod->fsmLength;
- }
-
- /* Asserts that any transition with a nonterminal has a single action
- * which is either a shift or a shift-reduce. */
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- LangEl *langEl = langElIndex[trans->value->lowKey];
- if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
- assert( trans->value->actions.length() == 1 );
- assert( trans->value->actions[0] == SHIFT_CODE ||
- (trans->value->actions[0] & 0x3) == SHIFT_REDUCE_CODE );
- }
- }
- }
-
- /* Assert that shift reduces always appear on their own. */
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) {
- if ( (*act & 0x3) == SHIFT_REDUCE_CODE )
- assert( trans->value->actions.length() == 1 );
- }
- }
- }
-
- /* Verify that any type we parse_stop can actually be parsed that way. */
- for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
- LangEl *lel = *pe;
- if ( lel->parseStop )
- verifyParseStopGrammar(lel , pdaGraph);
- }
-}
-
-void Compiler::wrapNonTerminals()
-{
- /* Make a language element that will be used to make the root productions.
- * These are used for making parsers rooted at any production (including
- * the start symbol). */
- rootLangEl = declareLangEl( this, rootNamespace, "_root", LangEl::NonTerm );
-
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- /* Make a single production used when the lel is a root. */
- ProdElList *prodElList = makeProdElList( lel );
- lel->rootDef = new Definition( InputLoc(), rootLangEl,
- prodElList, false, 0,
- prodList.length(), rootLangEl->defList.length(),
- Definition::Production );
- prodList.append( lel->rootDef );
- rootLangEl->defList.append( lel->rootDef );
-
- /* First resolve. */
- for ( ProdElList::Iter fact = *prodElList; fact.lte(); fact++ )
- resolveFactor( fact );
- }
-}
-
-bool Compiler::makeNonTermFirstSetProd( Definition *prod, PdaState *state )
-{
- bool modified = false;
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- if ( trans->key >= firstNonTermId ) {
- long *inserted = prod->nonTermFirstSet.insert( trans->key );
- if ( inserted != 0 )
- modified = true;
-
- bool hasEpsilon = false;
- LangEl *lel = langElIndex[trans->key];
- for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) {
- for ( ProdIdSet::Iter pid = ldef->nonTermFirstSet;
- pid.lte(); pid++ )
- {
- if ( *pid == -1 )
- hasEpsilon = true;
- else {
- long *inserted = prod->nonTermFirstSet.insert( *pid );
- if ( inserted != 0 )
- modified = true;
- }
- }
- }
-
- if ( hasEpsilon ) {
- if ( trans->value->toState->isFinState() ) {
- long *inserted = prod->nonTermFirstSet.insert( -1 );
- if ( inserted != 0 )
- modified = true;
- }
-
- bool lmod = makeNonTermFirstSetProd( prod, trans->value->toState );
- if ( lmod )
- modified = true;
- }
- }
- }
- return modified;
-}
-
-
-void Compiler::makeNonTermFirstSets()
-{
- bool modified = true;
- while ( modified ) {
- modified = false;
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- if ( prod->fsm->startState->isFinState() ) {
- long *inserted = prod->nonTermFirstSet.insert( -1 );
- if ( inserted != 0 )
- modified = true;
- }
-
- bool lmod = makeNonTermFirstSetProd( prod, prod->fsm->startState );
- if ( lmod )
- modified = true;
- }
- }
-
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- if ( prod->nonTermFirstSet.find( prod->prodName->id ) )
- prod->isLeftRec = true;
- }
-}
-
-void Compiler::printNonTermFirstSets()
-{
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- cerr << prod->data << ": ";
- for ( ProdIdSet::Iter pid = prod->nonTermFirstSet; pid.lte(); pid++ )
- {
- if ( *pid < 0 )
- cerr << " <EPSILON>";
- else {
- LangEl *lel = langElIndex[*pid];
- cerr << " " << lel->name;
- }
- }
- cerr << endl;
-
- if ( prod->isLeftRec )
- cerr << "PROD IS LEFT REC: " << prod->data << endl;
- }
-}
-
-bool Compiler::makeFirstSetProd( Definition *prod, PdaState *state )
-{
- bool modified = false;
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- if ( trans->key < firstNonTermId ) {
- long *inserted = prod->firstSet.insert( trans->key );
- if ( inserted != 0 )
- modified = true;
- }
- else {
- long *inserted = prod->firstSet.insert( trans->key );
- if ( inserted != 0 )
- modified = true;
-
- LangEl *klangEl = langElIndex[trans->key];
- if ( klangEl != 0 && klangEl->termDup != 0 ) {
- long *inserted2 = prod->firstSet.insert( klangEl->termDup->id );
- if ( inserted2 != 0 )
- modified = true;
- }
-
- bool hasEpsilon = false;
- LangEl *lel = langElIndex[trans->key];
- for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) {
- for ( ProdIdSet::Iter pid = ldef->firstSet;
- pid.lte(); pid++ )
- {
- if ( *pid == -1 )
- hasEpsilon = true;
- else {
- long *inserted = prod->firstSet.insert( *pid );
- if ( inserted != 0 )
- modified = true;
- }
- }
- }
-
- if ( hasEpsilon ) {
- if ( trans->value->toState->isFinState() ) {
- long *inserted = prod->firstSet.insert( -1 );
- if ( inserted != 0 )
- modified = true;
- }
-
- bool lmod = makeFirstSetProd( prod, trans->value->toState );
- if ( lmod )
- modified = true;
- }
- }
- }
- return modified;
-}
-
-
-void Compiler::makeFirstSets()
-{
- bool modified = true;
- while ( modified ) {
- modified = false;
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- if ( prod->fsm->startState->isFinState() ) {
- long *inserted = prod->firstSet.insert( -1 );
- if ( inserted != 0 )
- modified = true;
- }
-
- bool lmod = makeFirstSetProd( prod, prod->fsm->startState );
- if ( lmod )
- modified = true;
- }
- }
-}
-
-void Compiler::printFirstSets()
-{
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- cerr << prod->data << ": ";
- for ( ProdIdSet::Iter pid = prod->firstSet; pid.lte(); pid++ )
- {
- if ( *pid < 0 )
- cerr << " <EPSILON>";
- else {
- LangEl *lel = langElIndex[*pid];
- if ( lel != 0 )
- cerr << endl << " " << lel->name;
- else
- cerr << endl << " " << *pid;
- }
- }
- cerr << endl;
- }
-}
-
-void Compiler::insertUniqueEmptyProductions()
-{
- int limit = prodList.length();
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- if ( prod->prodId == limit )
- break;
-
- /* Get a language element. */
- char name[20];
- sprintf(name, "U%li", prodList.length());
- LangEl *prodName = addLangEl( this, rootNamespace, name, LangEl::NonTerm );
- Definition *newDef = new Definition( InputLoc(), prodName,
- 0 /* FIXME new VarDef( name, 0 )*/,
- false, 0, prodList.length(), prodName->defList.length(),
- Definition::Production );
- prodName->defList.append( newDef );
- prodList.append( newDef );
-
- prod->uniqueEmptyLeader = prodName;
- }
-}
-
-void Compiler::makeRuntimeData()
-{
- long count = 0;
-
- /*
- * ProdLengths
- * ProdLhsIs
- * ProdNames
- * ProdCodeBlocks
- * ProdCodeBlockLens
- */
-
- runtimeData->frameInfo = new FrameInfo[nextFrameId];
- runtimeData->numFrames = nextFrameId;
- memset( runtimeData->frameInfo, 0, sizeof(FrameInfo) * nextFrameId );
-
- /*
- * Init code block.
- */
- if ( rootCodeBlock == 0 ) {
- runtimeData->rootCode = 0;
- runtimeData->rootCodeLen = 0;
- runtimeData->rootFrameId = 0;
- }
- else {
- runtimeData->rootCode = rootCodeBlock->codeWC.data;
- runtimeData->rootCodeLen = rootCodeBlock->codeWC.length();
- runtimeData->rootFrameId = rootCodeBlock->frameId;
- }
-
- runtimeData->frameInfo[rootCodeBlock->frameId].codeWV = 0;
- runtimeData->frameInfo[rootCodeBlock->frameId].codeLenWV = 0;
- runtimeData->frameInfo[rootCodeBlock->frameId].trees = rootCodeBlock->trees.data;
- runtimeData->frameInfo[rootCodeBlock->frameId].treesLen = rootCodeBlock->trees.length();
- runtimeData->frameInfo[rootCodeBlock->frameId].frameSize = rootLocalFrame->size();
- runtimeData->frameInfo[rootCodeBlock->frameId].argSize = 0;
-
- /*
- * prodInfo
- */
- count = prodList.length();
- runtimeData->prodInfo = new ProdInfo[count];
- runtimeData->numProds = count;
-
- count = 0;
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- runtimeData->prodInfo[count].lhsId = prod->prodName->id;
- runtimeData->prodInfo[count].prodNum = prod->prodNum;
- runtimeData->prodInfo[count].length = prod->fsmLength;
- runtimeData->prodInfo[count].name = prod->data;
- runtimeData->prodInfo[count].frameId = -1;
-
- CodeBlock *block = prod->redBlock;
- if ( block != 0 ) {
- runtimeData->prodInfo[count].frameId = block->frameId;
- runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
- runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
-
- runtimeData->frameInfo[block->frameId].trees = block->trees.data;
- runtimeData->frameInfo[block->frameId].treesLen = block->trees.length();
-
- runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size();
- runtimeData->frameInfo[block->frameId].argSize = 0;
- }
-
- runtimeData->prodInfo[count].lhsUpref = true;
- runtimeData->prodInfo[count].copy = prod->copy.data;
- runtimeData->prodInfo[count].copyLen = prod->copy.length() / 2;
- count += 1;
- }
-
- /*
- * regionInfo
- */
- runtimeData->numRegions = regionList.length()+1;
- runtimeData->regionInfo = new RegionInfo[runtimeData->numRegions];
- memset( runtimeData->regionInfo, 0, sizeof(RegionInfo) * runtimeData->numRegions );
-
- runtimeData->regionInfo[0].name = "___EMPTY";
- runtimeData->regionInfo[0].defaultToken = -1;
- for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
- long regId = reg->id+1;
- runtimeData->regionInfo[regId].name = reg->name;
- runtimeData->regionInfo[regId].defaultToken =
- reg->defaultTokenDef == 0 ? -1 : reg->defaultTokenDef->tdLangEl->id;
- runtimeData->regionInfo[regId].eofFrameId = -1;
- runtimeData->regionInfo[regId].isIgnoreOnly = reg->isIgnoreOnly;
- runtimeData->regionInfo[regId].isCiOnly = reg->isCiOnly;
- runtimeData->regionInfo[regId].ciLelId = reg->isCiOnly ? reg->derivedFrom->ciLel->id : 0;
-
- CodeBlock *block = reg->preEofBlock;
- if ( block != 0 ) {
- runtimeData->regionInfo[regId].eofFrameId = block->frameId;
- runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
- runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
-
- runtimeData->frameInfo[block->frameId].trees = block->trees.data;
- runtimeData->frameInfo[block->frameId].treesLen = block->trees.length();
-
- runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size();
- runtimeData->frameInfo[block->frameId].argSize = 0;
- }
- }
-
- /*
- * lelInfo
- */
-
- count = nextSymbolId;
- runtimeData->lelInfo = new LangElInfo[count];
- runtimeData->numLangEls = count;
- memset( runtimeData->lelInfo, 0, sizeof(LangElInfo)*count );
-
- for ( int i = 0; i < nextSymbolId; i++ ) {
- LangEl *lel = langElIndex[i];
- if ( lel != 0 ) {
- runtimeData->lelInfo[i].name = lel->fullLit;
- runtimeData->lelInfo[i].xmlTag = lel->xmlTag;
- runtimeData->lelInfo[i].repeat = lel->isRepeat;
- runtimeData->lelInfo[i].list = lel->isList;
- runtimeData->lelInfo[i].literal = lel->isLiteral;
- runtimeData->lelInfo[i].ignore = lel->ignore;
- runtimeData->lelInfo[i].frameId = -1;
-
- CodeBlock *block = lel->transBlock;
- if ( block != 0 ) {
- runtimeData->lelInfo[i].frameId = block->frameId;
- runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
- runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
-
- runtimeData->frameInfo[block->frameId].trees = block->trees.data;
- runtimeData->frameInfo[block->frameId].treesLen = block->trees.length();
-
- runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size();
- runtimeData->frameInfo[block->frameId].argSize = 0;
- }
-
-
- runtimeData->lelInfo[i].objectTypeId =
- lel->objectDef == 0 ? 0 : lel->objectDef->id;
- runtimeData->lelInfo[i].ofiOffset = lel->ofiOffset;
- runtimeData->lelInfo[i].objectLength =
- ( lel->objectDef == 0 || lel->objectDef == tokenObj ) ? 0 :
- lel->objectDef->size();
-
-// runtimeData->lelInfo[i].contextTypeId = 0;
-// lel->context == 0 ? 0 : lel->context->contextObjDef->id;
-// runtimeData->lelInfo[i].contextLength = 0; //lel->context == 0 ? 0 :
-// lel->context->contextObjDef->size();
-// if ( lel->context != 0 ) {
-// cout << "type: " << runtimeData->lelInfo[i].contextTypeId << " length: " <<
-// runtimeData->lelInfo[i].contextLength << endl;
-// }
-
- runtimeData->lelInfo[i].termDupId = lel->termDup == 0 ? 0 : lel->termDup->id;
- runtimeData->lelInfo[i].genericId = lel->generic == 0 ? 0 : lel->generic->id;
-
- if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 &&
- lel->tokenDef->join->context != 0 )
- runtimeData->lelInfo[i].markId = lel->tokenDef->join->mark->markId;
- else
- runtimeData->lelInfo[i].markId = -1;
-
- runtimeData->lelInfo[i].numCaptureAttr = 0;
- }
- else {
- memset(&runtimeData->lelInfo[i], 0, sizeof(LangElInfo) );
- runtimeData->lelInfo[i].name = "__UNUSED";
- runtimeData->lelInfo[i].xmlTag = "__UNUSED";
- runtimeData->lelInfo[i].frameId = -1;
- }
- }
-
- /*
- * FunctionInfo
- */
- count = functionList.length();
-
- runtimeData->functionInfo = new FunctionInfo[count];
- runtimeData->numFunctions = count;
- memset( runtimeData->functionInfo, 0, sizeof(FunctionInfo)*count );
- for ( FunctionList::Iter func = functionList; func.lte(); func++ ) {
- runtimeData->functionInfo[func->funcId].name = func->name;
- runtimeData->functionInfo[func->funcId].frameId = -1;
-
- CodeBlock *block = func->codeBlock;
- if ( block != 0 ) {
- runtimeData->functionInfo[func->funcId].frameId = block->frameId;
-
- runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data;
- runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length();
-
- runtimeData->frameInfo[block->frameId].codeWC = block->codeWC.data;
- runtimeData->frameInfo[block->frameId].codeLenWC = block->codeWC.length();
-
- runtimeData->frameInfo[block->frameId].trees = block->trees.data;
- runtimeData->frameInfo[block->frameId].treesLen = block->trees.length();
-
- runtimeData->frameInfo[block->frameId].frameSize = func->localFrame->size();
- runtimeData->frameInfo[block->frameId].argSize = func->paramListSize;
- }
-
- runtimeData->functionInfo[func->funcId].frameSize = func->localFrame->size();
- runtimeData->functionInfo[func->funcId].argSize = func->paramListSize;
- }
-
- /*
- * PatReplInfo
- */
-
- /* Filled in later after patterns are parsed. */
- runtimeData->patReplInfo = new PatReplInfo[nextPatReplId];
- memset( runtimeData->patReplInfo, 0, sizeof(PatReplInfo) * nextPatReplId );
- runtimeData->numPatterns = nextPatReplId;
- runtimeData->patReplNodes = 0;
- runtimeData->numPatternNodes = 0;
-
-
- /*
- * GenericInfo
- */
- count = 1;
- for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ )
- count += nspace->genericList.length();
- assert( count == nextGenericId );
-
- runtimeData->genericInfo = new GenericInfo[count];
- runtimeData->numGenerics = count;
- memset( &runtimeData->genericInfo[0], 0, sizeof(GenericInfo) );
- for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) {
- for ( GenericList::Iter gen = nspace->genericList; gen.lte(); gen++ ) {
- runtimeData->genericInfo[gen->id].type = gen->typeId;
- runtimeData->genericInfo[gen->id].typeArg = gen->utArg->typeId;
- runtimeData->genericInfo[gen->id].keyType = gen->keyUT != 0 ?
- gen->keyUT->typeId : 0;
- runtimeData->genericInfo[gen->id].keyOffset = 0;
- runtimeData->genericInfo[gen->id].langElId = gen->langEl->id;
- runtimeData->genericInfo[gen->id].parserId = gen->utArg->langEl->parserId;
- }
- }
-
- runtimeData->argvGenericId = argvTypeRef->generic->id;
-
- /*
- * Literals
- */
- runtimeData->numLiterals = literalStrings.length();
- runtimeData->litdata = new const char *[literalStrings.length()];
- runtimeData->litlen = new long [literalStrings.length()];
- runtimeData->literals = 0;
- for ( StringMap::Iter el = literalStrings; el.lte(); el++ ) {
- /* Data. */
- char *data = new char[el->key.length()+1];
- memcpy( data, el->key.data, el->key.length() );
- data[el->key.length()] = 0;
- runtimeData->litdata[el->value] = data;
-
- /* Length. */
- runtimeData->litlen[el->value] = el->key.length();
- }
-
- /* Captured attributes. Loop over tokens and count first. */
- long numCapturedAttr = 0;
-// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
-// for ( TokenDefListReg::Iter td = reg->tokenDefList; td.lte(); td++ )
-// numCapturedAttr += td->reCaptureVect.length();
-// }
- runtimeData->captureAttr = new CaptureAttr[numCapturedAttr];
- runtimeData->numCapturedAttr = numCapturedAttr;
- memset( runtimeData->captureAttr, 0, sizeof( CaptureAttr ) * numCapturedAttr );
-
- count = 0;
-// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
-// for ( TokenDefListReg::Iter td = reg->tokenDefList; td.lte(); td++ ) {
-// runtimeData->lelInfo[td->token->id].captureAttr = count;
-// runtimeData->lelInfo[td->token->id].numCaptureAttr = td->reCaptureVect.length();
-// for ( ReCaptureVect::Iter c = td->reCaptureVect; c.lte(); c++ ) {
-// runtimeData->captureAttr[count].mark_enter = c->markEnter->markId;
-// runtimeData->captureAttr[count].mark_leave = c->markLeave->markId;
-// runtimeData->captureAttr[count].offset = c->objField->offset;
-//
-// count += 1;
-// }
-// }
-// }
-
- runtimeData->fsmTables = fsmTables;
- runtimeData->pdaTables = pdaTables;
-
- /* FIXME: need a parser descriptor. */
- runtimeData->startStates = new int[nextParserId];
- runtimeData->eofLelIds = new int[nextParserId];
- runtimeData->parserLelIds = new int[nextParserId];
- runtimeData->numParsers = nextParserId;
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->parserId >= 0 ) {
- runtimeData->startStates[lel->parserId] = lel->startState->stateNum;
- runtimeData->eofLelIds[lel->parserId] = lel->eofLel->id;
- runtimeData->parserLelIds[lel->parserId] = lel->id;
- }
- }
-
- runtimeData->globalSize = globalObjectDef->size();
-
- /*
- * firstNonTermId
- */
- runtimeData->firstNonTermId = firstNonTermId;
-
- /* Special trees. */
- runtimeData->integerId = intLangEl->id;
- runtimeData->stringId = strLangEl->id;
- runtimeData->anyId = anyLangEl->id;
- runtimeData->eofId = 0; //eofLangEl->id;
- runtimeData->noTokenId = noTokenLangEl->id;
-}
-
-/* Borrow alg->state for mapsTo. */
-void countNodes( Program *prg, int &count, ParseTree *parseTree, Kid *kid )
-{
- if ( kid != 0 ) {
- count += 1;
-
- /* Should't have to recurse here. */
- Tree *ignoreList = treeLeftIgnore( prg, kid->tree );
- if ( ignoreList != 0 ) {
- Kid *ignore = ignoreList->child;
- while ( ignore != 0 ) {
- count += 1;
- ignore = ignore->next;
- }
- }
-
- ignoreList = treeRightIgnore( prg, kid->tree );
- if ( ignoreList != 0 ) {
- Kid *ignore = ignoreList->child;
- while ( ignore != 0 ) {
- count += 1;
- ignore = ignore->next;
- }
- }
-
- //count += prg->rtd->lelInfo[kid->tree->id].numCaptureAttr;
-
- if ( !( parseTree->flags & PF_NAMED ) &&
- !( parseTree->flags & PF_ARTIFICIAL ) &&
- treeChild( prg, kid->tree ) != 0 )
- {
- countNodes( prg, count, parseTree->child, treeChild( prg, kid->tree ) );
- }
- countNodes( prg, count, parseTree->next, kid->next );
- }
-}
-
-void fillNodes( Program *prg, int &nextAvail, Bindings *bindings, long &bindId,
- PatReplNode *nodes, ParseTree *parseTree, Kid *kid, int ind )
-{
- if ( kid != 0 ) {
- PatReplNode &node = nodes[ind];
-
- Kid *child =
- !( parseTree->flags & PF_NAMED ) &&
- !( parseTree->flags & PF_ARTIFICIAL ) &&
- treeChild( prg, kid->tree ) != 0
- ?
- treeChild( prg, kid->tree ) : 0;
-
- ParseTree *ptChild =
- !( parseTree->flags & PF_NAMED ) &&
- !( parseTree->flags & PF_ARTIFICIAL ) &&
- treeChild( prg, kid->tree ) != 0
- ?
- parseTree->child : 0;
-
- /* Set up the fields. */
- node.id = kid->tree->id;
- node.prodNum = kid->tree->prodNum;
- node.length = stringLength( kid->tree->tokdata );
- node.data = stringData( kid->tree->tokdata );
-
- /* Ignore items. */
- Tree *ignoreList = treeLeftIgnore( prg, kid->tree );
- Kid *ignore = ignoreList == 0 ? 0 : ignoreList->child;
- node.leftIgnore = ignore == 0 ? -1 : nextAvail;
-
- while ( ignore != 0 ) {
- PatReplNode &node = nodes[nextAvail++];
-
- memset( &node, 0, sizeof(PatReplNode) );
- node.id = ignore->tree->id;
- node.prodNum = ignore->tree->prodNum;
- node.next = ignore->next == 0 ? -1 : nextAvail;
-
- node.length = stringLength( ignore->tree->tokdata );
- node.data = stringData( ignore->tree->tokdata );
-
- ignore = ignore->next;
- }
-
- /* Ignore items. */
- ignoreList = treeRightIgnore( prg, kid->tree );
- ignore = ignoreList == 0 ? 0 : ignoreList->child;
- node.rightIgnore = ignore == 0 ? -1 : nextAvail;
-
- while ( ignore != 0 ) {
- PatReplNode &node = nodes[nextAvail++];
-
- memset( &node, 0, sizeof(PatReplNode) );
- node.id = ignore->tree->id;
- node.prodNum = ignore->tree->prodNum;
- node.next = ignore->next == 0 ? -1 : nextAvail;
-
- node.length = stringLength( ignore->tree->tokdata );
- node.data = stringData( ignore->tree->tokdata );
-
- ignore = ignore->next;
- }
-
- ///* The captured attributes. */
- //for ( int i = 0; i < prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; i++ ) {
- // CaptureAttr *cap = prg->rtd->captureAttr +
- // prg->rtd->lelInfo[kid->tree->id].captureAttr + i;
- //
- // Tree *attr = getAttr( kid->tree, cap->offset );
- //
- // PatReplNode &node = nodes[nextAvail++];
- // memset( &node, 0, sizeof(PatReplNode) );
- //
- // node.id = attr->id;
- // node.prodNum = attr->prodNum;
- // node.length = stringLength( attr->tokdata );
- // node.data = stringData( attr->tokdata );
- //}
-
- node.stop = parseTree->flags & PF_TERM_DUP;
-
- node.child = child == 0 ? -1 : nextAvail++;
-
- /* Recurse. */
- fillNodes( prg, nextAvail, bindings, bindId, nodes, ptChild, child, node.child );
-
- /* Since the parser is bottom up the bindings are in a bottom up
- * traversal order. Check after recursing. */
- node.bindId = 0;
- if ( bindId < bindings->length() && bindings->data[bindId] == parseTree ) {
- /* Remember that binding ids are indexed from one. */
- node.bindId = bindId++;
-
- //cout << "binding match in " << __PRETTY_FUNCTION__ << endl;
- //cout << "bindId: " << node.bindId << endl;
- }
-
- node.next = kid->next == 0 ? -1 : nextAvail++;
-
- /* Move to the next child. */
- fillNodes( prg, nextAvail, bindings, bindId, nodes, parseTree->next, kid->next, node.next );
- }
-}
-
-void Compiler::fillInPatterns( Program *prg )
-{
- /*
- * patReplNodes
- */
-
- /* Count is referenced and computed by mapNode. */
- int count = 0;
- for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
- countNodes( prg, count,
- pat->pdaRun->stackTop->next,
- pat->pdaRun->stackTop->next->shadow );
- }
-
- for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
- countNodes( prg, count,
- repl->pdaRun->stackTop->next,
- repl->pdaRun->stackTop->next->shadow );
- }
-
- runtimeData->patReplNodes = new PatReplNode[count];
- runtimeData->numPatternNodes = count;
-
- int nextAvail = 0;
-
- for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
- int ind = nextAvail++;
- runtimeData->patReplInfo[pat->patRepId].offset = ind;
-
- /* BindIds are indexed base one. */
- runtimeData->patReplInfo[pat->patRepId].numBindings =
- pat->pdaRun->bindings->length() - 1;
-
- /* Init the bind */
- long bindId = 1;
- fillNodes( prg, nextAvail, pat->pdaRun->bindings, bindId,
- runtimeData->patReplNodes,
- pat->pdaRun->stackTop->next,
- pat->pdaRun->stackTop->next->shadow,
- ind );
- }
-
- for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
- int ind = nextAvail++;
- runtimeData->patReplInfo[repl->patRepId].offset = ind;
-
- /* BindIds are indexed base one. */
- runtimeData->patReplInfo[repl->patRepId].numBindings =
- repl->pdaRun->bindings->length() - 1;
-
- long bindId = 1;
- fillNodes( prg, nextAvail, repl->pdaRun->bindings, bindId,
- runtimeData->patReplNodes,
- repl->pdaRun->stackTop->next,
- repl->pdaRun->stackTop->next->shadow,
- ind );
- }
-
- assert( nextAvail == count );
-}
-
-
-int Compiler::findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &curLen )
-{
- for ( int start = 0; start < curLen; ) {
- int offset = start;
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- if ( pdaTables->owners[offset] != -1 )
- goto next_start;
-
- offset++;
- if ( ! trans.last() ) {
- TransMap::Iter next = trans.next();
- offset += next->key - trans->key - 1;
- }
- }
-
- /* Got though the whole list without a conflict. */
- return start;
-
-next_start:
- start++;
- }
-
- return curLen;
-}
-
-struct CmpSpan
-{
- static int compare( PdaState *state1, PdaState *state2 )
- {
- int dist1 = 0, dist2 = 0;
-
- if ( state1->transMap.length() > 0 ) {
- TransMap::Iter first1 = state1->transMap.first();
- TransMap::Iter last1 = state1->transMap.last();
- dist1 = last1->key - first1->key;
- }
-
- if ( state2->transMap.length() > 0 ) {
- TransMap::Iter first2 = state2->transMap.first();
- TransMap::Iter last2 = state2->transMap.last();
- dist2 = last2->key - first2->key;
- }
-
- if ( dist1 < dist2 )
- return 1;
- else if ( dist2 < dist1 )
- return -1;
- return 0;
- }
-};
-
-PdaGraph *Compiler::makePdaGraph( LangElSet &parserEls )
-{
- //for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
- // cerr << prod->prodId << " " << prod->data << endl;
-
- PdaGraph *pdaGraph = new PdaGraph();
- lalr1GenerateParser( pdaGraph, parserEls );
- pdaGraph->setStateNumbers();
- analyzeMachine( pdaGraph, parserEls );
-
- //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
-
- return pdaGraph;
-}
-
-PdaTables *Compiler::makePdaTables( PdaGraph *pdaGraph )
-{
- int count, pos;
- PdaTables *pdaTables = new PdaTables;
-
- /*
- * Counting max indices.
- */
- count = 0;
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- count++;
- if ( ! trans.last() ) {
- TransMap::Iter next = trans.next();
- count += next->key - trans->key - 1;
- }
- }
- }
-
-
- /* Allocate indicies and owners. */
- pdaTables->numIndicies = count;
- pdaTables->indicies = new int[count];
- pdaTables->owners = new int[count];
- for ( long i = 0; i < count; i++ ) {
- pdaTables->indicies[i] = -1;
- pdaTables->owners[i] = -1;
- }
-
- /* Allocate offsets. */
- int numStates = pdaGraph->stateList.length();
- pdaTables->offsets = new unsigned int[numStates];
- pdaTables->numStates = numStates;
-
- /* Place transitions into indicies/owners */
- PdaState **states = new PdaState*[numStates];
- long ds = 0;
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
- states[ds++] = state;
-
- /* Sorting baseded on span length. Gives an improvement, but incures a
- * cost. Off for now. */
- //MergeSort< PdaState*, CmpSpan > mergeSort;
- //mergeSort.sort( states, numStates );
-
- int indLen = 0;
- for ( int s = 0; s < numStates; s++ ) {
- PdaState *state = states[s];
-
- int indOff = findIndexOff( pdaTables, pdaGraph, state, indLen );
- pdaTables->offsets[state->stateNum] = indOff;
-
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- pdaTables->indicies[indOff] = trans->value->actionSetEl->key.id;
- pdaTables->owners[indOff] = state->stateNum;
- indOff++;
-
- if ( ! trans.last() ) {
- TransMap::Iter next = trans.next();
- indOff += next->key - trans->key - 1;
- }
- }
-
- if ( indOff > indLen )
- indLen = indOff;
- }
-
- /* We allocated the max, but cmpression gives us less. */
- pdaTables->numIndicies = indLen;
- delete[] states;
-
-
- /*
- * Keys
- */
- count = pdaGraph->stateList.length() * 2;;
- pdaTables->keys = new int[count];
- pdaTables->numKeys = count;
-
- count = 0;
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- if ( state->transMap.length() == 0 ) {
- pdaTables->keys[count+0] = 0;
- pdaTables->keys[count+1] = 0;
- }
- else {
- TransMap::Iter first = state->transMap.first();
- TransMap::Iter last = state->transMap.last();
- pdaTables->keys[count+0] = first->key;
- pdaTables->keys[count+1] = last->key;
- }
- count += 2;
- }
-
- /*
- * Targs
- */
- count = pdaGraph->actionSet.length();
- pdaTables->targs = new unsigned int[count];
- pdaTables->numTargs = count;
-
- count = 0;
- for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
- pdaTables->targs[count++] = asi->key.targ;
-
- /*
- * ActInds
- */
- count = pdaGraph->actionSet.length();
- pdaTables->actInds = new unsigned int[count];
- pdaTables->numActInds = count;
-
- count = pos = 0;
- for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) {
- pdaTables->actInds[count++] = pos;
- pos += asi->key.actions.length() + 1;
- }
-
- /*
- * Actions
- */
- count = 0;
- for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
- count += asi->key.actions.length() + 1;
-
- pdaTables->actions = new unsigned int[count];
- pdaTables->numActions = count;
-
- count = 0;
- for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) {
- for ( ActDataList::Iter ali = asi->key.actions; ali.lte(); ali++ )
- pdaTables->actions[count++] = *ali;
-
- pdaTables->actions[count++] = 0;
- }
-
- /*
- * CommitLen
- */
- count = pdaGraph->actionSet.length();
- pdaTables->commitLen = new int[count];
- pdaTables->numCommitLen = count;
-
- count = 0;
- for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
- pdaTables->commitLen[count++] = asi->key.commitLen;
-
- /*
- * tokenRegionInds. Start at one so region index 0 is null (unset).
- */
- count = 0;
- pos = 1;
- pdaTables->tokenRegionInds = new int[pdaTables->numStates];
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- pdaTables->tokenRegionInds[count++] = pos;
- pos += state->regions.length() + 1;
- }
-
-
- /*
- * tokenRegions. Build in a null at the beginning.
- */
-
- count = 1;
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
- count += state->regions.length() + 1;
-
- pdaTables->numRegionItems = count;
- pdaTables->tokenRegions = new int[pdaTables->numRegionItems];
-
- count = 0;
- pdaTables->tokenRegions[count++] = 0;
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ )
- pdaTables->tokenRegions[count++] = (*reg)->id + 1;
-
- pdaTables->tokenRegions[count++] = 0;
- }
-
- /*
- * tokenPreRegions. Build in a null at the beginning.
- */
-
- count = 1;
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
- count += state->regions.length() + 1;
-
- pdaTables->numPreRegionItems = count;
- pdaTables->tokenPreRegions = new int[pdaTables->numPreRegionItems];
-
- count = 0;
- pdaTables->tokenPreRegions[count++] = 0;
- for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
- for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) {
- assert( state->preRegions.length() <= 1 );
- if ( state->preRegions.length() == 0 || state->preRegions[0]->wasEmpty )
- pdaTables->tokenPreRegions[count++] = -1;
- else
- pdaTables->tokenPreRegions[count++] = state->preRegions[0]->id + 1;
- }
-
- pdaTables->tokenPreRegions[count++] = 0;
- }
-
-
- return pdaTables;
-}
-
-void Compiler::makeParser( LangElSet &parserEls )
-{
- pdaGraph = makePdaGraph( parserEls );
- pdaTables = makePdaTables( pdaGraph );
-}
-
diff --git a/colm/pdacodegen.cc b/colm/pdacodegen.cc
deleted file mode 100644
index 9e3dca47..00000000
--- a/colm/pdacodegen.cc
+++ /dev/null
@@ -1,653 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <iostream>
-#include <stdlib.h>
-#include <ctype.h>
-#include <limits.h>
-#include "global.h"
-#include "parsedata.h"
-#include "avlmap.h"
-#include "avlbasic.h"
-#include "avlset.h"
-#include "mergesort.h"
-#include "pdacodegen.h"
-
-using std::cerr;
-using std::endl;
-
-#define FRESH_BLOCK 8128
-#define act_sb "0x1"
-#define act_rb "0x2"
-#define lower "0x0000ffff"
-#define upper "0xffff0000"
-
-void escapeLiteralString( std::ostream &out, const char *path, int length )
-{
- for ( const char *pc = path, *end = path+length; pc != end; pc++ ) {
- switch ( *pc ) {
- case '\\': out << "\\\\"; break;
- case '"': out << "\\\""; break;
- case '\a': out << "\\a"; break;
- case '\b': out << "\\b"; break;
- case '\t': out << "\\t"; break;
- case '\n': out << "\\n"; break;
- case '\v': out << "\\v"; break;
- case '\f': out << "\\f"; break;
- case '\r': out << "\\r"; break;
- default: out << *pc; break;
- }
- }
-}
-
-void escapeLiteralString( std::ostream &out, const char *path )
-{
- escapeLiteralString( out, path, strlen(path) );
-}
-
-void PdaCodeGen::writeTokenIds()
-{
- out << "/*\n";
- for ( LelList::Iter lel = pd->langEls; lel.lte(); lel++ ) {
- if ( lel->name != 0 )
- out << " " << lel->name << " " << lel->id << endl;
- else
- out << " " << lel->id << endl;
- }
- out << "*/\n\n";
-}
-
-void PdaCodeGen::defineRuntime()
-{
- out <<
- "extern RuntimeData main_runtimeData;\n"
- "\n";
-}
-
-void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables )
-{
- /*
- * Blocks of code in frames.
- */
- for ( int i = 0; i < runtimeData->numFrames; i++ ) {
- /* FIXME: horrible code cloning going on here. */
- if ( runtimeData->frameInfo[i].codeLenWV > 0 ) {
- out << "Code code_" << i << "_wv[] = {\n\t";
-
- Code *block = runtimeData->frameInfo[i].codeWV;
- for ( int j = 0; j < runtimeData->frameInfo[i].codeLenWV; j++ ) {
- out << (unsigned long) block[j];
-
- if ( j < runtimeData->frameInfo[i].codeLenWV-1 ) {
- out << ", ";
- if ( (j+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
- }
-
- if ( runtimeData->frameInfo[i].codeLenWC > 0 ) {
- out << "Code code_" << i << "_wc[] = {\n\t";
-
- Code *block = runtimeData->frameInfo[i].codeWC;
- for ( int j = 0; j < runtimeData->frameInfo[i].codeLenWC; j++ ) {
- out << (unsigned long) block[j];
-
- if ( j < runtimeData->frameInfo[i].codeLenWC-1 ) {
- out << ", ";
- if ( (j+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
- }
-
- if ( runtimeData->frameInfo[i].treesLen > 0 ) {
- out << "char trees_" << i << "[] = {\n\t";
-
- char *block = runtimeData->frameInfo[i].trees;
- for ( int j = 0; j < runtimeData->frameInfo[i].treesLen; j++ ) {
- out << (long) block[j];
-
- if ( j < runtimeData->frameInfo[i].treesLen-1 ) {
- out << ", ";
- if ( (j+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
- }
- }
-
- /*
- * Blocks in production info.
- */
- for ( int i = 0; i < runtimeData->numProds; i++ ) {
- if ( runtimeData->prodInfo[i].copyLen > 0 ) {
- out << "unsigned char copy_" << i << "[] = {\n\t";
-
- unsigned char *block = runtimeData->prodInfo[i].copy;
- for ( int j = 0; j < runtimeData->prodInfo[i].copyLen; j++ ) {
- out << (long) block[j*2] << ", " << (long) block[j*2+1];
-
- if ( j < runtimeData->prodInfo[i].copyLen-1 ) {
- out << ", ";
- if ( (j+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
- }
- }
-
- /*
- * Init code.
- */
- out << "Code " << rootCode() << "[] = {\n\t";
- Code *block = runtimeData->rootCode ;
- for ( int j = 0; j < runtimeData->rootCodeLen; j++ ) {
- out << (unsigned int) block[j];
-
- if ( j < runtimeData->rootCodeLen-1 ) {
- out << ", ";
- if ( (j+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- /*
- * lelInfo
- */
- out << "LangElInfo " << lelInfo() << "[] = {\n";
- for ( int i = 0; i < runtimeData->numLangEls; i++ ) {
- out << "\t{";
-
- /* Name. */
- out << " \"";
- escapeLiteralString( out, runtimeData->lelInfo[i].name );
- out << "\", ";
-
- /* Name. */
- out << " \"";
- escapeLiteralString( out, runtimeData->lelInfo[i].xmlTag );
- out << "\", ";
-
- /* Repeat, literal, ignore flags. */
- out << (int)runtimeData->lelInfo[i].repeat << ", " <<
- (int)runtimeData->lelInfo[i].list << ", " <<
- (int)runtimeData->lelInfo[i].literal << ", " <<
- (int)runtimeData->lelInfo[i].ignore << ", ";
-
- out << runtimeData->lelInfo[i].frameId << ", ";
-
- out << runtimeData->lelInfo[i].objectTypeId << ", ";
-
- out << runtimeData->lelInfo[i].ofiOffset << ", ";
-
- out << runtimeData->lelInfo[i].objectLength << ", ";
-
-// out << runtimeData->lelInfo[i].contextTypeId << ", ";
-// out << runtimeData->lelInfo[i].contextLength << ", ";
-
- out << runtimeData->lelInfo[i].termDupId << ", ";
-
- out << runtimeData->lelInfo[i].genericId << ", ";
-
- out << runtimeData->lelInfo[i].markId << ", ";
-
- out << runtimeData->lelInfo[i].captureAttr << ", ";
-
- out << runtimeData->lelInfo[i].numCaptureAttr;
-
- out << " }";
-
- if ( i < runtimeData->numLangEls-1 )
- out << ",\n";
- }
- out << "\n};\n\n";
-
- /*
- * frameInfo
- */
- out << "FrameInfo " << frameInfo() << "[] = {\n";
- for ( int i = 0; i < runtimeData->numFrames; i++ ) {
- out << "\t{ ";
-
- if ( runtimeData->frameInfo[i].codeLenWV > 0 )
- out << "code_" << i << "_wv, ";
- else
- out << "0, ";
- out << runtimeData->frameInfo[i].codeLenWV << ", ";
-
- if ( runtimeData->frameInfo[i].codeLenWC > 0 )
- out << "code_" << i << "_wc, ";
- else
- out << "0, ";
- out << runtimeData->frameInfo[i].codeLenWC << ", ";
-
- if ( runtimeData->frameInfo[i].treesLen > 0 )
- out << "trees_" << i << ", ";
- else
- out << "0, ";
-
- out <<
- runtimeData->frameInfo[i].treesLen << ", " <<
- runtimeData->frameInfo[i].argSize << ", " <<
- runtimeData->frameInfo[i].frameSize;
-
- out << " }";
-
- if ( i < runtimeData->numFrames-1 )
- out << ",\n";
- }
- out << "\n};\n\n";
-
-
- /*
- * prodInfo
- */
- out << "ProdInfo " << prodInfo() << "[] = {\n";
- for ( int i = 0; i < runtimeData->numProds; i++ ) {
- out << "\t{ ";
-
- out << runtimeData->prodInfo[i].lhsId << ", ";
- out << runtimeData->prodInfo[i].prodNum << ", ";
- out << runtimeData->prodInfo[i].length << ", ";
-
- out <<
- '"' << runtimeData->prodInfo[i].name << "\", " <<
- runtimeData->prodInfo[i].frameId << ", " <<
- (int)runtimeData->prodInfo[i].lhsUpref << ", ";
-
- if ( runtimeData->prodInfo[i].copyLen > 0 )
- out << "copy_" << i << ", ";
- else
- out << "0, ";
-
- out << runtimeData->prodInfo[i].copyLen << ", ";
-
-
- out << " }";
-
- if ( i < runtimeData->numProds-1 )
- out << ",\n";
- }
- out << "\n};\n\n";
-
- /*
- * patReplInfo
- */
- out << "PatReplInfo " << patReplInfo() << "[] = {\n";
- for ( int i = 0; i < runtimeData->numPatterns; i++ ) {
- out << " { " << runtimeData->patReplInfo[i].offset << ", " <<
- runtimeData->patReplInfo[i].numBindings << " },\n";
- }
- out << "};\n\n";
-
- /*
- * patReplNodes
- */
- out << "PatReplNode " << patReplNodes() << "[] = {\n";
- for ( int i = 0; i < runtimeData->numPatternNodes; i++ ) {
- PatReplNode &node = runtimeData->patReplNodes[i];
- out << " { " << node.id << ", " <<
- node.prodNum << ", " << node.next << ", " <<
- node.child << ", " << node.bindId << ", ";
- if ( node.data == 0 )
- out << "0";
- else {
- out << '\"';
- escapeLiteralString( out, node.data, node.length );
- out << '\"';
- }
- out << ", " << node.length << ", ";
-
- out << node.leftIgnore << ", ";
- out << node.rightIgnore << ", ";
-
- out << (int)node.stop << " },\n";
- }
- out << "};\n\n";
-
- /*
- * functionInfo
- */
- out << "FunctionInfo " << functionInfo() << "[] = {\n";
- for ( int i = 0; i < runtimeData->numFunctions; i++ ) {
- out << "\t{ " <<
- "\"" << runtimeData->functionInfo[i].name << "\", " <<
- runtimeData->functionInfo[i].frameId << ", " <<
- runtimeData->functionInfo[i].argSize << ", " <<
- runtimeData->functionInfo[i].frameSize;
- out << " }";
-
- if ( i < runtimeData->numFunctions-1 )
- out << ",\n";
- }
- out << "\n};\n\n";
-
- /*
- * regionInfo
- */
- out << "RegionInfo " << regionInfo() << "[] = {\n";
- for ( int i = 0; i < runtimeData->numRegions; i++ ) {
- out << "\t{ \"";
- /* Name. */
- escapeLiteralString( out, runtimeData->regionInfo[i].name );
- out << "\", " << runtimeData->regionInfo[i].defaultToken <<
- ", " << runtimeData->regionInfo[i].eofFrameId <<
- ", " << runtimeData->regionInfo[i].isIgnoreOnly <<
- ", " << runtimeData->regionInfo[i].isCiOnly <<
- ", " << runtimeData->regionInfo[i].ciLelId <<
- " }";
-
- if ( i < runtimeData->numRegions-1 )
- out << ",\n";
- }
- out << "\n};\n\n";
-
- /*
- * genericInfo
- */
- out << "GenericInfo " << genericInfo() << "[] = {\n";
- for ( int i = 0; i < runtimeData->numGenerics; i++ ) {
- out << "\t{ " <<
- runtimeData->genericInfo[i].type << ", " <<
- runtimeData->genericInfo[i].typeArg << ", " <<
- runtimeData->genericInfo[i].keyOffset << ", " <<
- runtimeData->genericInfo[i].keyType << ", " <<
- runtimeData->genericInfo[i].langElId << ", " <<
- runtimeData->genericInfo[i].parserId << " },\n";
- }
- out << "};\n\n";
-
- /*
- * literals
- */
- out << "const char *" << litdata() << "[] = {\n";
- for ( int i = 0; i < runtimeData->numLiterals; i++ ) {
- out << "\t\"";
- escapeLiteralString( out, runtimeData->litdata[i] );
- out << "\",\n";
- }
- out << "};\n\n";
-
- out << "long " << litlen() << "[] = {\n\t";
- for ( int i = 0; i < runtimeData->numLiterals; i++ )
- out << runtimeData->litlen[i] << ", ";
- out << "};\n\n";
-
- out << "Head *" << literals() << "[] = {\n\t";
- for ( int i = 0; i < runtimeData->numLiterals; i++ )
- out << "0, ";
- out << "};\n\n";
-
- out << "int startStates[] = {\n\t";
- for ( long i = 0; i < runtimeData->numParsers; i++ ) {
- out << runtimeData->startStates[i] << ", ";
- }
- out << "};\n\n";
-
- out << "int eofLelIds[] = {\n\t";
- for ( long i = 0; i < runtimeData->numParsers; i++ ) {
- out << runtimeData->eofLelIds[i] << ", ";
- }
- out << "};\n\n";
-
- out << "int parserLelIds[] = {\n\t";
- for ( long i = 0; i < runtimeData->numParsers; i++ ) {
- out << runtimeData->parserLelIds[i] << ", ";
- }
- out << "};\n\n";
-
- out << "CaptureAttr captureAttr[] = {\n";
- for ( long i = 0; i < runtimeData->numCapturedAttr; i++ ) {
- out << "\t{ " <<
- runtimeData->captureAttr[i].mark_enter << ", " <<
- runtimeData->captureAttr[i].mark_leave << ", " <<
- runtimeData->captureAttr[i].offset << " },\n";
- }
-
- out << "};\n\n";
-
- out <<
- "RuntimeData main_runtimeData = \n"
- "{\n"
- " " << lelInfo() << ",\n"
- " " << runtimeData->numLangEls << ",\n"
- "\n"
- " " << prodInfo() << ",\n"
- " " << runtimeData->numProds << ",\n"
- "\n"
- " " << regionInfo() << ",\n"
- " " << runtimeData->numRegions << ",\n"
- "\n"
- " " << rootCode() << ",\n"
- " " << runtimeData->rootCodeLen << ",\n"
- " " << runtimeData->rootFrameId << ",\n"
- "\n"
- " " << frameInfo() << ",\n"
- " " << runtimeData->numFrames << ",\n"
- "\n"
- " " << functionInfo() << ",\n"
- " " << runtimeData->numFunctions << ",\n"
- "\n"
- " " << patReplInfo() << ",\n"
- " " << runtimeData->numPatterns << ",\n"
- "\n"
- " " << patReplNodes() << ",\n"
- " " << runtimeData->numPatternNodes << ",\n"
- "\n"
- " " << genericInfo() << ",\n"
- " " << runtimeData->numGenerics << ",\n"
- " " << runtimeData->argvGenericId << ",\n"
- "\n"
- " " << litdata() << ",\n"
- " " << litlen() << ",\n"
- " " << literals() << ",\n"
- " " << runtimeData->numLiterals << ",\n"
- "\n"
- " captureAttr,\n"
- " " << runtimeData->numCapturedAttr << ",\n"
- "\n"
- " &fsmTables_start,\n"
- " &pid_0_pdaTables,\n"
- " startStates, eofLelIds, parserLelIds, " << runtimeData->numParsers << ",\n"
- "\n"
- " " << runtimeData->globalSize << ",\n"
- "\n"
- " " << runtimeData->firstNonTermId << ",\n"
- " " << runtimeData->integerId << ",\n"
- " " << runtimeData->stringId << ",\n"
- " " << runtimeData->anyId << ",\n"
- " " << runtimeData->eofId << ",\n"
- " " << runtimeData->noTokenId << "\n"
- "};\n"
- "\n";
-}
-
-void PdaCodeGen::writeParserData( long id, PdaTables *tables )
-{
- String prefix = "pid_" + String(0, "%ld", id) + "_";
-
- out << "int " << prefix << indicies() << "[] = {\n\t";
- for ( int i = 0; i < tables->numIndicies; i++ ) {
- out << tables->indicies[i];
-
- if ( i < tables->numIndicies-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "int " << prefix << owners() << "[] = {\n\t";
- for ( int i = 0; i < tables->numIndicies; i++ ) {
- out << tables->owners[i];
-
- if ( i < tables->numIndicies-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "int " << prefix << keys() << "[] = {\n\t";
- for ( int i = 0; i < tables->numKeys; i++ ) {
- out << tables->keys[i];
-
- if ( i < tables->numKeys-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "unsigned int " << prefix << offsets() << "[] = {\n\t";
- for ( int i = 0; i < tables->numStates; i++ ) {
- out << tables->offsets[i];
-
- if ( i < tables->numStates-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "unsigned int " << prefix << targs() << "[] = {\n\t";
- for ( int i = 0; i < tables->numTargs; i++ ) {
- out << tables->targs[i];
-
- if ( i < tables->numTargs-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "unsigned int " << prefix << actInds() << "[] = {\n\t";
- for ( int i = 0; i < tables->numActInds; i++ ) {
- out << tables->actInds[i];
-
- if ( i < tables->numActInds-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "unsigned int " << prefix << actions() << "[] = {\n\t";
- for ( int i = 0; i < tables->numActions; i++ ) {
- out << tables->actions[i];
-
- if ( i < tables->numActions-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "int " << prefix << commitLen() << "[] = {\n\t";
- for ( int i = 0; i < tables->numCommitLen; i++ ) {
- out << tables->commitLen[i];
-
- if ( i < tables->numCommitLen-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "int " << prefix << tokenRegionInds() << "[] = {\n\t";
- for ( int i = 0; i < tables->numStates; i++ ) {
- out << tables->tokenRegionInds[i];
-
- if ( i < tables->numStates-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "int " << prefix << tokenRegions() << "[] = {\n\t";
- for ( int i = 0; i < tables->numRegionItems; i++ ) {
- out << tables->tokenRegions[i];
-
- if ( i < tables->numRegionItems-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out << "int " << prefix << tokenPreRegions() << "[] = {\n\t";
- for ( int i = 0; i < tables->numPreRegionItems; i++ ) {
- out << tables->tokenPreRegions[i];
-
- if ( i < tables->numPreRegionItems-1 ) {
- out << ", ";
- if ( (i+1) % 8 == 0 )
- out << "\n\t";
- }
- }
- out << "\n};\n\n";
-
- out <<
- "PdaTables " << prefix << "pdaTables =\n"
- "{\n"
- " " << prefix << indicies() << ",\n"
- " " << prefix << owners() << ",\n"
- " " << prefix << keys() << ",\n"
- " " << prefix << offsets() << ",\n"
- " " << prefix << targs() << ",\n"
- " " << prefix << actInds() << ",\n"
- " " << prefix << actions() << ",\n"
- " " << prefix << commitLen() << ",\n"
-
- " " << prefix << tokenRegionInds() << ",\n"
- " " << prefix << tokenRegions() << ",\n"
- " " << prefix << tokenPreRegions() << ",\n"
- "\n"
- " " << tables->numIndicies << ",\n"
- " " << tables->numKeys << ",\n"
- " " << tables->numStates << ",\n"
- " " << tables->numTargs << ",\n"
- " " << tables->numActInds << ",\n"
- " " << tables->numActions << ",\n"
- " " << tables->numCommitLen << ",\n"
- " " << tables->numRegionItems << ",\n"
- " " << tables->numPreRegionItems << "\n"
- "};\n"
- "\n";
-}
-
diff --git a/colm/pdacodegen.h b/colm/pdacodegen.h
deleted file mode 100644
index 8e5e7a3a..00000000
--- a/colm/pdacodegen.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#ifndef _PDACODEGEN_H
-#define _PDACODEGEN_H
-
-struct Compiler;
-
-struct PdaCodeGen
-{
- PdaCodeGen( const char *fileName, const char *parserName, Compiler *pd, ostream &out )
- :
- fileName(fileName),
- parserName(parserName),
- pd(pd),
- out(out)
- {}
-
- /*
- * Code Generation.
- */
- void startCodeGen();
- void endCodeGen( int endLine );
-
- void writeTokenIds();
- void writeLangEls();
-
- void writeReference( Definition *prod, char *data );
- void writeUndoReference( Definition *prod, char *data );
- void writeFinalReference( Definition *prod, char *data );
- void writeFirstLocate( Definition *prod );
- void writeRhsLocate( Definition *prod );
-
- void defineRuntime();
- void writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables );
- void writeParserData( long id, PdaTables *tables );
-
- String PARSER() { return "parser_"; }
-
- String startState() { return PARSER() + "startState"; }
- String indicies() { return PARSER() + "indicies"; }
- String owners() { return PARSER() + "owners"; }
- String keys() { return PARSER() + "keys"; }
- String offsets() { return PARSER() + "offsets"; }
- String targs() { return PARSER() + "targs"; }
- String actInds() { return PARSER() + "actInds"; }
- String actions() { return PARSER() + "actions"; }
- String commitLen() { return PARSER() + "commitLen"; }
- String fssProdIdIndex() { return PARSER() + "fssProdIdIndex"; }
- String prodLengths() { return PARSER() + "prodLengths"; }
- String prodLhsIds() { return PARSER() + "prodLhsIds"; }
- String prodNames() { return PARSER() + "prodNames"; }
- String lelInfo() { return PARSER() + "lelInfo"; }
- String prodInfo() { return PARSER() + "prodInfo"; }
- String tokenRegionInds() { return PARSER() + "tokenRegionInds"; }
- String tokenRegions() { return PARSER() + "tokenRegions"; }
- String tokenPreRegions() { return PARSER() + "tokenPreRegions"; }
- String prodCodeBlocks() { return PARSER() + "prodCodeBlocks"; }
- String prodCodeBlockLens() { return PARSER() + "prodCodeBlockLens"; }
- String rootCode() { return PARSER() + "rootCode"; }
- String frameInfo() { return PARSER() + "frameInfo"; }
- String functionInfo() { return PARSER() + "functionInfo"; }
- String objFieldInfo() { return PARSER() + "objFieldInfo"; }
- String patReplInfo() { return PARSER() + "patReplInfo"; }
- String patReplNodes() { return PARSER() + "patReplNodes"; }
- String regionInfo() { return PARSER() + "regionInfo"; }
- String genericInfo() { return PARSER() + "genericInfo"; }
- String litdata() { return PARSER() + "litdata"; }
- String litlen() { return PARSER() + "litlen"; }
- String literals() { return PARSER() + "literals"; }
- String fsmTables() { return PARSER() + "fsmTables"; }
-
- /*
- * Graphviz Generation
- */
- void writeTransList( PdaState *state );
- void writeDotFile( PdaGraph *graph );
- void writeDotFile( );
-
-
- const char *fileName;
- const char *parserName;
- Compiler *pd;
- ostream &out;
-};
-
-#endif
diff --git a/colm/pdagraph.cc b/colm/pdagraph.cc
deleted file mode 100644
index 8f17b7a5..00000000
--- a/colm/pdagraph.cc
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <string.h>
-#include <iostream>
-#include <string.h>
-#include <assert.h>
-#include "global.h"
-#include "pdagraph.h"
-#include "mergesort.h"
-
-using std::cerr;
-using std::endl;
-
-/* Create a new fsm state. State has not out transitions or in transitions, not
- * out out transition data and not number. */
-PdaState::PdaState()
-:
- /* No in transitions. */
- inRange(),
-
- /* No entry points, or epsilon trans. */
- pendingCommits(),
-
- stateSet(0),
-
- /* Only used during merging. Normally null. */
- stateDictEl(0),
-
- /* No state identification bits. */
- stateBits(0),
-
- onClosureQueue(false),
- inClosedMap(false),
- followMarked(false),
-
- advanceReductions(false)
-{
-}
-
-/* Copy everything except the action transitions. That is left up to the
- * PdaGraph copy constructor. */
-PdaState::PdaState(const PdaState &other)
-:
- inRange(),
-
- /* Duplicate the entry id set, epsilon transitions and context sets. These
- * are sets of integers and as such need no fixing. */
- pendingCommits(other.pendingCommits),
-
- stateSet(0),
-
- /* This is only used during merging. Normally null. */
- stateDictEl(0),
-
- /* Fsm state data. */
- stateBits(other.stateBits),
-
- dotSet(other.dotSet),
- onClosureQueue(false),
- inClosedMap(false),
- followMarked(false),
-
- transMap()
-{
- /* Duplicate all the transitions. */
- for ( TransMap::Iter trans = other.transMap; trans.lte(); trans++ ) {
- /* Dupicate and store the orginal target in the transition. This will
- * be corrected once all the states have been created. */
- PdaTrans *newTrans = new PdaTrans(*trans->value);
- newTrans->toState = trans->value->toState;
- transMap.append( TransMapEl( newTrans->lowKey, newTrans ) );
- }
-}
-
-/* If there is a state dict element, then delete it. Everything else is left
- * up to the FsmGraph destructor. */
-PdaState::~PdaState()
-{
- if ( stateDictEl != 0 )
- delete stateDictEl;
-}
-
-/* Graph constructor. */
-PdaGraph::PdaGraph()
-:
- /* No start state. */
- startState(0)
-{
-}
-
-/* Copy all graph data including transitions. */
-PdaGraph::PdaGraph( const PdaGraph &graph )
-:
- /* Lists start empty. Will be filled by copy. */
- stateList(),
- misfitList(),
-
- /* Copy in the entry points,
- * pointers will be resolved later. */
- startState(graph.startState),
-
- /* Will be filled by copy. */
- finStateSet()
-{
- /* Create the states and record their map in the original state. */
- PdaStateList::Iter origState = graph.stateList;
- for ( ; origState.lte(); origState++ ) {
- /* Make the new state. */
- PdaState *newState = new PdaState( *origState );
-
- /* Add the state to the list. */
- stateList.append( newState );
-
- /* Set the mapsTo item of the old state. */
- origState->stateMap = newState;
- }
-
- /* Derefernce all the state maps. */
- for ( PdaStateList::Iter state = stateList; state.lte(); state++ ) {
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- /* The points to the original in the src machine. The taget's duplicate
- * is in the statemap. */
- PdaState *toState = trans->value->toState != 0 ?
- trans->value->toState->stateMap : 0;
-
- /* Attach The transition to the duplicate. */
- trans->value->toState = 0;
- attachTrans( state, toState, trans->value );
- }
- }
-
- /* Fix the start state pointer and the new start state's count of in
- * transiions. */
- startState = startState->stateMap;
-
- /* Build the final state set. */
- PdaStateSet::Iter st = graph.finStateSet;
- for ( ; st.lte(); st++ )
- finStateSet.insert((*st)->stateMap);
-}
-
-/* Deletes all transition data then deletes each state. */
-PdaGraph::~PdaGraph()
-{
- /* Delete all the transitions. */
- PdaStateList::Iter state = stateList;
- for ( ; state.lte(); state++ ) {
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ )
- delete trans->value;
- }
-
- /* Delete all the states. */
- stateList.empty();
-}
-
-/* Set a state final. The state has its isFinState set to true and the state
- * is added to the finStateSet. */
-void PdaGraph::setFinState( PdaState *state )
-{
- /* Is it already a fin state. */
- if ( state->stateBits & SB_ISFINAL )
- return;
-
- state->stateBits |= SB_ISFINAL;
- finStateSet.insert( state );
-}
-
-void PdaGraph::unsetAllFinStates( )
-{
- for ( PdaStateSet::Iter st = finStateSet; st.lte(); st++ ) {
- PdaState *state = *st;
- state->stateBits &= ~ SB_ISFINAL;
- }
- finStateSet.empty();
-}
-
-/* Set and unset a state as the start state. */
-void PdaGraph::setStartState( PdaState *state )
-{
- /* Sould change from unset to set. */
- assert( startState == 0 );
- startState = state;
-}
-
-/* Mark all states reachable from state. Traverses transitions forward. Used
- * for removing states that have no path into them. */
-void PdaGraph::markReachableFromHere( PdaState *state )
-{
- /* Base case: return; */
- if ( state->stateBits & SB_ISMARKED )
- return;
-
- /* Set this state as processed. We are going to visit all states that this
- * state has a transition to. */
- state->stateBits |= SB_ISMARKED;
-
- /* Recurse on all out transitions. */
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- if ( trans->value->toState != 0 )
- markReachableFromHere( trans->value->toState );
- }
-}
-
-void PdaGraph::setStateNumbers()
-{
- int curNum = 0;
- PdaStateList::Iter state = stateList;
- for ( ; state.lte(); state++ )
- state->stateNum = curNum++;
-}
-
-/* Insert a transition into an inlist. The head must be supplied. */
-void PdaGraph::attachToInList( PdaState *from, PdaState *to,
- PdaTrans *&head, PdaTrans *trans )
-{
- trans->ilnext = head;
- trans->ilprev = 0;
-
- /* If in trans list is not empty, set the head->prev to trans. */
- if ( head != 0 )
- head->ilprev = trans;
-
- /* Now insert ourselves at the front of the list. */
- head = trans;
-};
-
-/* Detach a transition from an inlist. The head of the inlist must be supplied. */
-void PdaGraph::detachFromInList( PdaState *from, PdaState *to,
- PdaTrans *&head, PdaTrans *trans )
-{
- /* Detach in the inTransList. */
- if ( trans->ilprev == 0 )
- head = trans->ilnext;
- else
- trans->ilprev->ilnext = trans->ilnext;
-
- if ( trans->ilnext != 0 )
- trans->ilnext->ilprev = trans->ilprev;
-}
-
-/* Attach states on the default transition, range list or on out/in list key.
- * Type of attaching and is controlled by keyType. First makes a new
- * transition. If there is already a transition out from fromState on the
- * default, then will assertion fail. */
-PdaTrans *PdaGraph::appendNewTrans( PdaState *from, PdaState *to, long lowKey, long )
-{
- /* Make the new transition. */
- PdaTrans *retVal = new PdaTrans();
-
- /* The transition is now attached. Remember the parties involved. */
- retVal->fromState = from;
- retVal->toState = to;
-
- /* Make the entry in the out list for the transitions. */
- from->transMap.append( TransMapEl( lowKey, retVal ) );
-
- /* Set the the keys of the new trans. */
- retVal->lowKey = lowKey;
-
- /* Attach using inRange as the head pointer. */
- attachToInList( from, to, to->inRange.head, retVal );
-
- return retVal;
-}
-
-PdaTrans *PdaGraph::insertNewTrans( PdaState *from, PdaState *to, long lowKey, long )
-{
- /* Make the new transition. */
- PdaTrans *retVal = new PdaTrans();
-
- /* The transition is now attached. Remember the parties involved. */
- retVal->fromState = from;
- retVal->toState = to;
-
- /* Make the entry in the out list for the transitions. */
- from->transMap.insert( lowKey, retVal );
-
- /* Set the the keys of the new trans. */
- retVal->lowKey = lowKey;
-
- /* Attach using inRange as the head pointer. */
- attachToInList( from, to, to->inRange.head, retVal );
-
- return retVal;
-}
-
-/* Attach for range lists or for the default transition. Type of attaching is
- * controlled by the keyType parameter. This attach should be used when a
- * transition already is allocated and must be attached to a target state.
- * Does not handle adding the transition into the out list. */
-void PdaGraph::attachTrans( PdaState *from, PdaState *to, PdaTrans *trans )
-{
- assert( trans->fromState == 0 && trans->toState == 0 );
- trans->fromState = from;
- trans->toState = to;
-
- /* Attach using the inRange pointer as the head pointer. */
- attachToInList( from, to, to->inRange.head, trans );
-}
-
-/* Detach for out/in lists or for default transition. The type of detaching is
- * controlled by the keyType parameter. */
-void PdaGraph::detachTrans( PdaState *from, PdaState *to, PdaTrans *trans )
-{
- assert( trans->fromState == from && trans->toState == to );
- trans->fromState = 0;
- trans->toState = 0;
-
- /* Detach using to's inRange pointer as the head. */
- detachFromInList( from, to, to->inRange.head, trans );
-}
-
-
-/* Detach a state from the graph. Detaches and deletes transitions in and out
- * of the state. Empties inList and outList. Removes the state from the final
- * state set. A detached state becomes useless and should be deleted. */
-void PdaGraph::detachState( PdaState *state )
-{
- /* Detach the in transitions from the inRange list of transitions. */
- while ( state->inRange.head != 0 ) {
- /* Get pointers to the trans and the state. */
- PdaTrans *trans = state->inRange.head;
- PdaState *fromState = trans->fromState;
-
- /* Detach the transitions from the source state. */
- detachTrans( fromState, state, trans );
-
- /* Ok to delete the transition. */
- fromState->transMap.remove( trans->lowKey );
- delete trans;
- }
-
- /* Detach out range transitions. */
- for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
- detachTrans( state, trans->value->toState, trans->value );
- delete trans->value;
- }
-
- /* Delete all of the out range pointers. */
- state->transMap.empty();
-
- /* Unset final stateness before detaching from graph. */
- if ( state->stateBits & SB_ISFINAL )
- finStateSet.remove( state );
-}
-
-/* Move all the transitions that go into src so that they go into dest. */
-void PdaGraph::inTransMove( PdaState *dest, PdaState *src )
-{
- /* Do not try to move in trans to and from the same state. */
- assert( dest != src );
-
- /* If src is the start state, dest becomes the start state. */
- assert( src != startState );
-
- /* Move the transitions in inRange. */
- while ( src->inRange.head != 0 ) {
- /* Get trans and from state. */
- PdaTrans *trans = src->inRange.head;
- PdaState *fromState = trans->fromState;
-
- /* Detach from src, reattach to dest. */
- detachTrans( fromState, src, trans );
- attachTrans( fromState, dest, trans );
- }
-}
-
-void PdaGraph::addInReduction( PdaTrans *dest, long prodId, long prior )
-{
- /* Look for the reduction. If not there insert it, otherwise take
- * the max of the priorities. */
- ReductionMapEl *redMapEl = dest->reductions.find( prodId );
- if ( redMapEl == 0 )
- dest->reductions.insert( prodId, prior );
- else if ( prior > redMapEl->value )
- redMapEl->value = prior;
-}
-
-/* Callback invoked when another trans (or possibly this) is added into this
- * transition during the merging process. Draw in any properties of srcTrans
- * into this transition. AddInTrans is called when a new transitions is made
- * that will be a duplicate of another transition or a combination of several
- * other transitions. AddInTrans will be called for each transition that the
- * new transition is to represent. */
-void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans )
-{
- /* Protect against adding in from ourselves. */
- if ( srcTrans != destTrans ) {
-
- /* Add in the shift priority. */
- if ( destTrans->isShift && srcTrans->isShift ) {
- /* Both shifts are set. We want the max of the two. */
- if ( srcTrans->shiftPrior > destTrans->shiftPrior )
- destTrans->shiftPrior = srcTrans->shiftPrior;
- }
- else if ( srcTrans->isShift ) {
- /* Just the source is set, copy the source prior over. */
- destTrans->shiftPrior = srcTrans->shiftPrior;
- }
-
- /* If either is a shift, dest is a shift. */
- destTrans->isShift = destTrans->isShift || srcTrans->isShift;
-
- /* Add in the reductions. */
- for ( ReductionMap::Iter red = srcTrans->reductions; red.lte(); red++ )
- addInReduction( destTrans, red->key, red->value );
-
- /* Add in the commit points. */
- destTrans->commits.insert( srcTrans->commits );
-
- if ( srcTrans->toState->advanceReductions )
- destTrans->toState->advanceReductions = true;
-
- if ( srcTrans->noPreIgnore )
- destTrans->noPreIgnore = true;
- if ( srcTrans->noPostIgnore )
- destTrans->noPostIgnore = true;
- }
-}
-
-/* NO LONGER USED. */
-void PdaGraph::addInState( PdaState *destState, PdaState *srcState )
-{
- /* Draw in any properties of srcState into destState. */
- if ( srcState != destState ) {
- /* Get the epsilons, context, out priorities. */
- destState->pendingCommits.insert( srcState->pendingCommits );
- if ( srcState->pendingCommits.length() > 0 )
- cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl;
-
- /* Parser generation data. */
- destState->dotSet.insert( srcState->dotSet );
-
- if ( srcState->onClosureQueue && !destState->onClosureQueue ) {
- stateClosureQueue.append( destState );
- destState->onClosureQueue = true;
- }
- }
-}
-
-/* Make a new state. The new state will be put on the graph's
- * list of state. The new state can be created final or non final. */
-PdaState *PdaGraph::addState()
-{
- /* Make the new state to return. */
- PdaState *state = new PdaState();
-
- /* Create the new state. */
- stateList.append( state );
-
- return state;
-}
-
-
-/* Follow from to the final state of srcFsm. */
-PdaState *PdaGraph::followFsm( PdaState *from, PdaGraph *srcFsm )
-{
- PdaState *followSrc = srcFsm->startState;
-
- while ( ! followSrc->isFinState() ) {
- assert( followSrc->transMap.length() == 1 );
- PdaTrans *followTrans = followSrc->transMap[0].value;
-
- PdaTrans *inTrans = from->findTrans( followTrans->lowKey );
- assert( inTrans != 0 );
-
- from = inTrans->toState;
- followSrc = followTrans->toState;
- }
-
- return from;
-}
-
-int PdaGraph::fsmLength( )
-{
- int length = 0;
- PdaState *state = startState;
- while ( ! state->isFinState() ) {
- length += 1;
- state = state->transMap[0].value->toState;
- }
- return length;
-}
-
-/* Remove states that have no path to them from the start state. Recursively
- * traverses the graph marking states that have paths into them. Then removes
- * all states that did not get marked. */
-void PdaGraph::removeUnreachableStates()
-{
- /* Mark all the states that can be reached
- * through the existing set of entry points. */
- if ( startState != 0 )
- markReachableFromHere( startState );
-
- for ( PdaStateSet::Iter si = entryStateSet; si.lte(); si++ )
- markReachableFromHere( *si );
-
- /* Delete all states that are not marked
- * and unmark the ones that are marked. */
- PdaState *state = stateList.head;
- while ( state ) {
- PdaState *next = state->next;
-
- if ( state->stateBits & SB_ISMARKED )
- state->stateBits &= ~ SB_ISMARKED;
- else {
- detachState( state );
- stateList.detach( state );
- delete state;
- }
-
- state = next;
- }
-}
diff --git a/colm/pdagraph.h b/colm/pdagraph.h
deleted file mode 100644
index dc11b3e1..00000000
--- a/colm/pdagraph.h
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _PDAGRAPH_H
-#define _PDAGRAPH_H
-
-#include <assert.h>
-#include "vector.h"
-#include "bstset.h"
-#include "compare.h"
-#include "avltree.h"
-#include "dlist.h"
-#include "bstmap.h"
-#include "sbstmap.h"
-#include "sbstset.h"
-#include "sbsttable.h"
-#include "avlset.h"
-#include "dlistmel.h"
-#include "avltree.h"
-
-/* Flags for states. */
-#define SB_ISFINAL 0x04
-#define SB_ISMARKED 0x08
-#define SB_ISSTART 0x10
-
-/* Flags for transitions. */
-#define TB_ISMARKED 0x01
-
-struct PdaTrans;
-struct PdaState;
-struct PdaGraph;
-struct TokenDef;
-struct Definition;
-struct LangEl;
-struct TokenRegion;
-
-typedef Vector<TokenRegion*> RegionVect;
-
-typedef Vector<long> ActDataList;
-
-struct ActionData
-{
- ActionData( int targ, ActDataList &actions, int commitLen )
- : targ(targ), commitLen(commitLen), id(0), actions(actions) { }
-
- int targ;
- int commitLen;
- int id;
-
- ActDataList actions;
-};
-
-
-struct CmpActionData
-{
- static int compare( const ActionData &ap1, const ActionData &ap2 )
- {
- if ( ap1.targ < ap2.targ )
- return -1;
- else if ( ap1.targ > ap2.targ )
- return 1;
- else if ( ap1.commitLen < ap2.commitLen )
- return -1;
- else if ( ap1.commitLen > ap2.commitLen )
- return 1;
- else if ( ap1.id < ap2.id )
- return -1;
- else if ( ap1.id > ap2.id )
- return 1;
-
- return CmpTable< long, CmpOrd<long> >::
- compare( ap1.actions, ap2.actions );
- }
-};
-
-typedef AvlSet<ActionData, CmpActionData> PdaActionSet;
-typedef AvlSetEl<ActionData> PdaActionSetEl;
-
-/* List pointers for the closure queue. Goes into state. */
-struct ClosureQueueListEl { PdaState *prev, *next; };
-
-/* Queue of states, transitions to be closed. */
-typedef DListMel< PdaState, ClosureQueueListEl > StateClosureQueue;
-typedef DList<PdaTrans> TransClosureQueue;
-
-typedef BstSet< Definition*, CmpOrd<Definition*> > DefSet;
-typedef CmpTable< Definition*, CmpOrd<Definition*> > CmpDefSet;
-typedef BstSet< DefSet, CmpDefSet > DefSetSet;
-
-typedef Vector< Definition* > DefVect;
-typedef BstSet< long, CmpOrd<long> > AlphSet;
-
-struct ExpandToEl
-{
- ExpandToEl( PdaState *state, int prodId )
- : state(state), prodId(prodId) { }
-
- PdaState *state;
- int prodId;
-};
-
-struct CmpExpandToEl
-{
- static inline int compare( const ExpandToEl &etel1, const ExpandToEl &etel2 )
- {
- if ( etel1.state < etel2.state )
- return -1;
- else if ( etel1.state > etel2.state )
- return 1;
- else if ( etel1.prodId < etel2.prodId )
- return -1;
- else if ( etel1.prodId > etel2.prodId )
- return 1;
- else
- return 0;
- }
-};
-
-typedef BstSet<ExpandToEl, CmpExpandToEl> ExpandToSet;
-typedef BstSet< int, CmpOrd<int> > IntSet;
-typedef CmpTable< int, CmpOrd<int> > CmpIntSet;
-
-typedef BstSet< long, CmpOrd<long> > LongSet;
-typedef CmpTable< long, CmpOrd<long> > CmpLongSet;
-
-typedef BstMap< long, long, CmpOrd<long> > LongMap;
-typedef BstMapEl< long, long > LongMapEl;
-
-typedef LongSet ProdIdSet;
-typedef CmpLongSet CmpProdIdSet;
-
-/* Set of states, list of states. */
-typedef BstSet<PdaState*> PdaStateSet;
-typedef Vector<PdaState*> StateVect;
-typedef DList<PdaState> PdaStateList;
-
-typedef LongMap FollowToAdd;
-typedef LongMap ReductionMap;
-typedef LongMapEl ReductionMapEl;
-
-struct ProdIdPair
-{
- ProdIdPair( int onReduce, int length )
- : onReduce(onReduce), length(length) {}
-
- int onReduce;
- int length;
-};
-
-struct CmpProdIdPair
-{
- static inline int compare( const ProdIdPair &pair1, const ProdIdPair &pair2 )
- {
- if ( pair1.onReduce < pair2.onReduce )
- return -1;
- else if ( pair1.onReduce > pair2.onReduce )
- return 1;
- else if ( pair1.length < pair2.length )
- return -1;
- else if ( pair1.length > pair2.length )
- return 1;
- else
- return 0;
- }
-};
-
-typedef BstSet< ProdIdPair, CmpProdIdPair > ProdIdPairSet;
-
-/* Transition class that implements actions and priorities. */
-struct PdaTrans
-{
- PdaTrans() :
- fromState(0),
- toState(0),
- isShift(false),
- isShiftReduce(false),
- shiftPrior(0),
- noPreIgnore(false),
- noPostIgnore(false)
- { }
-
- PdaTrans( const PdaTrans &other ) :
- lowKey(other.lowKey),
- fromState(0), toState(0),
- isShift(other.isShift),
- isShiftReduce(other.isShiftReduce),
- shiftPrior(other.shiftPrior),
- reductions(other.reductions),
- commits(other.commits),
- noPreIgnore(false),
- noPostIgnore(false)
- { }
-
- long lowKey;
- PdaState *fromState;
- PdaState *toState;
-
- /* Pointers for outlist. */
- PdaTrans *prev, *next;
-
- /* Pointers for in-list. */
- PdaTrans *ilprev, *ilnext;
-
- long maxPrior();
-
- /* Parse Table construction data. */
- bool isShift, isShiftReduce;
- int shiftPrior;
- ReductionMap reductions;
- ActDataList actions;
- ActDataList actOrds;
- ActDataList actPriors;
-
- ExpandToSet expandTo;
-
- PdaActionSetEl *actionSetEl;
-
- LongSet commits;
- LongSet afterShiftCommits;
-
- bool noPreIgnore;
- bool noPostIgnore;
-};
-
-/* In transition list. Like DList except only has head pointers, which is all
- * that is required. Insertion and deletion is handled by the graph. This
- * class provides the iterator of a single list. */
-struct PdaTransInList
-{
- PdaTransInList() : head(0) { }
-
- PdaTrans *head;
-
- struct Iter
- {
- /* Default construct. */
- Iter() : ptr(0) { }
-
- /* Construct, assign from a list. */
- Iter( const PdaTransInList &il ) : ptr(il.head) { }
- Iter &operator=( const PdaTransInList &dl ) { ptr = dl.head; return *this; }
-
- /* At the end */
- bool lte() const { return ptr != 0; }
- bool end() const { return ptr == 0; }
-
- /* At the first, last element. */
- bool first() const { return ptr && ptr->ilprev == 0; }
- bool last() const { return ptr && ptr->ilnext == 0; }
-
- /* Cast, dereference, arrow ops. */
- operator PdaTrans*() const { return ptr; }
- PdaTrans &operator *() const { return *ptr; }
- PdaTrans *operator->() const { return ptr; }
-
- /* Increment, decrement. */
- inline void operator++(int) { ptr = ptr->ilnext; }
- inline void operator--(int) { ptr = ptr->ilprev; }
-
- /* The iterator is simply a pointer. */
- PdaTrans *ptr;
- };
-};
-
-typedef DList<PdaTrans> PdaTransList;
-
-/* A element in a state dict. */
-struct PdaStateDictEl
-:
- public AvlTreeEl<PdaStateDictEl>
-{
- PdaStateDictEl(const PdaStateSet &stateSet)
- : stateSet(stateSet) { }
-
- const PdaStateSet &getKey() { return stateSet; }
- PdaStateSet stateSet;
- PdaState *targState;
-};
-
-/* Dictionary mapping a set of states to a target state. */
-typedef AvlTree< PdaStateDictEl, PdaStateSet, CmpTable<PdaState*> > PdaStateDict;
-
-/* What items does a particular state encompass. */
-typedef BstSet< long, CmpOrd<long> > DotSet;
-typedef CmpTable< long, CmpOrd<long> > CmpDotSet;
-
-/* Map of dot sets to states. */
-typedef AvlTree< PdaState, DotSet, CmpDotSet > DotSetMap;
-typedef PdaState DotSetMapEl;
-
-typedef BstMap< long, PdaTrans* > TransMap;
-typedef BstMapEl< long, PdaTrans* > TransMapEl;
-
-/* State class that implements actions and priorities. */
-struct PdaState
-:
- public ClosureQueueListEl,
- public AvlTreeEl< PdaState >
-{
- PdaState();
- PdaState(const PdaState &other);
- ~PdaState();
-
- /* Is the state final? */
- bool isFinState() { return stateBits & SB_ISFINAL; }
-
- PdaTrans *findTrans( long key )
- {
- TransMapEl *transMapEl = transMap.find( key );
- if ( transMapEl == 0 )
- return 0;
- return transMapEl->value;
- }
-
- /* In transition list. */
- PdaTransInList inRange;
-
- ProdIdPairSet pendingCommits;
-
- /* When duplicating the fsm we need to map each
- * state to the new state representing it. */
- PdaState *stateMap;
-
- /* When merging states (state machine operations) this next pointer is
- * used for the list of states that need to be filled in. */
- PdaState *alg_next;
-
- PdaStateSet *stateSet;
-
- /* Identification for printing and stable minimization. */
- int stateNum;
-
- /* A pointer to a dict element that contains the set of states this state
- * represents. This cannot go into alg, because alg.next is used during
- * the merging process. */
- PdaStateDictEl *stateDictEl;
-
- /* Bits controlling the behaviour of the state during collapsing to dfa. */
- int stateBits;
-
- /* State list elements. */
- PdaState *next, *prev;
-
- /* For dotset map. */
- DotSet &getKey() { return dotSet; }
-
- /* Closure management. */
- DotSet dotSet;
- DotSet dotSet2;
- bool onClosureQueue;
- bool inClosedMap;
- bool followMarked;
- bool onStateList;
-
- TransMap transMap;
-
- RegionVect regions;
- RegionVect preRegions;
-
- bool advanceReductions;
-};
-
-/* Compare lists of epsilon transitions. Entries are name ids of targets. */
-typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
-
-/* Compare sets of context values. */
-typedef CmpTable< int, CmpOrd<int> > CmpContextSets;
-
-/* Graph class that implements actions and priorities. */
-struct PdaGraph
-{
- /* Constructors/Destructors. */
- PdaGraph();
- PdaGraph( const PdaGraph &graph );
- ~PdaGraph();
-
- /* The list of states. */
- PdaStateList stateList;
- PdaStateList misfitList;
-
- /* The start state. */
- PdaState *startState;
- PdaStateSet entryStateSet;
-
- /* The set of final states. */
- PdaStateSet finStateSet;
-
- /* Closure queues and maps. */
- DotSetMap closedMap;
- StateClosureQueue stateClosureQueue;
- StateClosureQueue stateClosedList;
-
- TransClosureQueue transClosureQueue;
- PdaState *stateClosureHead;
-
- LangEl **langElIndex;
-
- void setStartState( PdaState *state );
- void unsetStartState( );
-
- /*
- * Basic attaching and detaching.
- */
-
- /* Common to attaching/detaching list and default. */
- void attachToInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans );
- void detachFromInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans );
-
- /* Attach with a new transition. */
- PdaTrans *appendNewTrans( PdaState *from, PdaState *to, long onChar1, long );
- PdaTrans *insertNewTrans( PdaState *from, PdaState *to, long lowKey, long );
-
- /* Attach with an existing transition that already in an out list. */
- void attachTrans( PdaState *from, PdaState *to, PdaTrans *trans );
-
- /* Detach a transition from a target state. */
- void detachTrans( PdaState *from, PdaState *to, PdaTrans *trans );
-
- /* Detach a state from the graph. */
- void detachState( PdaState *state );
-
- /*
- * Callbacks.
- */
-
- /* Add in the properties of srcTrans into this. */
- void addInReduction( PdaTrans *dest, long prodId, long prior );
- void addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans );
- void addInState( PdaState *destState, PdaState *srcState );
-
- /*
- * Allocation.
- */
-
- /* New up a state and add it to the graph. */
- PdaState *addState();
-
- /*
- * Fsm operators.
- */
-
- /* Follow to the fin state of src fsm. */
- PdaState *followFsm( PdaState *from, PdaGraph *srcFsm );
-
- /*
- * Final states
- */
-
- /* Set and Unset a state as final. */
- void setFinState( PdaState *state );
- void unsetFinState( PdaState *state );
- void unsetAllFinStates( );
-
- /* Set State numbers starting at 0. */
- void setStateNumbers();
-
- /*
- * Path pruning
- */
-
- /* Mark all states reachable from state. */
- void markReachableFromHere( PdaState *state );
-
- /* Removes states that cannot be reached by any path in the fsm and are
- * thus wasted silicon. */
- void removeUnreachableStates();
-
- /* Remove error actions from states on which the error transition will
- * never be taken. */
- bool outListCovers( PdaState *state );
-
- /* Remove states that are on the misfit list. */
- void removeMisfits();
-
-
- /*
- * Other
- */
-
- /* Move the in trans into src into dest. */
- void inTransMove(PdaState *dest, PdaState *src);
-
- int fsmLength( );
-
- /* Collected machine information. */
- unsigned long long maxState;
- unsigned long long maxAction;
- unsigned long long maxLelId;
- unsigned long long maxOffset;
- unsigned long long maxIndex;
- unsigned long long maxProdLen;
-
- PdaActionSet actionSet;
-};
-
-
-#endif /* _FSMGRAPH_H */
diff --git a/colm/pdarun.c b/colm/pdarun.c
deleted file mode 100644
index 62ab107e..00000000
--- a/colm/pdarun.c
+++ /dev/null
@@ -1,2272 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "config.h"
-#include "debug.h"
-#include "pdarun.h"
-#include "fsmrun.h"
-#include "bytecode.h"
-#include "tree.h"
-#include "pool.h"
-
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#define true 1
-#define false 0
-
-#define act_sb 0x1
-#define act_rb 0x2
-#define lower 0x0000ffff
-#define upper 0xffff0000
-
-#define read_word_p( i, p ) do { \
- i = ((Word) p[0]); \
- i |= ((Word) p[1]) << 8; \
- i |= ((Word) p[2]) << 16; \
- i |= ((Word) p[3]) << 24; \
-} while(0)
-
-#define read_tree_p( i, p ) do { \
- Word w; \
- w = ((Word) p[0]); \
- w |= ((Word) p[1]) << 8; \
- w |= ((Word) p[2]) << 16; \
- w |= ((Word) p[3]) << 24; \
- i = (Tree*)w; \
-} while(0)
-
-void initFsmRun( FsmRun *fsmRun, Program *prg )
-{
- fsmRun->tables = prg->rtd->fsmTables;
- fsmRun->runBuf = 0;
-
- /* Run buffers need to stick around because
- * token strings point into them. */
- fsmRun->runBuf = newRunBuf();
- fsmRun->runBuf->next = 0;
-
- fsmRun->p = fsmRun->pe = fsmRun->runBuf->data;
- fsmRun->peof = 0;
-
- fsmRun->attachedInput = 0;
- fsmRun->attachedSource = 0;
- fsmRun->preRegion = -1;
-}
-
-void clearFsmRun( Program *prg, FsmRun *fsmRun )
-{
- if ( fsmRun->runBuf != 0 ) {
- /* Transfer the run buf list to the program */
- RunBuf *head = fsmRun->runBuf;
- RunBuf *tail = head;
- while ( tail->next != 0 )
- tail = tail->next;
-
- tail->next = prg->allocRunBuf;
- prg->allocRunBuf = head;
- }
-}
-
-/* Keep the position up to date after consuming text. */
-void updatePosition( InputStream *inputStream, const char *data, long length )
-{
- if ( !inputStream->handlesLine ) {
- int i;
- for ( i = 0; i < length; i++ ) {
- if ( data[i] != '\n' )
- inputStream->column += 1;
- else {
- inputStream->line += 1;
- inputStream->column = 1;
- }
- }
- }
-
- inputStream->byte += length;
-}
-
-/* Keep the position up to date after sending back text. */
-void undoPosition( InputStream *inputStream, const char *data, long length )
-{
- /* FIXME: this needs to fetch the position information from the parsed
- * token and restore based on that.. */
- int i;
- if ( !inputStream->handlesLine ) {
- for ( i = 0; i < length; i++ ) {
- if ( data[i] == '\n' )
- inputStream->line -= 1;
- }
- }
-
- inputStream->byte -= length;
-}
-
-void incrementSteps( PdaRun *pdaRun )
-{
- pdaRun->steps += 1;
- debug( REALM_PARSE, "steps up to %ld\n", pdaRun->steps );
-}
-
-void decrementSteps( PdaRun *pdaRun )
-{
- pdaRun->steps -= 1;
- debug( REALM_PARSE, "steps down to %ld\n", pdaRun->steps );
-}
-
-/* Load up a token, starting from tokstart if it is set. If not set then
- * start it at data. */
-Head *streamPull( Program *prg, FsmRun *fsmRun, InputStream *inputStream, long length )
-{
- /* We should not be in the midst of getting a token. */
- assert( fsmRun->tokstart == 0 );
-
- RunBuf *runBuf = newRunBuf();
- runBuf->next = fsmRun->runBuf;
- fsmRun->runBuf = runBuf;
-
- int len = 0;
- getData( fsmRun, inputStream, 0, runBuf->data, length, &len );
- consumeData( inputStream, length );
- fsmRun->p = fsmRun->pe = runBuf->data + length;
-
- Head *tokdata = stringAllocPointer( prg, runBuf->data, length );
- updatePosition( inputStream, runBuf->data, length );
-
- return tokdata;
-}
-
-void undoStreamPull( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length )
-{
- debug( REALM_PARSE, "undoing stream pull\n" );
-
- prependData( inputStream, data, length );
-}
-
-void streamPushText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length )
-{
- prependData( inputStream, data, length );
-}
-
-void streamPushTree( FsmRun *fsmRun, InputStream *inputStream, Tree *tree, int ignore )
-{
- prependTree( inputStream, tree, ignore );
-}
-
-void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, long length )
-{
- if ( length < 0 ) {
- Tree *tree = undoPrependTree( inputStream );
- treeDownref( prg, sp, tree );
- }
- else {
- undoPrependData( inputStream, length );
- }
-}
-
-void undoStreamAppend( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, Tree *input, long length )
-{
- if ( input->id == LEL_ID_STR )
- undoAppendData( inputStream, length );
- else if ( input->id == LEL_ID_STREAM )
- undoAppendStream( inputStream );
- else {
- Tree *tree = undoAppendTree( inputStream );
- treeDownref( prg, sp, tree );
- }
-}
-
-/* Should only be sending back whole tokens/ignores, therefore the send back
- * should never cross a buffer boundary. Either we slide back data, or we move to
- * a previous buffer and slide back data. */
-static void sendBackText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length )
-{
- debug( REALM_PARSE, "push back of %ld characters\n", length );
-
- if ( length == 0 )
- return;
-
- debug( REALM_PARSE, "sending back text: %.*s\n",
- (int)length, data );
-
- undoConsumeData( fsmRun, inputStream, data, length );
- undoPosition( inputStream, data, length );
-}
-
-void sendBackTree( InputStream *inputStream, Tree *tree )
-{
- undoConsumeTree( inputStream, tree, false );
-}
-
-/*
- * Stops on:
- * PcrRevIgnore
- */
-static void sendBackIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun,
- InputStream *inputStream, ParseTree *parseTree )
-{
- #ifdef DEBUG
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- debug( REALM_PARSE, "sending back: %s%s\n",
- lelInfo[parseTree->shadow->tree->id].name,
- parseTree->flags & PF_ARTIFICIAL ? " (artificial)" : "" );
- #endif
-
- Head *head = parseTree->shadow->tree->tokdata;
- int artificial = parseTree->flags & PF_ARTIFICIAL;
-
- if ( head != 0 && !artificial )
- sendBackText( fsmRun, inputStream, stringData( head ), head->length );
-
- decrementSteps( pdaRun );
-
- /* Check for reverse code. */
- if ( parseTree->flags & PF_HAS_RCODE ) {
- pdaRun->onDeck = true;
- parseTree->flags &= ~PF_HAS_RCODE;
- }
-
- if ( pdaRun->steps == pdaRun->targetSteps ) {
- debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps );
- pdaRun->stop = true;
- }
-
-}
-
-void attachInput( FsmRun *fsmRun, InputStream *is )
-{
- if ( is->attached != 0 && is->attached != fsmRun )
- detachInput( is->attached, is );
-
- if ( is->attached != fsmRun ) {
- debug( REALM_INPUT, "attaching fsm run to input stream: %p %p\n", fsmRun, is );
- fsmRun->attachedInput = is;
- is->attached = fsmRun;
- }
-}
-
-void attachSource( FsmRun *fsmRun, SourceStream *ss )
-{
- if ( ss->attached != 0 && ss->attached != fsmRun )
- detachSource( ss->attached, ss );
-
- if ( ss->attached != fsmRun ) {
- debug( REALM_INPUT, "attaching fsm run to source stream: %p %p\n", fsmRun, ss );
- fsmRun->attachedSource = ss;
- ss->attached = fsmRun;
- }
-}
-
-void detachInput( FsmRun *fsmRun, InputStream *is )
-{
- debug( REALM_INPUT, "detaching fsm run from input stream: %p %p\n", fsmRun, is );
-
- fsmRun->attachedInput = 0;
- is->attached = 0;
-
- clearBuffered( fsmRun );
-
- if ( fsmRun->attachedSource != 0 ) {
- fsmRun->attachedSource->attached = 0;
- fsmRun->attachedSource = 0;
- }
-}
-
-void detachSource( FsmRun *fsmRun, SourceStream *is )
-{
- debug( REALM_INPUT, "detaching fsm run from source stream: %p %p\n", fsmRun, is );
-
- fsmRun->attachedSource = 0;
- is->attached = 0;
-
- clearBuffered( fsmRun );
-
- if ( fsmRun->attachedInput != 0 ) {
- fsmRun->attachedInput->attached = 0;
- fsmRun->attachedInput = 0;
- }
-}
-
-void clearBuffered( FsmRun *fsmRun )
-{
- /* If there is data in the current buffer then send the whole send back
- * should be in this buffer. */
- if ( fsmRun->tokstart != 0 ) {
- fsmRun->p = fsmRun->pe = fsmRun->tokstart;
- fsmRun->tokstart = 0;
- }
- else {
- fsmRun->pe = fsmRun->p;
- }
-}
-
-void resetToken( FsmRun *fsmRun )
-{
- /* If there is a token started, but never finished for a lack of data, we
- * must first backup over it. */
- if ( fsmRun->tokstart != 0 ) {
- fsmRun->p = fsmRun->tokstart;
- fsmRun->tokstart = 0;
- }
-}
-
-/* Stops on:
- * PcrRevToken
- */
-
-static void sendBack( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun,
- InputStream *inputStream, ParseTree *parseTree )
-{
- debug( REALM_PARSE, "sending back: %s\n", prg->rtd->lelInfo[parseTree->id].name );
-
- if ( parseTree->flags & PF_NAMED ) {
- ///* Send back anything in the buffer that has not been parsed. */
- //if ( fsmRun->p == fsmRun->runBuf->data )
- // sendBackRunBufHead( fsmRun, inputStream );
-
- /* Send the named lang el back first, then send back any leading
- * whitespace. */
- undoConsumeLangEl( inputStream );
- }
-
- decrementSteps( pdaRun );
-
- /* Artifical were not parsed, instead sent in as items. */
- if ( parseTree->flags & PF_ARTIFICIAL ) {
- /* Check for reverse code. */
- if ( parseTree->flags & PF_HAS_RCODE ) {
- debug( REALM_PARSE, "tree has rcode, setting on deck\n" );
- pdaRun->onDeck = true;
- parseTree->flags &= ~PF_HAS_RCODE;
- }
-
- treeUpref( parseTree->shadow->tree );
-
- sendBackTree( inputStream, parseTree->shadow->tree );
- }
- else {
- /* Check for reverse code. */
- if ( parseTree->flags & PF_HAS_RCODE ) {
- debug( REALM_PARSE, "tree has rcode, setting on deck\n" );
- pdaRun->onDeck = true;
- parseTree->flags &= ~PF_HAS_RCODE;
- }
-
- /* Push back the token data. */
- sendBackText( fsmRun, inputStream, stringData( parseTree->shadow->tree->tokdata ),
- stringLength( parseTree->shadow->tree->tokdata ) );
-
- /* If eof was just sent back remember that it needs to be sent again. */
- if ( parseTree->id == prg->rtd->eofLelIds[pdaRun->parserId] )
- inputStream->eofSent = false;
-
- /* If the item is bound then store remove it from the bindings array. */
- popBinding( pdaRun, parseTree );
- }
-
- if ( pdaRun->steps == pdaRun->targetSteps ) {
- debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps );
- pdaRun->stop = true;
- }
-
- /* Downref the tree that was sent back and free the kid. */
- treeDownref( prg, sp, parseTree->shadow->tree );
- kidFree( prg, parseTree->shadow );
- parseTreeFree( prg, parseTree );
-}
-
-void setRegion( PdaRun *pdaRun, int emptyIgnore, ParseTree *tree )
-{
- if ( emptyIgnore ) {
- /* Recording the next region. */
- tree->region = pdaRun->nextRegionInd;
- if ( pdaRun->tables->tokenRegions[tree->region+1] != 0 )
- pdaRun->numRetry += 1;
- }
-}
-
-void ignoreTree( Program *prg, FsmRun *fsmRun, PdaRun *pdaRun, Tree *tree )
-{
- int emptyIgnore = pdaRun->accumIgnore == 0;
-
- incrementSteps( pdaRun );
-
- ParseTree *parseTree = parseTreeAllocate( prg );
- parseTree->shadow = kidAllocate( prg );
- parseTree->shadow->tree = tree;
-
- parseTree->next = pdaRun->accumIgnore;
- pdaRun->accumIgnore = parseTree;
-
- transferReverseCode( pdaRun, parseTree );
-
- if ( fsmRun->preRegion >= 0 )
- parseTree->flags |= PF_RIGHT_IGNORE;
-
- setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore );
-}
-
-void ignoreTree2( Program *prg, PdaRun *pdaRun, Tree *tree )
-{
- int emptyIgnore = pdaRun->accumIgnore == 0;
-
- incrementSteps( pdaRun );
-
- ParseTree *parseTree = parseTreeAllocate( prg );
- parseTree->flags |= PF_ARTIFICIAL;
- parseTree->shadow = kidAllocate( prg );
- parseTree->shadow->tree = tree;
-
- parseTree->next = pdaRun->accumIgnore;
- pdaRun->accumIgnore = parseTree;
-
- transferReverseCode( pdaRun, parseTree );
-
- setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore );
-}
-
-Kid *makeTokenWithData( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun,
- InputStream *inputStream, int id, Head *tokdata )
-{
- /* Make the token object. */
- long objectLength = prg->rtd->lelInfo[id].objectLength;
- Kid *attrs = allocAttrs( prg, objectLength );
-
- Kid *input = 0;
- input = kidAllocate( prg );
- input->tree = treeAllocate( prg );
-
- debug( REALM_PARSE, "made token %p\n", input->tree );
-
- input->tree->refs = 1;
- input->tree->id = id;
- input->tree->tokdata = tokdata;
-
- /* No children and ignores get added later. */
- input->tree->child = attrs;
-
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- if ( lelInfo[id].numCaptureAttr > 0 ) {
- int i;
- for ( i = 0; i < lelInfo[id].numCaptureAttr; i++ ) {
- CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[id].captureAttr + i];
- Head *data = stringAllocFull( prg,
- fsmRun->mark[ca->mark_enter], fsmRun->mark[ca->mark_leave]
- - fsmRun->mark[ca->mark_enter] );
- Tree *string = constructString( prg, data );
- treeUpref( string );
- setAttr( input->tree, ca->offset, string );
- }
- }
-
- return input;
-}
-
-void clearIgnoreList( Program *prg, Tree **sp, Kid *kid )
-{
- while ( kid != 0 ) {
- Kid *next = kid->next;
- treeDownref( prg, sp, kid->tree );
- kidFree( prg, kid );
- kid = next;
- }
-}
-
-static void reportParseError( Program *prg, Tree **sp, PdaRun *pdaRun )
-{
- Kid *kid = pdaRun->btPoint;
- Head *deepest = 0;
- while ( kid != 0 ) {
- Head *head = kid->tree->tokdata;
- if ( head != 0 && head->location != 0 ) {
- if ( deepest == 0 || head->location->byte > deepest->location->byte )
- deepest = head;
- }
- kid = kid->next;
- }
-
- Head *errorHead = 0;
-
- /* If there are no error points on record assume the error occurred at the beginning of the stream. */
- if ( deepest == 0 )
- errorHead = stringAllocFull( prg, "PARSE ERROR at 1:1", 18 );
- else {
- debug( REALM_PARSE, "deepest location byte: %d\n", deepest->location->byte );
-
- long line = deepest->location->line;
- long i, column = deepest->location->column;
-
- for ( i = 0; i < deepest->length; i++ ) {
- if ( deepest->data[i] != '\n' )
- column += 1;
- else {
- line += 1;
- column = 1;
- }
- }
-
- char formatted[128];
- sprintf( formatted, "PARSE ERROR at %ld:%ld", line, column );
- errorHead = stringAllocFull( prg, formatted, strlen(formatted) );
- }
-
- Tree *tree = constructString( prg, errorHead );
- treeDownref( prg, sp, prg->lastParseError );
- prg->lastParseError = tree;
- treeUpref( prg->lastParseError );
-}
-
-static void attachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree )
-{
- if ( pdaRun->accumIgnore == 0 )
- return;
-
- if ( pdaRun->stackTop->id > 0 && pdaRun->stackTop->id < prg->rtd->firstNonTermId ) {
- /* OK, do it */
- debug( REALM_PARSE, "attaching right ignore\n" );
-
- /* Reset. */
- assert( ! ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) );
-
- ParseTree *accum = pdaRun->accumIgnore;
-
- ParseTree *stopAt = 0, *use = accum;
- while ( use != 0 ) {
- if ( ! (use->flags & PF_RIGHT_IGNORE) )
- stopAt = use;
- use = use->next;
- }
-
- if ( stopAt != 0 ) {
- /* Stop at was set. Make it the last item in the igore list. Take
- * the rest. */
- accum = stopAt->next;
- stopAt->next = 0;
- }
- else {
- /* Stop at was never set. All right ignore. Use it all. */
- pdaRun->accumIgnore = 0;
- }
-
- /* The data list needs to be extracted and reversed. The parse tree list
- * can remain in stack order. */
- ParseTree *child = accum, *last = 0;
- Kid *dataChild = 0, *dataLast = 0;
-
- while ( child ) {
- dataChild = child->shadow;
- ParseTree *next = child->next;
-
- /* Reverse the lists. */
- dataChild->next = dataLast;
- child->next = last;
-
- /* Detach the parse tree from the data tree. */
- child->shadow = 0;
-
- /* Keep the last for reversal. */
- dataLast = dataChild;
- last = child;
-
- child = next;
- }
-
- /* Last is now the first. */
- parseTree->rightIgnore = last;
-
- if ( dataChild != 0 ) {
- debug( REALM_PARSE, "attaching ignore right\n" );
-
- Kid *ignoreKid = dataLast;
-
- /* Copy the ignore list first if we need to attach it as a right
- * ignore. */
- Tree *rightIgnore = 0;
-
- rightIgnore = treeAllocate( prg );
- rightIgnore->id = LEL_ID_IGNORE;
- rightIgnore->child = ignoreKid;
-
- Tree *pushTo = parseTree->shadow->tree;
-
- pushTo = pushRightIgnore( prg, pushTo, rightIgnore );
-
- parseTree->shadow->tree = pushTo;
-
- parseTree->flags |= PF_RIGHT_IL_ATTACHED;
- }
- }
-}
-
-static void attachLeftIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree )
-{
- /* Reset. */
- assert( ! ( parseTree->flags & PF_LEFT_IL_ATTACHED ) );
-
- ParseTree *accum = pdaRun->accumIgnore;
- pdaRun->accumIgnore = 0;
-
- /* The data list needs to be extracted and reversed. The parse tree list
- * can remain in stack order. */
- ParseTree *child = accum, *last = 0;
- Kid *dataChild = 0, *dataLast = 0;
-
- while ( child ) {
- dataChild = child->shadow;
- ParseTree *next = child->next;
-
- /* Reverse the lists. */
- dataChild->next = dataLast;
- child->next = last;
-
- /* Detach the parse tree from the data tree. */
- child->shadow = 0;
-
- /* Keep the last for reversal. */
- dataLast = dataChild;
- last = child;
-
- child = next;
- }
-
- /* Last is now the first. */
- parseTree->leftIgnore = last;
-
- if ( dataChild != 0 ) {
- debug( REALM_PARSE, "attaching left ignore\n" );
-
- Kid *ignoreKid = dataChild;
-
- /* Make the ignore list for the left-ignore. */
- Tree *leftIgnore = treeAllocate( prg );
- leftIgnore->id = LEL_ID_IGNORE;
- leftIgnore->child = ignoreKid;
-
- Tree *pushTo = parseTree->shadow->tree;
-
- pushTo = pushLeftIgnore( prg, pushTo, leftIgnore );
-
- parseTree->shadow->tree = pushTo;
-
- parseTree->flags |= PF_LEFT_IL_ATTACHED;
- }
-}
-
-/* Not currently used. Need to revive this. WARNING: untested changes here */
-static void detachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree )
-{
- /* Right ignore are immediately discarded since they are copies of
- * left-ignores. */
- Tree *rightIgnore = 0;
- if ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) {
- Tree *popFrom = parseTree->shadow->tree;
-
- popFrom = popRightIgnore( prg, sp, popFrom, &rightIgnore );
-
- parseTree->shadow->tree = popFrom;
-
- parseTree->flags &= ~PF_RIGHT_IL_ATTACHED;
- }
-
- if ( parseTree->rightIgnore != 0 ) {
- assert( rightIgnore != 0 );
-
- /* Transfer the trees to accumIgnore. */
- ParseTree *ignore = parseTree->rightIgnore;
- parseTree->rightIgnore = 0;
-
- Kid *dataIgnore = rightIgnore->child;
- rightIgnore->child = 0;
-
- ParseTree *last = 0;
- Kid *dataLast = 0;
- while ( ignore != 0 ) {
- ParseTree *next = ignore->next;
- Kid *dataNext = dataIgnore->next;
-
- /* Put the data trees underneath the parse trees. */
- ignore->shadow = dataIgnore;
-
- /* Reverse. */
- ignore->next = last;
- dataIgnore->next = dataLast;
-
- /* Keep last for reversal. */
- last = ignore;
- dataLast = dataIgnore;
-
- ignore = next;
- dataIgnore = dataNext;
- }
-
- pdaRun->accumIgnore = last;
-
- treeDownref( prg, sp, rightIgnore );
- }
-}
-
-static void detachLeftIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, ParseTree *parseTree )
-{
- /* Detach left. */
- Tree *leftIgnore = 0;
- if ( parseTree->flags & PF_LEFT_IL_ATTACHED ) {
- Tree *popFrom = parseTree->shadow->tree;
-
- popFrom = popLeftIgnore( prg, sp, popFrom, &leftIgnore );
-
- parseTree->shadow->tree = popFrom;
-
- parseTree->flags &= ~PF_LEFT_IL_ATTACHED;
- }
-
- if ( parseTree->leftIgnore != 0 ) {
- assert( leftIgnore != 0 );
-
- /* Transfer the trees to accumIgnore. */
- ParseTree *ignore = parseTree->leftIgnore;
- parseTree->leftIgnore = 0;
-
- Kid *dataIgnore = leftIgnore->child;
- leftIgnore->child = 0;
-
- ParseTree *last = 0;
- Kid *dataLast = 0;
- while ( ignore != 0 ) {
- ParseTree *next = ignore->next;
- Kid *dataNext = dataIgnore->next;
-
- /* Put the data trees underneath the parse trees. */
- ignore->shadow = dataIgnore;
-
- /* Reverse. */
- ignore->next = last;
- dataIgnore->next = dataLast;
-
- /* Keep last for reversal. */
- last = ignore;
- dataLast = dataIgnore;
-
- ignore = next;
- dataIgnore = dataNext;
- }
-
- pdaRun->accumIgnore = last;
- }
-
- treeDownref( prg, sp, leftIgnore );
-}
-
-void handleError( Program *prg, Tree **sp, PdaRun *pdaRun )
-{
- /* Check the result. */
- if ( pdaRun->parseError ) {
- /* Error occured in the top-level parser. */
- reportParseError( prg, sp, pdaRun );
- }
- else {
- if ( isParserStopFinished( pdaRun ) ) {
- debug( REALM_PARSE, "stopping the parse\n" );
- pdaRun->stopParsing = true;
- }
- }
-}
-
-void sendIgnore( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, long id )
-{
- debug( REALM_PARSE, "ignoring: %s\n", prg->rtd->lelInfo[id].name );
-
- /* Make the ignore string. */
- Head *ignoreStr = extractMatch( prg, fsmRun, inputStream );
- updatePosition( inputStream, fsmRun->tokstart, ignoreStr->length );
-
- debug( REALM_PARSE, "ignoring: %.*s\n", ignoreStr->length, ignoreStr->data );
-
- Tree *tree = treeAllocate( prg );
- tree->refs = 1;
- tree->id = id;
- tree->tokdata = ignoreStr;
-
- /* Send it to the pdaRun. */
- ignoreTree( prg, fsmRun, pdaRun, tree );
-}
-
-
-/* Doesn't consume. */
-Head *peekMatch( Program *prg, FsmRun *fsmRun, InputStream *inputStream )
-{
- long length = fsmRun->p - fsmRun->tokstart;
- Head *head = stringAllocPointer( prg, fsmRun->tokstart, length );
- head->location = locationAllocate( prg );
- head->location->line = inputStream->line;
- head->location->column = inputStream->column;
- head->location->byte = inputStream->byte;
-
- debug( REALM_PARSE, "location byte: %d\n", inputStream->byte );
-
- return head;
-}
-
-/* Consumes. */
-Head *extractMatch( Program *prg, FsmRun *fsmRun, InputStream *inputStream )
-{
- long length = fsmRun->p - fsmRun->tokstart;
- Head *head = stringAllocPointer( prg, fsmRun->tokstart, length );
- head->location = locationAllocate( prg );
- head->location->line = inputStream->line;
- head->location->column = inputStream->column;
- head->location->byte = inputStream->byte;
-
- debug( REALM_PARSE, "location byte: %d\n", inputStream->byte );
-
- consumeData( inputStream, length );
-
- return head;
-}
-
-static void sendToken( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, long id )
-{
- int emptyIgnore = pdaRun->accumIgnore == 0;
-
- /* Make the token data. */
- Head *tokdata = extractMatch( prg, fsmRun, inputStream );
-
- debug( REALM_PARSE, "token: %s text: %.*s\n",
- prg->rtd->lelInfo[id].name,
- stringLength(tokdata), stringData(tokdata) );
-
- updatePosition( inputStream, fsmRun->tokstart, tokdata->length );
-
- Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, id, tokdata );
-
- incrementSteps( pdaRun );
-
- ParseTree *parseTree = parseTreeAllocate( prg );
- parseTree->id = input->tree->id;
- parseTree->shadow = input;
-
- pdaRun->parseInput = parseTree;
-
- /* Store any alternate scanning region. */
- if ( input != 0 && pdaRun->cs >= 0 )
- setRegion( pdaRun, emptyIgnore, parseTree );
-}
-
-static void sendTree( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream )
-{
- Kid *input = kidAllocate( prg );
- input->tree = consumeTree( inputStream );
-
- incrementSteps( pdaRun );
-
- ParseTree *parseTree = parseTreeAllocate( prg );
- parseTree->id = input->tree->id;
- parseTree->flags |= PF_ARTIFICIAL;
- parseTree->shadow = input;
-
- pdaRun->parseInput = parseTree;
-}
-
-static void sendIgnoreTree( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream )
-{
- Tree *tree = consumeTree( inputStream );
- ignoreTree2( prg, pdaRun, tree );
-}
-
-static void sendCi( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, int id )
-{
- debug( REALM_PARSE, "token: CI\n" );
-
-/**/
-
- int emptyIgnore = pdaRun->accumIgnore == 0;
-
- /* Make the token data. */
- Head *tokdata = headAllocate( prg );
- tokdata->location = locationAllocate( prg );
- tokdata->location->line = inputStream->line;
- tokdata->location->column = inputStream->column;
- tokdata->location->byte = inputStream->byte;
-
- debug( REALM_PARSE, "token: %s text: %.*s\n",
- prg->rtd->lelInfo[id].name,
- stringLength(tokdata), stringData(tokdata) );
-
- updatePosition( inputStream, fsmRun->tokstart, tokdata->length );
-
- Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, id, tokdata );
-
- incrementSteps( pdaRun );
-
- ParseTree *parseTree = parseTreeAllocate( prg );
- parseTree->id = input->tree->id;
- parseTree->shadow = input;
-
- pdaRun->parseInput = parseTree;
-
- /* Store any alternate scanning region. */
- if ( input != 0 && pdaRun->cs >= 0 )
- setRegion( pdaRun, emptyIgnore, parseTree );
-}
-
-
-static void sendEof( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun )
-{
- debug( REALM_PARSE, "token: _EOF\n" );
-
- incrementSteps( pdaRun );
-
- Head *head = headAllocate( prg );
- head->location = locationAllocate( prg );
- head->location->line = inputStream->line;
- head->location->column = inputStream->column;
- head->location->byte = inputStream->byte;
-
- Kid *input = kidAllocate( prg );
- input->tree = treeAllocate( prg );
-
- input->tree->refs = 1;
- input->tree->id = prg->rtd->eofLelIds[pdaRun->parserId];
- input->tree->tokdata = head;
-
- /* Set the state using the state of the parser. */
- fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 );
- fsmRun->preRegion = pdaRunGetNextPreRegion( pdaRun );
- fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->region];
-
- ParseTree *parseTree = parseTreeAllocate( prg );
- parseTree->id = input->tree->id;
- parseTree->shadow = input;
-
- pdaRun->parseInput = parseTree;
-}
-
-void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun )
-{
- /* Init the scanner vars. */
- fsmRun->act = 0;
- fsmRun->tokstart = 0;
- fsmRun->tokend = 0;
- fsmRun->matchedToken = 0;
-
- /* Set the state using the state of the parser. */
- fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 );
- fsmRun->preRegion = pdaRunGetNextPreRegion( pdaRun );
- if ( fsmRun->preRegion > 0 ) {
- debug( REALM_PARSE, "pre region for next token: %s\n",
- prg->rtd->regionInfo[fsmRun->preRegion].name );
- fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->preRegion];
- fsmRun->ncs = fsmRun->tables->entryByRegion[fsmRun->region];
- }
- else {
- debug( REALM_PARSE, "scanning using token region: %s\n",
- prg->rtd->regionInfo[fsmRun->region].name );
-
- fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->region];
- }
-
-
- /* Clear the mark array. */
- memset( fsmRun->mark, 0, sizeof(fsmRun->mark) );
-}
-
-static void pushBtPoint( Program *prg, PdaRun *pdaRun )
-{
- Tree *tree = 0;
- if ( pdaRun->accumIgnore != 0 )
- tree = pdaRun->accumIgnore->shadow->tree;
- else if ( pdaRun->tokenList != 0 )
- tree = pdaRun->tokenList->kid->tree;
-
- if ( tree != 0 ) {
- debug( REALM_PARSE, "pushing bt point with location byte %d\n",
- ( tree != 0 && tree->tokdata != 0 && tree->tokdata->location != 0 ) ?
- tree->tokdata->location->byte : 0 );
-
- Kid *kid = kidAllocate( prg );
- kid->tree = tree;
- treeUpref( tree );
- kid->next = pdaRun->btPoint;
- pdaRun->btPoint = kid;
- }
-}
-
-
-#define SCAN_UNDO -7
-#define SCAN_IGNORE -6
-#define SCAN_TREE -5
-#define SCAN_TRY_AGAIN_LATER -4
-#define SCAN_ERROR -3
-#define SCAN_LANG_EL -2
-#define SCAN_EOF -1
-
-long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream )
-{
- if ( pdaRun->triggerUndo )
- return SCAN_UNDO;
-
- while ( true ) {
- fsmExecute( fsmRun, inputStream );
-
- /* First check if scanning stopped because we have a token. */
- if ( fsmRun->matchedToken > 0 ) {
- /* If the token has a marker indicating the end (due to trailing
- * context) then adjust data now. */
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- if ( lelInfo[fsmRun->matchedToken].markId >= 0 )
- fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId];
-
- return fsmRun->matchedToken;
- }
-
- /* Check for error. */
- if ( fsmRun->cs == fsmRun->tables->errorState ) {
- /* If a token was started, but not finished (tokstart != 0) then
- * restore data to the beginning of that token. */
- if ( fsmRun->tokstart != 0 )
- fsmRun->p = fsmRun->tokstart;
-
- /* Check for a default token in the region. If one is there
- * then send it and continue with the processing loop. */
- if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) {
- fsmRun->tokstart = fsmRun->tokend = fsmRun->p;
- return prg->rtd->regionInfo[fsmRun->region].defaultToken;
- }
-
- return SCAN_ERROR;
- }
-
- /* Got here because the state machine didn't match a token or
- * encounter an error. Must be because we got to the end of the buffer
- * data. */
- assert( fsmRun->p == fsmRun->pe );
-
- /* There may be space left in the current buffer. If not then we need
- * to make some. */
- long space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe;
- if ( space == 0 ) {
- /* Create a new run buf. */
- RunBuf *newBuf = newRunBuf();
-
- /* If partway through a token then preserve the prefix. */
- long have = 0;
-
- if ( fsmRun->tokstart == 0 ) {
- /* No prefix. We filled the previous buffer. */
- fsmRun->runBuf->length = FSM_BUFSIZE;
- }
- else {
- int i;
-
- debug( REALM_SCAN, "copying data over to new buffer\n" );
- assert( fsmRun->runBuf->offset == 0 );
-
- if ( fsmRun->tokstart == fsmRun->runBuf->data ) {
- /* A token is started and it is already at the beginning
- * of the current buffer. This means buffer is full and it
- * must be grown. Probably need to do this sooner. */
- fatal( "OUT OF BUFFER SPACE\n" );
- }
-
- /* There is data that needs to be shifted over. */
- have = fsmRun->pe - fsmRun->tokstart;
- memcpy( newBuf->data, fsmRun->tokstart, have );
-
- /* Compute the length of the previous buffer. */
- fsmRun->runBuf->length = FSM_BUFSIZE - have;
-
- /* Compute tokstart and tokend. */
- long dist = fsmRun->tokstart - newBuf->data;
-
- fsmRun->tokend -= dist;
- fsmRun->tokstart = newBuf->data;
-
- /* Shift any markers. */
- for ( i = 0; i < MARK_SLOTS; i++ ) {
- if ( fsmRun->mark[i] != 0 )
- fsmRun->mark[i] -= dist;
- }
- }
-
- fsmRun->p = fsmRun->pe = newBuf->data + have;
- fsmRun->peof = 0;
-
- newBuf->next = fsmRun->runBuf;
- fsmRun->runBuf = newBuf;
- }
-
- /* We don't have any data. What is next in the input inputStream? */
- space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe;
- assert( space > 0 );
-
- /* Get more data. */
- int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0;
- int len = 0;
- debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space );
- int type = getData( fsmRun, inputStream, have, fsmRun->p, space, &len );
-
- switch ( type ) {
- case INPUT_DATA:
- fsmRun->pe = fsmRun->p + len;
- break;
-
- case INPUT_EOF:
- if ( fsmRun->tokstart != 0 )
- fsmRun->peof = fsmRun->pe;
- else
- return SCAN_EOF;
- break;
-
- case INPUT_EOD:
- return SCAN_TRY_AGAIN_LATER;
-
- case INPUT_LANG_EL:
- if ( fsmRun->tokstart != 0 )
- fsmRun->peof = fsmRun->pe;
- else
- return SCAN_LANG_EL;
- break;
-
- case INPUT_TREE:
- if ( fsmRun->tokstart != 0 )
- fsmRun->peof = fsmRun->pe;
- else
- return SCAN_TREE;
- break;
- case INPUT_IGNORE:
- if ( fsmRun->tokstart != 0 )
- fsmRun->peof = fsmRun->pe;
- else
- return SCAN_IGNORE;
- break;
- }
- }
-
- /* Should not be reached. */
- return SCAN_ERROR;
-}
-
-/*
- * Stops on:
- * PcrPreEof
- * PcrGeneration
- * PcrReduction
- * PcrRevReduction
- * PcrRevIgnore
- * PcrRevToken
- */
-
-long parseLoop( Program *prg, Tree **sp, PdaRun *pdaRun,
- FsmRun *fsmRun, InputStream *inputStream, long entry )
-{
- LangElInfo *lelInfo = prg->rtd->lelInfo;
-
-switch ( entry ) {
-case PcrStart:
-
- pdaRun->stop = false;
-
- while ( true ) {
- debug( REALM_PARSE, "parse loop start %d:%d\n", inputStream->line, inputStream->column );
-
- /* Pull the current scanner from the parser. This can change during
- * parsing due to inputStream pushes, usually for the purpose of includes.
- * */
- pdaRun->tokenId = scanToken( prg, pdaRun, fsmRun, inputStream );
-
- if ( pdaRun->tokenId == SCAN_ERROR ) {
- if ( fsmRun->preRegion >= 0 ) {
- fsmRun->preRegion = -1;
- fsmRun->cs = fsmRun->ncs;
- debug( REALM_PARSE, "moving from pre region to main region: %s\n",
- prg->rtd->regionInfo[fsmRun->region].name );
- continue;
- }
- }
-
- if ( pdaRun->tokenId == SCAN_ERROR &&
- ( prg->rtd->regionInfo[fsmRun->region].ciLelId > 0 ) )
- {
- debug( REALM_PARSE, "sending a collect ignore\n" );
- sendCi( prg, sp, inputStream, fsmRun, pdaRun, prg->rtd->regionInfo[fsmRun->region].ciLelId );
- goto yes;
- }
-
- if ( pdaRun->tokenId == SCAN_TRY_AGAIN_LATER ) {
- debug( REALM_PARSE, "scanner says try again later\n" );
- break;
- }
-
- assert( pdaRun->parseInput == 0 );
- pdaRun->parseInput = 0;
-
- /* Check for EOF. */
- if ( pdaRun->tokenId == SCAN_EOF ) {
- inputStream->eofSent = true;
- sendEof( prg, sp, inputStream, fsmRun, pdaRun );
-
- pdaRun->frameId = prg->rtd->regionInfo[fsmRun->region].eofFrameId;
-
- if ( prg->ctxDepParsing && pdaRun->frameId >= 0 ) {
- debug( REALM_PARSE, "HAVE PRE_EOF BLOCK\n" );
-
- pdaRun->fi = &prg->rtd->frameInfo[pdaRun->frameId];
- pdaRun->code = pdaRun->fi->codeWV;
-
-return PcrPreEof;
-case PcrPreEof:
- makeReverseCode( pdaRun );
- }
- }
- else if ( pdaRun->tokenId == SCAN_UNDO ) {
- /* Fall through with parseInput = 0. FIXME: Do we need to send back ignore? */
- debug( REALM_PARSE, "invoking undo from the scanner\n" );
- }
- else if ( pdaRun->tokenId == SCAN_ERROR ) {
- /* Scanner error, maybe retry. */
- if ( pdaRun->accumIgnore == 0 && pdaRunGetNextRegion( pdaRun, 1 ) != 0 ) {
- debug( REALM_PARSE, "scanner failed, trying next region\n" );
-
- pdaRun->nextRegionInd += 1;
- goto skipSend;
- }
- else if ( pdaRun->numRetry > 0 ) {
- debug( REALM_PARSE, "invoking parse error from the scanner\n" );
-
- /* Fall through to send null (error). */
- pushBtPoint( prg, pdaRun );
- }
- else {
- debug( REALM_PARSE, "no alternate scanning regions\n" );
-
- /* There are no alternative scanning regions to try, nor are
- * there any alternatives stored in the current parse tree. No
- * choice but to end the parse. */
- pushBtPoint( prg, pdaRun );
-
- reportParseError( prg, sp, pdaRun );
- pdaRun->parseError = 1;
- goto skipSend;
- }
- }
- else if ( pdaRun->tokenId == SCAN_LANG_EL ) {
- debug( REALM_PARSE, "sending an named lang el\n" );
-
- /* A named language element (parsing colm program). */
- sendNamedLangEl( prg, sp, pdaRun, fsmRun, inputStream );
- }
- else if ( pdaRun->tokenId == SCAN_TREE ) {
- debug( REALM_PARSE, "sending a tree\n" );
-
- /* A tree already built. */
- sendTree( prg, sp, pdaRun, fsmRun, inputStream );
- }
- else if ( pdaRun->tokenId == SCAN_IGNORE ) {
- debug( REALM_PARSE, "sending an ignore token\n" );
-
- /* A tree to ignore. */
- sendIgnoreTree( prg, sp, pdaRun, fsmRun, inputStream );
- goto skipSend;
- }
- else if ( prg->ctxDepParsing && lelInfo[pdaRun->tokenId].frameId >= 0 ) {
- /* Has a generation action. */
- debug( REALM_PARSE, "token gen action: %s\n",
- prg->rtd->lelInfo[pdaRun->tokenId].name );
-
- /* Make the token data. */
- pdaRun->tokdata = peekMatch( prg, fsmRun, inputStream );
-
- /* Note that we don't update the position now. It is done when the token
- * data is pulled from the inputStream. */
-
- fsmRun->p = fsmRun->tokstart;
- fsmRun->tokstart = 0;
-
- pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->lelInfo[pdaRun->tokenId].frameId];
- pdaRun->frameId = prg->rtd->lelInfo[pdaRun->tokenId].frameId;
- pdaRun->code = pdaRun->fi->codeWV;
-
-return PcrGeneration;
-case PcrGeneration:
-
- makeReverseCode( pdaRun );
-
- /* Finished with the match text. */
- stringFree( prg, pdaRun->tokdata );
-
- goto skipSend;
- }
- else if ( lelInfo[pdaRun->tokenId].ignore ) {
- debug( REALM_PARSE, "sending an ignore token: %s\n",
- prg->rtd->lelInfo[pdaRun->tokenId].name );
-
- /* Is an ignore token. */
- sendIgnore( prg, sp, inputStream, fsmRun, pdaRun, pdaRun->tokenId );
- goto skipSend;
- }
- else {
- debug( REALM_PARSE, "sending an a plain old token: %s\n",
- prg->rtd->lelInfo[pdaRun->tokenId].name );
-
- /* Is a plain token. */
- sendToken( prg, sp, inputStream, fsmRun, pdaRun, pdaRun->tokenId );
- }
-yes:
-
- if ( pdaRun->parseInput != 0 )
- transferReverseCode( pdaRun, pdaRun->parseInput );
-
- if ( pdaRun->parseInput != 0 ) {
- /* If it's a nonterminal with a termdup then flip the parse tree to the terminal. */
- if ( pdaRun->parseInput->id >= prg->rtd->firstNonTermId ) {
- pdaRun->parseInput->id = prg->rtd->lelInfo[pdaRun->parseInput->id].termDupId;
- pdaRun->parseInput->flags |= PF_TERM_DUP;
- }
- }
-
- long pcr = parseToken( prg, sp, pdaRun, fsmRun, inputStream, PcrStart );
-
- while ( pcr != PcrDone ) {
-
-return pcr;
-case PcrReduction:
-case PcrReverse:
-
- pcr = parseToken( prg, sp, pdaRun, fsmRun, inputStream, entry );
- }
-
- assert( pcr == PcrDone );
-
- handleError( prg, sp, pdaRun );
-
-skipSend:
- newToken( prg, pdaRun, fsmRun );
-
- /* Various stop conditions. This should all be coverned by one test
- * eventually. */
-
- if ( pdaRun->triggerUndo ) {
- debug( REALM_PARSE, "parsing stopped by triggerUndo\n" );
- break;
- }
-
- if ( inputStream->eofSent ) {
- debug( REALM_PARSE, "parsing stopped by EOF\n" );
- break;
- }
-
- if ( pdaRun->stopParsing ) {
- debug( REALM_PARSE, "scanner has been stopped\n" );
- break;
- }
-
- if ( pdaRun->stop ) {
- debug( REALM_PARSE, "parsing has been stopped by consumedCount\n" );
- break;
- }
-
- if ( prg->induceExit ) {
- debug( REALM_PARSE, "parsing has been stopped by a call to exit\n" );
- break;
- }
-
- if ( pdaRun->parseError ) {
- debug( REALM_PARSE, "parsing stopped by a parse error\n" );
- break;
- }
- }
-
-case PcrDone:
-break; }
-
- return PcrDone;
-}
-
-/* Offset can be used to look at the next nextRegionInd. */
-int pdaRunGetNextRegion( PdaRun *pdaRun, int offset )
-{
- return pdaRun->tables->tokenRegions[pdaRun->nextRegionInd+offset];
-}
-
-int pdaRunGetNextPreRegion( PdaRun *pdaRun )
-{
- return pdaRun->tables->tokenPreRegions[pdaRun->nextRegionInd];
-}
-
-Tree *getParsedRoot( PdaRun *pdaRun, int stop )
-{
- if ( pdaRun->parseError )
- return 0;
- else if ( stop ) {
- if ( pdaRun->stackTop->shadow != 0 )
- return pdaRun->stackTop->shadow->tree;
- }
- else {
- if ( pdaRun->stackTop->next->shadow != 0 )
- return pdaRun->stackTop->next->shadow->tree;
- }
- return 0;
-}
-
-void clearParseTree( Program *prg, Tree **sp, ParseTree *parseTree )
-{
- /* Traverse the stack downreffing. */
- ParseTree *pt = parseTree;
- while ( pt != 0 ) {
- ParseTree *next = pt->next;
- if ( pt->shadow != 0 ) {
- treeDownref( prg, sp, pt->shadow->tree );
- kidFree( prg, pt->shadow );
- }
- if ( pt->child != 0 )
- clearParseTree( prg, sp, pt->child );
- if ( pt->leftIgnore != 0 )
- clearParseTree( prg, sp, pt->leftIgnore );
- if ( pt->rightIgnore != 0 )
- clearParseTree( prg, sp, pt->rightIgnore );
- parseTreeFree( prg, pt );
- pt = next;
- }
-}
-
-void clearPdaRun( Program *prg, Tree **sp, PdaRun *pdaRun )
-{
- /* Remaining stack and parse trees underneath. */
- clearParseTree( prg, sp, pdaRun->stackTop );
- pdaRun->stackTop = 0;
-
- /* Traverse the token list downreffing. */
- Ref *ref = pdaRun->tokenList;
- while ( ref != 0 ) {
- Ref *next = ref->next;
- kidFree( prg, (Kid*)ref );
- ref = next;
- }
- pdaRun->tokenList = 0;
-
- /* Traverse the btPoint list downreffing */
- Kid *btp = pdaRun->btPoint;
- while ( btp != 0 ) {
- Kid *next = btp->next;
- treeDownref( prg, sp, btp->tree );
- kidFree( prg, (Kid*)btp );
- btp = next;
- }
- pdaRun->btPoint = 0;
-
- /* Clear out any remaining ignores. */
- clearParseTree( prg, sp, pdaRun->accumIgnore );
- pdaRun->accumIgnore = 0;
-
- if ( pdaRun->context != 0 )
- treeDownref( prg, sp, pdaRun->context );
-
- rcodeDownrefAll( prg, sp, &pdaRun->reverseCode );
- rtCodeVectEmpty( &pdaRun->reverseCode );
- rtCodeVectEmpty( &pdaRun->rcodeCollect );
-}
-
-int isParserStopFinished( PdaRun *pdaRun )
-{
- int done =
- pdaRun->stackTop->next != 0 &&
- pdaRun->stackTop->next->next == 0 &&
- pdaRun->stackTop->id == pdaRun->stopTarget;
- return done;
-}
-
-void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables,
- FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context )
-{
- memset( pdaRun, 0, sizeof(PdaRun) );
- pdaRun->tables = tables;
- pdaRun->parserId = parserId;
- pdaRun->stopTarget = stopTarget;
- pdaRun->revertOn = revertOn;
- pdaRun->targetSteps = -1;
-
- debug( REALM_PARSE, "initializing PdaRun\n" );
-
- /* FIXME: need the right one here. */
- pdaRun->cs = prg->rtd->startStates[pdaRun->parserId];
-
- Kid *sentinal = kidAllocate( prg );
- sentinal->tree = treeAllocate( prg );
- sentinal->tree->refs = 1;
-
- /* Init the element allocation variables. */
- pdaRun->stackTop = parseTreeAllocate( prg );
- pdaRun->stackTop->state = -1;
- pdaRun->stackTop->shadow = sentinal;
-
- pdaRun->numRetry = 0;
- pdaRun->nextRegionInd = pdaRun->tables->tokenRegionInds[pdaRun->cs];
- pdaRun->stopParsing = false;
- pdaRun->accumIgnore = 0;
- pdaRun->btPoint = 0;
- pdaRun->checkNext = false;
- pdaRun->checkStop = false;
-
- initBindings( pdaRun );
-
- initRtCodeVect( &pdaRun->reverseCode );
- initRtCodeVect( &pdaRun->rcodeCollect );
-
- pdaRun->context = splitTree( prg, context );
- pdaRun->parseError = 0;
- pdaRun->parseInput = 0;
- pdaRun->triggerUndo = 0;
-
- pdaRun->tokenId = 0;
-
- pdaRun->onDeck = false;
- pdaRun->parsed = 0;
- pdaRun->reject = false;
-
- pdaRun->rcBlockCount = 0;
-}
-
-long stackTopTarget( Program *prg, PdaRun *pdaRun )
-{
- long state;
- if ( pdaRun->stackTop->state < 0 )
- state = prg->rtd->startStates[pdaRun->parserId];
- else {
- state = pdaRun->tables->targs[(int)pdaRun->tables->indicies[pdaRun->tables->offsets[
- pdaRun->stackTop->state] +
- (pdaRun->stackTop->id - pdaRun->tables->keys[pdaRun->stackTop->state<<1])]];
- }
- return state;
-}
-
-/*
- * Local commit:
- * -clears reparse flags underneath
- * -must be possible to backtrack after
- * Global commit (revertOn)
- * -clears all reparse flags
- * -must be possible to backtrack after
- * Global commit (!revertOn)
- * -clears all reparse flags
- * -clears all 'parsed' reverse code
- * -clears all reverse code
- * -clears all alg structures
- */
-
-int beenCommitted( ParseTree *parseTree )
-{
- return parseTree->flags & PF_COMMITTED;
-}
-
-Code *backupOverRcode( Code *rcode )
-{
- Word len;
- rcode -= SIZEOF_WORD;
- read_word_p( len, rcode );
- rcode -= len;
- return rcode;
-}
-
-/* The top level of the stack is linked right-to-left. Trees underneath are
- * linked left-to-right. */
-void commitKid( Program *prg, PdaRun *pdaRun, Tree **root, ParseTree *lel, Code **rcode, long *causeReduce )
-{
- ParseTree *tree = 0;
- Tree **sp = root;
- //Tree *restore = 0;
-
-head:
- /* Commit */
- debug( REALM_PARSE, "commit: visiting %s\n",
- prg->rtd->lelInfo[lel->id].name );
-
- /* Load up the parsed tree. */
- tree = lel;
-
- /* Check for reverse code. */
- //restore = 0;
- if ( tree->flags & PF_HAS_RCODE ) {
- /* If tree caused some reductions, now is not the right time to backup
- * over the reverse code. We need to backup over the reductions first. Store
- * the count of the reductions and do it when the count drops to zero. */
- if ( tree->causeReduce > 0 ) {
- /* The top reduce block does not correspond to this alg. */
- debug( REALM_PARSE, "commit: causeReduce found, delaying backup: %ld\n",
- (long)tree->causeReduce );
- *causeReduce = tree->causeReduce;
- }
- else {
- *rcode = backupOverRcode( *rcode );
-
- //if ( **rcode == IN_RESTORE_LHS ) {
- // debug( REALM_PARSE, "commit: has restore_lhs\n" );
- // read_tree_p( restore, (*rcode+1) );
- //}
- }
- }
-
- //FIXME: what was this about?
- //if ( restore != 0 )
- // tree = restore;
-
- /* All the parse algorithm data except for the RCODE flag is in the
- * original. That is why we restore first, then we can clear the retry
- * values. */
-
- /* Check causeReduce, might be time to backup over the reverse code
- * belonging to a nonterminal that caused previous reductions. */
- if ( *causeReduce > 0 &&
- tree->id >= prg->rtd->firstNonTermId &&
- !(tree->flags & PF_TERM_DUP) )
- {
- *causeReduce -= 1;
-
- if ( *causeReduce == 0 ) {
- debug( REALM_PARSE, "commit: causeReduce dropped to zero, backing up over rcode\n" );
-
- /* Cause reduce just dropped down to zero. */
- *rcode = backupOverRcode( *rcode );
- }
- }
-
- ///* FIXME: why was this here?
- // * Reset retries. */
- //if ( tree->flags & AF_PARSED ) {
- // if ( tree->retryLower > 0 ) {
- // pdaRun->numRetry -= 1;
- // tree->retryLower = 0;
- // }
- // if ( tree->retryUpper > 0 ) {
- // pdaRun->numRetry -= 1;
- // tree->retryUpper = 0;
- // }
- //}
-
- tree->flags |= PF_COMMITTED;
-
- /* Do not recures on trees that are terminal dups. */
- if ( !(tree->flags & PF_TERM_DUP) &&
- !(tree->flags & PF_NAMED) &&
- !(tree->flags & PF_ARTIFICIAL) &&
- tree->child != 0 )
- {
- vm_push( (Tree*)lel );
- lel = tree->child;
-
- if ( lel != 0 ) {
- while ( lel != 0 ) {
- vm_push( (Tree*)lel );
- lel = lel->next;
- }
- }
- }
-
-backup:
- if ( sp != root ) {
- ParseTree *next = (ParseTree*)vm_pop();
- if ( next->next == lel ) {
- /* Moving backwards. */
- lel = next;
-
- if ( !beenCommitted( lel ) )
- goto head;
- }
- else {
- /* Moving upwards. */
- lel = next;
- }
-
- goto backup;
- }
-
- pdaRun->numRetry = 0;
- assert( sp == root );
-}
-
-void commitFull( Program *prg, Tree **sp, PdaRun *pdaRun, long causeReduce )
-{
- debug( REALM_PARSE, "running full commit" );
-
- ParseTree *parseTree = pdaRun->stackTop;
- Code *rcode = pdaRun->reverseCode.data + pdaRun->reverseCode.tabLen;
-
- /* The top level of the stack is linked right to left. This is the
- * traversal order we need for committing. */
- while ( parseTree != 0 && !beenCommitted( parseTree ) ) {
- commitKid( prg, pdaRun, sp, parseTree, &rcode, &causeReduce );
- parseTree = parseTree->next;
- }
-
- /* We cannot always clear all the rcode here. We may need to backup over
- * the parse statement. We depend on the context flag. */
- if ( !pdaRun->revertOn )
- rcodeDownrefAll( prg, sp, &pdaRun->reverseCode );
-}
-
-/*
- * shift: retry goes into lower of shifted node.
- * reduce: retry goes into upper of reduced node.
- * shift-reduce: cannot be a retry
- */
-
-/* Stops on:
- * PcrReduction
- * PcrRevToken
- * PcrRevReduction
- */
-long parseToken( Program *prg, Tree **sp, PdaRun *pdaRun,
- FsmRun *fsmRun, InputStream *inputStream, long entry )
-{
- int pos;
- unsigned int *action;
- int rhsLen;
- int owner;
- int induceReject;
- int indPos;
- //LangElInfo *lelInfo = prg->rtd->lelInfo;
-
-switch ( entry ) {
-case PcrStart:
-
- /* The scanner will send a null token if it can't find a token. */
- if ( pdaRun->parseInput == 0 )
- goto parseError;
-
- /* This will cause parseInput to be lost. This
- * path should be traced. */
- if ( pdaRun->cs < 0 )
- return PcrDone;
-
- /* Record the state in the parse tree. */
- pdaRun->parseInput->state = pdaRun->cs;
-
-again:
- if ( pdaRun->parseInput == 0 )
- goto _out;
-
- pdaRun->lel = pdaRun->parseInput;
- pdaRun->curState = pdaRun->cs;
-
- if ( pdaRun->lel->id < pdaRun->tables->keys[pdaRun->curState<<1] ||
- pdaRun->lel->id > pdaRun->tables->keys[(pdaRun->curState<<1)+1] ) {
- debug( REALM_PARSE, "parse error, no transition 1\n" );
- pushBtPoint( prg, pdaRun );
- goto parseError;
- }
-
- indPos = pdaRun->tables->offsets[pdaRun->curState] +
- (pdaRun->lel->id - pdaRun->tables->keys[pdaRun->curState<<1]);
-
- owner = pdaRun->tables->owners[indPos];
- if ( owner != pdaRun->curState ) {
- debug( REALM_PARSE, "parse error, no transition 2\n" );
- pushBtPoint( prg, pdaRun );
- goto parseError;
- }
-
- pos = pdaRun->tables->indicies[indPos];
- if ( pos < 0 ) {
- debug( REALM_PARSE, "parse error, no transition 3\n" );
- pushBtPoint( prg, pdaRun );
- goto parseError;
- }
-
- /* Checking complete. */
-
- induceReject = false;
- pdaRun->cs = pdaRun->tables->targs[pos];
- action = pdaRun->tables->actions + pdaRun->tables->actInds[pos];
- if ( pdaRun->lel->retryLower )
- action += pdaRun->lel->retryLower;
-
- /*
- * Shift
- */
-
- if ( *action & act_sb ) {
- debug( REALM_PARSE, "shifted: %s\n",
- prg->rtd->lelInfo[pdaRun->lel->id].name );
- /* Consume. */
- pdaRun->parseInput = pdaRun->parseInput->next;
-
- pdaRun->lel->state = pdaRun->curState;
-
- /* If its a token then attach ignores and record it in the token list
- * of the next ignore attachment to use. */
- if ( pdaRun->lel->id < prg->rtd->firstNonTermId ) {
- if ( pdaRun->lel->causeReduce == 0 )
- attachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop );
- }
-
- pdaRun->lel->next = pdaRun->stackTop;
- pdaRun->stackTop = pdaRun->lel;
-
- /* If its a token then attach ignores and record it in the token list
- * of the next ignore attachment to use. */
- if ( pdaRun->lel->id < prg->rtd->firstNonTermId ) {
- attachLeftIgnore( prg, sp, pdaRun, pdaRun->lel );
-
- Ref *ref = (Ref*)kidAllocate( prg );
- ref->kid = pdaRun->lel->shadow;
- //treeUpref( pdaRun->tree );
- ref->next = pdaRun->tokenList;
- pdaRun->tokenList = ref;
- }
-
- if ( action[1] == 0 )
- pdaRun->lel->retryLower = 0;
- else {
- debug( REALM_PARSE, "retry: %p\n", pdaRun->stackTop );
- pdaRun->lel->retryLower += 1;
- assert( pdaRun->lel->retryUpper == 0 );
- /* FIXME: Has the retry already been counted? */
- pdaRun->numRetry += 1;
- }
- }
-
- /*
- * Commit
- */
-
- if ( pdaRun->tables->commitLen[pos] != 0 ) {
- long causeReduce = 0;
- if ( pdaRun->parseInput != 0 ) {
- if ( pdaRun->parseInput->flags & PF_HAS_RCODE )
- causeReduce = pdaRun->parseInput->causeReduce;
- }
- commitFull( prg, sp, pdaRun, causeReduce );
- }
-
- /*
- * Reduce
- */
-
- if ( *action & act_rb ) {
- int r, objectLength;
- ParseTree *last, *child;
- Kid *attrs;
- Kid *dataLast, *dataChild;
-
- /* If there was shift don't attach again. */
- if ( !( *action & act_sb ) && pdaRun->lel->id < prg->rtd->firstNonTermId )
- attachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop );
-
- pdaRun->reduction = *action >> 2;
-
- if ( pdaRun->parseInput != 0 )
- pdaRun->parseInput->causeReduce += 1;
-
- Kid *value = kidAllocate( prg );
- value->tree = treeAllocate( prg );
- value->tree->refs = 1;
- value->tree->id = prg->rtd->prodInfo[pdaRun->reduction].lhsId;
- value->tree->prodNum = prg->rtd->prodInfo[pdaRun->reduction].prodNum;
-
- pdaRun->redLel = parseTreeAllocate( prg );
- pdaRun->redLel->id = prg->rtd->prodInfo[pdaRun->reduction].lhsId;
- pdaRun->redLel->next = 0;
- pdaRun->redLel->causeReduce = 0;
- pdaRun->redLel->retryLower = 0;
- pdaRun->redLel->shadow = value;
-
- /* Transfer. */
- pdaRun->redLel->retryUpper = pdaRun->lel->retryLower;
- pdaRun->lel->retryLower = 0;
-
- /* Allocate the attributes. */
- objectLength = prg->rtd->lelInfo[pdaRun->redLel->id].objectLength;
- attrs = allocAttrs( prg, objectLength );
-
- /* Build the list of children. We will be giving up a reference when we
- * detach parse tree and data tree, but gaining the reference when we
- * put the children under the new data tree. No need to alter refcounts
- * here. */
- rhsLen = prg->rtd->prodInfo[pdaRun->reduction].length;
- child = last = 0;
- dataChild = dataLast = 0;
- for ( r = 0; r < rhsLen; r++ ) {
-
- /* The child. */
- child = pdaRun->stackTop;
- dataChild = child->shadow;
-
- /* Pop. */
- pdaRun->stackTop = pdaRun->stackTop->next;
-
- /* Detach the parse tree from the data. */
- child->shadow = 0;
-
- /* Reverse list. */
- child->next = last;
- dataChild->next = dataLast;
-
- /* Track last for reversal. */
- last = child;
- dataLast = dataChild;
- }
-
- pdaRun->redLel->child = child;
- pdaRun->redLel->shadow->tree->child = kidListConcat( attrs, dataChild );
-
- debug( REALM_PARSE, "reduced: %s rhsLen %d\n",
- prg->rtd->prodInfo[pdaRun->reduction].name, rhsLen );
- if ( action[1] == 0 )
- pdaRun->redLel->retryUpper = 0;
- else {
- pdaRun->redLel->retryUpper += 1;
- assert( pdaRun->lel->retryLower == 0 );
- pdaRun->numRetry += 1;
- debug( REALM_PARSE, "retry: %p\n", pdaRun->redLel );
- }
-
- /* When the production is of zero length we stay in the same state.
- * Otherwise we use the state stored in the first child. */
- pdaRun->cs = rhsLen == 0 ? pdaRun->curState : child->state;
-
- if ( prg->ctxDepParsing && prg->rtd->prodInfo[pdaRun->reduction].frameId >= 0 ) {
- /* Frame info for reduction. */
- pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->prodInfo[pdaRun->reduction].frameId];
- pdaRun->frameId = prg->rtd->prodInfo[pdaRun->reduction].frameId;
- pdaRun->reject = false;
- pdaRun->parsed = 0;
- pdaRun->code = pdaRun->fi->codeWV;
-
-return PcrReduction;
-case PcrReduction:
-
- if ( prg->induceExit )
- goto fail;
-
- /* If the lhs was stored and it changed then we need to restore the
- * original upon backtracking, otherwise downref since we took a
- * copy above. */
- if ( pdaRun->parsed != 0 ) {
- if ( pdaRun->parsed != pdaRun->redLel->shadow->tree ) {
- debug( REALM_PARSE, "lhs tree was modified, adding a restore instruction\n" );
-//
-// /* Make it into a parse tree. */
-// Tree *newPt = prepParseTree( prg, sp, pdaRun->redLel->tree );
-// treeDownref( prg, sp, pdaRun->redLel->tree );
-//
-// /* Copy it in. */
-// pdaRun->redLel->tree = newPt;
-// treeUpref( pdaRun->redLel->tree );
-
- /* Add the restore instruct. */
- append( &pdaRun->rcodeCollect, IN_RESTORE_LHS );
- appendWord( &pdaRun->rcodeCollect, (Word)pdaRun->parsed );
- append( &pdaRun->rcodeCollect, SIZEOF_CODE + SIZEOF_WORD );
- }
- else {
- /* Not changed. Done with parsed. */
- treeDownref( prg, sp, pdaRun->parsed );
- }
- pdaRun->parsed = 0;
- }
-
- /* Pull out the reverse code, if any. */
- makeReverseCode( pdaRun );
- transferReverseCode( pdaRun, pdaRun->redLel );
-
- /* Perhaps the execution environment is telling us we need to
- * reject the reduction. */
- induceReject = pdaRun->reject;
- }
-
- /* If the left hand side was replaced then the only parse algorithm
- * data that is contained in it will the PF_HAS_RCODE flag. Everthing
- * else will be in the original. This requires that we restore first
- * when going backwards and when doing a commit. */
-
- if ( induceReject ) {
- debug( REALM_PARSE, "error induced during reduction of %s\n",
- prg->rtd->lelInfo[pdaRun->redLel->id].name );
- pdaRun->redLel->state = pdaRun->curState;
- pdaRun->redLel->next = pdaRun->stackTop;
- pdaRun->stackTop = pdaRun->redLel;
- /* FIXME: What is the right argument here? */
- pushBtPoint( prg, pdaRun );
- goto parseError;
- }
-
- pdaRun->redLel->next = pdaRun->parseInput;
- pdaRun->parseInput = pdaRun->redLel;
- }
-
- goto again;
-
-parseError:
- debug( REALM_PARSE, "hit error, backtracking\n" );
-
- if ( pdaRun->numRetry == 0 ) {
- debug( REALM_PARSE, "out of retries failing parse\n" );
- goto fail;
- }
-
- while ( 1 ) {
- if ( pdaRun->onDeck ) {
- debug( REALM_BYTECODE, "dropping out for reverse code call\n" );
-
- pdaRun->frameId = -1;
- pdaRun->code = popReverseCode( &pdaRun->reverseCode );
-
-return PcrReverse;
-case PcrReverse:
-
- decrementSteps( pdaRun );
- }
- else if ( pdaRun->checkNext ) {
- pdaRun->checkNext = false;
-
- if ( pdaRun->next > 0 && pdaRun->tables->tokenRegions[pdaRun->next] != 0 ) {
- debug( REALM_PARSE, "found a new region\n" );
- pdaRun->numRetry -= 1;
- pdaRun->cs = stackTopTarget( prg, pdaRun );
- pdaRun->nextRegionInd = pdaRun->next;
- return PcrDone;
- }
- }
- else if ( pdaRun->checkStop ) {
- pdaRun->checkStop = false;
-
- if ( pdaRun->stop ) {
- debug( REALM_PARSE, "stopping the backtracking, steps is %d\n", pdaRun->steps );
-
- pdaRun->cs = stackTopTarget( prg, pdaRun );
- goto _out;
- }
- }
- else if ( pdaRun->parseInput != 0 ) {
- /* Either we are dealing with a terminal that was
- * shifted or a nonterminal that was reduced. */
- if ( pdaRun->parseInput->id < prg->rtd->firstNonTermId ) {
- assert( pdaRun->parseInput->retryUpper == 0 );
-
- if ( pdaRun->parseInput->retryLower != 0 ) {
- debug( REALM_PARSE, "found retry targ: %p\n", pdaRun->parseInput );
-
- pdaRun->numRetry -= 1;
- pdaRun->cs = pdaRun->parseInput->state;
- goto again;
- }
-
- if ( pdaRun->parseInput->causeReduce != 0 ) {
- pdaRun->undoLel = pdaRun->stackTop;
-
- /* Check if we've arrived at the stack sentinal. This guard
- * is here to allow us to initially set numRetry to one to
- * cause the parser to backup all the way to the beginning
- * when an error occurs. */
- if ( pdaRun->undoLel->next == 0 )
- break;
-
- /* Either we are dealing with a terminal that was
- * shifted or a nonterminal that was reduced. */
- assert( !(pdaRun->stackTop->id < prg->rtd->firstNonTermId) );
-
- debug( REALM_PARSE, "backing up over non-terminal: %s\n",
- prg->rtd->lelInfo[pdaRun->stackTop->id].name );
-
- /* Pop the item from the stack. */
- pdaRun->stackTop = pdaRun->stackTop->next;
-
- /* Queue it as next parseInput item. */
- pdaRun->undoLel->next = pdaRun->parseInput;
- pdaRun->parseInput = pdaRun->undoLel;
- }
- else {
- long region = pdaRun->parseInput->region;
- pdaRun->next = region > 0 ? region + 1 : 0;
- pdaRun->checkNext = true;
- pdaRun->checkStop = true;
-
- sendBack( prg, sp, pdaRun, fsmRun, inputStream, pdaRun->parseInput );
-
- pdaRun->parseInput = 0;
- }
- }
- else if ( pdaRun->parseInput->flags & PF_HAS_RCODE ) {
- debug( REALM_PARSE, "tree has rcode, setting on deck\n" );
- pdaRun->onDeck = true;
- pdaRun->parsed = 0;
-
- /* Only the RCODE flag was in the replaced lhs. All the rest is in
- * the the original. We read it after restoring. */
-
- pdaRun->parseInput->flags &= ~PF_HAS_RCODE;
- }
- else {
- /* Remove it from the input queue. */
- pdaRun->undoLel = pdaRun->parseInput;
- pdaRun->parseInput = pdaRun->parseInput->next;
-
- /* Extract children from the child list. */
- ParseTree *first = pdaRun->undoLel->child;
- pdaRun->undoLel->child = 0;
-
- /* This will skip the ignores/attributes, etc. */
- Kid *dataFirst = treeExtractChild( prg, pdaRun->undoLel->shadow->tree );
-
- /* Walk the child list and and push the items onto the parsing
- * stack one at a time. */
- while ( first != 0 ) {
- /* Get the next item ahead of time. */
- ParseTree *next = first->next;
- Kid *dataNext = dataFirst->next;
-
- /* Push onto the stack. */
- first->next = pdaRun->stackTop;
- pdaRun->stackTop = first;
-
- /* Reattach the data and the parse tree. */
- first->shadow = dataFirst;
-
- first = next;
- dataFirst = dataNext;
- }
-
- /* If there is an parseInput queued, this is one less reduction it has
- * caused. */
- if ( pdaRun->parseInput != 0 )
- pdaRun->parseInput->causeReduce -= 1;
-
- if ( pdaRun->undoLel->retryUpper != 0 ) {
- /* There is always an parseInput item here because reduce
- * conflicts only happen on a lookahead character. */
- assert( pdaRun->parseInput != pdaRun->undoLel );
- assert( pdaRun->parseInput != 0 );
- assert( pdaRun->undoLel->retryLower == 0 );
- assert( pdaRun->parseInput->retryUpper == 0 );
-
- /* Transfer the retry from undoLel to parseInput. */
- pdaRun->parseInput->retryLower = pdaRun->undoLel->retryUpper;
- pdaRun->parseInput->retryUpper = 0;
- pdaRun->parseInput->state = stackTopTarget( prg, pdaRun );
- }
-
- /* Free the reduced item. */
- treeDownref( prg, sp, pdaRun->undoLel->shadow->tree );
- kidFree( prg, pdaRun->undoLel->shadow );
- parseTreeFree( prg, pdaRun->undoLel );
-
- /* If the stacktop had right ignore attached, detach now. */
- if ( pdaRun->stackTop->flags & PF_RIGHT_IL_ATTACHED )
- detachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop );
- }
- }
- else if ( pdaRun->accumIgnore != 0 ) {
- debug( REALM_PARSE, "have accumulated ignore to undo\n" );
-
- /* Send back any accumulated ignore tokens, then trigger error
- * in the the parser. */
- ParseTree *ignore = pdaRun->accumIgnore;
- pdaRun->accumIgnore = pdaRun->accumIgnore->next;
- ignore->next = 0;
-
- long region = ignore->region;
- pdaRun->next = region > 0 ? region + 1 : 0;
- pdaRun->checkNext = true;
- pdaRun->checkStop = true;
-
- sendBackIgnore( prg, sp, pdaRun, fsmRun, inputStream, ignore );
-
- treeDownref( prg, sp, ignore->shadow->tree );
- kidFree( prg, ignore->shadow );
- parseTreeFree( prg, ignore );
- }
- else {
- /* Now it is time to undo something. Pick an element from the top of
- * the stack. */
- pdaRun->undoLel = pdaRun->stackTop;
-
- /* Check if we've arrived at the stack sentinal. This guard is
- * here to allow us to initially set numRetry to one to cause the
- * parser to backup all the way to the beginning when an error
- * occurs. */
- if ( pdaRun->undoLel->next == 0 )
- break;
-
- /* Either we are dealing with a terminal that was
- * shifted or a nonterminal that was reduced. */
- if ( pdaRun->stackTop->id < prg->rtd->firstNonTermId ) {
- debug( REALM_PARSE, "backing up over effective terminal: %s\n",
- prg->rtd->lelInfo[pdaRun->stackTop->id].name );
-
- /* Pop the item from the stack. */
- pdaRun->stackTop = pdaRun->stackTop->next;
-
- /* Queue it as next parseInput item. */
- pdaRun->undoLel->next = pdaRun->parseInput;
- pdaRun->parseInput = pdaRun->undoLel;
-
- /* Pop from the token list. */
- Ref *ref = pdaRun->tokenList;
- pdaRun->tokenList = ref->next;
- kidFree( prg, (Kid*)ref );
-
- assert( pdaRun->accumIgnore == 0 );
- detachLeftIgnore( prg, sp, pdaRun, fsmRun, pdaRun->parseInput );
- }
- else {
- debug( REALM_PARSE, "backing up over non-terminal: %s\n",
- prg->rtd->lelInfo[pdaRun->stackTop->id].name );
-
- /* Pop the item from the stack. */
- pdaRun->stackTop = pdaRun->stackTop->next;
-
- /* Queue it as next parseInput item. */
- pdaRun->undoLel->next = pdaRun->parseInput;
- pdaRun->parseInput = pdaRun->undoLel;
- }
-
- /* Undo attach of right ignore. */
- if ( pdaRun->stackTop->flags & PF_RIGHT_IL_ATTACHED )
- detachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop );
- }
- }
-
-fail:
- pdaRun->cs = -1;
- pdaRun->parseError = 1;
-
- /* If we failed parsing on tree we must free it. The caller expected us to
- * either consume it or send it back to the parseInput. */
- if ( pdaRun->parseInput != 0 ) {
- //treeDownref( prg, sp, (Tree*)pdaRun->parseInput->tree );
- //ptKidFree( prg, pdaRun->parseInput );
- pdaRun->parseInput = 0;
- }
-
- /* FIXME: do we still need to fall through here? A fail is permanent now,
- * no longer called into again. */
-
- return PcrDone;
-
-_out:
- pdaRun->nextRegionInd = pdaRun->tables->tokenRegionInds[pdaRun->cs];
-
-case PcrDone:
-break; }
-
- return PcrDone;
-}
diff --git a/colm/pdarun.h b/colm/pdarun.h
deleted file mode 100644
index 4ab648a6..00000000
--- a/colm/pdarun.h
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __COLM_PDARUN_H
-#define __COLM_PDARUN_H
-
-#include <colm/input.h>
-#include <colm/fsmrun.h>
-#include <colm/defs.h>
-#include <colm/tree.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct ColmProgram;
-
-#define MARK_SLOTS 32
-
-typedef struct _FsmTables
-{
- long *actions;
- long *keyOffsets;
- char *transKeys;
- long *singleLengths;
- long *rangeLengths;
- long *indexOffsets;
- long *transTargsWI;
- long *transActionsWI;
- long *toStateActions;
- long *fromStateActions;
- long *eofActions;
- long *eofTargs;
- long *entryByRegion;
-
- long numStates;
- long numActions;
- long numTransKeys;
- long numSingleLengths;
- long numRangeLengths;
- long numIndexOffsets;
- long numTransTargsWI;
- long numTransActionsWI;
- long numRegions;
-
- long startState;
- long firstFinal;
- long errorState;
-
- struct GenAction **actionSwitch;
- long numActionSwitch;
-} FsmTables;
-
-typedef struct _FsmRun
-{
- FsmTables *tables;
-
- RunBuf *runBuf;
-
- /* FsmRun State. */
- long region, preRegion;
- long cs, ncs, act;
- char *tokstart, *tokend;
- char *p, *pe, *peof;
- int returnResult;
- char *mark[MARK_SLOTS];
- long matchedToken;
-
- InputStream *attachedInput;
- SourceStream *attachedSource;
-} FsmRun;
-
-void initFsmRun( FsmRun *fsmRun, struct ColmProgram *prg );
-void clearFsmRun( struct ColmProgram *prg, FsmRun *fsmRun );
-void updatePosition( InputStream *inputStream, const char *data, long length );
-void undoPosition( InputStream *inputStream, const char *data, long length );
-void sendBackRunBufHead( FsmRun *fsmRun, InputStream *inputStream );
-void undoStreamPull( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length );
-
-
-#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
- #error "SIZEOF_LONG contained an unexpected value"
-#endif
-
-struct _Execution;
-
-typedef struct _RtCodeVect
-{
- Code *data;
- long tabLen;
- long allocLen;
-
- /* FIXME: leak when freed. */
-} RtCodeVect;
-
-void listAddAfter( List *list, ListEl *prev_el, ListEl *new_el );
-void listAddBefore( List *list, ListEl *next_el, ListEl *new_el );
-
-void listPrepend( List *list, ListEl *new_el );
-void listAppend( List *list, ListEl *new_el );
-
-ListEl *listDetach( List *list, ListEl *el );
-ListEl *listDetachFirst(List *list );
-ListEl *listDetachLast(List *list );
-
-long listLength(List *list);
-
-typedef struct _FunctionInfo
-{
- const char *name;
- long frameId;
- long argSize;
- long frameSize;
-} FunctionInfo;
-
-/*
- * Program Data.
- */
-
-typedef struct _PatReplInfo
-{
- long offset;
- long numBindings;
-} PatReplInfo;
-
-typedef struct _PatReplNode
-{
- long id;
- long prodNum;
- long next;
- long child;
- long bindId;
- const char *data;
- long length;
- long leftIgnore;
- long rightIgnore;
-
- /* Just match nonterminal, don't go inside. */
- unsigned char stop;
-} PatReplNode;
-
-/* FIXME: should have a descriptor for object types to give the length. */
-
-typedef struct _LangElInfo
-{
- const char *name;
- const char *xmlTag;
- unsigned char repeat;
- unsigned char list;
- unsigned char literal;
- unsigned char ignore;
-
- long frameId;
-
- long objectTypeId;
- long ofiOffset;
- long objectLength;
-
-// long contextTypeId;
-// long contextLength;
-
- long termDupId;
- long genericId;
- long markId;
- long captureAttr;
- long numCaptureAttr;
-} LangElInfo;
-
-typedef struct _ObjFieldInfo
-{
- int typeId;
-} ObjFieldInfo;
-
-typedef struct _ProdInfo
-{
- unsigned long lhsId;
- short prodNum;
- long length;
- const char *name;
- long frameId;
- unsigned char lhsUpref;
- unsigned char *copy;
- long copyLen;
-} ProdInfo;
-
-typedef struct _FrameInfo
-{
- Code *codeWV;
- long codeLenWV;
- Code *codeWC;
- long codeLenWC;
- char *trees;
- long treesLen;
- long argSize;
- long frameSize;
-} FrameInfo;
-
-typedef struct _RegionInfo
-{
- const char *name;
- long defaultToken;
- long eofFrameId;
- int isIgnoreOnly;
- int isCiOnly;
- int ciLelId;
-} RegionInfo;
-
-typedef struct _CaptureAttr
-{
- long mark_enter;
- long mark_leave;
- long offset;
-} CaptureAttr;
-
-typedef struct _PdaTables
-{
- /* Parser table data. */
- int *indicies;
- int *owners;
- int *keys;
- unsigned int *offsets;
- unsigned int *targs;
- unsigned int *actInds;
- unsigned int *actions;
- int *commitLen;
- int *tokenRegionInds;
- int *tokenRegions;
- int *tokenPreRegions;
-
- int numIndicies;
- int numKeys;
- int numStates;
- int numTargs;
- int numActInds;
- int numActions;
- int numCommitLen;
- int numRegionItems;
- int numPreRegionItems;
-} PdaTables;
-
-typedef struct _PoolBlock
-{
- void *data;
- struct _PoolBlock *next;
-} PoolBlock;
-
-typedef struct _PoolItem
-{
- struct _PoolItem *next;
-} PoolItem;
-
-typedef struct _PoolAlloc
-{
- PoolBlock *head;
- long nextel;
- PoolItem *pool;
- int sizeofT;
-} PoolAlloc;
-
-typedef struct _PdaRun
-{
- int numRetry;
- ParseTree *stackTop;
- Ref *tokenList;
- int cs;
- int nextRegionInd;
-
- PdaTables *tables;
- int parserId;
-
- /* Reused. */
- RtCodeVect rcodeCollect;
- RtCodeVect reverseCode;
-
- int stopParsing;
- long stopTarget;
-
- ParseTree *accumIgnore;
-
- Kid *btPoint;
-
- struct Bindings *bindings;
-
- int revertOn;
-
- Tree *context;
-
- int stop;
- int parseError;
-
- long steps;
- long targetSteps;
-
- int onDeck;
-
- /*
- * Data we added when refactoring the parsing engine into a coroutine.
- */
-
- ParseTree *parseInput;
- FrameInfo *fi;
- int reduction;
- ParseTree *redLel;
- int curState;
- ParseTree *lel;
- int triggerUndo;
-
- int tokenId;
- Head *tokdata;
- int frameId;
- int next;
- ParseTree *undoLel;
-
- int checkNext;
- int checkStop;
-
- /* The lhs is sometimes saved before reduction actions in case it is
- * replaced and we need to restore it on backtracking */
- Tree *parsed;
-
- int reject;
-
- /* Instruction pointer to use when we stop parsing and execute code. */
- Code *code;
-
- int rcBlockCount;
-} PdaRun;
-
-void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len );
-void rtCodeVectEmpty( RtCodeVect *vect );
-void rtCodeVectRemove( RtCodeVect *vect, long pos, long len );
-
-void initRtCodeVect( RtCodeVect *codeVect );
-
-//inline static void remove( RtCodeVect *vect, long pos );
-inline static void append( RtCodeVect *vect, const Code val );
-inline static void append2( RtCodeVect *vect, const Code *val, long len );
-inline static void appendHalf( RtCodeVect *vect, Half half );
-inline static void appendWord( RtCodeVect *vect, Word word );
-
-inline static void append2( RtCodeVect *vect, const Code *val, long len )
-{
- rtCodeVectReplace( vect, vect->tabLen, val, len );
-}
-
-inline static void append( RtCodeVect *vect, const Code val )
-{
- rtCodeVectReplace( vect, vect->tabLen, &val, 1 );
-}
-
-inline static void appendHalf( RtCodeVect *vect, Half half )
-{
- /* not optimal. */
- append( vect, half & 0xff );
- append( vect, (half>>8) & 0xff );
-}
-
-inline static void appendWord( RtCodeVect *vect, Word word )
-{
- /* not optimal. */
- append( vect, word & 0xff );
- append( vect, (word>>8) & 0xff );
- append( vect, (word>>16) & 0xff );
- append( vect, (word>>24) & 0xff );
- #if SIZEOF_LONG == 8
- append( vect, (word>>32) & 0xff );
- append( vect, (word>>40) & 0xff );
- append( vect, (word>>48) & 0xff );
- append( vect, (word>>56) & 0xff );
- #endif
-}
-
-void incrementSteps( PdaRun *pdaRun );
-void decrementSteps( PdaRun *pdaRun );
-
-int makeReverseCode( PdaRun *pdaRun );
-void transferReverseCode( PdaRun *pdaRun, ParseTree *tree );
-
-void initPdaRun( PdaRun *pdaRun, struct ColmProgram *prg, PdaTables *tables,
- FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context );
-void clearPdaRun( struct ColmProgram *prg, Tree **root, PdaRun *pdaRun );
-
-void initInputStream( InputStream *inputStream );
-void clearInputStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream );
-void initSourceStream( SourceStream *in );
-void clearSourceStream( struct ColmProgram *prg, Tree **sp, SourceStream *sourceStream );
-
-
-void clearContext( PdaRun *pdaRun, Tree **sp );
-Kid *extractIgnore( PdaRun *pdaRun );
-long stackTopTarget( struct ColmProgram *prg, PdaRun *pdaRun );
-void runCommit( PdaRun *pdaRun );
-int isParserStopFinished( PdaRun *pdaRun );
-void pdaRunMatch( PdaRun *pdaRun, Kid *tree, Kid *pattern );
-
-/* Offset can be used to look at the next nextRegionInd. */
-int pdaRunGetNextRegion( PdaRun *pdaRun, int offset );
-int pdaRunGetNextPreRegion( PdaRun *pdaRun );
-
-#define PcrStart 1
-#define PcrDone 2
-#define PcrReduction 3
-#define PcrGeneration 4
-#define PcrPreEof 5
-#define PcrReverse 6
-
-long parseToken( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun,
- FsmRun *fsmRun, InputStream *inputStream, long entry );
-
-long undoParse( Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, Tree *tree );
-
-Head *streamPull( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream, long length );
-Head *stringAllocPointer( struct ColmProgram *prg, const char *data, long length );
-
-void streamPushText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length );
-void streamPushTree( FsmRun *fsmRun, InputStream *inputStream, Tree *tree, int ignore );
-void undoStreamPush( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, long length );
-void undoStreamAppend( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, struct ColmTree *tree, long length );
-Kid *makeTokenWithData( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun,
- InputStream *inputStream, int id, Head *tokdata );
-
-void pushBinding( PdaRun *pdaRun, ParseTree *parseTree );
-void popBinding( PdaRun *pdaRun, ParseTree *parseTree );
-
-void executeGenerationAction( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, PdaRun *pdaRun,
- InputStream *inputStream, int frameId, Code *code, long id, Head *tokdata );
-Kid *extractIgnore( PdaRun *pdaRun );
-long sendBackQueuedIgnore( struct ColmProgram *prg, Tree **sp, InputStream *inputStream,
- FsmRun *fsmRun, PdaRun *pdaRun, long entry );
-void clearIgnoreList( struct ColmProgram *prg, Tree **sp, Kid *kid );
-Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream );
-Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream );
-void newToken( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun );
-void fsmExecute( FsmRun *fsmRun, InputStream *inputStream );
-void sendNamedLangEl( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream );
-long parseLoop( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun,
- FsmRun *fsmRun, InputStream *inputStream, long entry );
-void initBindings( PdaRun *pdaRun );
-Tree *getParsedRoot( PdaRun *pdaRun, int stop );
-void undoParseStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun,
- PdaRun *pdaRun, long steps );
-
-void clearBuffered( FsmRun *fsmRun );
-void resetToken( FsmRun *fsmRun );
-
-void detachInput( FsmRun *fsmRun, InputStream *is );
-void attachInput( FsmRun *fsmRun, InputStream *is );
-void detachSource( FsmRun *fsmRun, SourceStream *ss );
-void attachSource( FsmRun *fsmRun, SourceStream *ss );
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/colm/pool.c b/colm/pool.c
deleted file mode 100644
index f531338d..00000000
--- a/colm/pool.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <string.h>
-#include <stdlib.h>
-
-#include <colm/pdarun.h>
-#include <colm/pool.h>
-#include <colm/debug.h>
-
-void initPoolAlloc( PoolAlloc *poolAlloc, int sizeofT )
-{
- poolAlloc->head = 0;
- poolAlloc->nextel = FRESH_BLOCK;
- poolAlloc->pool = 0;
- poolAlloc->sizeofT = sizeofT;
-}
-
-void *poolAllocAllocate( PoolAlloc *poolAlloc )
-{
- debug( REALM_POOL, "pool allocation\n" );
-
-#ifdef POOL_MALLOC
- void *res = malloc( poolAlloc->sizeofT );
- memset( res, 0, poolAlloc->sizeofT );
- return res;
-#else
- //#ifdef COLM_LOG_BYTECODE
- //cerr << "allocating in: " << __PRETTY_FUNCTION__ << endl;
- //#endif
-
- void *newEl = 0;
- if ( poolAlloc->pool == 0 ) {
- if ( poolAlloc->nextel == FRESH_BLOCK ) {
- //#ifdef COLM_LOG_BYTECODE
- //if ( colm_log_bytecode )
- // cerr << "allocating " << FRESH_BLOCK << " Elements of type T" << endl;
- //#endif
-
- PoolBlock *newBlock = (PoolBlock*)malloc( sizeof(PoolBlock) );
- newBlock->data = malloc( poolAlloc->sizeofT * FRESH_BLOCK );
- newBlock->next = poolAlloc->head;
- poolAlloc->head = newBlock;
- poolAlloc->nextel = 0;
- }
-
- newEl = (char*)poolAlloc->head->data + poolAlloc->sizeofT * poolAlloc->nextel++;
- }
- else {
- newEl = poolAlloc->pool;
- poolAlloc->pool = poolAlloc->pool->next;
- }
- memset( newEl, 0, poolAlloc->sizeofT );
- return newEl;
-#endif
-}
-
-void poolAllocFree( PoolAlloc *poolAlloc, void *el )
-{
- #if 0
- /* Some sanity checking. Best not to normally run with this on. */
- char *p = (char*)el + sizeof(PoolItem*);
- char *pe = (char*)el + sizeof(T);
- for ( ; p < pe; p++ )
- assert( *p != 0xcc );
- memset( el, 0xcc, sizeof(T) );
- #endif
-
-#ifdef POOL_MALLOC
- free( el );
-#else
- PoolItem *pi = (PoolItem*) el;
- pi->next = poolAlloc->pool;
- poolAlloc->pool = pi;
-#endif
-}
-
-void poolAllocClear( PoolAlloc *poolAlloc )
-{
- PoolBlock *block = poolAlloc->head;
- while ( block != 0 ) {
- PoolBlock *next = block->next;
- free( block->data );
- free( block );
- block = next;
- }
-
- poolAlloc->head = 0;
- poolAlloc->nextel = 0;
- poolAlloc->pool = 0;
-}
-
-long poolAllocNumLost( PoolAlloc *poolAlloc )
-{
- /* Count the number of items allocated. */
- long lost = 0;
- PoolBlock *block = poolAlloc->head;
- if ( block != 0 ) {
- lost = poolAlloc->nextel;
- block = block->next;
- while ( block != 0 ) {
- lost += FRESH_BLOCK;
- block = block->next;
- }
- }
-
- /* Subtract. Items that are on the free list. */
- PoolItem *pi = poolAlloc->pool;
- while ( pi != 0 ) {
- lost -= 1;
- pi = pi->next;
- }
-
- return lost;
-}
-
-/*
- * Kid
- */
-
-Kid *kidAllocate( Program *prg )
-{
- return (Kid*) poolAllocAllocate( &prg->kidPool );
-}
-
-void kidFree( Program *prg, Kid *el )
-{
- poolAllocFree( &prg->kidPool, el );
-}
-
-void kidClear( Program *prg )
-{
- poolAllocClear( &prg->kidPool );
-}
-
-long kidNumLost( Program *prg )
-{
- return poolAllocNumLost( &prg->kidPool );
-}
-
-/*
- * Tree
- */
-
-Tree *treeAllocate( Program *prg )
-{
- return (Tree*) poolAllocAllocate( &prg->treePool );
-}
-
-void treeFree( Program *prg, Tree *el )
-{
- poolAllocFree( &prg->treePool, el );
-}
-
-void treeClear( Program *prg )
-{
- poolAllocClear( &prg->treePool );
-}
-
-long treeNumLost( Program *prg )
-{
- return poolAllocNumLost( &prg->treePool );
-}
-
-/*
- * ParseTree
- */
-
-ParseTree *parseTreeAllocate( Program *prg )
-{
- return (ParseTree*) poolAllocAllocate( &prg->parseTreePool );
-}
-
-void parseTreeFree( Program *prg, ParseTree *el )
-{
- poolAllocFree( &prg->parseTreePool, el );
-}
-
-void parseTreeClear( Program *prg )
-{
- poolAllocClear( &prg->parseTreePool );
-}
-
-long parseTreeNumLost( Program *prg )
-{
- return poolAllocNumLost( &prg->parseTreePool );
-}
-
-/*
- * ListEl
- */
-
-ListEl *listElAllocate( Program *prg )
-{
- return (ListEl*) poolAllocAllocate( &prg->listElPool );
-}
-
-void listElFree( Program *prg, ListEl *el )
-{
- poolAllocFree( &prg->listElPool, el );
-}
-
-void listElClear( Program *prg )
-{
- poolAllocClear( &prg->listElPool );
-}
-
-long listElNumLost( Program *prg )
-{
- return poolAllocNumLost( &prg->listElPool );
-}
-
-/*
- * MapEl
- */
-
-MapEl *mapElAllocate( Program *prg )
-{
- return (MapEl*) poolAllocAllocate( &prg->mapElPool );
-}
-
-void mapElFree( Program *prg, MapEl *el )
-{
- poolAllocFree( &prg->mapElPool, el );
-}
-
-void mapElClear( Program *prg )
-{
- poolAllocClear( &prg->mapElPool );
-}
-
-long mapElNumLost( Program *prg )
-{
- return poolAllocNumLost( &prg->mapElPool );
-}
-
-/*
- * Head
- */
-
-Head *headAllocate( Program *prg )
-{
- return (Head*) poolAllocAllocate( &prg->headPool );
-}
-
-void headFree( Program *prg, Head *el )
-{
- poolAllocFree( &prg->headPool, el );
-}
-
-void headClear( Program *prg )
-{
- poolAllocClear( &prg->headPool );
-}
-
-long headNumLost( Program *prg )
-{
- return poolAllocNumLost( &prg->headPool );
-}
-
-/*
- * Location
- */
-
-Location *locationAllocate( Program *prg )
-{
- return (Location*) poolAllocAllocate( &prg->locationPool );
-}
-
-void locationFree( Program *prg, Location *el )
-{
- poolAllocFree( &prg->locationPool, el );
-}
-
-void locationClear( Program *prg )
-{
- poolAllocClear( &prg->locationPool );
-}
-
-long locationNumLost( Program *prg )
-{
- return poolAllocNumLost( &prg->locationPool );
-}
-
-/*
- * Stream
- */
-
-Stream *streamAllocate( Program *prg )
-{
- return (Stream*)mapElAllocate( prg );
-}
-
-void streamFree( Program *prg, Stream *stream )
-{
- mapElFree( prg, (MapEl*)stream );
-}
-
-
-/*
- * Input
- */
-
-Input *inputAllocate( Program *prg )
-{
- return (Input*)mapElAllocate( prg );
-}
-
-void inputFree( Program *prg, Input *accumStream )
-{
- mapElFree( prg, (MapEl*)accumStream );
-}
diff --git a/colm/pool.h b/colm/pool.h
deleted file mode 100644
index 454a5354..00000000
--- a/colm/pool.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _POOL_H
-#define _POOL_H
-
-/* Allocation, number of items. */
-#define FRESH_BLOCK 8128
-
-#include <colm/pdarun.h>
-#include <colm/map.h>
-#include <colm/tree.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void initPoolAlloc( PoolAlloc *poolAlloc, int sizeofT );
-
-Kid *kidAllocate( Program *prg );
-void kidFree( Program *prg, Kid *el );
-void kidClear( Program *prg );
-long kidNumLost( Program *prg );
-
-Tree *treeAllocate( Program *prg );
-void treeFree( Program *prg, Tree *el );
-void treeClear( Program *prg );
-long treeNumLost( Program *prg );
-
-ParseTree *parseTreeAllocate( Program *prg );
-void parseTreeFree( Program *prg, ParseTree *el );
-void parseTreeClear( Program *prg );
-long parseTreeNumLost( Program *prg );
-
-ListEl *listElAllocate( Program *prg );
-void listElFree( Program *prg, ListEl *el );
-void listElClear( Program *prg );
-long listElNumLost( Program *prg );
-
-MapEl *mapElAllocate( Program *prg );
-void mapElFree( Program *prg, MapEl *el );
-void mapElClear( Program *prg );
-long mapElNumLost( Program *prg );
-
-Head *headAllocate( Program *prg );
-void headFree( Program *prg, Head *el );
-void headClear( Program *prg );
-long headNumLost( Program *prg );
-
-Location *locationAllocate( Program *prg );
-void locationFree( Program *prg, Location *el );
-void locationClear( Program *prg );
-long locationNumLost( Program *prg );
-
-Stream *streamAllocate( Program *prg );
-void streamFree( Program *prg, Stream *stream );
-
-Input *inputAllocate( Program *prg );
-void inputFree( Program *prg, Input *stream );
-
-/* Wrong place. */
-TreePair mapRemove( Program *prg, Map *map, Tree *key );
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/colm/program.c b/colm/program.c
deleted file mode 100644
index 50b41fa1..00000000
--- a/colm/program.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <colm/pdarun.h>
-#include <colm/fsmrun.h>
-#include <colm/tree.h>
-#include <colm/bytecode.h>
-#include <colm/pool.h>
-#include <colm/debug.h>
-#include <colm/config.h>
-
-#include <alloca.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <assert.h>
-#include <stdlib.h>
-
-void colmInit( long debugRealm )
-{
- /* Always on because because logging is controlled with ifdefs in\n" the
- * runtime lib. */
- colm_log_bytecode = 1;
- colm_log_parse = 1;
- colm_log_match = 1;
- colm_log_compile = 1;
- colm_log_conds = 1;
- colmActiveRealm = debugRealm;
- initInputFuncs();
-}
-
-void colmRunProgram( Program *prg )
-{
- assert( sizeof(Int) <= sizeof(Tree) );
- assert( sizeof(Str) <= sizeof(Tree) );
- assert( sizeof(Pointer) <= sizeof(Tree) );
- assert( sizeof(Map) <= sizeof(MapEl) );
- assert( sizeof(List) <= sizeof(MapEl) );
- assert( sizeof(Stream) <= sizeof(MapEl) );
- assert( sizeof(Parser) <= sizeof(MapEl) );
-
- /* Allocate the global variable. */
- allocGlobal( prg );
-
- /*
- * Allocate the VM stack.
- */
-
- prg->vm_stack = stackAlloc();
- prg->vm_root = &prg->vm_stack[VM_STACK_SIZE];
-
- /*
- * Execute
- */
- if ( prg->rtd->rootCodeLen > 0 ) {
- //RtCodeVect rcodeCollect;
- Execution execution;
-
- initExecution( &execution, 0, 0, 0, 0, prg->rtd->rootFrameId );
- mainExecution( prg, &execution, prg->rtd->rootCode );
- }
-}
-
-void clearGlobal( Program *prg, Tree **sp )
-{
- /* Downref all the fields in the global object. */
- int g;
- for ( g = 0; g < prg->rtd->globalSize; g++ ) {
- //assert( getAttr( global, g )->refs == 1 );
- treeDownref( prg, sp, getAttr( prg->global, g ) );
- }
-
- /* Free the global object. */
- if ( prg->rtd->globalSize > 0 )
- freeAttrs( prg, prg->global->child );
- treeFree( prg, prg->global );
-}
-
-void allocGlobal( Program *prg )
-{
- /* Alloc the global. */
- Tree *tree = treeAllocate( prg );
- tree->child = allocAttrs( prg, prg->rtd->globalSize );
- tree->refs = 1;
- prg->global = tree;
-}
-
-Tree **stackAlloc()
-{
- //return new Tree*[VM_STACK_SIZE];
-
- return (Tree**)mmap( 0, sizeof(Tree*)*VM_STACK_SIZE,
- PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0 );
-}
-
-Tree **vm_root( struct ColmProgram *prg )
-{
- return prg->vm_root;
-}
-
-Tree *returnVal( struct ColmProgram *prg )
-{
- return prg->returnVal;
-}
-
-
-Program *colmNewProgram( RuntimeData *rtd, int argc, const char **argv )
-{
- Program *prg = malloc(sizeof(Program));
- memset( prg, 0, sizeof(Program) );
- prg->argc = argc;
- prg->argv = argv;
- prg->rtd = rtd;
- prg->ctxDepParsing = 1;
- prg->global = 0;
- prg->heap = 0;
- prg->stdinVal = 0;
- prg->stdoutVal = 0;
- prg->stderrVal = 0;
- prg->induceExit = 0;
- prg->exitStatus = 0;
-
- initPoolAlloc( &prg->kidPool, sizeof(Kid) );
- initPoolAlloc( &prg->treePool, sizeof(Tree) );
- initPoolAlloc( &prg->parseTreePool, sizeof(ParseTree) );
- initPoolAlloc( &prg->listElPool, sizeof(ListEl) );
- initPoolAlloc( &prg->mapElPool, sizeof(MapEl) );
- initPoolAlloc( &prg->headPool, sizeof(Head) );
- initPoolAlloc( &prg->locationPool, sizeof(Location) );
-
- Int *trueInt = (Int*) treeAllocate( prg );
- trueInt->id = LEL_ID_BOOL;
- trueInt->refs = 1;
- trueInt->value = 1;
-
- Int *falseInt = (Int*) treeAllocate( prg );
- falseInt->id = LEL_ID_BOOL;
- falseInt->refs = 1;
- falseInt->value = 0;
-
- prg->trueVal = (Tree*)trueInt;
- prg->falseVal = (Tree*)falseInt;
-
- prg->allocRunBuf = 0;
- prg->returnVal = 0;
- prg->lastParseError = 0;
-
- return prg;
-}
-
-int colmDeleteProgram( Program *prg )
-{
- Tree **sp = prg->vm_root;
- int exitStatus = prg->exitStatus;
-
- #ifdef COLM_LOG_BYTECODE
- if ( colm_log_bytecode ) {
- cerr << "clearing the prg" << endl;
- }
- #endif
-
- treeDownref( prg, sp, prg->returnVal );
- treeDownref( prg, sp, prg->lastParseError );
- clearGlobal( prg, sp );
-
- /* Clear the heap. */
- Kid *a = prg->heap;
- while ( a != 0 ) {
- Kid *next = a->next;
- treeDownref( prg, sp, a->tree );
- kidFree( prg, a );
- a = next;
- }
-
- //assert( trueVal->refs == 1 );
- //assert( falseVal->refs == 1 );
- treeDownref( prg, sp, prg->trueVal );
- treeDownref( prg, sp, prg->falseVal );
-
- treeDownref( prg, sp, (Tree*)prg->stdinVal );
- treeDownref( prg, sp, (Tree*)prg->stdoutVal );
- treeDownref( prg, sp, (Tree*)prg->stderrVal );
-
-#if DEBUG
- long kidLost = kidNumLost( prg );
- long treeLost = treeNumLost( prg );
- long parseTreeLost = parseTreeNumLost( prg );
- long listLost = listElNumLost( prg );
- long mapLost = mapElNumLost( prg );
- long headLost = headNumLost( prg );
- long locationLost = locationNumLost( prg );
-
- if ( kidLost )
- message( "warning: lost kids: %ld\n", kidLost );
-
- if ( treeLost )
- message( "warning: lost trees: %ld\n", treeLost );
-
- if ( parseTreeLost )
- message( "warning: lost parse trees: %ld\n", parseTreeLost );
-
- if ( listLost )
- message( "warning: lost listEls: %ld\n", listLost );
-
- if ( mapLost )
- message( "warning: lost mapEls: %ld\n", mapLost );
-
- if ( headLost )
- message( "warning: lost heads: %ld\n", headLost );
-
- if ( locationLost )
- message( "warning: lost locations: %ld\n", locationLost );
-#endif
-
- kidClear( prg );
- treeClear( prg );
- headClear( prg );
- parseTreeClear( prg );
- listElClear( prg );
- mapElClear( prg );
- locationClear( prg );
-
- //memset( vm_stack, 0, sizeof(Tree*) * VM_STACK_SIZE);
-
- RunBuf *rb = prg->allocRunBuf;
- while ( rb != 0 ) {
- RunBuf *next = rb->next;
- free( rb );
- rb = next;
- }
-
- free( prg );
-
- return exitStatus;
-}
-
-
diff --git a/colm/program.h b/colm/program.h
deleted file mode 100644
index b5de90b3..00000000
--- a/colm/program.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __COLM_PROGRAM_H
-#define __COLM_PROGRAM_H
-
-#include <colm/pdarun.h>
-
-typedef struct ColmRuntimeData
-{
- LangElInfo *lelInfo;
- long numLangEls;
-
- ProdInfo *prodInfo;
- long numProds;
-
- RegionInfo *regionInfo;
- long numRegions;
-
- Code *rootCode;
- long rootCodeLen;
- long rootFrameId;
-
- FrameInfo *frameInfo;
- long numFrames;
-
- FunctionInfo *functionInfo;
- long numFunctions;
-
- PatReplInfo *patReplInfo;
- long numPatterns;
-
- PatReplNode *patReplNodes;
- long numPatternNodes;
-
- GenericInfo *genericInfo;
- long numGenerics;
-
- long argvGenericId;
-
- const char **litdata;
- long *litlen;
- Head **literals;
- long numLiterals;
-
- CaptureAttr *captureAttr;
- long numCapturedAttr;
-
- FsmTables *fsmTables;
- PdaTables *pdaTables;
- int *startStates;
- int *eofLelIds;
- int *parserLelIds;
- long numParsers;
-
- long globalSize;
-
- long firstNonTermId;
-
- long integerId;
- long stringId;
- long anyId;
- long eofId;
- long noTokenId;
-} RuntimeData;
-
-
-typedef struct ColmProgram
-{
- int argc;
- const char **argv;
-
- unsigned char ctxDepParsing;
- RuntimeData *rtd;
- Tree *global;
- int induceExit;
- int exitStatus;
-
- PoolAlloc kidPool;
- PoolAlloc treePool;
- PoolAlloc parseTreePool;
- PoolAlloc listElPool;
- PoolAlloc mapElPool;
- PoolAlloc headPool;
- PoolAlloc locationPool;
-
- Tree *trueVal;
- Tree *falseVal;
-
- Kid *heap;
-
- Tree **se;
-
- Stream *stdinVal;
- Stream *stdoutVal;
- Stream *stderrVal;
-
- RunBuf *allocRunBuf;
-
- Tree **vm_stack;
- Tree **vm_root;
-
- /* Returned from the main line. Should have exports instead. */
- Tree *returnVal;
-
- /* The most recent parse error. Should be returned from the parsing function. */
- Tree *lastParseError;
-} Program;
-
-#endif
diff --git a/colm/redbuild.cc b/colm/redbuild.cc
deleted file mode 100644
index ae5faf38..00000000
--- a/colm/redbuild.cc
+++ /dev/null
@@ -1,650 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#include "global.h"
-#include "redbuild.h"
-#include "fsmgraph.h"
-#include "redfsm.h"
-#include "fsmcodegen.h"
-#include <string.h>
-
-using namespace std;
-
-RedFsmBuild::RedFsmBuild( char *fsmName, Compiler *pd, FsmGraph *fsm )
-:
- fsmName(fsmName),
- pd(pd),
- fsm(fsm),
- nextActionTableId(0),
- startState(-1),
- errState(-1)
-{
-}
-
-void RedFsmBuild::initActionList( unsigned long length )
-{
- redFsm->allActions = new GenAction[length];
- memset( redFsm->allActions, 0, sizeof(GenAction) * length );
- for ( unsigned long a = 0; a < length; a++ )
- redFsm->genActionList.append( redFsm->allActions+a );
-}
-
-
-void RedFsmBuild::makeActionList()
-{
- /* Determine which actions to write. */
- int nextActionId = 0;
- for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
- if ( act->numRefs() > 0 || act->numCondRefs > 0 )
- act->actionId = nextActionId++;
- }
-
- initActionList( nextActionId );
- curAction = 0;
-
- for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
- if ( act->actionId >= 0 )
- makeAction( act );
- }
-}
-
-void RedFsmBuild::initActionTableList( unsigned long length )
-{
- redFsm->allActionTables = new RedAction[length];
-}
-
-void RedFsmBuild::initStateList( unsigned long length )
-{
- redFsm->allStates = new RedState[length];
- for ( unsigned long s = 0; s < length; s++ )
- redFsm->stateList.append( redFsm->allStates+s );
-
- /* We get the start state as an offset, set the pointer now. */
- assert( startState >= 0 );
- redFsm->startState = redFsm->allStates + startState;
- if ( errState >= 0 )
- redFsm->errState = redFsm->allStates + errState;
- for ( EntryIdVect::Iter en = redFsm->entryPointIds; en.lte(); en++ )
- redFsm->entryPoints.insert( redFsm->allStates + *en );
-
- /* The nextStateId is no longer used to assign state ids (they come in set
- * from the frontend now), however generation code still depends on it.
- * Should eventually remove this variable. */
- redFsm->nextStateId = redFsm->stateList.length();
-}
-
-void RedFsmBuild::addEntryPoint( int entryId, char *name, unsigned long entryState )
-{
- redFsm->entryPointIds.append( entryState );
- redFsm->entryPointNames.append( name );
- redFsm->redEntryMap.insert( entryId, entryState );
-}
-
-void RedFsmBuild::addRegionToEntry( int regionId, int entryId )
-{
- assert( regionId == redFsm->regionToEntry.length() );
- redFsm->regionToEntry.append( entryId );
-}
-
-void RedFsmBuild::initTransList( int snum, unsigned long length )
-{
- /* Could preallocate the out range to save time growing it. For now do
- * nothing. */
-}
-
-void RedFsmBuild::newTrans( int snum, int tnum, Key lowKey,
- Key highKey, long targ, long action )
-{
- /* Get the current state and range. */
- RedState *curState = redFsm->allStates + snum;
- RedTransList &destRange = curState->outRange;
-
- if ( curState == redFsm->errState )
- return;
-
- /* Make the new transitions. */
- RedState *targState = targ >= 0 ? (redFsm->allStates + targ) :
- redFsm->wantComplete ? redFsm->getErrorState() : 0;
- RedAction *actionTable = action >= 0 ? (redFsm->allActionTables + action) : 0;
- RedTrans *trans = redFsm->allocateTrans( targState, actionTable );
- RedTransEl transEl( lowKey, highKey, trans );
-
- if ( redFsm->wantComplete ) {
- /* If the machine is to be complete then we need to fill any gaps with
- * the error transitions. */
- if ( destRange.length() == 0 ) {
- /* Range is currently empty. */
- if ( keyOps->minKey < lowKey ) {
- /* The first range doesn't start at the low end. */
- Key fillHighKey = lowKey;
- fillHighKey.decrement();
-
- /* Create the filler with the state's error transition. */
- RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() );
- destRange.append( newTel );
- }
- }
- else {
- /* The range list is not empty, get the the last range. */
- RedTransEl *last = &destRange[destRange.length()-1];
- Key nextKey = last->highKey;
- nextKey.increment();
- if ( nextKey < lowKey ) {
- /* There is a gap to fill. Make the high key. */
- Key fillHighKey = lowKey;
- fillHighKey.decrement();
-
- /* Create the filler with the state's error transtion. */
- RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() );
- destRange.append( newTel );
- }
- }
- }
-
- /* Filler taken care of. Append the range. */
- destRange.append( RedTransEl( lowKey, highKey, trans ) );
-}
-
-void RedFsmBuild::finishTransList( int snum )
-{
- /* Get the current state and range. */
- RedState *curState = redFsm->allStates + snum;
- RedTransList &destRange = curState->outRange;
-
- if ( curState == redFsm->errState )
- return;
-
- /* If building a complete machine we may need filler on the end. */
- if ( redFsm->wantComplete ) {
- /* Check if there are any ranges already. */
- if ( destRange.length() == 0 ) {
- /* Fill with the whole alphabet. */
- /* Add the range on the lower and upper bound. */
- RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() );
- destRange.append( newTel );
- }
- else {
- /* Get the last and check for a gap on the end. */
- RedTransEl *last = &destRange[destRange.length()-1];
- if ( last->highKey < keyOps->maxKey ) {
- /* Make the high key. */
- Key fillLowKey = last->highKey;
- fillLowKey.increment();
-
- /* Create the new range with the error trans and append it. */
- RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() );
- destRange.append( newTel );
- }
- }
- }
-}
-
-void RedFsmBuild::setId( int snum, int id )
-{
- RedState *curState = redFsm->allStates + snum;
- curState->id = id;
-}
-
-void RedFsmBuild::setEofTrans( int snum, int eofTarget, int actId )
-{
- RedState *curState = redFsm->allStates + snum;
- RedState *targState = redFsm->allStates + eofTarget;
- RedAction *eofAct = redFsm->allActionTables + actId;
- curState->eofTrans = redFsm->allocateTrans( targState, eofAct );
-}
-
-void RedFsmBuild::setFinal( int snum )
-{
- RedState *curState = redFsm->allStates + snum;
- curState->isFinal = true;
-}
-
-
-void RedFsmBuild::setStateActions( int snum, long toStateAction,
- long fromStateAction, long eofAction )
-{
- RedState *curState = redFsm->allStates + snum;
- if ( toStateAction >= 0 )
- curState->toStateAction = redFsm->allActionTables + toStateAction;
- if ( fromStateAction >= 0 )
- curState->fromStateAction = redFsm->allActionTables + fromStateAction;
- if ( eofAction >= 0 )
- curState->eofAction = redFsm->allActionTables + eofAction;
-}
-
-void RedFsmBuild::closeMachine()
-{
- //for ( GenActionList::Iter a = redFsm->actionList; a.lte(); a++ )
- // resolveTargetStates( a->inlineList );
-
- /* Note that even if we want a complete graph we do not give the error
- * state a default transition. All machines break out of the processing
- * loop when in the error state. */
-
- for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
- for ( GenStateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ )
- st->stateCondVect.append( sci );
- }
-}
-
-void RedFsmBuild::initCondSpaceList( ulong length )
-{
- redFsm->allCondSpaces = new GenCondSpace[length];
- for ( ulong c = 0; c < length; c++ )
- redFsm->condSpaceList.append( redFsm->allCondSpaces + c );
-}
-
-void RedFsmBuild::newCondSpace( int cnum, int condSpaceId, Key baseKey )
-{
- GenCondSpace *cond = redFsm->allCondSpaces + cnum;
- cond->condSpaceId = condSpaceId;
- cond->baseKey = baseKey;
-}
-
-void RedFsmBuild::condSpaceItem( int cnum, long condActionId )
-{
- GenCondSpace *cond = redFsm->allCondSpaces + cnum;
- cond->condSet.append( redFsm->allActions + condActionId );
-}
-
-void RedFsmBuild::initStateCondList( int snum, ulong length )
-{
- /* Could preallocate these, as we could with transitions. */
-}
-
-void RedFsmBuild::addStateCond( int snum, Key lowKey, Key highKey, long condNum )
-{
- RedState *curState = redFsm->allStates + snum;
-
- /* Create the new state condition. */
- GenStateCond *stateCond = new GenStateCond;
- stateCond->lowKey = lowKey;
- stateCond->highKey = highKey;
-
- /* Assign it a cond space. */
- GenCondSpace *condSpace = redFsm->allCondSpaces + condNum;
- stateCond->condSpace = condSpace;
-
- curState->stateCondList.append( stateCond );
-}
-
-
-void RedFsmBuild::setForcedErrorState()
-{
- redFsm->forcedErrorState = true;
-}
-
-Key RedFsmBuild::findMaxKey()
-{
- Key maxKey = keyOps->maxKey;
- for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
- assert( st->outSingle.length() == 0 );
- assert( st->defTrans == 0 );
-
- long rangeLen = st->outRange.length();
- if ( rangeLen > 0 ) {
- Key highKey = st->outRange[rangeLen-1].highKey;
- if ( highKey > maxKey )
- maxKey = highKey;
- }
- }
- return maxKey;
-}
-
-
-void RedFsmBuild::makeActionTableList()
-{
- /* Must first order the action tables based on their id. */
- int numTables = nextActionTableId;
- RedActionTable **tables = new RedActionTable*[numTables];
- for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ )
- tables[at->id] = at;
-
- initActionTableList( numTables );
- curActionTable = 0;
-
- for ( int t = 0; t < numTables; t++ ) {
- long length = tables[t]->key.length();
-
- /* Collect the action table. */
- RedAction *redAct = redFsm->allActionTables + curActionTable;
- redAct->actListId = curActionTable;
- redAct->key.setAsNew( length );
-
- int pos = 0;
- for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) {
- int actionId = atel->value->actionId;
- redAct->key[pos].key = 0;
- redAct->key[pos].value = redFsm->allActions+actionId;
- pos += 1;
- }
-
- /* Insert into the action table map. */
- redFsm->actionMap.insert( redAct );
-
- curActionTable += 1;
-
- }
-
- delete[] tables;
-}
-
-void RedFsmBuild::reduceActionTables()
-{
- /* Reduce the actions tables to a set. */
- for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
- RedActionTable *actionTable = 0;
-
- /* Reduce To State Actions. */
- if ( st->toStateActionTable.length() > 0 ) {
- if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) )
- actionTable->id = nextActionTableId++;
- }
-
- /* Reduce From State Actions. */
- if ( st->fromStateActionTable.length() > 0 ) {
- if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) )
- actionTable->id = nextActionTableId++;
- }
-
- /* Reduce EOF actions. */
- if ( st->eofActionTable.length() > 0 ) {
- if ( actionTableMap.insert( st->eofActionTable, &actionTable ) )
- actionTable->id = nextActionTableId++;
- }
-
- /* Loop the transitions and reduce their actions. */
- for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
- if ( trans->actionTable.length() > 0 ) {
- if ( actionTableMap.insert( trans->actionTable, &actionTable ) )
- actionTable->id = nextActionTableId++;
- }
- }
- }
-}
-
-void RedFsmBuild::appendTrans( TransListVect &outList, Key lowKey,
- Key highKey, FsmTrans *trans )
-{
- if ( trans->toState != 0 || trans->actionTable.length() > 0 )
- outList.append( TransEl( lowKey, highKey, trans ) );
-}
-
-void RedFsmBuild::makeTrans( Key lowKey, Key highKey, FsmTrans *trans )
-{
- /* First reduce the action. */
- RedActionTable *actionTable = 0;
- if ( trans->actionTable.length() > 0 )
- actionTable = actionTableMap.find( trans->actionTable );
-
- long targ = trans->toState == 0 ? -1 : trans->toState->alg.stateNum;
- long action = actionTable == 0 ? -1 : actionTable->id;
-
- newTrans( curState, curTrans++, lowKey, highKey, targ, action );
-}
-
-void RedFsmBuild::makeTransList( FsmState *state )
-{
- TransListVect outList;
-
- /* If there is only are no ranges the task is simple. */
- if ( state->outList.length() > 0 ) {
- /* Loop each source range. */
- for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
- /* Reduce the transition. If it reduced to anything then add it. */
- appendTrans( outList, trans->lowKey, trans->highKey, trans );
- }
- }
-
- long length = outList.length();
- initTransList( curState, length );
- curTrans = 0;
-
- for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ )
- makeTrans( tvi->lowKey, tvi->highKey, tvi->value );
- finishTransList( curState );
-}
-
-void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *action )
-{
- redFsm->allActions[anum].actionId = anum;
- redFsm->allActions[anum].name = name;
- redFsm->allActions[anum].loc.line = line;
- redFsm->allActions[anum].loc.col = col;
- redFsm->allActions[anum].inlineList = action->inlineList;
- redFsm->allActions[anum].objField = action->objField;
- redFsm->allActions[anum].markType = action->markType;
- redFsm->allActions[anum].markId = action->markId + 1;
-}
-
-void RedFsmBuild::makeAction( Action *action )
-{
- int line = action->loc.line;
- int col = action->loc.col;
-
- char *name = 0;
- if ( action->name != 0 )
- name = action->name;
-
- newAction( curAction++, name, line, col, action );
-}
-
-void xmlEscapeHost( std::ostream &out, char *data, int len )
-{
- char *end = data + len;
- while ( data != end ) {
- switch ( *data ) {
- case '<': out << "&lt;"; break;
- case '>': out << "&gt;"; break;
- case '&': out << "&amp;"; break;
- default: out << *data; break;
- }
- data += 1;
- }
-}
-
-void RedFsmBuild::makeStateActions( FsmState *state )
-{
- RedActionTable *toStateActions = 0;
- if ( state->toStateActionTable.length() > 0 )
- toStateActions = actionTableMap.find( state->toStateActionTable );
-
- RedActionTable *fromStateActions = 0;
- if ( state->fromStateActionTable.length() > 0 )
- fromStateActions = actionTableMap.find( state->fromStateActionTable );
-
- RedActionTable *eofActions = 0;
- if ( state->eofActionTable.length() > 0 )
- eofActions = actionTableMap.find( state->eofActionTable );
-
- if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) {
- long toStateAction = -1;
- long fromStateAction = -1;
- long eofAction = -1;
-
- if ( toStateActions != 0 )
- toStateAction = toStateActions->id;
- if ( fromStateActions != 0 )
- fromStateAction = fromStateActions->id;
- if ( eofActions != 0 )
- eofAction = eofActions->id;
-
- setStateActions( curState, toStateAction,
- fromStateAction, eofAction );
- }
-}
-
-void RedFsmBuild::makeStateConditions( FsmState *state )
-{
- if ( state->stateCondList.length() > 0 ) {
-
- long length = state->stateCondList.length();
- initStateCondList( curState, length );
- curStateCond = 0;
-
- for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) {
- Key lowKey = scdi->lowKey;
- Key highKey = scdi->highKey;
- long condId = scdi->condSpace->condSpaceId;
- addStateCond( curState, lowKey, highKey, condId );
- }
- }
-}
-
-void RedFsmBuild::makeStateList()
-{
- /* Write the list of states. */
- long length = fsm->stateList.length();
- initStateList( length );
- curState = 0;
-
- for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
- /* Both or neither should be set. */
- assert( !( (st->eofTarget != 0) xor (st->eofActionTable.length() > 0) ) );
-
- makeStateActions( st );
- makeStateConditions( st );
- makeTransList( st );
-
- setId( curState, st->alg.stateNum );
- if ( st->isFinState() )
- setFinal( curState );
-
- /* If there is an eof target, make an eof transition. */
- if ( st->eofTarget != 0 ) {
- /* Find the eof actions. */
- RedActionTable *eofActions = 0;
- eofActions = actionTableMap.find( st->eofActionTable );
- setEofTrans( curState, st->eofTarget->alg.stateNum, eofActions->id );
- }
-
- curState += 1;
- }
-}
-
-void RedFsmBuild::makeEntryPoints()
-{
- if ( fsm->lmRequiresErrorState )
- setForcedErrorState();
-
- for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) {
- /* Get the name instantiation from nameIndex. */
- NameInst *nameInst = fsm->nameIndex[en->key];
- FsmState *state = en->value;
- char *name = nameInst->name;
- long entry = state->alg.stateNum;
- addEntryPoint( en->key, name, entry );
- }
-
- for ( RegionList::Iter reg = pd->regionList; reg.lte(); reg++ ) {
- assert( reg->regionNameInst != 0 );
-
- TokenRegion *use = reg;
-
- if ( use->isCiOnly )
- use = use->derivedFrom->ignoreOnlyRegion;
-
- NameInst *regionName = use->regionNameInst;
- addRegionToEntry( reg->id, regionName->id );
- }
-}
-
-void RedFsmBuild::makeMachine()
-{
- /* Action tables. */
- reduceActionTables();
-
- makeActionList();
- makeActionTableList();
- makeConditions();
-
- /* Start state. */
- startState = fsm->startState->alg.stateNum;
-
- /* Error state. */
- if ( fsm->errState != 0 )
- errState = fsm->errState->alg.stateNum;
-
- makeEntryPoints();
- makeStateList();
-}
-
-void RedFsmBuild::makeConditions()
-{
- if ( condData->condSpaceMap.length() > 0 ) {
- long nextCondSpaceId = 0;
- for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ )
- cs->condSpaceId = nextCondSpaceId++;
-
- long length = condData->condSpaceMap.length();
- initCondSpaceList( length );
- curCondSpace = 0;
-
- for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) {
- long condSpaceId = cs->condSpaceId;
- Key baseKey = cs->baseKey;
-
- newCondSpace( curCondSpace, condSpaceId, baseKey );
- for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) {
- long actionOffset = (*csi)->actionId;
- condSpaceItem( curCondSpace, actionOffset );
- }
-
- curCondSpace += 1;
- }
- }
-}
-
-RedFsm *RedFsmBuild::reduceMachine()
-{
- redFsm = new RedFsm();
- redFsm->wantComplete = true;
-
- /* Open the definition. */
- makeMachine();
-
- /* Do this before distributing transitions out to singles and defaults
- * makes life easier. */
- redFsm->maxKey = findMaxKey();
-
- redFsm->assignActionLocs();
-
- /* Find the first final state (The final state with the lowest id). */
- redFsm->findFirstFinState();
-
- /* Choose default transitions and the single transition. */
- redFsm->chooseDefaultSpan();
-
- /* Maybe do flat expand, otherwise choose single. */
- redFsm->chooseSingle();
-
- /* Set up incoming transitions. */
- redFsm->setInTrans();
-
- /* Anlayze Machine will find the final action reference counts, among
- * other things. We will use these in reporting the usage
- * of fsm directives in action code. */
- redFsm->analyzeMachine();
-
- return redFsm;
-}
-
diff --git a/colm/redbuild.h b/colm/redbuild.h
deleted file mode 100644
index dbbb3e19..00000000
--- a/colm/redbuild.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _FSMREDUCE_H
-#define _FSMREDUCE_H
-
-#include <iostream>
-#include "avltree.h"
-#include "fsmgraph.h"
-#include "parsedata.h"
-#include "fsmrun.h"
-
-/* Forwards. */
-struct FsmTrans;
-struct FsmGraph;
-struct Compiler;
-struct FsmCodeGen;
-struct RedFsm;
-struct GenCondSpace;
-struct Condition;
-
-struct RedActionTable
-:
- public AvlTreeEl<RedActionTable>
-{
- RedActionTable( const ActionTable &key )
- :
- key(key),
- id(0)
- { }
-
- const ActionTable &getKey()
- { return key; }
-
- ActionTable key;
- int id;
-};
-
-typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap;
-
-struct NextRedTrans
-{
- Key lowKey, highKey;
- FsmTrans *trans;
- FsmTrans *next;
-
- void load() {
- if ( trans != 0 ) {
- next = trans->next;
- lowKey = trans->lowKey;
- highKey = trans->highKey;
- }
- }
-
- NextRedTrans( FsmTrans *t ) {
- trans = t;
- load();
- }
-
- void increment() {
- trans = next;
- load();
- }
-};
-
-class RedFsmBuild
-{
-public:
- RedFsmBuild( char *fsmName, Compiler *pd, FsmGraph *fsm );
- RedFsm *reduceMachine( );
-
-private:
- void appendTrans( TransListVect &outList, Key lowKey, Key highKey, FsmTrans *trans );
- void makeStateActions( FsmState *state );
- void makeStateList();
- void makeStateConditions( FsmState *state );
-
- void initActionList( unsigned long length );
- void newAction( int anum, char *name, int line, int col, Action *action );
- void initActionTableList( unsigned long length );
- void initCondSpaceList( ulong length );
- void condSpaceItem( int cnum, long condActionId );
- void newCondSpace( int cnum, int condSpaceId, Key baseKey );
- void initStateCondList( int snum, ulong length );
- void addStateCond( int snum, Key lowKey, Key highKey, long condNum );
- void initStateList( unsigned long length );
- void addRegionToEntry( int regionId, int entryId );
- void addEntryPoint( int entryId, char *name, unsigned long entryState );
- void setId( int snum, int id );
- void initTransList( int snum, unsigned long length );
- void newTrans( int snum, int tnum, Key lowKey, Key highKey,
- long targ, long act );
- void finishTransList( int snum );
- void setFinal( int snum );
- void setEofTrans( int snum, int eofTarget, int actId );
- void setStateActions( int snum, long toStateAction,
- long fromStateAction, long eofAction );
- void setForcedErrorState();
- void closeMachine();
- Key findMaxKey();
-
-
- void makeEntryPoints();
- void makeGetKeyExpr();
- void makeAccessExpr();
- void makeCurStateExpr();
- void makeConditions();
- void makeInlineList( InlineList *inlineList, InlineItem *context );
- void makeActionList();
- void makeActionTableList();
- void reduceTrans( FsmTrans *trans );
- void reduceActionTables();
- void makeTransList( FsmState *state );
- void makeTrans( Key lowKey, Key highKey, FsmTrans *defTrans );
- void makeAction( Action *action );
- void makeLmSwitch( InlineItem *item );
- void makeMachine();
- void makeActionExec( InlineItem *item );
- void makeActionExecTE( InlineItem *item );
-
- char *fsmName;
- Compiler *pd;
- FsmGraph *fsm;
- ActionTableMap actionTableMap;
- int nextActionTableId;
-
- int startState;
- int errState;
-
-public:
- RedFsm *redFsm;
-
-private:
- int curAction;
- int curActionTable;
- int curTrans;
- int curState;
- int curCondSpace;
- int curStateCond;
-};
-
-
-#endif /* _FSMREDUCE_H */
diff --git a/colm/redfsm.cc b/colm/redfsm.cc
deleted file mode 100644
index d3a65b7c..00000000
--- a/colm/redfsm.cc
+++ /dev/null
@@ -1,1112 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <iostream>
-#include <sstream>
-#include "redfsm.h"
-#include "avlmap.h"
-#include "mergesort.h"
-#include "fsmgraph.h"
-#include "parsetree.h"
-#include "fsmrun.h"
-
-using std::ostringstream;
-
-string nameOrLoc( GenAction *genAction )
-{
- if ( genAction->name != 0 )
- return string(genAction->name);
- else {
- ostringstream ret;
- ret << genAction->loc.line << ":" << genAction->loc.col;
- return ret.str();
- }
-}
-
-RedFsm::RedFsm()
-:
- wantComplete(false),
- forcedErrorState(false),
- nextActionId(0),
- nextTransId(0),
- errState(0),
- errTrans(0),
- firstFinState(0),
- numFinStates(0),
- allActions(0),
- allActionTables(0),
- allConditions(0),
- allCondSpaces(0),
- allStates(0),
- bAnyToStateActions(false),
- bAnyFromStateActions(false),
- bAnyRegActions(false),
- bAnyEofActions(false),
- bAnyActionGotos(false),
- bAnyActionCalls(false),
- bAnyActionRets(false),
- bAnyRegActionRets(false),
- bAnyRegActionByValControl(false),
- bAnyRegNextStmt(false),
- bAnyRegCurStateRef(false),
- bAnyRegBreak(false),
- bAnyLmSwitchError(false),
- bAnyConditions(false)
-{
-}
-
-/* Does the machine have any actions. */
-bool RedFsm::anyActions()
-{
- return actionMap.length() > 0;
-}
-
-void RedFsm::depthFirstOrdering( RedState *state )
-{
- /* Nothing to do if the state is already on the list. */
- if ( state->onStateList )
- return;
-
- /* Doing depth first, put state on the list. */
- state->onStateList = true;
- stateList.append( state );
-
-// /* At this point transitions should only be in ranges. */
-// assert( state->outSingle.length() == 0 );
-// assert( state->defTrans == 0 );
-
- /* Recurse on singles. */
- for ( RedTransList::Iter stel = state->outSingle; stel.lte(); stel++ ) {
- if ( stel->value->targ != 0 )
- depthFirstOrdering( stel->value->targ );
- }
-
- /* Recurse on everything ranges. */
- for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
- if ( rtel->value->targ != 0 )
- depthFirstOrdering( rtel->value->targ );
- }
-
- if ( state->defTrans != 0 && state->defTrans->targ != 0 )
- depthFirstOrdering( state->defTrans->targ );
-}
-
-/* Ordering states by transition connections. */
-void RedFsm::depthFirstOrdering()
-{
- /* Init on state list flags. */
- for ( RedStateList::Iter st = stateList; st.lte(); st++ )
- st->onStateList = false;
-
- /* Clear out the state list, we will rebuild it. */
- int stateListLen = stateList.length();
- stateList.abandon();
-
- /* Add back to the state list from the start state and all other entry
- * points. */
- depthFirstOrdering( startState );
- for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ )
- depthFirstOrdering( *en );
- if ( forcedErrorState )
- depthFirstOrdering( errState );
-
- /* Make sure we put everything back on. */
- assert( stateListLen == stateList.length() );
-}
-
-/* Assign state ids by appearance in the state list. */
-void RedFsm::sequentialStateIds()
-{
- /* Table based machines depend on the state numbers starting at zero. */
- nextStateId = 0;
- for ( RedStateList::Iter st = stateList; st.lte(); st++ )
- st->id = nextStateId++;
-}
-
-/* Stable sort the states by final state status. */
-void RedFsm::sortStatesByFinal()
-{
- /* Move forward through the list and throw final states onto the end. */
- RedState *state = 0;
- RedState *next = stateList.head;
- RedState *last = stateList.tail;
- while ( state != last ) {
- /* Move forward and load up the next. */
- state = next;
- next = state->next;
-
- /* Throw to the end? */
- if ( state->isFinal ) {
- stateList.detach( state );
- stateList.append( state );
- }
- }
-}
-
-/* Assign state ids by final state state status. */
-void RedFsm::sortStateIdsByFinal()
-{
- /* Table based machines depend on this starting at zero. */
- nextStateId = 0;
-
- /* First pass to assign non final ids. */
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- if ( ! st->isFinal )
- st->id = nextStateId++;
- }
-
- /* Second pass to assign final ids. */
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- if ( st->isFinal )
- st->id = nextStateId++;
- }
-}
-
-struct CmpStateById
-{
- static int compare( RedState *st1, RedState *st2 )
- {
- if ( st1->id < st2->id )
- return -1;
- else if ( st1->id > st2->id )
- return 1;
- else
- return 0;
- }
-};
-
-void RedFsm::sortByStateId()
-{
- /* Make the array. */
- int pos = 0;
- RedState **ptrList = new RedState*[stateList.length()];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ )
- ptrList[pos++] = st;
-
- MergeSort<RedState*, CmpStateById> mergeSort;
- mergeSort.sort( ptrList, stateList.length() );
-
- stateList.abandon();
- for ( int st = 0; st < pos; st++ )
- stateList.append( ptrList[st] );
-
- delete[] ptrList;
-}
-
-/* Find the final state with the lowest id. */
-void RedFsm::findFirstFinState()
-{
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) )
- firstFinState = st;
- }
-}
-
-void RedFsm::assignActionLocs()
-{
- int nextLocation = 0;
- for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) {
- /* Store the loc, skip over the array and a null terminator. */
- act->location = nextLocation;
- nextLocation += act->key.length() + 1;
- }
-}
-
-/* Check if we can extend the current range by displacing any ranges
- * ahead to the singles. */
-bool RedFsm::canExtend( const RedTransList &list, int pos )
-{
- /* Get the transition that we want to extend. */
- RedTrans *extendTrans = list[pos].value;
-
- /* Look ahead in the transition list. */
- for ( int next = pos + 1; next < list.length(); pos++, next++ ) {
- /* If they are not continuous then cannot extend. */
- Key nextKey = list[next].lowKey;
- nextKey.decrement();
- if ( list[pos].highKey != nextKey )
- break;
-
- /* Check for the extenstion property. */
- if ( extendTrans == list[next].value )
- return true;
-
- /* If the span of the next element is more than one, then don't keep
- * checking, it won't be moved to single. */
- unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey );
- if ( nextSpan > 1 )
- break;
- }
- return false;
-}
-
-/* Move ranges to the singles list. */
-void RedFsm::moveTransToSingle( RedState *state )
-{
- RedTransList &range = state->outRange;
- RedTransList &single = state->outSingle;
- for ( int rpos = 0; rpos < range.length(); ) {
- /* Check if this is a range we can extend. */
- if ( canExtend( range, rpos ) ) {
- /* Transfer singles over. */
- while ( range[rpos].value != range[rpos+1].value ) {
- /* Transfer the range to single. */
- single.append( range[rpos+1] );
- range.remove( rpos+1 );
- }
-
- /* Extend. */
- range[rpos].highKey = range[rpos+1].highKey;
- range.remove( rpos+1 );
- }
- /* Maybe move it to the singles. */
- else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) {
- single.append( range[rpos] );
- range.remove( rpos );
- }
- else {
- /* Keeping it in the ranges. */
- rpos += 1;
- }
- }
-}
-
-/* Look through ranges and choose suitable single character transitions. */
-void RedFsm::chooseSingle()
-{
- /* Loop the states. */
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- /* Rewrite the transition list taking out the suitable single
- * transtions. */
- moveTransToSingle( st );
- }
-}
-
-void RedFsm::makeFlat()
-{
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- if ( st->stateCondList.length() == 0 ) {
- st->condLowKey = 0;
- st->condHighKey = 0;
- }
- else {
- st->condLowKey = st->stateCondList.head->lowKey;
- st->condHighKey = st->stateCondList.tail->highKey;
-
- unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
- st->condList = new GenCondSpace*[ span ];
- memset( st->condList, 0, sizeof(GenCondSpace*)*span );
-
- for ( GenStateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) {
- unsigned long long base, trSpan;
- base = keyOps->span( st->condLowKey, sci->lowKey )-1;
- trSpan = keyOps->span( sci->lowKey, sci->highKey );
- for ( unsigned long long pos = 0; pos < trSpan; pos++ )
- st->condList[base+pos] = sci->condSpace;
- }
- }
-
- if ( st->outRange.length() == 0 ) {
- st->lowKey = st->highKey = 0;
- st->transList = 0;
- }
- else {
- st->lowKey = st->outRange[0].lowKey;
- st->highKey = st->outRange[st->outRange.length()-1].highKey;
- unsigned long long span = keyOps->span( st->lowKey, st->highKey );
- st->transList = new RedTrans*[ span ];
- memset( st->transList, 0, sizeof(RedTrans*)*span );
-
- for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) {
- unsigned long long base, trSpan;
- base = keyOps->span( st->lowKey, trans->lowKey )-1;
- trSpan = keyOps->span( trans->lowKey, trans->highKey );
- for ( unsigned long long pos = 0; pos < trSpan; pos++ )
- st->transList[base+pos] = trans->value;
- }
-
- /* Fill in the gaps with the default transition. */
- for ( unsigned long long pos = 0; pos < span; pos++ ) {
- if ( st->transList[pos] == 0 )
- st->transList[pos] = st->defTrans;
- }
- }
- }
-}
-
-
-/* A default transition has been picked, move it from the outRange to the
- * default pointer. */
-void RedFsm::moveToDefault( RedTrans *defTrans, RedState *state )
-{
- /* Rewrite the outRange, omitting any ranges that use
- * the picked default. */
- RedTransList outRange;
- for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
- /* If it does not take the default, copy it over. */
- if ( rtel->value != defTrans )
- outRange.append( *rtel );
- }
-
- /* Save off the range we just created into the state's range. */
- state->outRange.transfer( outRange );
-
- /* Store the default. */
- state->defTrans = defTrans;
-}
-
-bool RedFsm::alphabetCovered( RedTransList &outRange )
-{
- /* Cannot cover without any out ranges. */
- if ( outRange.length() == 0 )
- return false;
-
- /* If the first range doesn't start at the the lower bound then the
- * alphabet is not covered. */
- RedTransList::Iter rtel = outRange;
- if ( keyOps->minKey < rtel->lowKey )
- return false;
-
- /* Check that every range is next to the previous one. */
- rtel.increment();
- for ( ; rtel.lte(); rtel++ ) {
- Key highKey = rtel[-1].highKey;
- highKey.increment();
- if ( highKey != rtel->lowKey )
- return false;
- }
-
- /* The last must extend to the upper bound. */
- RedTransEl *last = &outRange[outRange.length()-1];
- if ( last->highKey < keyOps->maxKey )
- return false;
-
- return true;
-}
-
-RedTrans *RedFsm::chooseDefaultSpan( RedState *state )
-{
- /* Make a set of transitions from the outRange. */
- RedTransPtrSet stateTransSet;
- for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
- stateTransSet.insert( rtel->value );
-
- /* For each transition in the find how many alphabet characters the
- * transition spans. */
- unsigned long long *span = new unsigned long long[stateTransSet.length()];
- memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() );
- for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
- /* Lookup the transition in the set. */
- RedTrans **inSet = stateTransSet.find( rtel->value );
- int pos = inSet - stateTransSet.data;
- span[pos] += keyOps->span( rtel->lowKey, rtel->highKey );
- }
-
- /* Find the max span, choose it for making the default. */
- RedTrans *maxTrans = 0;
- unsigned long long maxSpan = 0;
- for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
- if ( span[rtel.pos()] > maxSpan ) {
- maxSpan = span[rtel.pos()];
- maxTrans = *rtel;
- }
- }
-
- delete[] span;
- return maxTrans;
-}
-
-/* Pick default transitions from ranges for the states. */
-void RedFsm::chooseDefaultSpan()
-{
- /* Loop the states. */
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- /* Only pick a default transition if the alphabet is covered. This
- * avoids any transitions in the out range that go to error and avoids
- * the need for an ERR state. */
- if ( alphabetCovered( st->outRange ) ) {
- /* Pick a default transition by largest span. */
- RedTrans *defTrans = chooseDefaultSpan( st );
-
- /* Rewrite the transition list taking out the transition we picked
- * as the default and store the default. */
- moveToDefault( defTrans, st );
- }
- }
-}
-
-RedTrans *RedFsm::chooseDefaultGoto( RedState *state )
-{
- /* Make a set of transitions from the outRange. */
- RedTransPtrSet stateTransSet;
- for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
- if ( rtel->value->targ == state->next )
- return rtel->value;
- }
- return 0;
-}
-
-void RedFsm::chooseDefaultGoto()
-{
- /* Loop the states. */
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- /* Pick a default transition. */
- RedTrans *defTrans = chooseDefaultGoto( st );
- if ( defTrans == 0 )
- defTrans = chooseDefaultSpan( st );
-
- /* Rewrite the transition list taking out the transition we picked
- * as the default and store the default. */
- moveToDefault( defTrans, st );
- }
-}
-
-RedTrans *RedFsm::chooseDefaultNumRanges( RedState *state )
-{
- /* Make a set of transitions from the outRange. */
- RedTransPtrSet stateTransSet;
- for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
- stateTransSet.insert( rtel->value );
-
- /* For each transition in the find how many ranges use the transition. */
- int *numRanges = new int[stateTransSet.length()];
- memset( numRanges, 0, sizeof(int) * stateTransSet.length() );
- for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
- /* Lookup the transition in the set. */
- RedTrans **inSet = stateTransSet.find( rtel->value );
- numRanges[inSet - stateTransSet.data] += 1;
- }
-
- /* Find the max number of ranges. */
- RedTrans *maxTrans = 0;
- int maxNumRanges = 0;
- for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
- if ( numRanges[rtel.pos()] > maxNumRanges ) {
- maxNumRanges = numRanges[rtel.pos()];
- maxTrans = *rtel;
- }
- }
-
- delete[] numRanges;
- return maxTrans;
-}
-
-void RedFsm::chooseDefaultNumRanges()
-{
- /* Loop the states. */
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- /* Pick a default transition. */
- RedTrans *defTrans = chooseDefaultNumRanges( st );
-
- /* Rewrite the transition list taking out the transition we picked
- * as the default and store the default. */
- moveToDefault( defTrans, st );
- }
-}
-
-RedTrans *RedFsm::getErrorTrans( )
-{
- /* If the error trans has not been made aready, make it. */
- if ( errTrans == 0 ) {
- /* This insert should always succeed since no transition created by
- * the user can point to the error state. */
- errTrans = new RedTrans( getErrorState(), 0, nextTransId++ );
- RedTrans *inRes = transSet.insert( errTrans );
- assert( inRes != 0 );
- }
- return errTrans;
-}
-
-RedState *RedFsm::getErrorState()
-{
- /* Something went wrong. An error state is needed but one was not supplied
- * by the frontend. */
- assert( errState != 0 );
- return errState;
-}
-
-
-RedTrans *RedFsm::allocateTrans( RedState *targ, RedAction *action )
-{
- /* Create a reduced trans and look for it in the transiton set. */
- RedTrans redTrans( targ, action, 0 );
- RedTrans *inDict = transSet.find( &redTrans );
- if ( inDict == 0 ) {
- inDict = new RedTrans( targ, action, nextTransId++ );
- transSet.insert( inDict );
- }
- return inDict;
-}
-
-void RedFsm::partitionFsm( int nparts )
-{
- /* At this point the states are ordered by a depth-first traversal. We
- * will allocate to partitions based on this ordering. */
- this->nParts = nparts;
- int partSize = stateList.length() / nparts;
- int remainder = stateList.length() % nparts;
- int numInPart = partSize;
- int partition = 0;
- if ( remainder-- > 0 )
- numInPart += 1;
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- st->partition = partition;
-
- numInPart -= 1;
- if ( numInPart == 0 ) {
- partition += 1;
- numInPart = partSize;
- if ( remainder-- > 0 )
- numInPart += 1;
- }
- }
-}
-
-void RedFsm::setInTrans()
-{
- /* First pass counts the number of transitions. */
- for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ )
- trans->targ->numInTrans += 1;
-
- /* Pass over states to allocate the needed memory. Reset the counts so we
- * can use them as the current size. */
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- st->inTrans = new RedTrans*[st->numInTrans];
- st->numInTrans = 0;
- }
-
- /* Second pass over transitions copies pointers into the in trans list. */
- for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ )
- trans->targ->inTrans[trans->targ->numInTrans++] = trans;
-}
-
-GenCondSpace *RedFsm::findCondSpace( Key lowKey, Key highKey )
-{
- for ( CondSpaceList::Iter cs = condSpaceList; cs.lte(); cs++ ) {
- Key csHighKey = cs->baseKey;
- csHighKey += keyOps->alphSize() * (1 << cs->condSet.length());
-
- if ( lowKey >= cs->baseKey && highKey <= csHighKey )
- return cs;
- }
- return 0;
-}
-
-Condition *RedFsm::findCondition( Key key )
-{
- for ( ConditionList::Iter cond = conditionList; cond.lte(); cond++ ) {
- Key upperKey = cond->baseKey + (1 << cond->condSet.length());
- if ( cond->baseKey <= key && key <= upperKey )
- return cond;
- }
- return 0;
-}
-
-void RedFsm::setValueLimits()
-{
- maxSingleLen = 0;
- maxRangeLen = 0;
- maxKeyOffset = 0;
- maxIndexOffset = 0;
- maxActListId = 0;
- maxActionLoc = 0;
- maxActArrItem = 0;
- maxSpan = 0;
- maxCondSpan = 0;
- maxFlatIndexOffset = 0;
- maxCondOffset = 0;
- maxCondLen = 0;
- maxCondSpaceId = 0;
- maxCondIndexOffset = 0;
-
- /* In both of these cases the 0 index is reserved for no value, so the max
- * is one more than it would be if they started at 0. */
- maxIndex = transSet.length();
- maxCond = condSpaceList.length();
-
- /* The nextStateId - 1 is the last state id assigned. */
- maxState = nextStateId - 1;
-
- for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) {
- if ( csi->condSpaceId > maxCondSpaceId )
- maxCondSpaceId = csi->condSpaceId;
- }
-
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- /* Maximum cond length. */
- if ( st->stateCondList.length() > maxCondLen )
- maxCondLen = st->stateCondList.length();
-
- /* Maximum single length. */
- if ( st->outSingle.length() > maxSingleLen )
- maxSingleLen = st->outSingle.length();
-
- /* Maximum range length. */
- if ( st->outRange.length() > maxRangeLen )
- maxRangeLen = st->outRange.length();
-
- /* The key offset index offset for the state after last is not used, skip it.. */
- if ( ! st.last() ) {
- maxCondOffset += st->stateCondList.length();
- maxKeyOffset += st->outSingle.length() + st->outRange.length()*2;
- maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1;
- }
-
- /* Max cond span. */
- if ( st->condList != 0 ) {
- unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey );
- if ( span > maxCondSpan )
- maxCondSpan = span;
- }
-
- /* Max key span. */
- if ( st->transList != 0 ) {
- unsigned long long span = keyOps->span( st->lowKey, st->highKey );
- if ( span > maxSpan )
- maxSpan = span;
- }
-
- /* Max cond index offset. */
- if ( ! st.last() ) {
- if ( st->condList != 0 )
- maxCondIndexOffset += keyOps->span( st->condLowKey, st->condHighKey );
- }
-
- /* Max flat index offset. */
- if ( ! st.last() ) {
- if ( st->transList != 0 )
- maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey );
- maxFlatIndexOffset += 1;
- }
- }
-
- for ( GenActionTableMap::Iter at = actionMap; at.lte(); at++ ) {
- /* Maximum id of action lists. */
- if ( at->actListId+1 > maxActListId )
- maxActListId = at->actListId+1;
-
- /* Maximum location of items in action array. */
- if ( at->location+1 > maxActionLoc )
- maxActionLoc = at->location+1;
-
- /* Maximum values going into the action array. */
- if ( at->key.length() > maxActArrItem )
- maxActArrItem = at->key.length();
- for ( GenActionTable::Iter item = at->key; item.lte(); item++ ) {
- if ( item->value->actionId > maxActArrItem )
- maxActArrItem = item->value->actionId;
- }
- }
-}
-
-void RedFsm::findFinalActionRefs()
-{
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- /* Rerence count out of single transitions. */
- for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
- if ( rtel->value->action != 0 ) {
- rtel->value->action->numTransRefs += 1;
- for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
- item->value->numTransRefs += 1;
- }
- }
-
- /* Reference count out of range transitions. */
- for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
- if ( rtel->value->action != 0 ) {
- rtel->value->action->numTransRefs += 1;
- for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
- item->value->numTransRefs += 1;
- }
- }
-
- /* Reference count default transition. */
- if ( st->defTrans != 0 && st->defTrans->action != 0 ) {
- st->defTrans->action->numTransRefs += 1;
- for ( GenActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ )
- item->value->numTransRefs += 1;
- }
-
- /* Reference count to state actions. */
- if ( st->toStateAction != 0 ) {
- st->toStateAction->numToStateRefs += 1;
- for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ )
- item->value->numToStateRefs += 1;
- }
-
- /* Reference count from state actions. */
- if ( st->fromStateAction != 0 ) {
- st->fromStateAction->numFromStateRefs += 1;
- for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ )
- item->value->numFromStateRefs += 1;
- }
-
- /* Reference count EOF actions. */
- if ( st->eofAction != 0 ) {
- st->eofAction->numEofRefs += 1;
- for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ )
- item->value->numEofRefs += 1;
- }
- }
-}
-
-void RedFsm::analyzeAction( GenAction *act, InlineList *inlineList )
-{
- for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
- /* Check for various things in regular actions. */
- if ( act->numTransRefs > 0 || act->numToStateRefs > 0 ||
- act->numFromStateRefs > 0 || act->numEofRefs > 0 )
- {
- if ( item->type == InlineItem::LmSwitch &&
- item->tokenRegion->lmSwitchHandlesError )
- {
- bAnyLmSwitchError = true;
- }
- }
-
- if ( item->children != 0 )
- analyzeAction( act, item->children );
- }
-}
-
-void RedFsm::analyzeActionList( RedAction *redAct, InlineList *inlineList )
-{
- for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
- if ( item->children != 0 )
- analyzeActionList( redAct, item->children );
- }
-}
-
-/* Assign ids to referenced actions. */
-void RedFsm::assignActionIds()
-{
- int nextActionId = 0;
- for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) {
- /* Only ever interested in referenced actions. */
- if ( numRefs( act ) > 0 )
- act->actionId = nextActionId++;
- }
-}
-
-/* Gather various info on the machine. */
-void RedFsm::analyzeMachine()
-{
- /* Find the true count of action references. */
- findFinalActionRefs();
-
- /* Check if there are any calls in action code. */
- for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) {
- /* Record the occurrence of various kinds of actions. */
- if ( act->numToStateRefs > 0 )
- bAnyToStateActions = true;
- if ( act->numFromStateRefs > 0 )
- bAnyFromStateActions = true;
- if ( act->numEofRefs > 0 )
- bAnyEofActions = true;
- if ( act->numTransRefs > 0 )
- bAnyRegActions = true;
-
- /* Recurse through the action's parse tree looking for various things. */
- analyzeAction( act, act->inlineList );
- }
-
- /* Analyze reduced action lists. */
- for ( GenActionTableMap::Iter redAct = actionMap; redAct.lte(); redAct++ ) {
- for ( GenActionTable::Iter act = redAct->key; act.lte(); act++ )
- analyzeActionList( redAct, act->value->inlineList );
- }
-
- /* Find states that have transitions with actions that have next
- * statements. */
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- /* Check any actions out of outSinge. */
- for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
- if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
- st->bAnyRegCurStateRef = true;
- }
-
- /* Check any actions out of outRange. */
- for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
- if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
- st->bAnyRegCurStateRef = true;
- }
-
- /* Check any action out of default. */
- if ( st->defTrans != 0 && st->defTrans->action != 0 &&
- st->defTrans->action->anyCurStateRef() )
- st->bAnyRegCurStateRef = true;
-
- if ( st->stateCondList.length() > 0 )
- bAnyConditions = true;
- }
-
- /* Assign ids to actions that are referenced. */
- assignActionIds();
-
- /* Set the maximums of various values used for deciding types. */
- setValueLimits();
-}
-
-int transAction( RedTrans *trans )
-{
- int retAct = 0;
- if ( trans->action != 0 )
- retAct = trans->action->location+1;
- return retAct;
-}
-
-int toStateAction( RedState *state )
-{
- int act = 0;
- if ( state->toStateAction != 0 )
- act = state->toStateAction->location+1;
- return act;
-}
-
-int fromStateAction( RedState *state )
-{
- int act = 0;
- if ( state->fromStateAction != 0 )
- act = state->fromStateAction->location+1;
- return act;
-}
-
-int eofAction( RedState *state )
-{
- int act = 0;
- if ( state->eofAction != 0 )
- act = state->eofAction->location+1;
- return act;
-}
-
-
-FsmTables *RedFsm::makeFsmTables()
-{
- /* The fsm runtime needs states sorted by id. */
- sortByStateId();
-
- int pos, curKeyOffset, curIndOffset;
- FsmTables *fsmTables = new FsmTables;
- fsmTables->numStates = stateList.length();
-
- /*
- * actions
- */
-
- fsmTables->numActions = 1;
- for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ )
- fsmTables->numActions += 1 + act->key.length();
-
- pos = 0;
- fsmTables->actions = new long[fsmTables->numActions];
- fsmTables->actions[pos++] = 0;
- for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) {
- fsmTables->actions[pos++] = act->key.length();
- for ( GenActionTable::Iter item = act->key; item.lte(); item++ )
- fsmTables->actions[pos++] = item->value->actionId;
- }
-
- /*
- * keyOffset
- */
- pos = 0, curKeyOffset = 0;
- fsmTables->keyOffsets = new long[fsmTables->numStates];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- /* Store the current offset. */
- fsmTables->keyOffsets[pos++] = curKeyOffset;
-
- /* Move the key offset ahead. */
- curKeyOffset += st->outSingle.length() + st->outRange.length()*2;
- }
-
- /*
- * transKeys
- */
- fsmTables->numTransKeys = 0;
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- fsmTables->numTransKeys += st->outSingle.length();
- fsmTables->numTransKeys += 2 * st->outRange.length();
- }
-
- pos = 0;
- fsmTables->transKeys = new char[fsmTables->numTransKeys];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ )
- fsmTables->transKeys[pos++] = stel->lowKey.getVal();
- for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
- fsmTables->transKeys[pos++] = rtel->lowKey.getVal();
- fsmTables->transKeys[pos++] = rtel->highKey.getVal();
- }
- }
-
- /*
- * singleLengths
- */
- pos = 0;
- fsmTables->singleLengths = new long[fsmTables->numStates];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ )
- fsmTables->singleLengths[pos++] = st->outSingle.length();
-
- /*
- * rangeLengths
- */
- pos = 0;
- fsmTables->rangeLengths = new long[fsmTables->numStates];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ )
- fsmTables->rangeLengths[pos++] = st->outRange.length();
-
- /*
- * indexOffsets
- */
- pos = 0, curIndOffset = 0;
- fsmTables->indexOffsets = new long[fsmTables->numStates];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- fsmTables->indexOffsets[pos++] = curIndOffset;
-
- curIndOffset += st->outSingle.length() + st->outRange.length();
- if ( st->defTrans != 0 )
- curIndOffset += 1;
- }
-
- /*
- * transTargsWI
- */
- fsmTables->numTransTargsWI = 0;
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- fsmTables->numTransTargsWI += st->outSingle.length();
- fsmTables->numTransTargsWI += st->outRange.length();
- if ( st->defTrans != 0 )
- fsmTables->numTransTargsWI += 1;
- }
-
- pos = 0;
- fsmTables->transTargsWI = new long[fsmTables->numTransTargsWI];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ )
- fsmTables->transTargsWI[pos++] = stel->value->targ->id;
-
- for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ )
- fsmTables->transTargsWI[pos++] = rtel->value->targ->id;
-
- if ( st->defTrans != 0 )
- fsmTables->transTargsWI[pos++] = st->defTrans->targ->id;
- }
-
- /*
- * transActionsWI
- */
- fsmTables->numTransActionsWI = 0;
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- fsmTables->numTransActionsWI += st->outSingle.length();
- fsmTables->numTransActionsWI += st->outRange.length();
- if ( st->defTrans != 0 )
- fsmTables->numTransActionsWI += 1;
- }
-
- pos = 0;
- fsmTables->transActionsWI = new long[fsmTables->numTransActionsWI];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ )
- fsmTables->transActionsWI[pos++] = transAction( stel->value );
-
- for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ )
- fsmTables->transActionsWI[pos++] = transAction( rtel->value );
-
- if ( st->defTrans != 0 )
- fsmTables->transActionsWI[pos++] = transAction( st->defTrans );
- }
-
- /*
- * toStateActions
- */
- pos = 0;
- fsmTables->toStateActions = new long[fsmTables->numStates];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ )
- fsmTables->toStateActions[pos++] = toStateAction( st );
-
- /*
- * fromStateActions
- */
- pos = 0;
- fsmTables->fromStateActions = new long[fsmTables->numStates];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ )
- fsmTables->fromStateActions[pos++] = fromStateAction( st );
-
- /*
- * eofActions
- */
- pos = 0;
- fsmTables->eofActions = new long[fsmTables->numStates];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ )
- fsmTables->eofActions[pos++] = eofAction( st );
-
- /*
- * eofTargs
- */
- pos = 0;
- fsmTables->eofTargs = new long[fsmTables->numStates];
- for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
- int targ = -1;
- if ( st->eofTrans != 0 )
- targ = st->eofTrans->targ->id;
- fsmTables->eofTargs[pos++] = targ;
- }
-
- /* Start state. */
- fsmTables->startState = startState->id;
-
- /* First final state. */
- fsmTables->firstFinal = ( firstFinState != 0 ) ?
- firstFinState->id : nextStateId;
-
- /* The error state. */
- fsmTables->errorState = ( errState != 0 ) ?
- errState->id : -1;
-
- /* The array pointing to actions. */
- pos = 0;
- fsmTables->numActionSwitch = genActionList.length();
- fsmTables->actionSwitch = new GenAction*[fsmTables->numActionSwitch];
- for ( GenActionList::Iter act = genActionList; act.lte(); act++ )
- fsmTables->actionSwitch[pos++] = act;
-
- /*
- * entryByRegion
- */
-
- fsmTables->numRegions = regionToEntry.length()+1;
- fsmTables->entryByRegion = new long[fsmTables->numRegions];
- fsmTables->entryByRegion[0] = fsmTables->errorState;
-
- pos = 1;
- for ( RegionToEntry::Iter en = regionToEntry; en.lte(); en++ ) {
- /* Find the entry state from the entry id. */
- RedEntryMapEl *entryMapEl = redEntryMap.find( *en );
-
- /* Save it off. */
- fsmTables->entryByRegion[pos++] = entryMapEl != 0 ? entryMapEl->value
- : fsmTables->errorState;
- }
-
- return fsmTables;
-}
-
-
diff --git a/colm/redfsm.h b/colm/redfsm.h
deleted file mode 100644
index 39b98d5f..00000000
--- a/colm/redfsm.h
+++ /dev/null
@@ -1,524 +0,0 @@
-/*
- * Copyright 2006-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _REDFSM_H
-#define _REDFSM_H
-
-#include <assert.h>
-#include <string.h>
-#include <string>
-#include "keyops.h"
-#include "vector.h"
-#include "dlist.h"
-#include "compare.h"
-#include "bstmap.h"
-#include "bstset.h"
-#include "avlmap.h"
-#include "avltree.h"
-#include "avlbasic.h"
-#include "mergesort.h"
-#include "sbstmap.h"
-#include "sbstset.h"
-#include "sbsttable.h"
-#include "global.h"
-#include "pdarun.h"
-
-#define TRANS_ERR_TRANS 0
-#define STATE_ERR_STATE 0
-#define FUNC_NO_FUNC 0
-
-using std::string;
-
-struct RedState;
-struct InlineList;
-struct Compiler;
-struct ObjField;
-
-/* Element in list of actions. Contains the string for the code to exectute. */
-struct GenAction
-{
- /* Data collected during parse. */
- InputLoc loc;
- char *name;
- InlineList *inlineList;
- int actionId;
- MarkType markType;
- ObjField *objField;
- long markId;
-
- int numTransRefs;
- int numToStateRefs;
- int numFromStateRefs;
- int numEofRefs;
-
- GenAction *prev, *next;
-};
-
-typedef DList<GenAction> GenActionList;
-string nameOrLoc( GenAction *genAction );
-
-/* Number of references in the final machine. */
-inline int numRefs( GenAction *genAction )
-{
- return genAction->numTransRefs +
- genAction->numToStateRefs +
- genAction->numFromStateRefs +
- genAction->numEofRefs;
-}
-
-
-/* Forwards. */
-struct RedState;
-struct FsmState;
-
-/* Transistion GenAction Element. */
-typedef SBstMapEl< int, GenAction* > GenActionTableEl;
-
-/* Transition GenAction Table. */
-struct GenActionTable
- : public SBstMap< int, GenAction*, CmpOrd<int> >
-{
- void setAction( int ordering, GenAction *action );
- void setActions( int *orderings, GenAction **actions, int nActs );
- void setActions( const GenActionTable &other );
-};
-
-/* Compare of a whole action table element (key & value). */
-struct GenCmpActionTableEl
-{
- static int compare( const GenActionTableEl &action1,
- const GenActionTableEl &action2 )
- {
- if ( action1.key < action2.key )
- return -1;
- else if ( action1.key > action2.key )
- return 1;
- else if ( action1.value < action2.value )
- return -1;
- else if ( action1.value > action2.value )
- return 1;
- return 0;
- }
-};
-
-/* Compare for GenActionTable. */
-typedef CmpSTable< GenActionTableEl, GenCmpActionTableEl > GenCmpActionTable;
-
-/* Set of states. */
-typedef BstSet<RedState*> RedStateSet;
-typedef BstSet<int> IntSet;
-
-/* Reduced action. */
-struct RedAction
-:
- public AvlTreeEl<RedAction>
-{
- RedAction( )
- :
- key(),
- eofRefs(0),
- numTransRefs(0),
- numToStateRefs(0),
- numFromStateRefs(0),
- numEofRefs(0),
- bAnyNextStmt(false),
- bAnyCurStateRef(false),
- bAnyBreakStmt(false)
- { }
-
- const GenActionTable &getKey()
- { return key; }
-
- GenActionTable key;
- int actListId;
- int location;
- IntSet *eofRefs;
-
- /* Number of references in the final machine. */
- bool numRefs()
- { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
- int numTransRefs;
- int numToStateRefs;
- int numFromStateRefs;
- int numEofRefs;
-
- bool anyNextStmt() { return bAnyNextStmt; }
- bool anyCurStateRef() { return bAnyCurStateRef; }
- bool anyBreakStmt() { return bAnyBreakStmt; }
-
- bool bAnyNextStmt;
- bool bAnyCurStateRef;
- bool bAnyBreakStmt;
-};
-typedef AvlTree<RedAction, GenActionTable, GenCmpActionTable> GenActionTableMap;
-
-/* Reduced transition. */
-struct RedTrans
-:
- public AvlTreeEl<RedTrans>
-{
- RedTrans( RedState *targ, RedAction *action, int id )
- : targ(targ), action(action), id(id), labelNeeded(true) { }
-
- RedState *targ;
- RedAction *action;
- int id;
- bool partitionBoundary;
- bool labelNeeded;
-};
-
-/* Compare of transitions for the final reduction of transitions. Comparison
- * is on target and the pointer to the shared action table. It is assumed that
- * when this is used the action tables have been reduced. */
-struct CmpRedTrans
-{
- static int compare( const RedTrans &t1, const RedTrans &t2 )
- {
- if ( t1.targ < t2.targ )
- return -1;
- else if ( t1.targ > t2.targ )
- return 1;
- else if ( t1.action < t2.action )
- return -1;
- else if ( t1.action > t2.action )
- return 1;
- else
- return 0;
- }
-};
-
-typedef AvlBasic<RedTrans, CmpRedTrans> RedTransSet;
-
-/* Element in out range. */
-struct RedTransEl
-{
- /* Constructors. */
- RedTransEl( Key lowKey, Key highKey, RedTrans *value )
- : lowKey(lowKey), highKey(highKey), value(value) { }
-
- Key lowKey, highKey;
- RedTrans *value;
-};
-
-typedef Vector<RedTransEl> RedTransList;
-typedef Vector<RedState*> RedStateVect;
-
-typedef BstMapEl<RedState*, unsigned long long> RedSpanMapEl;
-typedef BstMap<RedState*, unsigned long long> RedSpanMap;
-
-/* Compare used by span map sort. Reverse sorts by the span. */
-struct CmpRedSpanMapEl
-{
- static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 )
- {
- if ( smel1.value > smel2.value )
- return -1;
- else if ( smel1.value < smel2.value )
- return 1;
- else
- return 0;
- }
-};
-
-/* Sorting state-span map entries by span. */
-typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort;
-
-/* Set of entry ids that go into this state. */
-typedef Vector<int> EntryIdVect;
-typedef Vector<char*> EntryNameVect;
-
-/* Maps entry ids (defined by the frontend, to reduced state ids. */
-typedef BstMap<int, int> RedEntryMap;
-typedef BstMapEl<int, int> RedEntryMapEl;
-
-typedef Vector<int> RegionToEntry;
-
-typedef Vector< GenAction* > GenCondSet;
-
-struct Condition
-{
- Condition( )
- : key(0), baseKey(0) {}
-
- Key key;
- Key baseKey;
- GenCondSet condSet;
-
- Condition *next, *prev;
-};
-typedef DList<Condition> ConditionList;
-
-struct GenCondSpace
-{
- Key baseKey;
- GenCondSet condSet;
- int condSpaceId;
-
- GenCondSpace *next, *prev;
-};
-typedef DList<GenCondSpace> CondSpaceList;
-
-struct GenStateCond
-{
- Key lowKey;
- Key highKey;
-
- GenCondSpace *condSpace;
-
- GenStateCond *prev, *next;
-};
-typedef DList<GenStateCond> GenStateCondList;
-typedef Vector<GenStateCond*> StateCondVect;
-
-/* Reduced state. */
-struct RedState
-{
- RedState()
- :
- defTrans(0),
- condList(0),
- transList(0),
- isFinal(false),
- labelNeeded(false),
- outNeeded(false),
- onStateList(false),
- toStateAction(0),
- fromStateAction(0),
- eofAction(0),
- eofTrans(0),
- id(0),
- bAnyRegCurStateRef(false),
- partitionBoundary(false),
- inTrans(0),
- numInTrans(0)
- { }
-
- /* Transitions out. */
- RedTransList outSingle;
- RedTransList outRange;
- RedTrans *defTrans;
-
- /* For flat conditions. */
- Key condLowKey, condHighKey;
- GenCondSpace **condList;
-
- /* For flat keys. */
- Key lowKey, highKey;
- RedTrans **transList;
-
- /* The list of states that transitions from this state go to. */
- RedStateVect targStates;
-
- bool isFinal;
- bool labelNeeded;
- bool outNeeded;
- bool onStateList;
- RedAction *toStateAction;
- RedAction *fromStateAction;
- RedAction *eofAction;
- RedTrans *eofTrans;
- int id;
- GenStateCondList stateCondList;
- StateCondVect stateCondVect;
-
- /* Pointers for the list of states. */
- RedState *prev, *next;
-
- bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
- bool bAnyRegCurStateRef;
-
- int partition;
- bool partitionBoundary;
-
- RedTrans **inTrans;
- int numInTrans;
-};
-
-/* List of states. */
-typedef DList<RedState> RedStateList;
-
-/* Set of reduced transitons. Comparison is by pointer. */
-typedef BstSet< RedTrans*, CmpOrd<RedTrans*> > RedTransPtrSet;
-
-/* Next version of the fsm machine. */
-struct RedFsm
-{
- RedFsm();
-
- bool wantComplete;
- bool forcedErrorState;
-
- int nextActionId;
- int nextTransId;
-
- /* Next State Id doubles as the total number of state ids. */
- int nextStateId;
-
- RedTransSet transSet;
- GenActionTableMap actionMap;
- RedStateList stateList;
- RedStateSet entryPoints;
- RedState *startState;
- RedState *errState;
- RedTrans *errTrans;
- RedTrans *errActionTrans;
- RedState *firstFinState;
- int numFinStates;
- int nParts;
-
- GenAction *allActions;
- RedAction *allActionTables;
- Condition *allConditions;
- GenCondSpace *allCondSpaces;
- RedState *allStates;
- GenActionList genActionList;
- ConditionList conditionList;
- CondSpaceList condSpaceList;
- EntryIdVect entryPointIds;
- EntryNameVect entryPointNames;
- RedEntryMap redEntryMap;
- RegionToEntry regionToEntry;
-
- bool bAnyToStateActions;
- bool bAnyFromStateActions;
- bool bAnyRegActions;
- bool bAnyEofActions;
- bool bAnyActionGotos;
- bool bAnyActionCalls;
- bool bAnyActionRets;
- bool bAnyRegActionRets;
- bool bAnyRegActionByValControl;
- bool bAnyRegNextStmt;
- bool bAnyRegCurStateRef;
- bool bAnyRegBreak;
- bool bAnyLmSwitchError;
- bool bAnyConditions;
-
- int maxState;
- int maxSingleLen;
- int maxRangeLen;
- int maxKeyOffset;
- int maxIndexOffset;
- int maxIndex;
- int maxActListId;
- int maxActionLoc;
- int maxActArrItem;
- unsigned long long maxSpan;
- unsigned long long maxCondSpan;
- int maxFlatIndexOffset;
- Key maxKey;
- int maxCondOffset;
- int maxCondLen;
- int maxCondSpaceId;
- int maxCondIndexOffset;
- int maxCond;
-
- bool anyActions();
- bool anyToStateActions() { return bAnyToStateActions; }
- bool anyFromStateActions() { return bAnyFromStateActions; }
- bool anyRegActions() { return bAnyRegActions; }
- bool anyEofActions() { return bAnyEofActions; }
- bool anyActionGotos() { return bAnyActionGotos; }
- bool anyActionCalls() { return bAnyActionCalls; }
- bool anyActionRets() { return bAnyActionRets; }
- bool anyRegActionRets() { return bAnyRegActionRets; }
- bool anyRegActionByValControl() { return bAnyRegActionByValControl; }
- bool anyRegNextStmt() { return bAnyRegNextStmt; }
- bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
- bool anyRegBreak() { return bAnyRegBreak; }
- bool anyLmSwitchError() { return bAnyLmSwitchError; }
- bool anyConditions() { return bAnyConditions; }
-
- GenCondSpace *findCondSpace( Key lowKey, Key highKey );
- Condition *findCondition( Key key );
-
- /* Is is it possible to extend a range by bumping ranges that span only
- * one character to the singles array. */
- bool canExtend( const RedTransList &list, int pos );
-
- /* Pick single transitions from the ranges. */
- void moveTransToSingle( RedState *state );
- void chooseSingle();
-
- void makeFlat();
-
- /* Move a selected transition from ranges to default. */
- void moveToDefault( RedTrans *defTrans, RedState *state );
-
- /* Pick a default transition by largest span. */
- RedTrans *chooseDefaultSpan( RedState *state );
- void chooseDefaultSpan();
-
- /* Pick a default transition by most number of ranges. */
- RedTrans *chooseDefaultNumRanges( RedState *state );
- void chooseDefaultNumRanges();
-
- /* Pick a default transition tailored towards goto driven machine. */
- RedTrans *chooseDefaultGoto( RedState *state );
- void chooseDefaultGoto();
-
- /* Ordering states by transition connections. */
- void optimizeStateOrdering( RedState *state );
- void optimizeStateOrdering();
-
- /* Ordering states by transition connections. */
- void depthFirstOrdering( RedState *state );
- void depthFirstOrdering();
-
- /* Set state ids. */
- void sequentialStateIds();
- void sortStateIdsByFinal();
-
- /* Arrange states in by final id. This is a stable sort. */
- void sortStatesByFinal();
-
- /* Sorting states by id. */
- void sortByStateId();
-
- /* Locating the first final state. This is the final state with the lowest
- * id. */
- void findFirstFinState();
-
- void assignActionLocs();
-
- RedTrans *getErrorTrans();
- RedState *getErrorState();
-
- /* Is every char in the alphabet covered? */
- bool alphabetCovered( RedTransList &outRange );
-
- RedTrans *allocateTrans( RedState *targState, RedAction *actionTable );
-
- void partitionFsm( int nParts );
-
- void setInTrans();
- void setValueLimits();
- void assignActionIds();
- void analyzeActionList( RedAction *redAct, InlineList *inlineList );
- void analyzeAction( GenAction *act, InlineList *inlineList );
- void findFinalActionRefs();
- void analyzeMachine();
-
- FsmTables *makeFsmTables();
-};
-
-
-#endif /* _REDFSM_H */
diff --git a/colm/resolve.cc b/colm/resolve.cc
deleted file mode 100644
index a661e68e..00000000
--- a/colm/resolve.cc
+++ /dev/null
@@ -1,805 +0,0 @@
-/*
- * Copyright 2009-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "bytecode.h"
-#include "parsedata.h"
-#include "fsmrun.h"
-#include <iostream>
-#include <assert.h>
-
-using std::cout;
-using std::cerr;
-using std::endl;
-
-UniqueType *TypeRef::lookupTypeName( Compiler *pd )
-{
- /* Lookup up the qualifiction and then the name. */
- nspace = nspaceQual->getQual( pd );
-
- if ( nspace == 0 )
- error(loc) << "do not have region for resolving reference" << endp;
-
- while ( nspace != 0 ) {
- /* Search for the token in the region by typeName. */
- TypeMapEl *inDict = nspace->typeMap.find( typeName );
-
- if ( inDict != 0 ) {
- switch ( inDict->type ) {
- /* Defer to the typeRef we are an alias of. We need to guard against loops here. */
- case TypeMapEl::TypeAliasType:
- return inDict->typeRef->lookupType( pd );
-
- case TypeMapEl::LangElType:
- return pd->findUniqueType( TYPE_TREE, inDict->value );
- }
- }
-
- nspace = nspace->parentNamespace;
- }
-
- error(loc) << "unknown type in typeof expression" << endp;
- return 0;
-}
-
-UniqueType *TypeRef::lookupTypeLiteral( Compiler *pd )
-{
- /* Lookup up the qualifiction and then the name. */
- nspace = nspaceQual->getQual( pd );
-
- if ( nspace == 0 )
- error(loc) << "do not have region for resolving reference" << endp;
-
- /* Interpret escape sequences and remove quotes. */
- bool unusedCI;
- String interp;
- prepareLitString( interp, unusedCI, pdaLiteral->token.data,
- pdaLiteral->token.loc );
-
- while ( nspace != 0 ) {
- LiteralDictEl *ldel = nspace->literalDict.find( interp );
-
- if ( ldel != 0 )
- return pd->findUniqueType( TYPE_TREE, ldel->value->tdLangEl );
-
- nspace = nspace->parentNamespace;
- }
-
- error(loc) << "unknown type in typeof expression" << endp;
- return 0;
-}
-
-UniqueType *TypeRef::lookupTypeMap( Compiler *pd )
-{
- /* Lookup up the qualifiction and then the name. */
- nspace = nspaceQual->getQual( pd );
-
- UniqueType *utKey = typeRef1->lookupType( pd );
- UniqueType *utValue = typeRef2->lookupType( pd );
-
- UniqueMap searchKey( utKey, utValue );
- UniqueMap *inMap = pd->uniqueMapMap.find( &searchKey );
- if ( inMap == 0 ) {
- inMap = new UniqueMap( utKey, utValue );
- pd->uniqueMapMap.insert( inMap );
-
- /* FIXME: Need uniqe name allocator for types. */
- static int mapId = 0;
- String name( 36, "__map%d", mapId++ );
-
- GenericType *generic = new GenericType( name, GEN_MAP,
- pd->nextGenericId++, 0/*langEl*/, typeRef2 );
- generic->keyTypeArg = typeRef1;
-
- nspace->genericList.append( generic );
-
- generic->declare( pd, nspace );
-
- inMap->generic = generic;
- }
-
- generic = inMap->generic;
- return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
-}
-
-UniqueType *TypeRef::lookupTypeList( Compiler *pd )
-{
- /* Lookup up the qualifiction and then the name. */
- nspace = nspaceQual->getQual( pd );
-
- UniqueType *utValue = typeRef1->lookupType( pd );
-
- UniqueList searchKey( utValue );
- UniqueList *inMap = pd->uniqueListMap.find( &searchKey );
- if ( inMap == 0 ) {
- inMap = new UniqueList( utValue );
- pd->uniqueListMap.insert( inMap );
-
- /* FIXME: Need uniqe name allocator for types. */
- static int listId = 0;
- String name( 36, "__list%d", listId++ );
-
- GenericType *generic = new GenericType( name, GEN_LIST,
- pd->nextGenericId++, 0/*langEl*/, typeRef1 );
-
- nspace->genericList.append( generic );
-
- generic->declare( pd, nspace );
-
- inMap->generic = generic;
- }
-
- generic = inMap->generic;
- return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
-}
-
-UniqueType *TypeRef::lookupTypeVector( Compiler *pd )
-{
- /* Lookup up the qualifiction and then the name. */
- nspace = nspaceQual->getQual( pd );
-
- UniqueType *utValue = typeRef1->lookupType( pd );
-
- UniqueVector searchKey( utValue );
- UniqueVector *inMap = pd->uniqueVectorMap.find( &searchKey );
- if ( inMap == 0 ) {
- inMap = new UniqueVector( utValue );
- pd->uniqueVectorMap.insert( inMap );
-
- /* FIXME: Need uniqe name allocator for types. */
- static int vectorId = 0;
- String name( 36, "__vector%d", vectorId++ );
-
- GenericType *generic = new GenericType( name, GEN_VECTOR,
- pd->nextGenericId++, 0/*langEl*/, typeRef1 );
-
- nspace->genericList.append( generic );
-
- generic->declare( pd, nspace );
-
- inMap->generic = generic;
- }
-
- generic = inMap->generic;
- return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
-}
-
-UniqueType *TypeRef::lookupTypeParser( Compiler *pd )
-{
- /* Lookup up the qualifiction and then the name. */
- nspace = nspaceQual->getQual( pd );
-
- UniqueType *utParse = typeRef1->lookupType( pd );
-
- UniqueParser searchKey( utParse );
- UniqueParser *inMap = pd->uniqueParserMap.find( &searchKey );
- if ( inMap == 0 ) {
- inMap = new UniqueParser( utParse );
- pd->uniqueParserMap.insert( inMap );
-
- /* FIXME: Need uniqe name allocator for types. */
- static int accumId = 0;
- String name( 36, "__accum%d", accumId++ );
-
- GenericType *generic = new GenericType( name, GEN_PARSER,
- pd->nextGenericId++, 0/*langEl*/, typeRef1 );
-
- nspace->genericList.append( generic );
-
- generic->declare( pd, nspace );
-
- inMap->generic = generic;
- }
-
- generic = inMap->generic;
- return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
-}
-
-UniqueType *TypeRef::lookupTypePtr( Compiler *pd )
-{
- typeRef1->lookupType( pd );
- return pd->findUniqueType( TYPE_PTR, typeRef1->uniqueType->langEl );
-}
-
-UniqueType *TypeRef::lookupTypeRef( Compiler *pd )
-{
- typeRef1->lookupType( pd );
- return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl );
-}
-
-void TypeRef::resolveRepeat( Compiler *pd )
-{
- if ( uniqueType->typeId != TYPE_TREE )
- error(loc) << "cannot repeat non-tree type" << endp;
-
- UniqueRepeat searchKey( repeatType, uniqueType->langEl );
- UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey );
- if ( uniqueRepeat == 0 ) {
- uniqueRepeat = new UniqueRepeat( repeatType, uniqueType->langEl );
- pd->uniqeRepeatMap.insert( uniqueRepeat );
-
- LangEl *declLangEl = 0;
-
- switch ( repeatType ) {
- case RepeatRepeat: {
- /* If the factor is a repeat, create the repeat element and link the
- * factor to it. */
- String repeatName( 128, "_repeat_%s", typeName.data );
- declLangEl = pd->makeRepeatProd( nspace, repeatName, nspaceQual, typeName );
- break;
- }
- case RepeatList: {
- /* If the factor is a repeat, create the repeat element and link the
- * factor to it. */
- String listName( 128, "_list_%s", typeName.data );
- declLangEl = pd->makeListProd( nspace, listName, nspaceQual, typeName );
- break;
- }
- case RepeatOpt: {
- /* If the factor is an opt, create the opt element and link the factor
- * to it. */
- String optName( 128, "_opt_%s", typeName.data );
- declLangEl = pd->makeOptProd( nspace, optName, nspaceQual, typeName );
- break;
- }
-
- case RepeatNone:
- break;
- }
-
- uniqueRepeat->declLangEl = declLangEl;
- declLangEl->repeatOf = uniqueRepeat->langEl;
- }
-
- uniqueType = pd->findUniqueType( TYPE_TREE, uniqueRepeat->declLangEl );
-}
-
-
-UniqueType *TypeRef::lookupType( Compiler *pd )
-{
- if ( uniqueType != 0 )
- return uniqueType;
-
- /* Not an iterator. May be a reference. */
- switch ( type ) {
- case Name:
- uniqueType = lookupTypeName( pd );
- break;
- case Literal:
- uniqueType = lookupTypeLiteral( pd );
- break;
- case Map:
- uniqueType = lookupTypeMap( pd );
- break;
- case List:
- uniqueType = lookupTypeList( pd );
- break;
- case Vector:
- uniqueType = lookupTypeVector( pd );
- break;
- case Parser:
- uniqueType = lookupTypeParser( pd );
- break;
- case Ptr:
- uniqueType = lookupTypePtr( pd );
- break;
- case Ref:
- uniqueType = lookupTypeRef( pd );
- break;
- case Iterator:
- case Unspecified:
- /* No lookup needed, unique type(s) set when constructed. */
- break;
- }
-
- if ( repeatType != RepeatNone )
- resolveRepeat( pd );
-
- return uniqueType;
-}
-
-void Compiler::resolveFactor( ProdEl *fact )
-{
- fact->typeRef->lookupType( this );
- fact->langEl = fact->typeRef->uniqueType->langEl;
-}
-
-void LangTerm::resolve( Compiler *pd )
-{
- switch ( type ) {
- case ConstructType:
- typeRef->lookupType( pd );
-
- /* Evaluate the initialization expressions. */
- if ( fieldInitArgs != 0 ) {
- for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ )
- (*pi)->expr->resolve( pd );
- }
- break;
- case VarRefType:
- break;
-
- case MakeTreeType:
- case MakeTokenType:
- case MethodCallType:
- if ( args != 0 ) {
- for ( ExprVect::Iter pe = *args; pe.lte(); pe++ )
- (*pe)->resolve( pd );
- }
- break;
-
- case NumberType:
- case StringType:
- case MatchType:
- break;
- case NewType:
- expr->resolve( pd );
- break;
- case TypeIdType:
- typeRef->lookupType( pd );
- break;
- case SearchType:
- typeRef->lookupType( pd );
- break;
- case NilType:
- case TrueType:
- case FalseType:
- break;
-
- case ParseType:
- case ParseStopType:
- typeRef->lookupType( pd );
- parserTypeRef->lookupType( pd );
- generic = parserTypeRef->generic;
- break;
-
- case EmbedStringType:
- break;
- }
-}
-
-void LangVarRef::resolve( Compiler *pd ) const
-{
-
-}
-
-void LangExpr::resolve( Compiler *pd ) const
-{
- switch ( type ) {
- case BinaryType: {
- left->resolve( pd );
- right->resolve( pd );
- break;
- }
- case UnaryType: {
- right->resolve( pd );
- break;
- }
- case TermType: {
- term->resolve( pd );
- break;
- }
- }
-}
-
-void LangStmt::resolveParserItems( Compiler *pd ) const
-{
- /* Assign bind ids to the variables in the replacement. */
- for ( ReplItemList::Iter item = *parserText->list; item.lte(); item++ ) {
- varRef->resolve( pd );
-
- switch ( item->type ) {
- case ReplItem::FactorType:
- break;
- case ReplItem::InputText:
- break;
- case ReplItem::ExprType:
- item->expr->resolve( pd );
- break;
- }
- }
-}
-
-void LangStmt::resolve( Compiler *pd ) const
-{
- switch ( type ) {
- case PrintType:
- case PrintXMLACType:
- case PrintXMLType:
- case PrintStreamType: {
- /* Push the args backwards. */
- for ( ExprVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- )
- (*pex)->resolve( pd );
- break;
- }
- case ExprType: {
- /* Evaluate the exrepssion, then pop it immediately. */
- expr->resolve( pd );
- break;
- }
- case IfType: {
- /* Evaluate the test. */
- expr->resolve( pd );
-
- /* Analyze the if true branch. */
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->resolve( pd );
-
- if ( elsePart != 0 )
- elsePart->resolve( pd );
- break;
- }
- case ElseType: {
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->resolve( pd );
- break;
- }
- case RejectType:
- break;
- case WhileType: {
- expr->resolve( pd );
-
- /* Compute the while block. */
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->resolve( pd );
- break;
- }
- case AssignType: {
- /* Evaluate the exrepssion. */
-// cout << "Assign Type" << endl;
- expr->resolve( pd );
- break;
- }
- case ForIterType: {
- typeRef->lookupType( pd );
-
- /* Evaluate and push the arguments. */
- langTerm->resolve( pd );
-
- /* Compile the contents. */
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->resolve( pd );
-
- break;
- }
- case ReturnType: {
- /* Evaluate the exrepssion. */
- expr->resolve( pd );
- break;
- }
- case BreakType: {
- break;
- }
- case YieldType: {
- /* take a reference and yield it. Immediately reset the referece. */
- varRef->resolve( pd );
- break;
- }
- case ParserType: {
- //for ( )
- break;
- }
- }
-}
-
-void ObjectDef::resolve( Compiler *pd )
-{
- for ( ObjFieldList::Iter fli = *objFieldList; fli.lte(); fli++ ) {
- ObjField *field = fli->value;
-
- if ( field->typeRef != 0 ) {
- field->typeRef->lookupType( pd );
- }
- }
-}
-
-void CodeBlock::resolve( Compiler *pd ) const
-{
- if ( localFrame != 0 )
- localFrame->resolve( pd );
-
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->resolve( pd );
-}
-
-void Compiler::resolveFunction( Function *func )
-{
- CodeBlock *block = func->codeBlock;
- block->resolve( this );
-}
-
-void Compiler::resolveUserIter( Function *func )
-{
- CodeBlock *block = func->codeBlock;
- block->resolve( this );
-}
-
-void Compiler::resolvePreEof( TokenRegion *region )
-{
- CodeBlock *block = region->preEofBlock;
- block->resolve( this );
-}
-
-void Compiler::resolveRootBlock()
-{
- rootLocalFrame->resolve( this );
-
- CodeBlock *block = rootCodeBlock;
- block->resolve( this );
-}
-
-void Compiler::resolveTranslateBlock( LangEl *langEl )
-{
- CodeBlock *block = langEl->transBlock;
- block->resolve( this );
-}
-
-void Compiler::resolveReductionCode( Definition *prod )
-{
- CodeBlock *block = prod->redBlock;
- block->resolve( this );
-}
-
-void Compiler::resolveParseTree()
-{
- /* Compile functions. */
- for ( FunctionList::Iter f = functionList; f.lte(); f++ ) {
- if ( f->isUserIter )
- resolveUserIter( f );
- else
- resolveFunction( f );
-
- if ( f->typeRef != 0 )
- f->typeRef->lookupType( this );
-
- for ( ParameterList::Iter param = *f->paramList; param.lte(); param++ )
- param->typeRef->lookupType( this );
- }
-
- /* Compile the reduction code. */
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- if ( prod->redBlock != 0 )
- resolveReductionCode( prod );
- }
-
- /* Compile the token translation code. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->transBlock != 0 )
- resolveTranslateBlock( lel );
- }
-
- /* Compile preeof blocks. */
- for ( RegionList::Iter r = regionList; r.lte(); r++ ) {
- if ( r->preEofBlock != 0 )
- resolvePreEof( r );
- }
-
- /* Compile the init code */
- resolveRootBlock( );
-
- /* Init all user object fields (need consistent size). */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- ObjectDef *objDef = lel->objectDef;
- if ( objDef != 0 ) {
- /* Init all fields of the object. */
- for ( ObjFieldList::Iter f = *objDef->objFieldList; f.lte(); f++ ) {
- f->value->typeRef->lookupType( this );
- }
- }
- }
-
- /* Init all fields of the global object. */
- for ( ObjFieldList::Iter f = *globalObjectDef->objFieldList; f.lte(); f++ ) {
- f->value->typeRef->lookupType( this );
- }
-
-}
-
-
-void Compiler::resolveUses()
-{
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->objectDefUses != 0 ) {
- /* Look for the production's associated region. */
- Namespace *nspace = lel->objectDefUsesQual->getQual( this );
-
- if ( nspace == 0 )
- error() << "do not have namespace for resolving reference" << endp;
-
- /* Look up the language element in the region. */
- LangEl *langEl = findType( this, nspace, lel->objectDefUses );
- lel->objectDef = langEl->objectDef;
- }
- }
-}
-
-void Compiler::resolvePatternEls()
-{
- for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
- for ( PatternItemList::Iter item = *pat->list; item.lte(); item++ ) {
- switch ( item->type ) {
- case PatternItem::FactorType:
- /* Use pdaFactor reference resolving. */
- resolveFactor( item->factor );
- break;
- case PatternItem::InputText:
- /* Nothing to do here. */
- break;
- }
- }
- }
-}
-
-void Compiler::resolveReplacementEls()
-{
- for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
- for ( ReplItemList::Iter item = *repl->list; item.lte(); item++ ) {
- switch ( item->type ) {
- case ReplItem::FactorType:
- /* Use pdaFactor reference resolving. */
- resolveFactor( item->factor );
- break;
- case ReplItem::InputText:
- case ReplItem::ExprType:
- break;
- }
- }
- }
-}
-
-void Compiler::resolveParserEls()
-{
- for ( ParserTextList::Iter accum = parserTextList; accum.lte(); accum++ ) {
- for ( ReplItemList::Iter item = *accum->list; item.lte(); item++ ) {
- switch ( item->type ) {
- case ReplItem::FactorType:
- resolveFactor( item->factor );
- break;
- case ReplItem::InputText:
- case ReplItem::ExprType:
- break;
- }
- }
- }
-}
-
-/* Resolves production els and computes the precedence of each prod. */
-void Compiler::resolveProductionEls()
-{
- /* NOTE: as we process this list it may be growing! */
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- /* First resolve. */
- for ( ProdElList::Iter fact = *prod->prodElList; fact.lte(); fact++ )
- resolveFactor( fact );
-
- /* If there is no explicit precdence ... */
- if ( prod->predOf == 0 ) {
- /* Compute the precedence of the productions. */
- for ( ProdElList::Iter fact = prod->prodElList->last(); fact.gtb(); fact-- ) {
- /* Production inherits the precedence of the last terminal with
- * precedence. */
- if ( fact->langEl->predType != PredNone ) {
- prod->predOf = fact->langEl;
- break;
- }
- }
- }
- }
-}
-
-void Compiler::resolveGenericTypes()
-{
- for ( NamespaceList::Iter ns = namespaceList; ns.lte(); ns++ ) {
- for ( GenericList::Iter gen = ns->genericList; gen.lte(); gen++ ) {
-// cout << __PRETTY_FUNCTION__ << " " << gen->name.data << " " << gen->typeArg << endl;
-
- gen->utArg = gen->typeArg->lookupType( this );
-
- if ( gen->typeId == GEN_MAP )
- gen->keyUT = gen->keyTypeArg->lookupType( this );
- }
- }
-}
-
-void Compiler::makeTerminalWrappers()
-{
- /* Make terminal language elements corresponding to each nonterminal in
- * the grammar. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->type == LangEl::NonTerm ) {
- String name( lel->name.length() + 5, "_T_%s", lel->name.data );
- LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term );
-
- /* Give the dup the attributes of the nonterminal. This ensures
- * that the attributes are allocated when patterns and
- * constructors are parsed. */
- termDup->objectDef = lel->objectDef;
-
- langEls.append( termDup );
- lel->termDup = termDup;
- termDup->termDup = lel;
- }
- }
-}
-
-void Compiler::makeEofElements()
-{
- /* Make eof language elements for each user terminal. This is a bit excessive and
- * need to be reduced to the ones that we need parsers for, but we don't know that yet.
- * Another pass before this one is needed. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->eofLel == 0 &&
- lel != eofLangEl &&
- lel != errorLangEl &&
- lel != noTokenLangEl &&
- !( lel->tokenDef != 0 && lel->tokenDef->dupOf != 0 ) )
- {
- String name( lel->name.length() + 5, "_eof_%s", lel->name.data );
- LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term );
-
- langEls.append( eofLel );
- lel->eofLel = eofLel;
- eofLel->eofLel = lel;
- eofLel->isEOF = true;
- }
- }
-}
-
-void Compiler::makeIgnoreCollectors()
-{
- for ( RegionList::Iter region = regionList; region.lte(); region++ ) {
- if ( region->isFullRegion ) {
- String name( region->name.length() + 5, "_ign_%s", region->name.data );
- LangEl *ignLel = new LangEl( rootNamespace, name, LangEl::Term );
- langEls.append( ignLel );
- ignLel->isCI = true;
- ignLel->ciRegion = region;
-
- region->ciLel = ignLel;
- }
- }
-}
-
-void Compiler::typeResolve()
-{
- /*
- * Type Resolving.
- */
-
- /* Resolve uses statements. */
- resolveUses();
-
- /* Resolve pattern and replacement elements. */
- resolvePatternEls();
- resolveReplacementEls();
- resolveParserEls();
-
- resolveParseTree();
-
- resolveGenericTypes();
-
- argvTypeRef->lookupType( this );
-
- /* We must do this as the last step in the type resolution process because
- * all type resolves can cause new language elments with associated
- * productions. They get tacked onto the end of the list of productions.
- * Doing it at the end results processing a growing list. */
- resolveProductionEls();
-}
diff --git a/colm/rtvector.h b/colm/rtvector.h
deleted file mode 100644
index e03a17f9..00000000
--- a/colm/rtvector.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2002, 2006, 2009 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Aapl.
- *
- * Aapl is free software; you can redistribute it and/or modify it under the
- * terms of the GNU Lesser General Public License as published by the Free
- * Software Foundation; either version 2.1 of the License, or (at your option)
- * any later version.
- *
- * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
- * more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with Aapl; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _RT_VECTOR_H
-#define _RT_VECTOR_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
diff --git a/colm/string.c b/colm/string.c
deleted file mode 100644
index d670b68c..00000000
--- a/colm/string.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <colm/pool.h>
-#include <colm/pdarun.h>
-#include <colm/bytecode.h>
-
-#include <assert.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-
-/*
- * In this system strings are not null terminated. Often strings come from a
- * parse, in which case the string is just a pointer into the the data string.
- * A block in a parsed stream can house many tokens and there is no room for
- * nulls.
- */
-
-Head *stringCopy( Program *prg, Head *head )
-{
- Head *result = 0;
- if ( head != 0 ) {
- if ( (char*)(head+1) == head->data )
- result = stringAllocFull( prg, head->data, head->length );
- else
- result = stringAllocPointer( prg, head->data, head->length );
- }
- return result;
-}
-
-void stringFree( Program *prg, Head *head )
-{
- if ( head != 0 ) {
- if ( head->location != 0 )
- locationFree( prg, head->location );
-
- if ( (char*)(head+1) == head->data ) {
- /* Full string allocation. */
- free( head );
- }
- else {
- /* Just a string head. */
- headFree( prg, head );
- }
- }
-}
-
-const char *stringData( Head *head )
-{
- if ( head == 0 )
- return 0;
- return head->data;
-}
-
-long stringLength( Head *head )
-{
- if ( head == 0 )
- return 0;
- return head->length;
-}
-
-void stringShorten( Head *head, long newlen )
-{
- assert( newlen <= head->length );
- head->length = newlen;
-}
-
-Head *initStrSpace( long length )
-{
- /* Find the length and allocate the space for the shared string. */
- Head *head = (Head*) malloc( sizeof(Head) + length );
- //if ( head == 0 )
- // throw std::bad_alloc();
-
- /* Init the header. */
- head->data = (char*)(head+1);
- head->length = length;
- head->location = 0;
-
- /* Save the pointer to the data. */
- return head;
-}
-
-/* Create from a c-style string. */
-Head *stringAllocFull( Program *prg, const char *data, long length )
-{
- /* Init space for the data. */
- Head *head = initStrSpace( length );
-
- /* Copy in the data. */
- memcpy( (head+1), data, length );
-
- return head;
-}
-
-/* Create from a c-style string. */
-Head *stringAllocPointer( Program *prg, const char *data, long length )
-{
- /* Find the length and allocate the space for the shared string. */
- Head *head = headAllocate( prg );
-
- /* Init the header. */
- head->data = data;
- head->length = length;
-
- return head;
-}
-
-Head *concatStr( Head *s1, Head *s2 )
-{
- long s1Len = s1->length;
- long s2Len = s2->length;
-
- /* Init space for the data. */
- Head *head = initStrSpace( s1Len + s2Len );
-
- /* Copy in the data. */
- memcpy( (head+1), s1->data, s1Len );
- memcpy( (char*)(head+1) + s1Len, s2->data, s2Len );
-
- return head;
-}
-
-Head *stringToUpper( Head *s )
-{
- /* Init space for the data. */
- long len = s->length;
- Head *head = initStrSpace( len );
-
- /* Copy in the data. */
- const char *src = s->data;
- char *dst = (char*)(head+1);
- int i;
- for ( i = 0; i < len; i++ )
- *dst++ = toupper( *src++ );
-
- return head;
-}
-
-Head *stringToLower( Head *s )
-{
- /* Init space for the data. */
- long len = s->length;
- Head *head = initStrSpace( len );
-
- /* Copy in the data. */
- const char *src = s->data;
- char *dst = (char*)(head+1);
- int i;
- for ( i = 0; i < len; i++ )
- *dst++ = tolower( *src++ );
-
- return head;
-}
-
-
-/* Compare two strings. If identical returns 1, otherwise 0. */
-Word cmpString( Head *s1, Head *s2 )
-{
- if ( s1->length < s2->length )
- return -1;
- else if ( s1->length > s2->length )
- return 1;
- else {
- char *d1 = (char*)(s1->data);
- char *d2 = (char*)(s2->data);
- return memcmp( d1, d2, s1->length );
- }
-}
-
-Word strAtoi( Head *str )
-{
- /* FIXME: need to implement this by hand. There is no null terminator. */
- char *nulled = (char*)malloc( str->length + 1 );
- memcpy( nulled, str->data, str->length );
- nulled[str->length] = 0;
- int res = atoi( nulled );
- free( nulled );
- return res;
-}
-
-Head *intToStr( Program *prg, Word i )
-{
- char data[20];
- sprintf( data, "%ld", i );
- return stringAllocFull( prg, data, strlen(data) );
-}
-
-Word strUord16( Head *head )
-{
- uchar *data = (uchar*)(head->data);
- ulong res;
- res = (ulong)data[1];
- res |= ((ulong)data[0]) << 8;
- return res;
-}
-
-Word strUord8( Head *head )
-{
- uchar *data = (uchar*)(head->data);
- ulong res = (ulong)data[0];
- return res;
-}
-
-Head *makeLiteral( Program *prg, long offset )
-{
- return stringAllocPointer( prg,
- prg->rtd->litdata[offset],
- prg->rtd->litlen[offset] );
-}
-
-Head *stringSprintf( Program *prg, Str *format, Int *integer )
-{
- Head *formatHead = format->value;
- long written = snprintf( 0, 0, stringData(formatHead), integer->value );
- Head *head = initStrSpace( written+1 );
- written = snprintf( (char*)head->data, written+1, stringData(formatHead), integer->value );
- head->length -= 1;
- return head;
-}
diff --git a/colm/synthesis.cc b/colm/synthesis.cc
deleted file mode 100644
index 794927ad..00000000
--- a/colm/synthesis.cc
+++ /dev/null
@@ -1,3277 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "bytecode.h"
-#include "parsedata.h"
-#include "fsmrun.h"
-#include "pdarun.h"
-#include "input.h"
-#include <iostream>
-#include <assert.h>
-
-using std::cout;
-using std::cerr;
-using std::endl;
-
-void Compiler::initUniqueTypes( )
-{
- uniqueTypeNil = new UniqueType( TYPE_NIL );
- uniqueTypePtr = new UniqueType( TYPE_TREE, ptrLangEl );
- uniqueTypeBool = new UniqueType( TYPE_TREE, boolLangEl );
- uniqueTypeInt = new UniqueType( TYPE_TREE, intLangEl );
- uniqueTypeStr = new UniqueType( TYPE_TREE, strLangEl );
- uniqueTypeStream = new UniqueType( TYPE_TREE, streamLangEl );
- uniqueTypeInput = new UniqueType( TYPE_TREE, inputLangEl );
- uniqueTypeIgnore = new UniqueType( TYPE_TREE, ignoreLangEl );
- uniqueTypeAny = new UniqueType( TYPE_TREE, anyLangEl );
-
- uniqeTypeMap.insert( uniqueTypeNil );
- uniqeTypeMap.insert( uniqueTypePtr );
- uniqeTypeMap.insert( uniqueTypeBool );
- uniqeTypeMap.insert( uniqueTypeInt );
- uniqeTypeMap.insert( uniqueTypeStr );
- uniqeTypeMap.insert( uniqueTypeStream );
- uniqeTypeMap.insert( uniqueTypeInput );
- uniqeTypeMap.insert( uniqueTypeIgnore );
- uniqeTypeMap.insert( uniqueTypeAny );
-}
-
-IterDef::IterDef( Type type ) :
- type(type),
- func(0),
- useFuncId(false),
- useSearchUT(false)
-{
- switch ( type ) {
- case Tree:
- inCreateWV = IN_TRITER_FROM_REF;
- inCreateWC = IN_TRITER_FROM_REF;
- inDestroy = IN_TRITER_DESTROY;
- inAdvance = IN_TRITER_ADVANCE;
-
- inGetCurR = IN_TRITER_GET_CUR_R;
- inGetCurWC = IN_TRITER_GET_CUR_WC;
- inSetCurWC = IN_TRITER_SET_CUR_WC;
- inRefFromCur = IN_TRITER_REF_FROM_CUR;
- useSearchUT = true;
- break;
- case Child:
- inCreateWV = IN_TRITER_FROM_REF;
- inCreateWC = IN_TRITER_FROM_REF;
- inDestroy = IN_TRITER_DESTROY;
- inAdvance = IN_TRITER_NEXT_CHILD;
-
- inGetCurR = IN_TRITER_GET_CUR_R;
- inGetCurWC = IN_TRITER_GET_CUR_WC;
- inSetCurWC = IN_TRITER_SET_CUR_WC;
- inRefFromCur = IN_TRITER_REF_FROM_CUR;
- useSearchUT = true;
- break;
- case RevChild:
- inCreateWV = IN_REV_TRITER_FROM_REF;
- inCreateWC = IN_REV_TRITER_FROM_REF;
- inDestroy = IN_REV_TRITER_DESTROY;
- inAdvance = IN_REV_TRITER_PREV_CHILD;
-
- inGetCurR = IN_TRITER_GET_CUR_R;
- inGetCurWC = IN_TRITER_GET_CUR_WC;
- inSetCurWC = IN_TRITER_SET_CUR_WC;
- inRefFromCur = IN_TRITER_REF_FROM_CUR;
- useSearchUT = true;
- break;
-
- case Repeat:
- inCreateWV = IN_TRITER_FROM_REF;
- inCreateWC = IN_TRITER_FROM_REF;
- inDestroy = IN_TRITER_DESTROY;
- inAdvance = IN_TRITER_NEXT_REPEAT;
-
- inGetCurR = IN_TRITER_GET_CUR_R;
- inGetCurWC = IN_TRITER_GET_CUR_WC;
- inSetCurWC = IN_TRITER_SET_CUR_WC;
- inRefFromCur = IN_TRITER_REF_FROM_CUR;
- useSearchUT = true;
- break;
-
- case RevRepeat:
- inCreateWV = IN_TRITER_FROM_REF;
- inCreateWC = IN_TRITER_FROM_REF;
- inDestroy = IN_TRITER_DESTROY;
- inAdvance = IN_TRITER_PREV_REPEAT;
-
- inGetCurR = IN_TRITER_GET_CUR_R;
- inGetCurWC = IN_TRITER_GET_CUR_WC;
- inSetCurWC = IN_TRITER_SET_CUR_WC;
- inRefFromCur = IN_TRITER_REF_FROM_CUR;
- useSearchUT = true;
- break;
-
- case User:
- assert(false);
- }
-}
-
-IterDef::IterDef( Type type, Function *func ) :
- type(type),
- func(func),
- useFuncId(true),
- useSearchUT(true),
- inCreateWV(IN_UITER_CREATE_WV),
- inCreateWC(IN_UITER_CREATE_WC),
- inDestroy(IN_UITER_DESTROY),
- inAdvance(IN_UITER_ADVANCE),
- inGetCurR(IN_UITER_GET_CUR_R),
- inGetCurWC(IN_UITER_GET_CUR_WC),
- inSetCurWC(IN_UITER_SET_CUR_WC),
- inRefFromCur(IN_UITER_REF_FROM_CUR)
-{}
-
-ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj,
- const String &name, int methIdWV, int methIdWC, bool isConst )
-{
- ObjMethod *objMethod = new ObjMethod( retType, name,
- methIdWV, methIdWC, 0, 0, 0, isConst );
- obj->objMethodMap->insert( name, objMethod );
- return objMethod;
-}
-
-ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj,
- const String &name, int methIdWV, int methIdWC, UniqueType *arg1, bool isConst )
-{
- UniqueType *args[] = { arg1 };
- ObjMethod *objMethod = new ObjMethod( retType, name,
- methIdWV, methIdWC, 1, args, 0, isConst );
- obj->objMethodMap->insert( name, objMethod );
- return objMethod;
-}
-
-ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj,
- const String &name, int methIdWV, int methIdWC,
- UniqueType *arg1, UniqueType *arg2, bool isConst )
-{
- UniqueType *args[] = { arg1, arg2 };
- ObjMethod *objMethod = new ObjMethod( retType, name,
- methIdWV, methIdWC, 2, args, 0, isConst );
- obj->objMethodMap->insert( name, objMethod );
- return objMethod;
-}
-
-IterDef *Compiler::findIterDef( IterDef::Type type, Function *func )
-{
- IterDefSetEl *el = iterDefSet.find( IterDef( type, func ) );
- if ( el == 0 )
- el = iterDefSet.insert( IterDef( type, func ) );
- return &el->key;
-}
-
-IterDef *Compiler::findIterDef( IterDef::Type type )
-{
- IterDefSetEl *el = iterDefSet.find( IterDef( type ) );
- if ( el == 0 )
- el = iterDefSet.insert( IterDef( type ) );
- return &el->key;
-}
-
-UniqueType *Compiler::findUniqueType( int typeId )
-{
- UniqueType searchKey( typeId );
- UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
- if ( uniqueType == 0 ) {
- uniqueType = new UniqueType( typeId );
- uniqeTypeMap.insert( uniqueType );
- }
- return uniqueType;
-}
-
-UniqueType *Compiler::findUniqueType( int typeId, LangEl *langEl )
-{
- UniqueType searchKey( typeId, langEl );
- UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
- if ( uniqueType == 0 ) {
- uniqueType = new UniqueType( typeId, langEl );
- uniqeTypeMap.insert( uniqueType );
- }
- return uniqueType;
-}
-
-UniqueType *Compiler::findUniqueType( int typeId, IterDef *iterDef )
-{
- UniqueType searchKey( typeId, iterDef );
- UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
- if ( uniqueType == 0 ) {
- uniqueType = new UniqueType( typeId, iterDef );
- uniqeTypeMap.insert( uniqueType );
- }
- return uniqueType;
-}
-
-void ObjectDef::iterPushScope()
-{
- //cout << "iter push scope ";
- if ( scope->childIter == 0 ) {
- scope->childIter = scope->children.head;
- }
- else {
- scope->childIter = scope->childIter->next;
- /* Resetting. */
- if ( scope->childIter == 0 )
- scope ->childIter = scope->children.head;
- }
-
- scope = scope->childIter;
-}
-
-void ObjectDef::iterPopScope()
-{
- //cout << "iter pop scope" << endl;
- scope = scope->parentScope;
-}
-
-void ObjectDef::pushScope()
-{
- ObjNameScope *newScope = new ObjNameScope;
- newScope->objFieldMap = new ObjFieldMap;
-
- newScope->parentScope = scope;
- scope->children.append( newScope );
-
- scope = newScope;
-}
-
-void ObjectDef::popScope()
-{
- scope = scope->parentScope;
-}
-
-void ObjectDef::insertField( const String &name, ObjField *value )
-{
- scope->objFieldMap->insert( name, value );
- objFieldList->append( value );
-}
-
-/* Recurisve find through a single object def's scope. */
-ObjField *ObjectDef::findFieldInScope( const String &name, ObjNameScope *inScope )
-{
- ObjFieldMapEl *objDefMapEl = inScope->objFieldMap->find( name );
- if ( objDefMapEl != 0 )
- return objDefMapEl->value;
- if ( inScope->parentScope != 0 )
- return findFieldInScope( name, inScope->parentScope );
- return 0;
-}
-
-ObjField *ObjectDef::checkRedecl( const String &name )
-{
- //cout << "looking for " << name << endl;
- ObjFieldMapEl *objDefMapEl = scope->objFieldMap->find( name );
- if ( objDefMapEl != 0 )
- return objDefMapEl->value;
- return 0;
-
-}
-
-/* 0-based. */
-ObjField *ObjectDef::findFieldNum( long offset )
-{
- int fn = 0;
- ObjFieldList::Iter field = *objFieldList;
- while ( fn < offset ) {
- fn++;
- field++;
- }
- return field->value;
-}
-
-ObjField *ObjectDef::findField( const String &name )
-{
- //cout << "looking for " << name << endl;
- ObjField *objField = findFieldInScope( name, scope );
- if ( objField != 0 )
- return objField;
- return 0;
-}
-
-ObjMethod *ObjectDef::findMethod( const String &name )
-{
- ObjMethodMapEl *objMethodMapEl = objMethodMap->find( name );
- if ( objMethodMapEl != 0 )
- return objMethodMapEl->value;
- return 0;
-}
-
-long sizeOfField( UniqueType *fieldUT )
-{
- long size = 0;
- if ( fieldUT->typeId == TYPE_ITER ) {
- /* Select on the iterator type. */
- switch ( fieldUT->iterDef->type ) {
- case IterDef::Tree:
- case IterDef::Child:
- case IterDef::Repeat:
- case IterDef::RevRepeat:
- size = sizeof(TreeIter) / sizeof(Word);
- break;
- case IterDef::RevChild:
- size = sizeof(RevTreeIter) / sizeof(Word);
- break;
-
- case IterDef::User:
- /* User iterators are just a pointer to the UserIter struct. The
- * struct needs to go right beneath the call to the user iterator
- * so it can be found by a yield. It is therefore allocated on the
- * stack right before the call. */
- size = 1;
- break;
- }
- }
- else if ( fieldUT->typeId == TYPE_REF )
- size = 2;
- else
- size = 1;
-
- return size;
-}
-
-void ObjectDef::referenceField( Compiler *pd, ObjField *field )
-{
- field->beenReferenced = true;
- initField( pd, field );
-}
-
-void ObjectDef::initField( Compiler *pd, ObjField *field )
-{
- if ( !field->beenInitialized ) {
- field->beenInitialized = true;
- UniqueType *fieldUT = field->typeRef->uniqueType;
-
- if ( type == FrameType ) {
- nextOffset += sizeOfField( fieldUT );
- field->offset = -nextOffset;
-
- pd->initLocalInstructions( field );
- }
- else if ( field->isRhsGet ) {
- field->useOffset = false;
- field->inGetR = IN_GET_RHS_VAL_R;
- field->inGetWC = IN_GET_RHS_VAL_WC;
- field->inGetWV = IN_GET_RHS_VAL_WV;
- field->inSetWC = IN_SET_RHS_VAL_WC;
- field->inSetWV = IN_SET_RHS_VAL_WC;
- }
- else {
- field->offset = nextOffset;
- nextOffset += sizeOfField( fieldUT );
-
- /* Initialize the instructions. */
- pd->initFieldInstructions( field );
- }
- }
-}
-
-UniqueType *LangVarRef::loadFieldInstr( Compiler *pd, CodeVect &code,
- ObjectDef *inObject, ObjField *el, bool forWriting, bool revert ) const
-{
- /* Ensure that the field is referenced. */
- inObject->referenceField( pd, el );
-
- UniqueType *elUT = el->typeRef->uniqueType;
-
- /* If it's a reference then we load it read always. */
- if ( forWriting ) {
- /* The instruction, depends on whether or not we are reverting. */
- if ( elUT->typeId == TYPE_ITER )
- code.append( elUT->iterDef->inGetCurWC );
- else if ( pd->revertOn && revert )
- code.append( el->inGetWV );
- else
- code.append( el->inGetWC );
- }
- else {
- /* Loading something for writing */
- if ( elUT->typeId == TYPE_ITER )
- code.append( elUT->iterDef->inGetCurR );
- else
- code.append( el->inGetR );
- }
-
- if ( el->useOffset ) {
- /* Gets of locals and fields require offsets. Fake vars like token
- * data and lhs don't require it. */
- code.appendHalf( el->offset );
- }
- else if ( el->isRhsGet ) {
- /* Need to place the array computing the val. */
- code.append( el->rhsVal.length() );
- for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) {
- code.append( rg->prodNum );
- code.append( rg->childNum );
- }
- }
-
- /* If we are dealing with an iterator then dereference it. */
- if ( elUT->typeId == TYPE_ITER )
- elUT = el->typeRef->searchUniqueType;
-
- return elUT;
-}
-
-ObjectDef *objDefFromUT( Compiler *pd, UniqueType *ut )
-{
- ObjectDef *objDef = 0;
- if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_REF )
- objDef = ut->langEl->objectDef;
- else {
- /* This should have generated a compiler error. */
- assert(false);
- }
- return objDef;
-}
-
-/* The qualification must start at a local frame. There cannot be any pointer. */
-long LangVarRef::loadQualificationRefs( Compiler *pd, CodeVect &code ) const
-{
- long count = 0;
- ObjectDef *rootObj = pd->curLocalFrame;
-
- /* Start the search from the root object. */
- ObjectDef *searchObjDef = rootObj;
-
- for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) {
- /* Lookup the field in the current qualification. */
- ObjField *el = searchObjDef->findField( qi->data );
- if ( el == 0 )
- error(qi->loc) << "cannot resolve qualification " << qi->data << endp;
-
- if ( qi.pos() > 0 ) {
- code.append( IN_REF_FROM_QUAL_REF );
- code.appendHalf( 0 );
- code.appendHalf( el->offset );
- }
- else if ( el->typeRef->iterDef != 0 ) {
- code.append( el->typeRef->iterDef->inRefFromCur );
- code.appendHalf( el->offset );
- }
- else if ( el->typeRef->type == TypeRef::Ref ) {
- code.append( IN_REF_FROM_REF );
- code.appendHalf( el->offset );
- }
- else {
- code.append( IN_REF_FROM_LOCAL );
- code.appendHalf( el->offset );
- }
-
- UniqueType *elUT = el->typeRef->uniqueType;
- if ( elUT->typeId == TYPE_ITER )
- elUT = el->typeRef->searchUniqueType;
-
- assert( qi->type == QualItem::Dot );
-
- searchObjDef = objDefFromUT( pd, elUT );
- count += 1;
- }
- return count;
-}
-
-void LangVarRef::loadQualification( Compiler *pd, CodeVect &code,
- ObjectDef *rootObj, int lastPtrInQual, bool forWriting, bool revert ) const
-{
- /* Start the search from the root object. */
- ObjectDef *searchObjDef = rootObj;
-
- for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) {
- /* Lookup the field int the current qualification. */
- ObjField *el = searchObjDef->findField( qi->data );
- if ( el == 0 )
- error(qi->loc) << "cannot resolve qualification " << qi->data << endp;
-
- if ( forWriting && el->refActive )
- error(qi->loc) << "reference active, cannot write to object" << endp;
-
- bool lfForWriting = forWriting;
- bool lfRevert = revert;
-
- /* If there is a pointer in the qualification, we need to compute
- * forWriting and revert. */
- if ( lastPtrInQual >= 0 ) {
- if ( qi.pos() <= lastPtrInQual ) {
- /* If we are before or at the pointer we are strictly read
- * only, regardless of the origin. */
- lfForWriting = false;
- lfRevert = false;
- }
- else {
- /* If we are past the pointer then we are always reverting
- * because the object is global. Forwriting is as passed in.
- * */
- lfRevert = true;
- }
- }
-
- UniqueType *qualUT = loadFieldInstr( pd, code, searchObjDef,
- el, lfForWriting, lfRevert );
-
- if ( qi->type == QualItem::Dot ) {
- /* Cannot a reference. Iterator yes (access of the iterator not
- * hte current) */
- if ( qualUT->typeId == TYPE_PTR )
- error(loc) << "dot cannot be used to access a pointer" << endp;
- }
- else if ( qi->type == QualItem::Arrow ) {
- if ( qualUT->typeId == TYPE_PTR ) {
- /* Always dereference references when used for qualification. If
- * this is the last one then we must start with the reverse
- * execution business. */
- if ( pd->revertOn && qi.pos() == lastPtrInQual && forWriting ) {
- /* This is like a global load. */
- code.append( IN_PTR_DEREF_WV );
- }
- else {
- /* If reading or not yet the last in ref then we only need a
- * reading deref. */
- code.append( IN_PTR_DEREF_R );
- }
-
- qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl );
- }
- else {
- error(loc) << "arrow operator cannot be used to access this type" << endp;
- }
- }
-
- searchObjDef = objDefFromUT( pd, qualUT );
- }
-}
-
-void LangVarRef::loadContextObj( Compiler *pd, CodeVect &code,
- int lastPtrInQual, bool forWriting ) const
-{
- /* Start the search in the global object. */
- ObjectDef *rootObj = pd->context->contextObjDef;
-
- if ( forWriting && lastPtrInQual < 0 ) {
- /* If we are writing an no reference was found in the qualification
- * then load the gloabl with a revert. */
- if ( pd->revertOn )
- code.append( IN_LOAD_CONTEXT_WV );
- else
- code.append( IN_LOAD_CONTEXT_WC );
- }
- else {
- /* Either we are reading or we are loading a pointer that will be
- * dereferenced. */
- code.append( IN_LOAD_CONTEXT_R );
- }
-
- loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, true );
-}
-
-void LangVarRef::loadGlobalObj( Compiler *pd, CodeVect &code,
- int lastPtrInQual, bool forWriting ) const
-{
- /* Start the search in the global object. */
- ObjectDef *rootObj = pd->globalObjectDef;
-
- if ( forWriting && lastPtrInQual < 0 ) {
- /* If we are writing an no reference was found in the qualification
- * then load the gloabl with a revert. */
- if ( pd->revertOn )
- code.append( IN_LOAD_GLOBAL_WV );
- else
- code.append( IN_LOAD_GLOBAL_WC );
- }
- else {
- /* Either we are reading or we are loading a pointer that will be
- * dereferenced. */
- code.append( IN_LOAD_GLOBAL_R );
- }
-
- loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, true );
-}
-
-void LangVarRef::loadCustom( Compiler *pd, CodeVect &code,
- int lastPtrInQual, bool forWriting ) const
-{
- /* Start the search in the local frame. */
- loadQualification( pd, code, pd->curLocalFrame, lastPtrInQual, forWriting, true );
-}
-
-void LangVarRef::loadLocalObj( Compiler *pd, CodeVect &code,
- int lastPtrInQual, bool forWriting ) const
-{
- /* Start the search in the local frame. */
- loadQualification( pd, code, pd->curLocalFrame, lastPtrInQual, forWriting, false );
-}
-
-bool LangVarRef::isLocalRef( Compiler *pd ) const
-{
- if ( qual->length() > 0 ) {
- if ( pd->curLocalFrame->findField( qual->data[0].data ) != 0 )
- return true;
- }
- else if ( pd->curLocalFrame->findField( name ) != 0 )
- return true;
- else if ( pd->curLocalFrame->findMethod( name ) != 0 )
- return true;
-
- return false;
-}
-
-bool LangVarRef::isContextRef( Compiler *pd ) const
-{
- if ( pd->context != 0 ) {
- if ( qual->length() > 0 ) {
- if ( pd->context->contextObjDef->findField( qual->data[0].data ) != 0 )
- return true;
- }
- else if ( pd->context->contextObjDef->findField( name ) != 0 )
- return true;
- else if ( pd->context->contextObjDef->findMethod( name ) != 0 )
- return true;
- }
-
- return false;
-}
-
-bool LangVarRef::isCustom( Compiler *pd ) const
-{
- if ( qual->length() > 0 ) {
- ObjField *field = pd->curLocalFrame->findField( qual->data[0].data );
- if ( field != 0 && field->isCustom )
- return true;
- }
- else {
- ObjField *field = pd->curLocalFrame->findField( name );
- if ( field != 0 ) {
- if ( field->isCustom )
- return true;
- }
- else {
- ObjMethod *method = pd->curLocalFrame->findMethod( name );
- if ( method != 0 && method->isCustom )
- return true;
- }
-
- }
- return false;
-}
-
-void LangVarRef::loadObj( Compiler *pd, CodeVect &code,
- int lastPtrInQual, bool forWriting ) const
-{
- if ( isCustom( pd ) )
- loadCustom( pd, code, lastPtrInQual, forWriting );
- else if ( isLocalRef( pd ) )
- loadLocalObj( pd, code, lastPtrInQual, forWriting );
- else if ( isContextRef( pd ) )
- loadContextObj( pd, code, lastPtrInQual, forWriting );
- else
- loadGlobalObj( pd, code, lastPtrInQual, forWriting );
-}
-
-VarRefLookup LangVarRef::lookupQualification( Compiler *pd, ObjectDef *rootDef ) const
-{
- int lastPtrInQual = -1;
- ObjectDef *searchObjDef = rootDef;
- int firstConstPart = -1;
-
- for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) {
- /* Lookup the field int the current qualification. */
- ObjField *el = searchObjDef->findField( qi->data );
- if ( el == 0 )
- error(qi->loc) << "cannot resolve qualification " << qi->data << endp;
-
- /* Lookup the type of the field. */
- UniqueType *qualUT = el->typeRef->uniqueType;
-
- /* If we are dealing with an iterator then dereference it. */
- if ( qualUT->typeId == TYPE_ITER )
- qualUT = el->typeRef->searchUniqueType;
-
- /* Is it const? */
- if ( firstConstPart < 0 && el->isConst )
- firstConstPart = qi.pos();
-
- /* Check for references. When loop is done we will have the last one
- * present, if any. */
- if ( qualUT->typeId == TYPE_PTR )
- lastPtrInQual = qi.pos();
-
- if ( qi->type == QualItem::Dot ) {
- /* Cannot dot a reference. Iterator yes (access of the iterator
- * not the current) */
- if ( qualUT->typeId == TYPE_PTR )
- error(loc) << "dot cannot be used to access a pointer" << endp;
- }
- else if ( qi->type == QualItem::Arrow ) {
- if ( qualUT->typeId == TYPE_ITER )
- qualUT = el->typeRef->searchUniqueType;
- else if ( qualUT->typeId == TYPE_PTR )
- qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl );
- }
-
- searchObjDef = objDefFromUT( pd, qualUT );
- }
-
- return VarRefLookup( lastPtrInQual, firstConstPart, searchObjDef );
-}
-
-VarRefLookup LangVarRef::lookupObj( Compiler *pd ) const
-{
- ObjectDef *rootDef;
- if ( isLocalRef( pd ) )
- rootDef = pd->curLocalFrame;
- else if ( isContextRef( pd ) )
- rootDef = pd->context->contextObjDef;
- else
- rootDef = pd->globalObjectDef;
-
- return lookupQualification( pd, rootDef );
-}
-
-VarRefLookup LangVarRef::lookupField( Compiler *pd ) const
-{
- /* Lookup the object that the field is in. */
- VarRefLookup lookup = lookupObj( pd );
-
- /* Lookup the field. */
- ObjField *field = lookup.inObject->findField( name );
- if ( field == 0 )
- error(loc) << "cannot find name " << name << " in object" << endp;
-
- lookup.objField = field;
- lookup.uniqueType = field->typeRef->uniqueType;
-
- if ( field->typeRef->searchUniqueType != 0 )
- lookup.iterSearchUT = field->typeRef->searchUniqueType;
-
- return lookup;
-}
-
-
-VarRefLookup LangVarRef::lookupMethod( Compiler *pd )
-{
- /* Lookup the object that the field is in. */
- VarRefLookup lookup = lookupObj( pd );
-
- /* Find the method. */
- assert( lookup.inObject->objMethodMap != 0 );
- ObjMethod *method = lookup.inObject->findMethod( name );
- if ( method == 0 ) {
- /* Not found as a method, try it as an object on which we will call a
- * default function. */
- qual->append( QualItem( InputLoc(), name, QualItem::Dot ) );
- name = "finish";
-
- /* Lookup the object that the field is in. */
- VarRefLookup lookup = lookupObj( pd );
-
- /* Find the method. */
- assert( lookup.inObject->objMethodMap != 0 );
- method = lookup.inObject->findMethod( name );
- if ( method == 0 )
- error(loc) << "cannot find " << name << "(...) in object" << endp;
- }
-
- lookup.objMethod = method;
- lookup.uniqueType = method->returnUT;
-
- return lookup;
-}
-
-void LangVarRef::setFieldInstr( Compiler *pd, CodeVect &code,
- ObjectDef *inObject, ObjField *el, UniqueType *exprUT, bool revert ) const
-{
- /* Ensure that the field is referenced. */
- inObject->referenceField( pd, el );
-
- if ( pd->revertOn && revert )
- code.append( el->inSetWV );
- else
- code.append( el->inSetWC );
-
- /* Maybe write out an offset. */
- if ( el->useOffset )
- code.appendHalf( el->offset );
-}
-
-bool castAssignment( Compiler *pd, CodeVect &code, UniqueType *destUT,
- UniqueType *destSearchUT, UniqueType *srcUT )
-{
- if ( destUT == srcUT )
- return true;
-
- /* Casting trees to any. */
- if ( destUT->typeId == TYPE_TREE && destUT->langEl == pd->anyLangEl &&
- srcUT->typeId == TYPE_TREE )
- return true;
-
- /* Setting a reference from a tree. */
- if ( destUT->typeId == TYPE_REF && srcUT->typeId == TYPE_TREE &&
- destUT->langEl == srcUT->langEl )
- return true;
-
- /* Setting a tree from a reference. */
- if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_REF &&
- destUT->langEl == srcUT->langEl )
- return true;
-
- /* Setting an iterator from a tree. */
- if ( destUT->typeId == TYPE_ITER && srcUT->typeId == TYPE_TREE &&
- destSearchUT->langEl == srcUT->langEl )
- return true;
-
- /* Assigning nil to a tree. */
- if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_NIL )
- return true;
-
- /* Assigning nil to a pointer. */
- if ( destUT->typeId == TYPE_PTR && srcUT->typeId == TYPE_NIL )
- return true;
-
- return false;
-}
-
-void LangVarRef::setField( Compiler *pd, CodeVect &code,
- ObjectDef *inObject, UniqueType *exprUT, bool revert ) const
-{
- ObjField *el = inObject->findField( name );
- if ( el == 0 )
- error(loc) << "cannot find name " << name << " in object" << endp;
-
- setFieldInstr( pd, code, inObject, el, exprUT, revert );
-}
-
-void LangVarRef::setFieldIter( Compiler *pd, CodeVect &code,
- ObjectDef *inObject, UniqueType *objUT, UniqueType *exprType, bool revert ) const
-{
- ObjField *el = inObject->findField( name );
- if ( el == 0 )
- error(loc) << "cannot find name " << name << " in object" << endp;
-
- code.append( objUT->iterDef->inSetCurWC );
- code.appendHalf( el->offset );
-}
-
-UniqueType *LangVarRef::evaluate( Compiler *pd, CodeVect &code, bool forWriting ) const
-{
- /* Lookup the loadObj. */
- VarRefLookup lookup = lookupField( pd );
-
- /* Load the object, if any. */
- loadObj( pd, code, lookup.lastPtrInQual, forWriting );
-
- /* Load the field. */
- UniqueType *ut = loadFieldInstr( pd, code, lookup.inObject,
- lookup.objField, forWriting, false );
-
- return ut;
-}
-
-void LangVarRef::canTakeRef( Compiler *pd, VarRefLookup &lookup ) const
-{
- bool canTake = false;
-
- /* If the var is not a local, it must be an attribute accessed
- * via a local and attributes. */
- if ( lookup.inObject->type == ObjectDef::FrameType )
- canTake = true;
- else if ( isLocalRef(pd) && lookup.lastPtrInQual < 0 && lookup.uniqueType->typeId != TYPE_PTR )
- canTake = true;
-
- if ( !canTake ) {
- error(loc) << "can only take references of locals or "
- "attributes accessed via a local" << endp;
- }
-
- if ( lookup.objField->refActive )
- error(loc) << "reference currently active, cannot take another" << endp;
-}
-
-/* Return the field referenced. */
-ObjField *LangVarRef::preEvaluateRef( Compiler *pd, CodeVect &code ) const
-{
- VarRefLookup lookup = lookupField( pd );
-
- canTakeRef( pd, lookup );
-
- loadQualificationRefs( pd, code );
-
- return lookup.objField;
-}
-
-/* Return the field referenced. */
-ObjField *LangVarRef::evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const
-{
- VarRefLookup lookup = lookupField( pd );
-
- canTakeRef( pd, lookup );
-
- /* Ensure that the field is referenced. */
- lookup.inObject->referenceField( pd, lookup.objField );
-
- /* Note that we could have modified children. */
- if ( qual->length() == 0 )
- lookup.objField->refActive = true;
-
- /* Whenever we take a reference we have to assume writing and that the
- * tree is dirty. */
- lookup.objField->dirtyTree = true;
-
- if ( qual->length() > 0 ) {
- code.append( IN_REF_FROM_QUAL_REF );
- code.appendHalf( pushCount );
- code.appendHalf( lookup.objField->offset );
- }
- else if ( lookup.objField->typeRef->iterDef != 0 ) {
- code.append( lookup.objField->typeRef->iterDef->inRefFromCur );
- code.appendHalf( lookup.objField->offset );
- }
- else if ( lookup.objField->typeRef->type == TypeRef::Ref ) {
- code.append( IN_REF_FROM_REF );
- code.appendHalf( lookup.objField->offset );
- }
- else {
- code.append( IN_REF_FROM_LOCAL );
- code.appendHalf( lookup.objField->offset );
- }
-
- return lookup.objField;
-}
-
-ObjField **LangVarRef::evaluateArgs( Compiler *pd, CodeVect &code,
- VarRefLookup &lookup, ExprVect *args ) const
-{
- /* Parameter list is given only for user defined methods. Otherwise it
- * will be null. */
- ParameterList *paramList = lookup.objMethod->paramList;
-
- /* Match the number of arguments. */
- int numArgs = args != 0 ? args->length() : 0;
- if ( numArgs != lookup.objMethod->numParams )
- error(loc) << "wrong number of arguments" << endp;
-
- /* This is for storing the object fields used by references. */
- ObjField **paramRefs = new ObjField*[numArgs];
- memset( paramRefs, 0, sizeof(ObjField*) * numArgs );
-
- /* Evaluate and push the args. */
- if ( args != 0 ) {
- /* We use this only if there is a paramter list. */
- ParameterList::Iter p;
- long pushCount = 0;
-
- /* First pass we need to push object loads for reference parameters. */
- paramList != 0 && ( p = *paramList );
- for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
- /* Get the expression and the UT for the arg. */
- LangExpr *expression = *pe;
- UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()];
-
- if ( paramUT->typeId == TYPE_REF ) {
- /* Make sure we are dealing with a variable reference. */
- if ( expression->type != LangExpr::TermType )
- error(loc) << "not a term: argument must be a local variable" << endp;
- if ( expression->term->type != LangTerm::VarRefType )
- error(loc) << "not a variable: argument must be a local variable" << endp;
-
- /* Lookup the field. */
- LangVarRef *varRef = expression->term->varRef;
-
- ObjField *refOf = varRef->preEvaluateRef( pd, code );
- paramRefs[pe.pos()] = refOf;
-
- pushCount += varRef->qual->length() * 2;
- }
-
- /* Advance the parameter list iterator if we have it. */
- paramList != 0 && p.increment();
- }
-
- paramList != 0 && ( p = *paramList );
- for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
- /* Get the expression and the UT for the arg. */
- LangExpr *expression = *pe;
- UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()];
-
- if ( paramUT->typeId == TYPE_REF ) {
-
- /* Make sure we are dealing with a variable reference. */
- if ( expression->type != LangExpr::TermType )
- error(loc) << "not a term: argument must be a local variable" << endp;
- if ( expression->term->type != LangTerm::VarRefType )
- error(loc) << "not a variable: argument must be a local variable" << endp;
-
- /* Lookup the field. */
- LangVarRef *varRef = expression->term->varRef;
-
- pushCount -= varRef->qual->length() * 2;
-
- ObjField *refOf = varRef->evaluateRef( pd, code, pushCount );
- paramRefs[pe.pos()] = refOf;
-
- pushCount += 2;
- }
- else {
- UniqueType *exprUT = expression->evaluate( pd, code );
-
- if ( !castAssignment( pd, code, paramUT, 0, exprUT ) )
- error(loc) << "arg " << pe.pos()+1 << " is of the wrong type" << endp;
-
- pushCount += 1;
- }
-
- /* Advance the parameter list iterator if we have it. */
- paramList != 0 && p.increment();
- }
- }
-
- return paramRefs;
-}
-
-void LangVarRef::resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjField **paramRefs ) const
-{
- /* Parameter list is given only for user defined methods. Otherwise it
- * will be null. */
- for ( long p = 0; p < lookup.objMethod->numParams; p++ ) {
- if ( paramRefs[p] != 0 )
- paramRefs[p]->refActive = false;
- }
-}
-
-
-void LangVarRef::callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const
-{
- /* This is for writing if it is a non-const builtin. */
- bool forWriting = lookup.objMethod->func == 0 &&
- !lookup.objMethod->isConst;
-
- if ( lookup.objMethod->useCallObj ) {
- /* Load the object, if any. */
- loadObj( pd, code, lookup.lastPtrInQual, forWriting );
- }
-
- /* Check if we need to revert the function. If it operates on a reference
- * or if it is not local then we need to revert it. */
- bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd);
-
- /* The call instruction. */
- if ( pd->revertOn && revert ) {
- if ( lookup.objMethod->opcodeWV == IN_PARSE_FINISH_WV ) {
- code.append( IN_PARSE_SAVE_STEPS );
- code.append( IN_PARSE_LOAD_START );
- code.append( IN_PARSE_FINISH_WV );
- code.appendHalf( 0 );
- code.append( IN_PCR_CALL );
- code.append( IN_PARSE_FINISH_WV3 );
- }
- else {
- code.append( lookup.objMethod->opcodeWV );
- }
- }
- else {
- if ( lookup.objMethod->opcodeWC == IN_PARSE_FINISH_WC ) {
- code.append( IN_PARSE_SAVE_STEPS );
- code.append( IN_PARSE_LOAD_START );
- code.append( IN_PARSE_FINISH_WC );
- code.appendHalf( 0 );
- code.append( IN_PCR_CALL );
- code.append( IN_PARSE_FINISH_WC3 );
- }
- else {
- code.append( lookup.objMethod->opcodeWC );
- }
- }
-
- if ( lookup.objMethod->useFuncId )
- code.appendHalf( lookup.objMethod->funcId );
-}
-
-void LangVarRef::popRefQuals( Compiler *pd, CodeVect &code,
- VarRefLookup &lookup, ExprVect *args ) const
-{
- long popCount = 0;
-
- /* Evaluate and push the args. */
- if ( args != 0 ) {
- /* We use this only if there is a paramter list. */
- for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
- /* Get the expression and the UT for the arg. */
- LangExpr *expression = *pe;
- UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()];
-
- if ( paramUT->typeId == TYPE_REF ) {
- /* Lookup the field. */
- LangVarRef *varRef = expression->term->varRef;
- popCount += varRef->qual->length() * 2;
- }
- }
- if ( popCount > 0 ) {
- code.append( IN_POP_N_WORDS );
- code.appendHalf( (short)popCount );
- }
- }
-}
-
-UniqueType *LangVarRef::evaluateCall( Compiler *pd, CodeVect &code, ExprVect *args )
-{
- /* Evaluate the object. */
- VarRefLookup lookup = lookupMethod( pd );
-
- /* Evaluate and push the arguments. */
- ObjField **paramRefs = evaluateArgs( pd, code, lookup, args );
-
- /* Write the call opcode. */
- callOperation( pd, code, lookup );
-
- popRefQuals( pd, code, lookup, args );
-
- resetActiveRefs( pd, lookup, paramRefs);
- delete[] paramRefs;
-
- /* Return the type to the expression. */
- return lookup.uniqueType;
-}
-
-UniqueType *LangTerm::evaluateMatch( Compiler *pd, CodeVect &code ) const
-{
- /* Add the vars bound by the pattern into the local scope. */
- for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) {
- if ( item->varRef != 0 )
- item->bindId = pattern->nextBindId++;
- }
-
- UniqueType *ut = varRef->evaluate( pd, code );
- if ( ut->typeId != TYPE_TREE )
- error(varRef->loc) << "expected match against a tree type" << endp;
-
- /* Store the language element type in the pattern. This is needed by
- * the pattern parser. */
- pattern->langEl = ut->langEl;
-
- code.append( IN_MATCH );
- code.appendHalf( pattern->patRepId );
-
- for ( PatternItemList::Iter item = pattern->list->last(); item.gtb(); item-- ) {
- if ( item->varRef != 0 ) {
- /* Compute the unique type. */
- UniqueType *exprType = pd->findUniqueType( TYPE_TREE, item->factor->langEl );
-
- /* Get the type of the variable being assigned to. */
- VarRefLookup lookup = item->varRef->lookupField( pd );
-
- item->varRef->loadObj( pd, code, lookup.lastPtrInQual, false );
- item->varRef->setField( pd, code, lookup.inObject, exprType, false );
- }
- }
-
- return ut;
-}
-
-UniqueType *LangTerm::evaluateNew( Compiler *pd, CodeVect &code ) const
-{
- /* Evaluate the expression. */
- UniqueType *ut = expr->evaluate( pd, code );
- if ( ut->typeId != TYPE_TREE )
- error() << "new can only be applied to tree types" << endp;
-
- code.append( IN_TREE_NEW );
- return pd->findUniqueType( TYPE_PTR, ut->langEl );
-}
-
-void LangTerm::assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const
-{
- /* Now assign the field initializations. Note that we need to do this in
- * reverse because the last expression evaluated is at the top of the
- * stack. */
- if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) {
- ObjectDef *objDef = objDefFromUT( pd, replUT );
- /* Note the reverse traversal. */
- for ( FieldInitVect::Iter pi = fieldInitArgs->last(); pi.gtb(); pi-- ) {
- FieldInit *fieldInit = *pi;
- ObjField *field = objDef->findFieldNum( pi.pos() );
- if ( field == 0 ) {
- error(fieldInit->loc) << "failed to find init pos " <<
- pi.pos() << " in object" << endp;
- }
-
- /* Lookup the type of the field and compare it to the type of the
- * expression. */
- UniqueType *fieldUT = field->typeRef->uniqueType;
- if ( !castAssignment( pd, code, fieldUT, 0, fieldInit->exprUT ) )
- error(fieldInit->loc) << "type mismatch in initialization" << endp;
-
- /* The set field instruction must leave the object on the top of
- * the stack. */
- code.append( IN_SET_FIELD_LEAVE_WC );
- code.appendHalf( field->offset );
- }
- }
-}
-
-UniqueType *LangTerm::evaluateConstruct( Compiler *pd, CodeVect &code ) const
-{
- /* Evaluate the initialization expressions. */
- if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) {
- for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) {
- FieldInit *fieldInit = *pi;
- fieldInit->exprUT = fieldInit->expr->evaluate( pd, code );
- }
- }
-
- /* Assign bind ids to the variables in the replacement. */
- for ( ReplItemList::Iter item = *replacement->list; item.lte(); item++ ) {
- if ( item->expr != 0 )
- item->bindId = replacement->nextBindId++;
- }
-
- /* Evaluate variable references. */
- for ( ReplItemList::Iter item = replacement->list->last(); item.gtb(); item-- ) {
- if ( item->type == ReplItem::ExprType ) {
- UniqueType *ut = item->expr->evaluate( pd, code );
-
- if ( ut->typeId != TYPE_TREE )
- error() << "variables used in replacements must be trees" << endp;
-
- item->langEl = ut->langEl;
- }
- }
-
- /* Construct the tree using the tree information stored in the compiled
- * code. */
- code.append( IN_CONSTRUCT );
- code.appendHalf( replacement->patRepId );
-
- /* Lookup the type of the replacement and store it in the replacement
- * object so that replacement parsing has a target. */
- UniqueType *replUT = typeRef->uniqueType;
- if ( replUT->typeId != TYPE_TREE )
- error(loc) << "don't know how to construct this type" << endp;
-
- if ( replUT->langEl->generic != 0 && replUT->langEl->generic->typeId == GEN_PARSER ) {
- code.append( IN_CONSTRUCT_INPUT );
- code.append( IN_DUP_TOP_OFF );
- code.appendHalf( 1 );
- code.append( IN_SET_INPUT );
- }
-
- replacement->langEl = replUT->langEl;
- assignFieldArgs( pd, code, replUT );
-
- if ( varRef != 0 ) {
- code.append( IN_DUP_TOP );
-
- /* Get the type of the variable being assigned to. */
- VarRefLookup lookup = varRef->lookupField( pd );
-
- varRef->loadObj( pd, code, lookup.lastPtrInQual, false );
- varRef->setField( pd, code, lookup.inObject, replUT, false );
- }
-
- return replUT;
-}
-
-UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const
-{
- UniqueType *ut = typeRef->uniqueType;
- assert( ut != 0 );
-
- if ( ut->typeId != TYPE_TREE )
- error(loc) << "can only parse trees" << endl;
-
- /* Should be one arg, a stream. */
- if ( args == 0 || ( args->length() != 1 && args->length() != 2 ) )
- error(loc) << "expecting one or two args" << endp;
-
- int context, input;
- if ( ut->langEl->contextIn == 0 ) {
- if ( args->length() != 1 )
- error(loc) << "parse command requires just input" << endp;
- context = -1;
- input = 0;
- }
- else {
- if ( args->length() != 2 )
- error(loc) << "parse command requires context and input" << endp;
- context = 0;
- input = 1;
- }
-
- /*
- * Make the parser.
- */
- code.append( IN_CONSTRUCT );
- code.appendHalf( replacement->patRepId );
-
- /* Dup once for the context load, again for the argument load, again for
- * the parse frag, leaving the original there for the finish. */
- code.append( IN_DUP_TOP );
-// code.append( IN_DUP_TOP );
-// code.append( IN_DUP_TOP );
-
- /*
- * First load the context into the parser.
- */
- if ( context < 0 ) {
- code.append( IN_LOAD_NIL );
- }
- else {
- UniqueType *argUT = args->data[context]->evaluate( pd, code );
- if ( argUT != pd->uniqueTypeStream && argUT->typeId != TYPE_TREE )
- error(loc) << "context argument must be a stream or a tree" << endp;
- }
-
- /* FIXME: need to select right one here. */
- code.append( IN_DUP_TOP_OFF );
- code.appendHalf( 1 );
- code.append( IN_SET_ACCUM_CTX_WC );
-
- /*
- * Evaluate the parse arg.
- */
-
- /* Evaluate the parse args. */
- UniqueType *argUT = args->data[input]->evaluate( pd, code );
- if ( argUT != pd->uniqueTypeStream && argUT->typeId != TYPE_TREE )
- error(loc) << "input argument must be a stream or a tree" << endp;
-
- /* Allocate a parser id. This will cause a parser to be built for
- * the type. */
- if ( ut->langEl->parserId < 0 )
- ut->langEl->parserId = pd->nextParserId++;
-
- /* If this is a parse stop then we need to verify that the type is
- * compatible with parse stop. */
- if ( stop )
- ut->langEl->parseStop = true;
-
- if ( argUT != pd->uniqueTypeInput ) {
- code.append( IN_CONSTRUCT_INPUT );
- if ( pd->revertOn )
- code.append( IN_INPUT_APPEND_WV );
- else
- code.append( IN_INPUT_APPEND_WC );
- }
-
- code.append( IN_DUP_TOP_OFF );
- code.appendHalf( 1 );
- code.append( IN_SET_INPUT );
-
- int stopId = stop ? ut->langEl->id : 0;
-
- /* Parse instruction, dependent on whether or not we are producing revert
- * or commit code. */
- if ( pd->revertOn ) {
- code.append( IN_PARSE_SAVE_STEPS );
- code.append( IN_PARSE_LOAD_START );
- code.append( IN_PARSE_FRAG_WV );
- code.appendHalf( stopId );
- code.append( IN_PCR_CALL );
- code.append( IN_PARSE_FRAG_WV3 );
-
- /* Finish immediately. */
- code.append( IN_PARSE_SAVE_STEPS );
- code.append( IN_PARSE_LOAD_START );
- code.append( IN_PARSE_FINISH_WV );
- code.appendHalf( stopId );
- code.append( IN_PCR_CALL );
- code.append( IN_PARSE_FINISH_WV3 );
- }
- else {
- code.append( IN_PARSE_SAVE_STEPS );
- code.append( IN_PARSE_LOAD_START );
- code.append( IN_PARSE_FRAG_WC );
- code.appendHalf( stopId );
- code.append( IN_PCR_CALL );
- code.append( IN_PARSE_FRAG_WC3 );
-
- /* Finish immediately. */
- code.append( IN_PARSE_SAVE_STEPS );
- code.append( IN_PARSE_LOAD_START );
- code.append( IN_PARSE_FINISH_WC );
- code.appendHalf( stopId );
- code.append( IN_PCR_CALL );
- code.append( IN_PARSE_FINISH_WC3 );
- }
-
- /* Lookup the type of the replacement and store it in the replacement
- * object so that replacement parsing has a target. */
- replacement->langEl = generic->langEl;
-
- if ( varRef != 0 ) {
- code.append( IN_DUP_TOP );
-
- /* Get the type of the variable being assigned to. */
- VarRefLookup lookup = varRef->lookupField( pd );
-
- varRef->loadObj( pd, code, lookup.lastPtrInQual, false );
- varRef->setField( pd, code, lookup.inObject, ut, false );
- }
-
- return ut;
-}
-
-UniqueType *LangTerm::evaluateEmbedString( Compiler *pd, CodeVect &code ) const
-{
- /* Assign bind ids to the variables in the replacement. */
- for ( ReplItemList::Iter item = *replItemList; item.lte(); item++ ) {
- switch ( item->type ) {
- case ReplItem::FactorType: {
- String result;
- bool unusedCI;
- prepareLitString( result, unusedCI,
- item->factor->typeRef->pdaLiteral->token.data,
- item->factor->typeRef->pdaLiteral->token.loc );
-
- /* Make sure we have this string. */
- StringMapEl *mapEl = 0;
- if ( pd->literalStrings.insert( result, &mapEl ) )
- mapEl->value = pd->literalStrings.length()-1;
-
- code.append( IN_LOAD_STR );
- code.appendWord( mapEl->value );
- break;
- }
- case ReplItem::InputText: {
- /* Make sure we have this string. */
- StringMapEl *mapEl = 0;
- if ( pd->literalStrings.insert( item->data, &mapEl ) )
- mapEl->value = pd->literalStrings.length()-1;
-
- code.append( IN_LOAD_STR );
- code.appendWord( mapEl->value );
- break;
- }
- case ReplItem::ExprType:
- item->expr->evaluate( pd, code );
- break;
- }
-
- }
-
- long items = replItemList->length();
- for ( long i = 0; i < items-1; i++ )
- code.append( IN_CONCAT_STR );
-
- return pd->uniqueTypeStr;
-}
-
-UniqueType *LangTerm::evaluate( Compiler *pd, CodeVect &code ) const
-{
- switch ( type ) {
- case VarRefType:
- return varRef->evaluate( pd, code );
- case MethodCallType:
- return varRef->evaluateCall( pd, code, args );
- case NilType:
- code.append( IN_LOAD_NIL );
- return pd->uniqueTypeNil;
- case TrueType:
- code.append( IN_LOAD_TRUE );
- return pd->uniqueTypeBool;
- case FalseType:
- code.append( IN_LOAD_FALSE );
- return pd->uniqueTypeBool;
- case MakeTokenType:
- return evaluateMakeToken( pd, code );
- case MakeTreeType:
- return evaluateMakeTree( pd, code );
- case NumberType: {
- unsigned int n = atoi( data );
- code.append( IN_LOAD_INT );
- code.appendWord( n );
- return pd->uniqueTypeInt;
- }
- case StringType: {
- String interp;
- bool unused;
- prepareLitString( interp, unused, data, InputLoc() );
-
- /* Make sure we have this string. */
- StringMapEl *mapEl = 0;
- if ( pd->literalStrings.insert( interp, &mapEl ) )
- mapEl->value = pd->literalStrings.length()-1;
-
- code.append( IN_LOAD_STR );
- code.appendWord( mapEl->value );
- return pd->uniqueTypeStr;
- }
- case MatchType:
- return evaluateMatch( pd, code );
- case ParseType:
- return evaluateParse( pd, code, false );
- case ParseStopType:
- return evaluateParse( pd, code, true );
- case ConstructType:
- return evaluateConstruct( pd, code );
- case NewType:
- return evaluateNew( pd, code );
- case TypeIdType: {
- /* Evaluate the expression. */
- UniqueType *ut = typeRef->uniqueType;
- if ( ut->typeId != TYPE_TREE )
- error() << "typeid can only be applied to tree types" << endp;
-
- code.append( IN_LOAD_INT );
- code.appendWord( ut->langEl->id );
- return pd->uniqueTypeInt;
- }
- case SearchType: {
- /* Evaluate the expression. */
- UniqueType *ut = typeRef->uniqueType;
- if ( ut->typeId != TYPE_TREE )
- error(loc) << "can only search for tree types" << endp;
-
- UniqueType *treeUT = varRef->evaluate( pd, code );
- if ( treeUT->typeId != TYPE_TREE )
- error(loc) << "search can be applied only to tree types" << endl;
-
- code.append( IN_TREE_SEARCH );
- code.appendWord( ut->langEl->id );
- return ut;
- };
- case EmbedStringType: {
- return evaluateEmbedString( pd, code );
- }
- }
- return 0;
-}
-
-UniqueType *LangExpr::evaluate( Compiler *pd, CodeVect &code ) const
-{
- switch ( type ) {
- case BinaryType: {
- switch ( op ) {
- case '+': {
- UniqueType *lt = left->evaluate( pd, code );
- UniqueType *rt = right->evaluate( pd, code );
-
- if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
- code.append( IN_ADD_INT );
- return pd->uniqueTypeInt;
- }
-
- if ( lt == pd->uniqueTypeStr && rt == pd->uniqueTypeStr ) {
- code.append( IN_CONCAT_STR );
- return pd->uniqueTypeStr;
- }
-
- error(loc) << "do not have an addition operator for these types" << endp;
- break;
- }
- case '-': {
- UniqueType *lt = left->evaluate( pd, code );
- UniqueType *rt = right->evaluate( pd, code );
-
- if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
- code.append( IN_SUB_INT );
- return pd->uniqueTypeInt;
- }
-
- error(loc) << "do not have an addition operator for these types" << endp;
- break;
- }
- case '*': {
- UniqueType *lt = left->evaluate( pd, code );
- UniqueType *rt = right->evaluate( pd, code );
-
- if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
- code.append( IN_MULT_INT );
- return pd->uniqueTypeInt;
- }
-
- error(loc) << "do not have an multiplication "
- "operator for these types" << endp;
- break;
- }
- case '/': {
- UniqueType *lt = left->evaluate( pd, code );
- UniqueType *rt = right->evaluate( pd, code );
-
- if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
- code.append( IN_DIV_INT );
- return pd->uniqueTypeInt;
- }
-
- error(loc) << "do not have an division"
- "operator for these types" << endp;
- break;
- }
- case OP_DoubleEql: {
- UniqueType *lt = left->evaluate( pd, code );
- UniqueType *rt = right->evaluate( pd, code );
-
- if ( lt != rt )
- error(loc) << "comparison of different types" << endp;
-
- code.append( IN_TST_EQL );
- return pd->uniqueTypeBool;
- }
- case OP_NotEql: {
- UniqueType *lt = left->evaluate( pd, code );
- UniqueType *rt = right->evaluate( pd, code );
-
- if ( lt != rt )
- error(loc) << "comparison of different types" << endp;
-
- code.append( IN_TST_NOT_EQL );
- return pd->uniqueTypeBool;
- }
- case '<': {
- left->evaluate( pd, code );
- right->evaluate( pd, code );
-
- code.append( IN_TST_LESS );
- return pd->uniqueTypeBool;
- }
- case '>': {
- left->evaluate( pd, code );
- right->evaluate( pd, code );
-
- code.append( IN_TST_GRTR );
- return pd->uniqueTypeBool;
- }
- case OP_LessEql: {
- left->evaluate( pd, code );
- right->evaluate( pd, code );
-
- code.append( IN_TST_LESS_EQL );
- return pd->uniqueTypeBool;
- }
- case OP_GrtrEql: {
- left->evaluate( pd, code );
- right->evaluate( pd, code );
-
- code.append( IN_TST_GRTR_EQL );
- return pd->uniqueTypeBool;
- }
- case OP_LogicalAnd: {
- /* Evaluate the left and duplicate it. */
- left->evaluate( pd, code );
- code.append( IN_DUP_TOP );
-
- /* Jump over the right if false, leaving the original left
- * result on the top of the stack. We don't know the
- * distance yet so record the position of the jump. */
- long jump = code.length();
- code.append( IN_JMP_FALSE );
- code.appendHalf( 0 );
-
- /* Evauluate the right, add the test. Store it separately. */
- right->evaluate( pd, code );
- code.append( IN_TST_LOGICAL_AND );
-
- /* Set the distance of the jump. */
- long distance = code.length() - jump - 3;
- code.setHalf( jump+1, distance );
-
- return pd->uniqueTypeInt;
- }
- case OP_LogicalOr: {
- /* Evaluate the left and duplicate it. */
- left->evaluate( pd, code );
- code.append( IN_DUP_TOP );
-
- /* Jump over the right if true, leaving the original left
- * result on the top of the stack. We don't know the
- * distance yet so record the position of the jump. */
- long jump = code.length();
- code.append( IN_JMP_TRUE );
- code.appendHalf( 0 );
-
- /* Evauluate the right, add the test. */
- right->evaluate( pd, code );
- code.append( IN_TST_LOGICAL_OR );
-
- /* Set the distance of the jump. */
- long distance = code.length() - jump - 3;
- code.setHalf( jump+1, distance );
-
- return pd->uniqueTypeInt;
- }
- }
-
- assert(false);
- return 0;
- }
- case UnaryType: {
- switch ( op ) {
- case '!': {
- /* Evaluate the left and duplicate it. */
- right->evaluate( pd, code );
- code.append( IN_NOT );
- return pd->uniqueTypeBool;
- }
- case '$': {
- right->evaluate( pd, code );
- code.append( IN_TREE_TO_STR );
- return pd->uniqueTypeStr;
-
- }
- case '%': {
- right->evaluate( pd, code );
- code.append( IN_TREE_TO_STR_NOTRIM );
- return pd->uniqueTypeStr;
- }
- case '^': {
- UniqueType *rt = right->evaluate( pd, code );
- code.append( IN_TREE_TRIM );
- return rt;
- }
- case OP_Deref: {
- UniqueType *ut = right->evaluate( pd, code );
- if ( ut->typeId != TYPE_PTR )
- error(loc) << "can only dereference pointers" << endl;
-
- code.append( IN_PTR_DEREF_R );
- ut = pd->findUniqueType( TYPE_TREE, ut->langEl );
- return ut;
- }
- default:
- assert(false);
- }
- return 0;
- }
- case TermType: {
- return term->evaluate( pd, code );
- }
- }
- return 0;
-}
-
-void LangVarRef::assignValue( Compiler *pd, CodeVect &code,
- UniqueType *exprUT ) const
-{
- /* Lookup the left hand side of the assignment. */
- VarRefLookup lookup = lookupField( pd );
-
- if ( lookup.objField->refActive )
- error(loc) << "reference active, cannot write to object" << endp;
-
- if ( lookup.firstConstPart >= 0 ) {
- error(loc) << "left hand side qualification \"" <<
- qual->data[lookup.firstConstPart].data << "\" is const" << endp;
- }
-
- if ( lookup.objField->isConst )
- error(loc) << "field \"" << name << "\" is const" << endp;
-
- /* Writing guarantees the field is dirty. tree is dirty. */
- lookup.objField->dirtyTree = true;
-
- /* Check the types of the assignment and possibly cast. */
- UniqueType *objUT = lookup.objField->typeRef->uniqueType;
- assert( lookup.uniqueType == lookup.objField->typeRef->uniqueType );
- if ( !castAssignment( pd, code, objUT, lookup.iterSearchUT, exprUT ) )
- error(loc) << "type mismatch in assignment" << endp;
-
- /* Decide if we need to revert the assignment. */
- bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd);
-
- /* Load the object and generate the field setting code. */
- loadObj( pd, code, lookup.lastPtrInQual, true );
-
- if ( lookup.uniqueType->typeId == TYPE_ITER )
- setFieldIter( pd, code, lookup.inObject, lookup.uniqueType, exprUT, false );
- else
- setField( pd, code, lookup.inObject, exprUT, revert );
-}
-
-UniqueType *LangTerm::evaluateMakeToken( Compiler *pd, CodeVect &code ) const
-{
-// if ( pd->compileContext != Compiler::CompileTranslation )
-// error(loc) << "make_token can be used only in a translation block" << endp;
-
- /* Match the number of arguments. */
- int numArgs = args != 0 ? args->length() : 0;
- if ( numArgs < 2 )
- error(loc) << "need at least two arguments" << endp;
-
- for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
- /* Evaluate. */
- UniqueType *exprUT = (*pe)->evaluate( pd, code );
-
- if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt )
- error(loc) << "first arg, id, must be an int" << endp;
-
- if ( pe.pos() == 1 && exprUT != pd->uniqueTypeStr )
- error(loc) << "second arg, length, must be a string" << endp;
- }
-
- /* The token is now created, send it. */
- code.append( IN_MAKE_TOKEN );
- code.append( args->length() );
-
- return pd->uniqueTypeAny;
-}
-
-UniqueType *LangTerm::evaluateMakeTree( Compiler *pd, CodeVect &code ) const
-{
- if ( pd->compileContext != Compiler::CompileTranslation )
- error(loc) << "make_tree can be used only in a translation block" << endp;
-
- /* Match the number of arguments. */
- int numArgs = args != 0 ? args->length() : 0;
- if ( numArgs < 1 )
- error(loc) << "need at least one argument" << endp;
-
- for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) {
- /* Evaluate. */
- UniqueType *exprUT = (*pe)->evaluate( pd, code );
-
- if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt )
- error(loc) << "first arg, nonterm id, must be an int" << endp;
- }
-
- /* The token is now created, send it. */
- code.append( IN_MAKE_TREE );
- code.append( args->length() );
-
- return pd->uniqueTypeAny;
-}
-
-void LangStmt::compileForIterBody( Compiler *pd,
- CodeVect &code, UniqueType *iterUT ) const
-{
- /* Remember the top of the loop. */
- long top = code.length();
-
- /* Advance */
- code.append( iterUT->iterDef->inAdvance );
- code.appendHalf( objField->offset );
-
- /* Test: jump past the while block if false. Note that we don't have the
- * distance yet. */
- long jumpFalse = code.length();
- code.append( IN_JMP_FALSE );
- code.appendHalf( 0 );
-
- /*
- * Set up the loop cleanup code.
- */
-
- /* Set up the current loop cleanup. */
- CodeVect loopCleanup;
- if ( pd->loopCleanup != 0 )
- loopCleanup.setAs( *pd->loopCleanup );
-
- /* Add the cleanup for the current loop. */
- loopCleanup.append( iterUT->iterDef->inDestroy );
- loopCleanup.appendHalf( objField->offset );
-
- /* Push the loop cleanup. */
- CodeVect *oldLoopCleanup = pd->loopCleanup;
- pd->loopCleanup = &loopCleanup;
-
- /* Compile the contents. */
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->compile( pd, code );
-
- pd->loopCleanup = oldLoopCleanup;
-
- /* Jump back to the top to retest. */
- long retestDist = code.length() - top + 3;
- code.append( IN_JMP );
- code.appendHalf( -retestDist );
-
- /* Set the jump false distance. */
- long falseDist = code.length() - jumpFalse - 3;
- code.setHalf( jumpFalse+1, falseDist );
-
- /* Compute the jump distance for the break jumps. */
- for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) {
- long distance = code.length() - *brk - 3;
- code.setHalf( *brk+1, distance );
- }
- pd->breakJumps.empty();
-
- /* Destroy the iterator. */
- code.append( iterUT->iterDef->inDestroy );
- code.appendHalf( objField->offset );
-
- /* Clean up any prepush args. */
-}
-
-LangTerm *LangStmt::chooseDefaultIter( Compiler *pd, LangTerm *fromVarRef ) const
-{
- /* Lookup the lang term and decide what iterator to use based
- * on its type. */
- VarRefLookup lookup = fromVarRef->varRef->lookupField( pd );
-
- if ( lookup.inObject->type != ObjectDef::FrameType )
- error(loc) << "root of iteration must be a local" << endp;
-
- LangVarRef *callVarRef = 0;
- if ( lookup.uniqueType->typeId == TYPE_TREE ||
- lookup.uniqueType->typeId == TYPE_REF ||
- lookup.uniqueType->typeId == TYPE_ITER ||
- lookup.uniqueType->typeId == TYPE_PTR )
- {
- /* The iterator name. */
- callVarRef = new LangVarRef( loc, new QualItemVect, "triter" );
- }
- else {
- error(loc) << "there is no default iterator for a "
- "root of that type" << endp;
- }
-
- /* The parameters. */
- ExprVect *callExprVect = new ExprVect;
- LangExpr *callExpr = new LangExpr( new LangTerm(
- LangTerm::VarRefType, fromVarRef->varRef ) );
- callExprVect->append( callExpr );
-
- LangTerm *callLangTerm = new LangTerm( callVarRef, callExprVect );
-
- return callLangTerm;
-}
-
-void LangStmt::compileForIter( Compiler *pd, CodeVect &code ) const
-{
- pd->curLocalFrame->iterPushScope();
-
- LangTerm *iterCallTerm = langTerm;
- if ( iterCallTerm->type != LangTerm::MethodCallType )
- iterCallTerm = chooseDefaultIter( pd, langTerm );
-
- /* The type we are searching for. */
- UniqueType *searchUT = typeRef->uniqueType;
-
- /*
- * Declare the iterator variable.
- */
- VarRefLookup lookup = iterCallTerm->varRef->lookupMethod( pd );
- if ( lookup.objMethod->iterDef == 0 ) {
- error(loc) << "attempt to iterate using something "
- "that is not an iterator" << endp;
- }
-
- /* Now that we have done the iterator call lookup we can make the type
- * reference for the object field. */
- UniqueType *iterUniqueType = pd->findUniqueType( TYPE_ITER, lookup.objMethod->iterDef );
- objField->typeRef = new TypeRef( loc, lookup.objMethod->iterDef, iterUniqueType, searchUT );
-
- /* Also force the field to be initialized. */
- pd->curLocalFrame->initField( pd, objField );
-
- /*
- * Create the iterator from the local var.
- */
-
- UniqueType *iterUT = objField->typeRef->uniqueType;
-
- /* Evaluate and push the arguments. */
- ObjField **paramRefs = iterCallTerm->varRef->evaluateArgs(
- pd, code, lookup, iterCallTerm->args );
-
- if ( pd->revertOn )
- code.append( iterUT->iterDef->inCreateWV );
- else
- code.append( iterUT->iterDef->inCreateWC );
-
- code.appendHalf( objField->offset );
- if ( lookup.objMethod->func != 0 )
- code.appendHalf( lookup.objMethod->func->funcId );
-
- if ( iterUT->iterDef->useSearchUT ) {
- if ( searchUT->typeId == TYPE_PTR )
- code.appendHalf( pd->uniqueTypePtr->langEl->id );
- else
- code.appendHalf( searchUT->langEl->id );
- }
-
- compileForIterBody( pd, code, iterUT );
-
- iterCallTerm->varRef->popRefQuals( pd, code, lookup, iterCallTerm->args );
-
- iterCallTerm->varRef->resetActiveRefs( pd, lookup, paramRefs );
- delete[] paramRefs;
-
- pd->curLocalFrame->iterPopScope();
-}
-
-void LangStmt::compileWhile( Compiler *pd, CodeVect &code ) const
-{
- pd->curLocalFrame->iterPushScope();
-
- /* Generate code for the while test. Remember the top. */
- long top = code.length();
- expr->evaluate( pd, code );
-
- /* Jump past the while block if false. Note that we don't have the
- * distance yet. */
- long jumpFalse = code.length();
- code.append( IN_JMP_FALSE );
- code.appendHalf( 0 );
-
- /* Compute the while block. */
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->compile( pd, code );
-
- /* Jump back to the top to retest. */
- long retestDist = code.length() - top + 3;
- code.append( IN_JMP );
- code.appendHalf( -retestDist );
-
- /* Set the jump false distance. */
- long falseDist = code.length() - jumpFalse - 3;
- code.setHalf( jumpFalse+1, falseDist );
-
- /* Compute the jump distance for the break jumps. */
- for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) {
- long distance = code.length() - *brk - 3;
- code.setHalf( *brk+1, distance );
- }
- pd->breakJumps.empty();
-
- pd->curLocalFrame->iterPopScope();
-}
-
-void LangStmt::evaluateParserItems( Compiler *pd, CodeVect &code ) const
-{
- varRef->evaluate( pd, code );
-
- /* Assign bind ids to the variables in the replacement. */
- for ( ReplItemList::Iter item = *parserText->list; item.lte(); item++ ) {
- switch ( item->type ) {
- case ReplItem::FactorType: {
- String result;
- bool unusedCI;
- prepareLitString( result, unusedCI,
- item->factor->typeRef->pdaLiteral->token.data,
- item->factor->typeRef->pdaLiteral->token.loc );
-
- /* Make sure we have this string. */
- StringMapEl *mapEl = 0;
- if ( pd->literalStrings.insert( result, &mapEl ) )
- mapEl->value = pd->literalStrings.length()-1;
-
- code.append( IN_LOAD_STR );
- code.appendWord( mapEl->value );
- break;
- }
- case ReplItem::InputText: {
- /* Make sure we have this string. */
- StringMapEl *mapEl = 0;
- if ( pd->literalStrings.insert( item->data, &mapEl ) )
- mapEl->value = pd->literalStrings.length()-1;
-
- code.append( IN_LOAD_STR );
- code.appendWord( mapEl->value );
- break;
- }
- case ReplItem::ExprType:
- item->expr->evaluate( pd, code );
- break;
- }
-
- code.append( IN_DUP_TOP_OFF );
- code.appendHalf( 1 );
-
- /* Not a stream. Get the input first. */
- code.append( IN_GET_INPUT );
- if ( pd->revertOn )
- code.append( IN_INPUT_APPEND_WV );
- else
- code.append( IN_INPUT_APPEND_WC );
- code.append( IN_POP );
-
- code.append( IN_DUP_TOP );
-
- /* Parse instruction, dependent on whether or not we are producing
- * revert or commit code. */
- if ( pd->revertOn ) {
- code.append( IN_PARSE_SAVE_STEPS );
- code.append( IN_PARSE_LOAD_START );
- code.append( IN_PARSE_FRAG_WV );
- code.appendHalf( 0 );
- code.append( IN_PCR_CALL );
- code.append( IN_PARSE_FRAG_WV3 );
- }
- else {
- code.append( IN_PARSE_SAVE_STEPS );
- code.append( IN_PARSE_LOAD_START );
- code.append( IN_PARSE_FRAG_WC );
- code.appendHalf( 0 );
- code.append( IN_PCR_CALL );
- code.append( IN_PARSE_FRAG_WC3 );
- }
- }
- code.append( IN_POP );
-}
-
-void LangStmt::compile( Compiler *pd, CodeVect &code ) const
-{
- switch ( type ) {
- case PrintType:
- case PrintXMLACType:
- case PrintXMLType:
- case PrintStreamType: {
- UniqueType **types = new UniqueType*[exprPtrVect->length()];
-
- /* Push the args backwards. */
- for ( ExprVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- )
- types[pex.pos()] = (*pex)->evaluate( pd, code );
-
- /* Run the printing forwards. */
- if ( type == PrintType ) {
- code.append( IN_PRINT );
- code.append( exprPtrVect->length() );
- }
- else if ( type == PrintXMLACType ) {
- code.append( IN_PRINT_XML_AC );
- code.append( exprPtrVect->length() );
- }
- else if ( type == PrintXMLType ) {
- code.append( IN_PRINT_XML );
- code.append( exprPtrVect->length() );
- }
- else if ( type == PrintStreamType ) {
- /* Minus one because the first arg is the stream. */
- code.append( IN_PRINT_STREAM );
- code.append( exprPtrVect->length() - 1 );
- }
-
- delete[] types;
-
- break;
- }
- case ExprType: {
- /* Evaluate the exrepssion, then pop it immediately. */
- expr->evaluate( pd, code );
- code.append( IN_POP );
- break;
- }
- case IfType: {
- pd->curLocalFrame->iterPushScope();
-
- long jumpFalse = 0, jumpPastElse = 0, distance = 0;
-
- /* Evaluate the test. */
- expr->evaluate( pd, code );
-
- /* Jump past the if block if false. We don't know the distance
- * yet so store the location of the jump. */
- jumpFalse = code.length();
- code.append( IN_JMP_FALSE );
- code.appendHalf( 0 );
-
- /* Compile the if true branch. */
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->compile( pd, code );
-
- if ( elsePart != 0 ) {
- /* Jump past the else code for the if true branch. */
- jumpPastElse = code.length();
- code.append( IN_JMP );
- code.appendHalf( 0 );
- }
-
- /* Set the distance for the jump false case. */
- distance = code.length() - jumpFalse - 3;
- code.setHalf( jumpFalse+1, distance );
-
- pd->curLocalFrame->iterPopScope();
-
- if ( elsePart != 0 ) {
- /* Compile the else branch. */
- elsePart->compile( pd, code );
-
- /* Set the distance for jump over the else part. */
- distance = code.length() - jumpPastElse - 3;
- code.setHalf( jumpPastElse+1, distance );
- }
-
- break;
- }
- case ElseType: {
- pd->curLocalFrame->iterPushScope();
-
- /* Compile the else branch. */
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->compile( pd, code );
-
- pd->curLocalFrame->iterPopScope();
- break;
- }
- case RejectType: {
- code.append( IN_REJECT );
- break;
- }
- case WhileType: {
- compileWhile( pd, code );
- break;
- }
- case AssignType: {
- /* Evaluate the exrepssion. */
- UniqueType *exprUT = expr->evaluate( pd, code );
-
- /* Do the assignment. */
- varRef->assignValue( pd, code, exprUT );
- break;
- }
- case ForIterType: {
- compileForIter( pd, code );
- break;
- }
- case ReturnType: {
- /* Evaluate the exrepssion. */
- UniqueType *exprUT = expr->evaluate( pd, code );
-
- if ( pd->curFunction == 0 ) {
- /* In the main function */
- pd->mainReturnUT = exprUT;
- }
- else {
- UniqueType *resUT = pd->curFunction->typeRef->uniqueType;
- if ( !castAssignment( pd, code, resUT, 0, exprUT ) )
- error(loc) << "return value wrong type" << endp;
- }
-
- code.append( IN_SAVE_RET );
-
- /* The loop cleanup code. */
- if ( pd->loopCleanup != 0 )
- code.append( *pd->loopCleanup );
-
- /* Jump to the return label. The distnacnce will be filled in
- * later. */
- pd->returnJumps.append( code.length() );
- code.append( IN_JMP );
- code.appendHalf( 0 );
- break;
- }
- case BreakType: {
- pd->breakJumps.append( code.length() );
- code.append( IN_JMP );
- code.appendHalf( 0 );
- break;
- }
- case YieldType: {
- /* take a reference and yield it. Immediately reset the referece. */
- varRef->preEvaluateRef( pd, code );
- ObjField *objField = varRef->evaluateRef( pd, code, 0 );
- code.append( IN_YIELD );
-
- if ( varRef->qual->length() > 0 ) {
- code.append( IN_POP_N_WORDS );
- code.appendHalf( (short)(varRef->qual->length()*2) );
- }
-
- objField->refActive = false;
- break;
- }
- case ParserType: {
- evaluateParserItems( pd, code );
- break;
- }
- }
-}
-
-void CodeBlock::compile( Compiler *pd, CodeVect &code ) const
-{
- for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
- stmt->compile( pd, code );
-}
-
-void Compiler::addMatchLength( ObjectDef *frame, LangEl *lel )
-{
- /* Make the type ref. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt );
-
- /* Create the field and insert it into the map. */
- ObjField *el = new ObjField( InputLoc(), typeRef, "match_length" );
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->isConst = true;
- el->useOffset = false;
- el->inGetR = IN_GET_MATCH_LENGTH_R;
- frame->insertField( el->name, el );
-}
-
-void Compiler::addMatchText( ObjectDef *frame, LangEl *lel )
-{
- /* Make the type ref. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr );
-
- /* Create the field and insert it into the map. */
- ObjField *el = new ObjField( InputLoc(), typeRef, "match_text" );
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->isConst = true;
- el->useOffset = false;
- el->inGetR = IN_GET_MATCH_TEXT_R;
- frame->insertField( el->name, el );
-}
-
-void Compiler::addInput( ObjectDef *frame )
-{
- /* Make the type ref. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInput );
-
- /* Create the field and insert it into the map. */
- ObjField *el = new ObjField( InputLoc(), typeRef, "input" );
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->isConst = false;
- el->useOffset = false;
- el->isCustom = true;
- el->inGetR = IN_LOAD_INPUT_R;
- el->inGetWV = IN_LOAD_INPUT_WV;
- el->inGetWC = IN_LOAD_INPUT_WC;
- frame->insertField( el->name, el );
-}
-
-void Compiler::addCtx( ObjectDef *frame )
-{
- /* Make the type ref. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStream );
-
- /* Create the field and insert it into the map. */
- ObjField *el = new ObjField( InputLoc(), typeRef, "ctx" );
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->isConst = false;
- el->useOffset = false;
- el->isCustom = true;
- el->inGetR = IN_LOAD_CTX_R;
- el->inGetWV = IN_LOAD_CTX_WV;
- el->inGetWC = IN_LOAD_CTX_WC;
- frame->insertField( el->name, el );
-}
-
-void Compiler::initFieldInstructions( ObjField *el )
-{
- el->inGetR = IN_GET_FIELD_R;
- el->inGetWC = IN_GET_FIELD_WC;
- el->inGetWV = IN_GET_FIELD_WV;
- el->inSetWC = IN_SET_FIELD_WC;
- el->inSetWV = IN_SET_FIELD_WV;
-}
-
-void Compiler::initLocalInstructions( ObjField *el )
-{
- el->inGetR = IN_GET_LOCAL_R;
- el->inGetWC = IN_GET_LOCAL_WC;
- el->inSetWC = IN_SET_LOCAL_WC;
-}
-
-void Compiler::initLocalRefInstructions( ObjField *el )
-{
- el->inGetR = IN_GET_LOCAL_REF_R;
- el->inGetWC = IN_GET_LOCAL_REF_WC;
- el->inSetWC = IN_SET_LOCAL_REF_WC;
-}
-
-void Compiler::initIntObject( )
-{
- intObj = new ObjectDef( ObjectDef::BuiltinType, "int", nextObjectId++ );
- intLangEl->objectDef = intObj;
-
- initFunction( uniqueTypeStr, intObj, "to_string", IN_INT_TO_STR, IN_INT_TO_STR, true );
-}
-
-/* Add a constant length field to the object.
- * Opcode supplied by the caller. */
-void Compiler::addLengthField( ObjectDef *objDef, Code getLength )
-{
- /* Create the "length" field. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt );
- ObjField *el = new ObjField( InputLoc(), typeRef, "length" );
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->isConst = true;
- el->useOffset = false;
- el->inGetR = getLength;
-
- objDef->insertField( el->name, el );
-}
-
-void Compiler::initStrObject( )
-{
- strObj = new ObjectDef( ObjectDef::BuiltinType, "str", nextObjectId++ );
- strLangEl->objectDef = strObj;
-
- initFunction( uniqueTypeInt, strObj, "atoi", IN_STR_ATOI, IN_STR_ATOI, true );
- initFunction( uniqueTypeInt, strObj, "uord8", IN_STR_UORD8, IN_STR_UORD8, true );
- initFunction( uniqueTypeInt, strObj, "sord8", IN_STR_SORD8, IN_STR_SORD8, true );
- initFunction( uniqueTypeInt, strObj, "uord16", IN_STR_UORD16, IN_STR_UORD16, true );
- initFunction( uniqueTypeInt, strObj, "sord16", IN_STR_SORD16, IN_STR_SORD16, true );
- initFunction( uniqueTypeInt, strObj, "uord32", IN_STR_UORD32, IN_STR_UORD32, true );
- initFunction( uniqueTypeInt, strObj, "sord32", IN_STR_SORD32, IN_STR_SORD32, true );
- addLengthField( strObj, IN_STR_LENGTH );
-
- initFunction( uniqueTypeStr, globalObjectDef, "sprintf",
- IN_SPRINTF, IN_SPRINTF, uniqueTypeStr, uniqueTypeInt, true );
-}
-
-void Compiler::initStreamObject( )
-{
- streamObj = new ObjectDef( ObjectDef::BuiltinType,
- "stream", nextObjectId++ );
- streamLangEl->objectDef = streamObj;
-}
-
-void Compiler::initInputObject( )
-{
- inputObj = new ObjectDef( ObjectDef::BuiltinType,
- "accum_stream", nextObjectId++ );
- inputLangEl->objectDef = inputObj;
-
- initFunction( uniqueTypeStr, inputObj, "pull",
- IN_INPUT_PULL_WV, IN_INPUT_PULL_WV, uniqueTypeInt, false );
- initFunction( uniqueTypeStr, inputObj, "push",
- IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false );
- initFunction( uniqueTypeStr, inputObj, "push_ignore",
- IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false );
-}
-
-ObjField *Compiler::makeDataEl()
-{
- /* Create the "data" field. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr );
- ObjField *el = new ObjField( InputLoc(), typeRef, "data" );
-
- /* Setting beenReferenced to true prevents us from assigning instructions
- * and an offset to the field. */
-
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->useOffset = false;
- el->inGetR = IN_GET_TOKEN_DATA_R;
- el->inSetWC = IN_SET_TOKEN_DATA_WC;
- el->inSetWV = IN_SET_TOKEN_DATA_WV;
- return el;
-}
-
-ObjField *Compiler::makePosEl()
-{
- /* Create the "data" field. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt );
- ObjField *el = new ObjField( InputLoc(), typeRef, "pos" );
-
- /* Setting beenReferenced to true prevents us from assigning instructions
- * and an offset to the field. */
-
- el->isConst = true;
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->useOffset = false;
- el->inGetR = IN_GET_TOKEN_POS_R;
- return el;
-}
-
-ObjField *Compiler::makeLineEl()
-{
- /* Create the "data" field. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt );
- ObjField *el = new ObjField( InputLoc(), typeRef, "line" );
-
- /* Setting beenReferenced to true prevents us from assigning instructions
- * and an offset to the field. */
-
- el->isConst = true;
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->useOffset = false;
- el->inGetR = IN_GET_TOKEN_LINE_R;
- return el;
-}
-
-void Compiler::initTokenObjects( )
-{
- /* Make a default object Definition. */
- tokenObj = new ObjectDef( ObjectDef::BuiltinType, "token", nextObjectId++ );
-
- ObjField *dataEl = makeDataEl();
- tokenObj->insertField( dataEl->name, dataEl );
-
- ObjField *posEl = makePosEl();
- tokenObj->insertField( posEl->name, posEl );
-
- ObjField *lineEl = makeLineEl();
- tokenObj->insertField( lineEl->name, lineEl );
-
- /* Give all user terminals the token object type. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->isUserTerm ) {
- if ( lel->objectDef == 0 )
- lel->objectDef = tokenObj;
- else {
- /* Create the "data" field. */
- ObjField *dataEl = makeDataEl();
- lel->objectDef->insertField( dataEl->name, dataEl );
-
- /* Create the "pos" field. */
- ObjField *posEl = makePosEl();
- lel->objectDef->insertField( posEl->name, posEl );
-
- /* Create the "line" field. */
- ObjField *lineEl = makeLineEl();
- lel->objectDef->insertField( lineEl->name, lineEl );
- }
- }
- }
-}
-
-void Compiler::findLocalTrees( CharSet &trees )
-{
- /* We exlcude "lhs" from being downrefed because we need to use if after
- * the frame is is cleaned and so it must survive. */
- for ( ObjFieldList::Iter ol = *curLocalFrame->objFieldList; ol.lte(); ol++ ) {
- ObjField *el = ol->value;
- /* FIXME: This test needs to be improved. Match_text was getting
- * through before useOffset was tested. What will? */
- if ( el->useOffset && !el->isLhsEl && ( el->beenReferenced || el->isParam ) ) {
- UniqueType *ut = el->typeRef->uniqueType;
- if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_PTR )
- trees.insert( el->offset );
- }
- }
-}
-
-void Compiler::makeProdCopies( Definition *prod )
-{
- int pos = 0;
- for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++, pos++) {
- if ( pel->captureField != 0 ) {
- prod->copy.append( pel->captureField->offset );
- prod->copy.append( pos );
- }
- }
-}
-
-void Compiler::compileReductionCode( Definition *prod )
-{
- CodeBlock *block = prod->redBlock;
-
- /* Init the compilation context. */
- compileContext = CompileReduction;
- curLocalFrame = block->localFrame;
- revertOn = true;
- block->frameId = nextFrameId++;
-
- CodeVect &code = block->codeWV;
-
- /* Add the alloc frame opcode. We don't have the right
- * frame size yet. We will fill it in later. */
- code.append( IN_INIT_LOCALS );
- code.appendHalf( 0 );
- long afterInit = code.length();
-
- /* Compile the reduce block. */
- block->compile( this, code );
-
- /* We have the frame size now. Set in the alloc frame instruction. */
- long frameSize = curLocalFrame->size();
- code.setHalf( 1, frameSize );
-
- /* Might need to load right hand side values. */
- addProdRHSLoads( prod, code, afterInit );
-
- addProdLHSLoad( prod, code, afterInit );
- addPushBackLHS( prod, code, afterInit );
-
- code.append( IN_PCR_RET );
-
- /* Now that compilation is done variables are referenced. Make the local
- * trees descriptor. */
- findLocalTrees( block->trees );
-}
-
-void Compiler::compileTranslateBlock( LangEl *langEl )
-{
- CodeBlock *block = langEl->transBlock;
-
- /* Set up compilation context. */
- compileContext = CompileTranslation;
- curLocalFrame = block->localFrame;
- revertOn = true;
- block->frameId = nextFrameId++;
-
- /* References to the reduce item. */
- addMatchLength( curLocalFrame, langEl );
- addMatchText( curLocalFrame, langEl );
- addInput( curLocalFrame );
- addCtx( curLocalFrame );
-
- CodeVect &code = block->codeWV;
-
- /* Add the alloc frame opcode. We don't have the right
- * frame size yet. We will fill it in later. */
- code.append( IN_INIT_LOCALS );
- code.appendHalf( 0 );
-
- if ( langEl->tokenDef->reCaptureVect.length() > 0 ) {
- code.append( IN_INIT_CAPTURES );
- code.append( langEl->tokenDef->reCaptureVect.length() );
-
- ObjFieldList::Iter f = *curLocalFrame->objFieldList;
- for ( int i = 0; i < langEl->tokenDef->reCaptureVect.length(); i++, f++ )
- curLocalFrame->referenceField( this, f->value );
- }
-
- /* Set the local frame and compile the reduce block. */
- block->compile( this, code );
-
- /* We have the frame size now. Set in the alloc frame instruction. */
- long frameSize = curLocalFrame->size();
- code.setHalf( 1, frameSize );
-
- code.append( IN_PCR_RET );
-
- /* Now that compilation is done variables are referenced. Make the local
- * trees descriptor. */
- findLocalTrees( block->trees );
-}
-
-void Compiler::compilePreEof( TokenRegion *region )
-{
- CodeBlock *block = region->preEofBlock;
-
- /* Set up compilation context. */
- compileContext = CompileTranslation;
- curLocalFrame = region->preEofBlock->localFrame;
- revertOn = true;
- block->frameId = nextFrameId++;
-
- addInput( curLocalFrame );
- addCtx( curLocalFrame );
-
- CodeVect &code = block->codeWV;
-
- /* Add the alloc frame opcode. We don't have the right
- * frame size yet. We will fill it in later. */
- code.append( IN_INIT_LOCALS );
- code.appendHalf( 0 );
-
- /* Set the local frame and compile the reduce block. */
- block->compile( this, code );
-
- /* We have the frame size now. Set in the alloc frame instruction. */
- long frameSize = curLocalFrame->size();
- code.setHalf( 1, frameSize );
-
- code.append( IN_PCR_RET );
-
- /* Now that compilation is done variables are referenced. Make the local
- * trees descriptor. */
- findLocalTrees( block->trees );
-}
-
-void Compiler::compileRootBlock( )
-{
- CodeBlock *block = rootCodeBlock;
-
- /* The root block never needs to be reverted. */
-
- /* Set up the compile context. No locals are needed for the root code
- * block, but we need an empty local frame for the compile. */
- compileContext = CompileRoot;
- curLocalFrame = rootLocalFrame;
- revertOn = false;
-
- /* The block needs a frame id. */
- block->frameId = nextFrameId++;
-
- /* The root block is not reverted. */
- CodeVect &code = block->codeWC;
-
- /* Add the alloc frame opcode. We don't have the right
- * frame size yet. We will fill it in later. */
- code.append( IN_INIT_LOCALS );
- code.appendHalf( 0 );
-
- code.append( IN_LOAD_ARGV );
- code.appendHalf( argvOffset() );
-
- block->compile( this, code );
-
- /* We have the frame size now. Store it in frame init. */
- long frameSize = curLocalFrame->size();
- code.setHalf( 1, frameSize );
-
- code.append( IN_STOP );
-
- /* Make the local trees descriptor. */
- findLocalTrees( block->trees );
-}
-
-void Compiler::initAllLanguageObjects()
-{
- /* Init all user object fields (need consistent size). */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- ObjectDef *objDef = lel->objectDef;
- if ( objDef != 0 ) {
- /* Init all fields of the object. */
- for ( ObjFieldList::Iter f = *objDef->objFieldList; f.lte(); f++ )
- objDef->initField( this, f->value );
- }
- }
-
- /* Init all fields of the global object. */
- for ( ObjFieldList::Iter f = *globalObjectDef->objFieldList; f.lte(); f++ )
- globalObjectDef->initField( this, f->value );
-}
-
-void Compiler::initMapFunctions( GenericType *gen )
-{
- addLengthField( gen->objDef, IN_MAP_LENGTH );
- initFunction( gen->utArg, gen->objDef, "find",
- IN_MAP_FIND, IN_MAP_FIND, gen->keyUT, true );
- initFunction( uniqueTypeInt, gen->objDef, "insert",
- IN_MAP_INSERT_WV, IN_MAP_INSERT_WC, gen->keyUT, gen->utArg, false );
- initFunction( uniqueTypeInt, gen->objDef, "store",
- IN_MAP_STORE_WV, IN_MAP_STORE_WC, gen->keyUT, gen->utArg, false );
- initFunction( gen->utArg, gen->objDef, "remove",
- IN_MAP_REMOVE_WV, IN_MAP_REMOVE_WC, gen->keyUT, false );
-}
-
-void Compiler::initListFunctions( GenericType *gen )
-{
- addLengthField( gen->objDef, IN_LIST_LENGTH );
-
- initFunction( uniqueTypeInt, gen->objDef, "append",
- IN_LIST_APPEND_WV, IN_LIST_APPEND_WC, gen->utArg, false );
- initFunction( uniqueTypeInt, gen->objDef, "push",
- IN_LIST_APPEND_WV, IN_LIST_APPEND_WC, gen->utArg, false );
-
- initFunction( gen->utArg, gen->objDef, "remove_end",
- IN_LIST_REMOVE_END_WV, IN_LIST_REMOVE_END_WC, false );
- initFunction( gen->utArg, gen->objDef, "pop",
- IN_LIST_REMOVE_END_WV, IN_LIST_REMOVE_END_WC, false );
-}
-
-void Compiler::initListField( GenericType *gen, const char *name, int offset )
-{
- /* Make the type ref and create the field. */
- TypeRef *typeRef = new TypeRef( InputLoc(), gen->utArg );
- ObjField *el = new ObjField( InputLoc(), typeRef, name );
-
- el->inGetR = IN_GET_LIST_MEM_R;
- el->inGetWC = IN_GET_LIST_MEM_WC;
- el->inGetWV = IN_GET_LIST_MEM_WV;
- el->inSetWC = IN_SET_LIST_MEM_WC;
- el->inSetWV = IN_SET_LIST_MEM_WV;
-
- gen->objDef->insertField( el->name, el );
-
- el->useOffset = true;
- el->beenReferenced = true;
- el->beenInitialized = true;
-
- /* Zero for head, One for tail. */
- el->offset = offset;
-}
-
-void Compiler::initListFields( GenericType *gen )
-{
- initListField( gen, "head", 0 );
- initListField( gen, "tail", 1 );
- initListField( gen, "top", 1 );
-}
-
-void Compiler::initVectorFunctions( GenericType *gen )
-{
- addLengthField( gen->objDef, IN_VECTOR_LENGTH );
- initFunction( uniqueTypeInt, gen->objDef, "append",
- IN_VECTOR_APPEND_WV, IN_VECTOR_APPEND_WC, gen->utArg, false );
- initFunction( uniqueTypeInt, gen->objDef, "insert",
- IN_VECTOR_INSERT_WV, IN_VECTOR_INSERT_WC, uniqueTypeInt, gen->utArg, false );
-}
-
-void Compiler::initParserFunctions( GenericType *gen )
-{
- initFunction( gen->utArg, gen->objDef, "finish",
- IN_PARSE_FINISH_WV, IN_PARSE_FINISH_WC, true );
-}
-
-void Compiler::initCtxField( GenericType *gen )
-{
- LangEl *langEl = gen->utArg->langEl;
- Context *context = langEl->contextIn;
-
- /* Make the type ref and create the field. */
- UniqueType *ctxUT = findUniqueType( TYPE_TREE, context->lel );
- TypeRef *typeRef = new TypeRef( InputLoc(), ctxUT );
- ObjField *el = new ObjField( InputLoc(), typeRef, "ctx" );
-
- el->inGetR = IN_GET_ACCUM_CTX_R;
- el->inGetWC = IN_GET_ACCUM_CTX_WC;
- el->inGetWV = IN_GET_ACCUM_CTX_WV;
- el->inSetWC = IN_SET_ACCUM_CTX_WC;
- el->inSetWV = IN_SET_ACCUM_CTX_WV;
-
- gen->objDef->insertField( el->name, el );
-
- el->useOffset = false;
- el->beenReferenced = true;
- el->beenInitialized = true;
-}
-
-void Compiler::initParserFields( GenericType *gen )
-{
- LangEl *langEl = gen->utArg->langEl;
- if ( langEl->contextIn != 0 )
- initCtxField( gen );
-}
-
-void Compiler::initGenericTypes()
-{
- for ( NamespaceList::Iter ns = namespaceList; ns.lte(); ns++ ) {
- for ( GenericList::Iter gen = ns->genericList; gen.lte(); gen++ ) {
- gen->utArg = gen->typeArg->uniqueType;
-
- if ( gen->typeId == GEN_MAP )
- gen->keyUT = gen->keyTypeArg->uniqueType;
-
- gen->objDef = new ObjectDef( ObjectDef::BuiltinType,
- gen->name, nextObjectId++ );
-
- switch ( gen->typeId ) {
- case GEN_MAP:
- initMapFunctions( gen );
- break;
- case GEN_LIST:
- initListFunctions( gen );
- initListFields( gen );
- break;
- case GEN_VECTOR:
- initVectorFunctions( gen );
- break;
- case GEN_PARSER:
- /* Need to generate a parser for the type. */
- gen->utArg->langEl->parserId = nextParserId++;
- initParserFunctions( gen );
- initParserFields( gen );
- break;
- }
-
- gen->langEl->objectDef = gen->objDef;
- }
- }
-}
-
-void Compiler::makeFuncVisible( Function *func, bool isUserIter )
-{
- func->localFrame = func->codeBlock->localFrame;
-
- /* Set up the parameters. */
- long paramPos = 0, paramListSize = 0;
- UniqueType **paramUTs = new UniqueType*[func->paramList->length()];
- for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) {
- paramUTs[paramPos] = param->typeRef->uniqueType;
-
- if ( func->localFrame->findField( param->name ) != 0 )
- error(param->loc) << "parameter " << param->name << " redeclared" << endp;
-
- func->localFrame->insertField( param->name, param );
- param->beenInitialized = true;
- param->pos = paramPos;
-
- /* Initialize the object field as a local variable. We also want trees
- * downreffed. */
- if ( paramUTs[paramPos]->typeId == TYPE_REF )
- initLocalRefInstructions( param );
- else
- initLocalInstructions( param );
-
- paramListSize += sizeOfField( paramUTs[paramPos] );
- paramPos += 1;
- }
-
- /* Param offset is relative to one past the last item in the array of
- * words containing the args. */
- long paramOffset = 0;
- for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) {
- /* Moving downward, and need the offset to point to the lower half of
- * the argument. */
- paramOffset -= sizeOfField( paramUTs[param->pos] );
-
- /* How much space do we need to make for call overhead. */
- long frameAfterArgs = isUserIter ? IFR_AA : FR_AA;
-
- /* Going up first we have the frame data, then maybe
- * the user iterator, then the args from high to low. */
- param->offset = frameAfterArgs +
- ( isUserIter ? ( sizeof(UserIter) / sizeof(Word) ) : 0 ) +
- paramListSize + paramOffset;
- }
-
- func->paramListSize = paramListSize;
- func->paramUTs = paramUTs;
-
- /* Insert the function into the global function map. */
- UniqueType *returnUT = func->typeRef != 0 ?
- func->typeRef->uniqueType : uniqueTypeInt;
- ObjMethod *objMethod = new ObjMethod( returnUT, func->name,
- IN_CALL_WV, IN_CALL_WC,
- func->paramList->length(), paramUTs, func->paramList, false );
- objMethod->funcId = func->funcId;
- objMethod->useFuncId = true;
- objMethod->useCallObj = false;
- objMethod->func = func;
-
- if ( isUserIter ) {
- IterDef *uiter = findIterDef( IterDef::User, func );
- objMethod->iterDef = uiter;
- }
-
- globalObjectDef->objMethodMap->insert( func->name, objMethod );
-}
-
-void Compiler::compileUserIter( Function *func, CodeVect &code )
-{
- CodeBlock *block = func->codeBlock;
-
- /* Add the alloc frame opcode. We don't have the right
- * frame size yet. We will fill it in later. */
- code.append( IN_INIT_LOCALS );
- code.appendHalf( 0 );
-
- /* Compile the block. */
- block->compile( this, code );
-
- /* We have the frame size now. Set in the alloc frame instruction. */
- int frameSize = func->localFrame->size();
- code.setHalf( 1, frameSize );
-
- /* Check for a return statement. */
- if ( block->stmtList->length() == 0 ||
- block->stmtList->tail->type != LangStmt::YieldType )
- {
- /* Push the return value. */
- code.append( IN_LOAD_NIL );
- code.append( IN_YIELD );
- }
-}
-
-void Compiler::compileUserIter( Function *func )
-{
- CodeBlock *block = func->codeBlock;
-
- /* Set up the context. */
- compileContext = CompileFunction;
- curFunction = func;
- block->frameId = nextFrameId++;
-
- /* Need an object for the local frame. */
- curLocalFrame = func->codeBlock->localFrame;
-
- /* Compile for revert and commit. */
- revertOn = true;
- compileUserIter( func, block->codeWV );
-
- revertOn = false;
- compileUserIter( func, block->codeWC );
-
- /* Now that compilation is done variables are referenced. Make the local
- * trees descriptor. */
- findLocalTrees( block->trees );
-
- /* FIXME: Need to deal with the freeing of local trees. */
-}
-
-/* Called for each type of function compile: revert and commit. */
-void Compiler::compileFunction( Function *func, CodeVect &code )
-{
- CodeBlock *block = func->codeBlock;
-
- /* Add the alloc frame opcode. We don't have the right
- * frame size yet. We will fill it in later. */
- code.append( IN_INIT_LOCALS );
- code.appendHalf( 0 );
-
- /* Compile the block. */
- block->compile( this, code );
-
- /* We have the frame size now. Set in the alloc frame instruction. */
- int frameSize = func->localFrame->size();
- code.setHalf( 1, frameSize );
-
- /* Check for a return statement. */
- if ( block->stmtList->length() == 0 ||
- block->stmtList->tail->type != LangStmt::ReturnType )
- {
- /* Push the return value. */
- code.append( IN_LOAD_NIL );
- code.append( IN_SAVE_RET );
- }
-
- /* Compute the jump distance for the return jumps. */
- for ( LongVect::Iter rj = returnJumps; rj.lte(); rj++ ) {
- long distance = code.length() - *rj - 3;
- code.setHalf( *rj+1, distance );
- }
-
- /* Reset the vector of return jumps. */
- returnJumps.empty();
-
- /* Return cleans up the stack (including the args) and leaves the return
- * value on the top. */
- code.append( IN_RET );
-}
-
-void Compiler::compileFunction( Function *func )
-{
- CodeBlock *block = func->codeBlock;
-
- /* Set up the compilation context. */
- compileContext = CompileFunction;
- curFunction = func;
-
- /* Assign a frame Id. */
- block->frameId = nextFrameId++;
-
- /* Need an object for the local frame. */
- curLocalFrame = func->codeBlock->localFrame;
-
- /* Compile once for revert. */
- revertOn = true;
- compileFunction( func, block->codeWV );
-
- /* Compile once for commit. */
- revertOn = false;
- compileFunction( func, block->codeWC );
-
- /* Now that compilation is done variables are referenced. Make the local
- * trees descriptor. */
- findLocalTrees( block->trees );
-}
-
-void Compiler::makeDefaultIterators()
-{
- /* Tree iterator. */
- {
- UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
- ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
- "triter", IN_HALT, IN_HALT, anyRefUT, true );
-
- IterDef *triter = findIterDef( IterDef::Tree );
- objMethod->iterDef = triter;
- }
-
- /* Child iterator. */
- {
- UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
- ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
- "child", IN_HALT, IN_HALT, anyRefUT, true );
-
- IterDef *triter = findIterDef( IterDef::Child );
- objMethod->iterDef = triter;
- }
-
- /* Reverse iterator. */
- {
- UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
- ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
- "rev_child", IN_HALT, IN_HALT, anyRefUT, true );
-
- IterDef *triter = findIterDef( IterDef::RevChild );
- objMethod->iterDef = triter;
- }
-
- /* Repeat iterator. */
- {
- UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
- ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
- "repeat", IN_HALT, IN_HALT, anyRefUT, true );
-
- IterDef *triter = findIterDef( IterDef::Repeat );
- objMethod->iterDef = triter;
- }
-
- /* Reverse repeat iterator. */
- {
- UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
- ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef,
- "rev_repeat", IN_HALT, IN_HALT, anyRefUT, true );
-
- IterDef *triter = findIterDef( IterDef::RevRepeat );
- objMethod->iterDef = triter;
- }
-}
-
-void Compiler::addStdin()
-{
- /* Make the type ref. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStream );
-
- /* Create the field and insert it into the map. */
- ObjField *el = new ObjField( InputLoc(), typeRef, "stdin" );
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->isConst = true;
- el->useOffset = false;
- el->inGetR = IN_GET_STDIN;
- globalObjectDef->insertField( el->name, el );
-}
-
-void Compiler::addStdout()
-{
- /* Make the type ref. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr );
-
- /* Create the field and insert it into the map. */
- ObjField *el = new ObjField( InputLoc(), typeRef, "stout" );
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->isConst = true;
- el->useOffset = false;
- el->inGetR = IN_GET_STDOUT;
- globalObjectDef->insertField( el->name, el );
-}
-
-void Compiler::addStderr()
-{
- /* Make the type ref. */
- TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr );
-
- /* Create the field and insert it into the map. */
- ObjField *el = new ObjField( InputLoc(), typeRef, "stderr" );
- el->beenReferenced = true;
- el->beenInitialized = true;
- el->isConst = true;
- el->useOffset = false;
- el->inGetR = IN_GET_STDERR;
- globalObjectDef->insertField( el->name, el );
-}
-
-void Compiler::addArgv()
-{
- /* Create the field and insert it into the map. */
- ObjField *el = new ObjField( InputLoc(), argvTypeRef, "argv" );
- el->isArgv = true;
- el->isConst = true;
- globalObjectDef->insertField( el->name, el );
-}
-
-int Compiler::argvOffset()
-{
- for ( ObjFieldList::Iter field = *globalObjectDef->objFieldList;
- field.lte(); field++ )
- {
- if ( field->value->isArgv ) {
- globalObjectDef->referenceField( this, field->value );
- return field->value->offset;
- }
- }
- assert(false);
-}
-
-void Compiler::initGlobalFunctions()
-{
- ObjMethod *method;
-
- method = initFunction( uniqueTypeStream, globalObjectDef, "open",
- IN_OPEN_FILE, IN_OPEN_FILE, uniqueTypeStr, uniqueTypeStr, true );
- method->useCallObj = false;
-
- method = initFunction( uniqueTypeStr, globalObjectDef, "tolower",
- IN_TO_LOWER, IN_TO_LOWER, uniqueTypeStr, true );
- method->useCallObj = false;
-
- method = initFunction( uniqueTypeStr, globalObjectDef, "toupper",
- IN_TO_UPPER, IN_TO_UPPER, uniqueTypeStr, true );
- method->useCallObj = false;
-
- method = initFunction( uniqueTypeInt, globalObjectDef, "exit",
- IN_EXIT, IN_EXIT, uniqueTypeInt, true );
-
- method = initFunction( uniqueTypeStr, globalObjectDef, "error",
- IN_ERROR, IN_ERROR, true );
-
- addStdin();
- addStdout();
- addStderr();
- addArgv();
-}
-
-void Compiler::removeNonUnparsableRepls()
-{
- for ( ReplList::Iter repl = replList; repl.lte(); ) {
- Replacement *maybeDel = repl++;
- if ( !maybeDel->parse )
- replList.detach( maybeDel );
- }
-}
-
-void Compiler::compileByteCode()
-{
-// initUniqueTypes();
- initIntObject();
- initStrObject();
- initStreamObject();
- initInputObject();
- initTokenObjects();
- makeDefaultIterators();
- initAllLanguageObjects();
- initGenericTypes();
-
- initGlobalFunctions();
-
- for ( FunctionList::Iter f = functionList; f.lte(); f++ )
- makeFuncVisible( f, f->isUserIter );
-
- /* This may be comment rot: The function info structure relies on functions
- * being compiled first, then iterators. */
-
- /* Compile functions. */
- for ( FunctionList::Iter f = functionList; f.lte(); f++ ) {
- if ( f->inContext != 0 )
- context = f->inContext;
- if ( f->isUserIter )
- compileUserIter( f );
- else
- compileFunction( f );
- context = 0;
- }
-
- /* Compile the reduction code. */
- for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
- makeProdCopies( prod );
- if ( prod->redBlock != 0 ) {
- if ( prod->redBlock->context != 0 )
- context = prod->redBlock->context;
- compileReductionCode( prod );
- context = 0;
- }
- }
-
- /* Compile the token translation code. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->transBlock != 0 ) {
- if ( lel->transBlock->context != 0 )
- context = lel->transBlock->context;
- compileTranslateBlock( lel );
- context = 0;
- }
- }
-
- /* Compile preeof blocks. */
- for ( RegionList::Iter r = regionList; r.lte(); r++ ) {
- if ( r->preEofBlock != 0 )
- compilePreEof( r );
- }
-
- /* Compile the init code */
- compileRootBlock( );
- removeNonUnparsableRepls();
-}
diff --git a/colm/tree.c b/colm/tree.c
deleted file mode 100644
index 66e5e025..00000000
--- a/colm/tree.c
+++ /dev/null
@@ -1,2484 +0,0 @@
-/*
- * Copyright 2008-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <colm/pdarun.h>
-#include <colm/tree.h>
-#include <colm/pool.h>
-#include <colm/bytecode.h>
-#include <colm/debug.h>
-#include <colm/map.h>
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#define true 1
-#define false 0
-
-#define BUFFER_INITIAL_SIZE 4096
-
-void listPrepend( List *list, ListEl *new_el) { listAddBefore(list, list->head, new_el); }
-void listAppend( List *list, ListEl *new_el) { listAddAfter(list, list->tail, new_el); }
-
-ListEl *listDetach( List *list, ListEl *el );
-ListEl *listDetachFirst(List *list ) { return listDetach(list, list->head); }
-ListEl *listDetachLast(List *list ) { return listDetach(list, list->tail); }
-
-long listLength(List *list)
- { return list->listLen; }
-
-void initTreeIter( TreeIter *treeIter, const Ref *rootRef, int searchId, Tree **stackRoot )
-{
- treeIter->rootRef = *rootRef;
- treeIter->searchId = searchId;
- treeIter->stackRoot = stackRoot;
- treeIter->stackSize = 0;
- treeIter->ref.kid = 0;
- treeIter->ref.next = 0;
-}
-
-void initRevTreeIter( RevTreeIter *revTriter, const Ref *rootRef,
- int searchId, Tree **stackRoot, int children )
-{
- revTriter->rootRef = *rootRef;
- revTriter->searchId = searchId;
- revTriter->stackRoot = stackRoot;
- revTriter->stackSize = children;
- revTriter->kidAtYield = 0;
- revTriter->children = children;
- revTriter->ref.kid = 0;
- revTriter->ref.next = 0;
-}
-
-void initUserIter( UserIter *userIter, Tree **stackRoot, long argSize, long searchId )
-{
- userIter->stackRoot = stackRoot;
- userIter->argSize = argSize;
- userIter->stackSize = 0;
- userIter->resume = 0;
- userIter->frame = 0;
- userIter->searchId = searchId;
-
- userIter->ref.kid = 0;
- userIter->ref.next = 0;
-}
-
-Kid *allocAttrs( Program *prg, long length )
-{
- Kid *cur = 0;
- long i;
- for ( i = 0; i < length; i++ ) {
- Kid *next = cur;
- cur = kidAllocate( prg );
- cur->next = next;
- }
- return cur;
-}
-
-void freeAttrs( Program *prg, Kid *attrs )
-{
- Kid *cur = attrs;
- while ( cur != 0 ) {
- Kid *next = cur->next;
- kidFree( prg, cur );
- cur = next;
- }
-}
-
-void freeKidList( Program *prg, Kid *kid )
-{
- while ( kid != 0 ) {
- Kid *next = kid->next;
- kidFree( prg, kid );
- kid = next;
- }
-}
-
-void setAttr( Tree *tree, long pos, Tree *val )
-{
- long i;
- Kid *kid = tree->child;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- for ( i = 0; i < pos; i++ )
- kid = kid->next;
- kid->tree = val;
-}
-
-Tree *getGlobal( Program *prg, long pos )
- { return getAttr( prg->global, pos ); }
-
-Tree *getAttr( Tree *tree, long pos )
-{
- long i;
- Kid *kid = tree->child;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- for ( i = 0; i < pos; i++ )
- kid = kid->next;
- return kid->tree;
-}
-
-
-Tree *getRepeatNext( Tree *tree )
-{
- Kid *kid = tree->child;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- return kid->next->tree;
-}
-
-Tree *getRepeatVal( Tree *tree )
-{
- Kid *kid = tree->child;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- return kid->tree;
-}
-
-int repeatEnd( Tree *tree )
-{
- Kid *kid = tree->child;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- return kid == 0;
-}
-
-int listLast( Tree *tree )
-{
- Kid *kid = tree->child;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- return kid->next == 0;
-}
-
-Kid *getAttrKid( Tree *tree, long pos )
-{
- long i;
- Kid *kid = tree->child;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- for ( i = 0; i < pos; i++ )
- kid = kid->next;
- return kid;
-}
-
-Kid *kidListConcat( Kid *list1, Kid *list2 )
-{
- if ( list1 == 0 )
- return list2;
- else if ( list2 == 0 )
- return list1;
-
- Kid *dest = list1;
- while ( dest->next != 0 )
- dest = dest->next;
- dest->next = list2;
- return list1;
-}
-
-
-Stream *openStreamFile( Program *prg, FILE *file )
-{
- Stream *res = (Stream*)mapElAllocate( prg );
- res->id = LEL_ID_STREAM;
- res->file = file;
- res->in = newSourceStreamFile( file );
- initSourceStream( res->in );
- return res;
-}
-
-Stream *openStreamFd( Program *prg, long fd )
-{
- Stream *res = (Stream*)mapElAllocate( prg );
- res->id = LEL_ID_STREAM;
- res->in = newSourceStreamFd( fd );
- initSourceStream( res->in );
- return res;
-}
-
-Stream *openFile( Program *prg, Tree *name, Tree *mode )
-{
- Head *headName = ((Str*)name)->value;
- Head *headMode = ((Str*)mode)->value;
-
- const char *givenMode = stringData(headMode);
- const char *fopenMode = 0;
- if ( memcmp( givenMode, "r", stringLength(headMode) ) == 0 )
- fopenMode = "rb";
- else if ( memcmp( givenMode, "w", stringLength(headMode) ) == 0 )
- fopenMode = "wb";
- else {
- fatal( "unknown file open mode: %s\n", givenMode );
- }
-
- /* Need to make a C-string (null terminated). */
- char *fileName = (char*)malloc(stringLength(headName)+1);
- memcpy( fileName, stringData(headName), stringLength(headName) );
- fileName[stringLength(headName)] = 0;
- FILE *file = fopen( fileName, fopenMode );
- free(fileName);
- return openStreamFile( prg, file );
-}
-
-Tree *constructInteger( Program *prg, long i )
-{
- Int *integer = (Int*) treeAllocate( prg );
- integer->id = LEL_ID_INT;
- integer->value = i;
-
- return (Tree*)integer;
-}
-
-Tree *constructString( Program *prg, Head *s )
-{
- Str *str = (Str*) treeAllocate( prg );
- str->id = LEL_ID_STR;
- str->value = s;
-
- return (Tree*)str;
-}
-
-Tree *constructPointer( Program *prg, Tree *tree )
-{
- Kid *kid = kidAllocate( prg );
- kid->tree = tree;
- kid->next = prg->heap;
- prg->heap = kid;
-
- Pointer *pointer = (Pointer*) treeAllocate( prg );
- pointer->id = LEL_ID_PTR;
- pointer->value = kid;
-
- return (Tree*)pointer;
-}
-
-Tree *constructTerm( Program *prg, Word id, Head *tokdata )
-{
- LangElInfo *lelInfo = prg->rtd->lelInfo;
-
- Tree *tree = treeAllocate( prg );
- tree->id = id;
- tree->refs = 0;
- tree->tokdata = tokdata;
-
- int objectLength = lelInfo[tree->id].objectLength;
- tree->child = allocAttrs( prg, objectLength );
-
- return tree;
-}
-
-Tree *constructInput( Program *prg )
-{
- Input *input = inputAllocate( prg );
- input->refs = 0;
- input->id = LEL_ID_INPUT;
- input->in = malloc( sizeof(InputStream) );
- initInputStream( input->in );
- return (Tree*)input;
-}
-
-Kid *constructReplacementKid( Tree **bindings, Program *prg, Kid *prev, long pat );
-
-static Kid *constructIgnoreList( Program *prg, long ignoreInd )
-{
- PatReplNode *nodes = prg->rtd->patReplNodes;
-
- Kid *first = 0, *last = 0;
- while ( ignoreInd >= 0 ) {
- Head *ignoreData = stringAllocPointer( prg, nodes[ignoreInd].data, nodes[ignoreInd].length );
-
- Tree *ignTree = treeAllocate( prg );
- ignTree->refs = 1;
- ignTree->id = nodes[ignoreInd].id;
- ignTree->tokdata = ignoreData;
-
- Kid *ignKid = kidAllocate( prg );
- ignKid->tree = ignTree;
- ignKid->next = 0;
-
- if ( last == 0 )
- first = ignKid;
- else
- last->next = ignKid;
-
- ignoreInd = nodes[ignoreInd].next;
- last = ignKid;
- }
-
- return first;
-}
-
-static Kid *constructLeftIgnoreList( Program *prg, long pat )
-{
- PatReplNode *nodes = prg->rtd->patReplNodes;
- return constructIgnoreList( prg, nodes[pat].leftIgnore );
-}
-
-static Kid *constructRightIgnoreList( Program *prg, long pat )
-{
- PatReplNode *nodes = prg->rtd->patReplNodes;
- return constructIgnoreList( prg, nodes[pat].rightIgnore );
-}
-
-static void insLeftIgnore( Program *prg, Tree *tree, Tree *ignoreList )
-{
- assert( ! (tree->flags & AF_LEFT_IGNORE) );
-
- /* Allocate. */
- Kid *kid = kidAllocate( prg );
- kid->tree = ignoreList;
- treeUpref( ignoreList );
-
- /* Attach it. */
- kid->next = tree->child;
- tree->child = kid;
-
- tree->flags |= AF_LEFT_IGNORE;
-}
-
-static void insRightIgnore( Program *prg, Tree *tree, Tree *ignoreList )
-{
- assert( ! (tree->flags & AF_RIGHT_IGNORE) );
-
- /* Insert an ignore head in the child list. */
- Kid *kid = kidAllocate( prg );
- kid->tree = ignoreList;
- treeUpref( ignoreList );
-
- /* Attach it. */
- if ( tree->flags & AF_LEFT_IGNORE ) {
- kid->next = tree->child->next;
- tree->child->next = kid;
- }
- else {
- kid->next = tree->child;
- tree->child = kid;
- }
-
- tree->flags |= AF_RIGHT_IGNORE;
-}
-
-Tree *pushRightIgnore( Program *prg, Tree *pushTo, Tree *rightIgnore )
-{
- /* About to alter the data tree. Split first. */
- pushTo = splitTree( prg, pushTo );
-
- if ( pushTo->flags & AF_RIGHT_IGNORE ) {
- /* The previous token already has a right ignore. Merge by
- * attaching it as a left ignore of the new list. */
- Kid *curIgnore = treeRightIgnoreKid( prg, pushTo );
- insLeftIgnore( prg, rightIgnore, curIgnore->tree );
-
- /* Replace the current ignore. Safe to access refs here because we just
- * upreffed it in insLeftIgnore. */
- curIgnore->tree->refs -= 1;
- curIgnore->tree = rightIgnore;
- treeUpref( rightIgnore );
- }
- else {
- /* Attach The ignore list. */
- insRightIgnore( prg, pushTo, rightIgnore );
- }
-
- return pushTo;
-}
-
-Tree *pushLeftIgnore( Program *prg, Tree *pushTo, Tree *leftIgnore )
-{
- pushTo = splitTree( prg, pushTo );
-
- /* Attach as left ignore to the token we are sending. */
- if ( pushTo->flags & AF_LEFT_IGNORE ) {
- /* The token already has a left-ignore. Merge by attaching it as a
- * right ignore of the new list. */
- Kid *curIgnore = treeLeftIgnoreKid( prg, pushTo );
- insRightIgnore( prg, leftIgnore, curIgnore->tree );
-
- /* Replace the current ignore. Safe to upref here because we just
- * upreffed it in insRightIgnore. */
- curIgnore->tree->refs -= 1;
- curIgnore->tree = leftIgnore;
- treeUpref( leftIgnore );
- }
- else {
- /* Attach the ignore list. */
- insLeftIgnore( prg, pushTo, leftIgnore );
- }
-
- return pushTo;
-}
-
-static void remLeftIgnore( Program *prg, Tree **sp, Tree *tree )
-{
- assert( tree->flags & AF_LEFT_IGNORE );
-
- Kid *next = tree->child->next;
- treeDownref( prg, sp, tree->child->tree );
- kidFree( prg, tree->child );
- tree->child = next;
-
- tree->flags &= ~AF_LEFT_IGNORE;
-}
-
-static void remRightIgnore( Program *prg, Tree **sp, Tree *tree )
-{
- assert( tree->flags & AF_RIGHT_IGNORE );
-
- if ( tree->flags & AF_LEFT_IGNORE ) {
- Kid *next = tree->child->next->next;
- treeDownref( prg, sp, tree->child->next->tree );
- kidFree( prg, tree->child->next );
- tree->child->next = next;
- }
- else {
- Kid *next = tree->child->next;
- treeDownref( prg, sp, tree->child->tree );
- kidFree( prg, tree->child );
- tree->child = next;
- }
-
- tree->flags &= ~AF_RIGHT_IGNORE;
-}
-
-Tree *popRightIgnore( Program *prg, Tree **sp, Tree *popFrom, Tree **rightIgnore )
-{
- /* Modifying the tree we are detaching from. */
- popFrom = splitTree( prg, popFrom );
-
- Kid *riKid = treeRightIgnoreKid( prg, popFrom );
-
- /* If the right ignore has a left ignore, then that was the original
- * right ignore. */
- Kid *li = treeLeftIgnoreKid( prg, riKid->tree );
- if ( li != 0 ) {
- treeUpref( li->tree );
- remLeftIgnore( prg, sp, riKid->tree );
- *rightIgnore = riKid->tree;
- treeUpref( *rightIgnore );
- riKid->tree = li->tree;
- }
- else {
- *rightIgnore = riKid->tree;
- treeUpref( *rightIgnore );
- remRightIgnore( prg, sp, popFrom );
- }
-
- return popFrom;
-}
-
-Tree *popLeftIgnore( Program *prg, Tree **sp, Tree *popFrom, Tree **leftIgnore )
-{
- /* Modifying, make the write safe. */
- popFrom = splitTree( prg, popFrom );
-
- Kid *liKid = treeLeftIgnoreKid( prg, popFrom );
-
- /* If the left ignore has a right ignore, then that was the original
- * left ignore. */
- Kid *ri = treeRightIgnoreKid( prg, liKid->tree );
- if ( ri != 0 ) {
- treeUpref( ri->tree );
- remRightIgnore( prg, sp, liKid->tree );
- *leftIgnore = liKid->tree;
- treeUpref( *leftIgnore );
- liKid->tree = ri->tree;
- }
- else {
- *leftIgnore = liKid->tree;
- treeUpref( *leftIgnore );
- remLeftIgnore( prg, sp, popFrom );
- }
-
- return popFrom;
-}
-
-
-/* Returns an uprefed tree. Saves us having to downref and bindings to zero to
- * return a zero-ref tree. */
-Tree *constructReplacementTree( Kid *kid, Tree **bindings, Program *prg, long pat )
-{
- PatReplNode *nodes = prg->rtd->patReplNodes;
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- Tree *tree = 0;
-
- if ( nodes[pat].bindId > 0 ) {
- /* All bindings have been uprefed. */
- tree = bindings[nodes[pat].bindId];
-
- long ignore = nodes[pat].leftIgnore;
- Tree *leftIgnore = 0;
- if ( ignore >= 0 ) {
- Kid *ignore = constructLeftIgnoreList( prg, pat );
-
- leftIgnore = treeAllocate( prg );
- leftIgnore->id = LEL_ID_IGNORE;
- leftIgnore->child = ignore;
-
- tree = pushLeftIgnore( prg, tree, leftIgnore );
- }
-
- ignore = nodes[pat].rightIgnore;
- Tree *rightIgnore = 0;
- if ( ignore >= 0 ) {
- Kid *ignore = constructRightIgnoreList( prg, pat );
-
- rightIgnore = treeAllocate( prg );
- rightIgnore->id = LEL_ID_IGNORE;
- rightIgnore->child = ignore;
-
- tree = pushRightIgnore( prg, tree, rightIgnore );
- }
- }
- else {
- tree = treeAllocate( prg );
- tree->id = nodes[pat].id;
- tree->refs = 1;
- tree->tokdata = nodes[pat].length == 0 ? 0 :
- stringAllocPointer( prg,
- nodes[pat].data, nodes[pat].length );
-
- int objectLength = lelInfo[tree->id].objectLength;
-
- Kid *attrs = allocAttrs( prg, objectLength );
- Kid *child = constructReplacementKid( bindings, prg,
- 0, nodes[pat].child );
-
- tree->child = kidListConcat( attrs, child );
-
- /* Right first, then left. */
- Kid *ignore = constructRightIgnoreList( prg, pat );
- if ( ignore != 0 ) {
- Tree *ignoreList = treeAllocate( prg );
- ignoreList->id = LEL_ID_IGNORE;
- ignoreList->refs = 1;
- ignoreList->child = ignore;
-
- Kid *ignoreHead = kidAllocate( prg );
- ignoreHead->tree = ignoreList;
- ignoreHead->next = tree->child;
- tree->child = ignoreHead;
-
- tree->flags |= AF_RIGHT_IGNORE;
- }
-
- ignore = constructLeftIgnoreList( prg, pat );
- if ( ignore != 0 ) {
- Tree *ignoreList = treeAllocate( prg );
- ignoreList->id = LEL_ID_IGNORE;
- ignoreList->refs = 1;
- ignoreList->child = ignore;
-
- Kid *ignoreHead = kidAllocate( prg );
- ignoreHead->tree = ignoreList;
- ignoreHead->next = tree->child;
- tree->child = ignoreHead;
-
- tree->flags |= AF_LEFT_IGNORE;
- }
-
- int i;
- for ( i = 0; i < lelInfo[tree->id].numCaptureAttr; i++ ) {
- long ci = pat+1+i;
- CaptureAttr *ca = prg->rtd->captureAttr + lelInfo[tree->id].captureAttr + i;
- Tree *attr = treeAllocate( prg );
- attr->id = nodes[ci].id;
- attr->refs = 1;
- attr->tokdata = nodes[ci].length == 0 ? 0 :
- stringAllocPointer( prg,
- nodes[ci].data, nodes[ci].length );
-
- setAttr( tree, ca->offset, attr );
- }
- }
-
- return tree;
-}
-
-Kid *constructReplacementKid( Tree **bindings, Program *prg, Kid *prev, long pat )
-{
- PatReplNode *nodes = prg->rtd->patReplNodes;
- Kid *kid = 0;
-
- if ( pat != -1 ) {
- kid = kidAllocate( prg );
- kid->tree = constructReplacementTree( kid, bindings, prg, pat );
-
- /* Recurse down next. */
- Kid *next = constructReplacementKid( bindings, prg,
- kid, nodes[pat].next );
-
- kid->next = next;
- }
-
- return kid;
-}
-
-Tree *constructToken( Program *prg, Tree **root, long nargs )
-{
- Tree **const sp = root;
- Tree **base = vm_ptop() + nargs;
-
- Int *idInt = (Int*)base[-1];
- Str *textStr = (Str*)base[-2];
-
- long id = idInt->value;
- Head *tokdata = stringCopy( prg, textStr->value );
-
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- Tree *tree;
-
- if ( lelInfo[id].ignore ) {
- tree = treeAllocate( prg );
- tree->refs = 1;
- tree->id = id;
- tree->tokdata = tokdata;
- }
- else {
- long objectLength = lelInfo[id].objectLength;
- Kid *attrs = allocAttrs( prg, objectLength );
-
- tree = treeAllocate( prg );
- tree->id = id;
- tree->refs = 1;
- tree->tokdata = tokdata;
-
- tree->child = attrs;
-
- assert( nargs-2 <= objectLength );
- long id;
- for ( id = 0; id < nargs-2; id++ ) {
- setAttr( tree, id, base[-3-id] );
- treeUpref( getAttr( tree, id) );
- }
- }
- return tree;
-}
-
-Tree *makeTree( Program *prg, Tree **root, long nargs )
-{
- Tree **const sp = root;
- Tree **base = vm_ptop() + nargs;
-
- Int *idInt = (Int*)base[-1];
-
- long id = idInt->value;
- LangElInfo *lelInfo = prg->rtd->lelInfo;
-
- Tree *tree = treeAllocate( prg );
- tree->id = id;
- tree->refs = 1;
-
- long objectLength = lelInfo[id].objectLength;
- Kid *attrs = allocAttrs( prg, objectLength );
-
- Kid *last = 0, *child = 0;
- for ( id = 0; id < nargs-1; id++ ) {
- Kid *kid = kidAllocate( prg );
- kid->tree = base[-2-id];
- treeUpref( kid->tree );
-
- if ( last == 0 )
- child = kid;
- else
- last->next = kid;
-
- last = kid;
- }
-
- tree->child = kidListConcat( attrs, child );
-
- return tree;
-}
-
-int testFalse( Program *prg, Tree *tree )
-{
- int flse = (
- tree == 0 ||
- tree == prg->falseVal ||
- ( tree->id == LEL_ID_INT && ((Int*)tree)->value == 0 ) );
- return flse;
-}
-
-Kid *copyIgnoreList( Program *prg, Kid *ignoreHeader )
-{
- Kid *newHeader = kidAllocate( prg );
- Kid *last = 0, *ic = (Kid*)ignoreHeader->tree;
- while ( ic != 0 ) {
- Kid *newIc = kidAllocate( prg );
-
- newIc->tree = ic->tree;
- newIc->tree->refs += 1;
-
- /* List pointers. */
- if ( last == 0 )
- newHeader->tree = (Tree*)newIc;
- else
- last->next = newIc;
-
- ic = ic->next;
- last = newIc;
- }
- return newHeader;
-}
-
-Kid *copyKidList( Program *prg, Kid *kidList )
-{
- Kid *newList = 0, *last = 0, *ic = kidList;
-
- while ( ic != 0 ) {
- Kid *newIc = kidAllocate( prg );
-
- newIc->tree = ic->tree;
- treeUpref( newIc->tree );
-
- /* List pointers. */
- if ( last == 0 )
- newList = newIc;
- else
- last->next = newIc;
-
- ic = ic->next;
- last = newIc;
- }
- return newList;
-}
-
-/* New tree has zero ref. */
-Tree *copyRealTree( Program *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown )
-{
- /* Need to keep a lookout for next down. If
- * copying it, return the copy. */
- Tree *newTree = treeAllocate( prg );
-
- newTree->id = tree->id;
- newTree->tokdata = stringCopy( prg, tree->tokdata );
-
- /* Copy the child list. Start with ignores, then the list. */
- Kid *child = tree->child, *last = 0;
-
- /* Left ignores. */
- if ( tree->flags & AF_LEFT_IGNORE ) {
- newTree->flags |= AF_LEFT_IGNORE;
-// Kid *newHeader = copyIgnoreList( prg, child );
-//
-// /* Always the head. */
-// newTree->child = newHeader;
-//
-// child = child->next;
-// last = newHeader;
- }
-
- /* Right ignores. */
- if ( tree->flags & AF_RIGHT_IGNORE ) {
- newTree->flags |= AF_RIGHT_IGNORE;
-// Kid *newHeader = copyIgnoreList( prg, child );
-// if ( last == 0 )
-// newTree->child = newHeader;
-// else
-// last->next = newHeader;
-// child = child->next;
-// last = newHeader;
- }
-
- /* Attributes and children. */
- while ( child != 0 ) {
- Kid *newKid = kidAllocate( prg );
-
- /* Watch out for next down. */
- if ( child == oldNextDown )
- *newNextDown = newKid;
-
- newKid->tree = child->tree;
- newKid->next = 0;
-
- /* May be an attribute. */
- if ( newKid->tree != 0 )
- newKid->tree->refs += 1;
-
- /* Store the first child. */
- if ( last == 0 )
- newTree->child = newKid;
- else
- last->next = newKid;
-
- child = child->next;
- last = newKid;
- }
-
- return newTree;
-}
-
-List *copyList( Program *prg, List *list, Kid *oldNextDown, Kid **newNextDown )
-{
-// #ifdef COLM_LOG_BYTECODE
-// if ( colm_log_bytecode ) {
-// cerr << "splitting list: " << list << " refs: " <<
-// list->refs << endl;
-// }
-// #endif
-
- /* Not a need copy. */
- List *newList = (List*)mapElAllocate( prg );
- newList->id = list->genericInfo->langElId;
- newList->genericInfo = list->genericInfo;
-
- ListEl *src = list->head;
- while( src != 0 ) {
- ListEl *newEl = listElAllocate( prg );
- newEl->value = src->value;
- treeUpref( newEl->value );
-
- listAppend( newList, newEl );
-
- /* Watch out for next down. */
- if ( (Kid*)src == oldNextDown )
- *newNextDown = (Kid*)newEl;
-
- src = src->next;
- }
-
- return newList;
-}
-
-Map *copyMap( Program *prg, Map *map, Kid *oldNextDown, Kid **newNextDown )
-{
-// #ifdef COLM_LOG_BYTECODE
-// if ( colm_log_bytecode ) {
-// cerr << "splitting map: " << map << " refs: " <<
-// map->refs << endl;
-// }
-// #endif
-
- Map *newMap = (Map*)mapElAllocate( prg );
- newMap->id = map->genericInfo->langElId;
- newMap->genericInfo = map->genericInfo;
- newMap->treeSize = map->treeSize;
- newMap->root = 0;
-
- /* If there is a root, copy the tree. */
- if ( map->root != 0 ) {
- newMap->root = mapCopyBranch( prg, newMap, map->root,
- oldNextDown, newNextDown );
- }
- MapEl *el;
- for ( el = newMap->head; el != 0; el = el->next ) {
- assert( map->genericInfo->typeArg == TYPE_TREE );
- treeUpref( el->tree );
- }
-
- return newMap;
-}
-
-Tree *copyTree( Program *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown )
-{
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- long genericId = lelInfo[tree->id].genericId;
- if ( genericId > 0 ) {
- GenericInfo *generic = &prg->rtd->genericInfo[genericId];
- if ( generic->type == GEN_LIST )
- tree = (Tree*) copyList( prg, (List*) tree, oldNextDown, newNextDown );
- else if ( generic->type == GEN_MAP )
- tree = (Tree*) copyMap( prg, (Map*) tree, oldNextDown, newNextDown );
- else if ( generic->type == GEN_PARSER ) {
- /* Need to figure out the semantics here. */
- fatal( "ATTEMPT TO COPY PARSER\n" );
- assert(false);
- }
- }
- else if ( tree->id == LEL_ID_PTR )
- assert(false);
- else if ( tree->id == LEL_ID_BOOL )
- assert(false);
- else if ( tree->id == LEL_ID_INT )
- assert(false);
- else if ( tree->id == LEL_ID_STR )
- assert(false);
- else if ( tree->id == LEL_ID_STREAM )
- assert(false);
- else {
- tree = copyRealTree( prg, tree, oldNextDown, newNextDown );
- }
-
- assert( tree->refs == 0 );
- return tree;
-}
-
-Tree *splitTree( Program *prg, Tree *tree )
-{
- if ( tree != 0 ) {
- assert( tree->refs >= 1 );
-
- if ( tree->refs > 1 ) {
- Kid *oldNextDown = 0, *newNextDown = 0;
- Tree *newTree = copyTree( prg, tree, oldNextDown, &newNextDown );
- treeUpref( newTree );
-
- /* Downref the original. Don't need to consider freeing because
- * refs were > 1. */
- tree->refs -= 1;
-
- tree = newTree;
- }
-
- assert( tree->refs == 1 );
- }
- return tree;
-}
-
-Tree *createGeneric( Program *prg, long genericId )
-{
- GenericInfo *genericInfo = &prg->rtd->genericInfo[genericId];
- Tree *newGeneric = 0;
- switch ( genericInfo->type ) {
- case GEN_MAP: {
- Map *map = (Map*)mapElAllocate( prg );
- map->id = genericInfo->langElId;
- map->genericInfo = genericInfo;
- newGeneric = (Tree*) map;
- break;
- }
- case GEN_LIST: {
- List *list = (List*)mapElAllocate( prg );
- list->id = genericInfo->langElId;
- list->genericInfo = genericInfo;
- newGeneric = (Tree*) list;
- break;
- }
- case GEN_PARSER: {
- Parser *parser = (Parser*)mapElAllocate( prg );
- parser->id = genericInfo->langElId;
- parser->genericInfo = genericInfo;
- parser->fsmRun = malloc( sizeof(FsmRun) );
- parser->pdaRun = malloc( sizeof(PdaRun) );
-
- /* Start off the parsing process. */
- initPdaRun( parser->pdaRun, prg, prg->rtd->pdaTables,
- parser->fsmRun, genericInfo->parserId, false, false, 0 );
- initFsmRun( parser->fsmRun, prg );
- newToken( prg, parser->pdaRun, parser->fsmRun );
-
- newGeneric = (Tree*) parser;
- break;
- }
- default:
- assert(false);
- return 0;
- }
-
- return newGeneric;
-}
-
-
-/* We can't make recursive calls here since the tree we are freeing may be
- * very large. Need the VM stack. */
-void treeFreeRec( Program *prg, Tree **sp, Tree *tree )
-{
- Tree **top = sp;
- LangElInfo *lelInfo;
- long genericId;
-
-free_tree:
- lelInfo = prg->rtd->lelInfo;
- genericId = lelInfo[tree->id].genericId;
- if ( genericId > 0 ) {
- GenericInfo *generic = &prg->rtd->genericInfo[genericId];
- if ( generic->type == GEN_LIST ) {
- List *list = (List*) tree;
- ListEl *el = list->head;
- while ( el != 0 ) {
- ListEl *next = el->next;
- vm_push( el->value );
- listElFree( prg, el );
- el = next;
- }
- mapElFree( prg, (MapEl*)list );
- }
- else if ( generic->type == GEN_MAP ) {
- Map *map = (Map*)tree;
- MapEl *el = map->head;
- while ( el != 0 ) {
- MapEl *next = el->next;
- vm_push( el->key );
- vm_push( el->tree );
- mapElFree( prg, el );
- el = next;
- }
- mapElFree( prg, (MapEl*)map );
- }
- else if ( generic->type == GEN_PARSER ) {
- Parser *parser = (Parser*)tree;
- clearFsmRun( prg, parser->fsmRun );
- clearPdaRun( prg, sp, parser->pdaRun );
- free( parser->pdaRun );
- free( parser->fsmRun );
- treeDownref( prg, sp, (Tree*)parser->input );
- mapElFree( prg, (MapEl*)parser );
- }
- else {
- assert(false);
- }
- }
- else {
- if ( tree->id == LEL_ID_STR ) {
- Str *str = (Str*) tree;
- stringFree( prg, str->value );
- treeFree( prg, tree );
- }
- else if ( tree->id == LEL_ID_BOOL || tree->id == LEL_ID_INT )
- treeFree( prg, tree );
- else if ( tree->id == LEL_ID_PTR )
- treeFree( prg, tree );
- else if ( tree->id == LEL_ID_STREAM ) {
- Stream *stream = (Stream*)tree;
- clearSourceStream( prg, sp, stream->in );
- free( stream->in );
- if ( stream->file != 0 )
- fclose( stream->file );
- streamFree( prg, stream );
- }
- else if ( tree->id == LEL_ID_INPUT ) {
- Input *input = (Input*)tree;
- clearInputStream( prg, sp, input->in );
- free( input->in );
- inputFree( prg, input );
- }
- else {
- if ( tree->id != LEL_ID_IGNORE )
- stringFree( prg, tree->tokdata );
-
- /* Attributes and grammar-based children. */
- Kid *child = tree->child;
- while ( child != 0 ) {
- Kid *next = child->next;
- vm_push( child->tree );
- kidFree( prg, child );
- child = next;
- }
-
- treeFree( prg, tree );
- }
- }
-
- /* Any trees to downref? */
- while ( sp != top ) {
- tree = vm_pop();
- if ( tree != 0 ) {
- assert( tree->refs > 0 );
- tree->refs -= 1;
- if ( tree->refs == 0 )
- goto free_tree;
- }
- }
-}
-
-void treeUpref( Tree *tree )
-{
- if ( tree != 0 )
- tree->refs += 1;
-}
-
-void treeDownref( Program *prg, Tree **sp, Tree *tree )
-{
- if ( tree != 0 ) {
- assert( tree->refs > 0 );
- tree->refs -= 1;
- if ( tree->refs == 0 )
- treeFreeRec( prg, sp, tree );
- }
-}
-
-/* Find the first child of a tree. */
-Kid *treeChild( Program *prg, const Tree *tree )
-{
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- Kid *kid = tree->child;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- /* Skip over attributes. */
- long objectLength = lelInfo[tree->id].objectLength;
- long a;
- for ( a = 0; a < objectLength; a++ )
- kid = kid->next;
-
- return kid;
-}
-
-/* Detach at the first real child of a tree. */
-Kid *treeExtractChild( Program *prg, Tree *tree )
-{
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- Kid *kid = tree->child, *last = 0;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- /* Skip over attributes. */
- long a, objectLength = lelInfo[tree->id].objectLength;
- for ( a = 0; a < objectLength; a++ ) {
- last = kid;
- kid = kid->next;
- }
-
- if ( last == 0 )
- tree->child = 0;
- else
- last->next = 0;
-
- return kid;
-}
-
-
-/* Find the first child of a tree. */
-Kid *treeAttr( Program *prg, const Tree *tree )
-{
- Kid *kid = tree->child;
-
- if ( tree->flags & AF_LEFT_IGNORE )
- kid = kid->next;
- if ( tree->flags & AF_RIGHT_IGNORE )
- kid = kid->next;
-
- return kid;
-}
-
-Tree *treeLeftIgnore( Program *prg, Tree *tree )
-{
- if ( tree->flags & AF_LEFT_IGNORE )
- return tree->child->tree;
- return 0;
-}
-
-Tree *treeRightIgnore( Program *prg, Tree *tree )
-{
- if ( tree->flags & AF_RIGHT_IGNORE ) {
- if ( tree->flags & AF_LEFT_IGNORE )
- return tree->child->next->tree;
- else
- return tree->child->tree;
- }
- return 0;
-}
-
-Kid *treeLeftIgnoreKid( Program *prg, Tree *tree )
-{
- if ( tree->flags & AF_LEFT_IGNORE )
- return tree->child;
- return 0;
-}
-
-Kid *treeRightIgnoreKid( Program *prg, Tree *tree )
-{
- if ( tree->flags & AF_RIGHT_IGNORE ) {
- if ( tree->flags & AF_LEFT_IGNORE )
- return tree->child->next;
- else
- return tree->child;
- }
- return 0;
-}
-
-Tree *treeIterDerefCur( TreeIter *iter )
-{
- return iter->ref.kid == 0 ? 0 : iter->ref.kid->tree;
-}
-
-void refSetValue( Ref *ref, Tree *v )
-{
- Kid *firstKid = ref->kid;
- while ( ref != 0 && ref->kid == firstKid ) {
- ref->kid->tree = v;
- ref = ref->next;
- }
-}
-
-Tree *getRhsEl( Program *prg, Tree *lhs, long position )
-{
- Kid *pos = treeChild( prg, lhs );
- while ( position > 0 ) {
- pos = pos->next;
- position -= 1;
- }
- return pos->tree;
-}
-
-Tree *getRhsVal( Program *prg, Tree *tree, int *a )
-{
- int i, len = a[0];
- for ( i = 0; i < len; i++ ) {
- int prodNum = a[1 + i * 2];
- int childNum = a[1 + i * 2 + 1];
- if ( tree->prodNum == prodNum )
- return getRhsEl( prg, tree, childNum );
- }
- return 0;
-}
-
-void setField( Program *prg, Tree *tree, long field, Tree *value )
-{
- assert( tree->refs == 1 );
- if ( value != 0 )
- assert( value->refs >= 1 );
- setAttr( tree, field, value );
-}
-
-Tree *getField( Tree *tree, Word field )
-{
- return getAttr( tree, field );
-}
-
-Kid *getFieldKid( Tree *tree, Word field )
-{
- return getAttrKid( tree, field );
-}
-
-Tree *getFieldSplit( Program *prg, Tree *tree, Word field )
-{
- Tree *val = getAttr( tree, field );
- Tree *split = splitTree( prg, val );
- setAttr( tree, field, split );
- return split;
-}
-
-void setUiterCur( Program *prg, UserIter *uiter, Tree *tree )
-{
- uiter->ref.kid->tree = tree;
-}
-
-void setTriterCur( Program *prg, TreeIter *iter, Tree *tree )
-{
- iter->ref.kid->tree = tree;
-}
-
-Tree *getPtrVal( Pointer *ptr )
-{
- return ptr->value->tree;
-}
-
-Tree *getPtrValSplit( Program *prg, Pointer *ptr )
-{
- Tree *val = ptr->value->tree;
- Tree *split = splitTree( prg, val );
- ptr->value->tree = split;
- return split;
-}
-
-/* This must traverse in the same order that the bindId assignments are done
- * in. */
-int matchPattern( Tree **bindings, Program *prg, long pat, Kid *kid, int checkNext )
-{
- PatReplNode *nodes = prg->rtd->patReplNodes;
-
-// #ifdef COLM_LOG_MATCH
-// if ( colm_log_match ) {
-// LangElInfo *lelInfo = prg->rtd->lelInfo;
-// cerr << "match pattern " << ( pat == -1 ? "NULL" : lelInfo[nodes[pat].id].name ) <<
-// " vs " << ( kid == 0 ? "NULL" : lelInfo[kid->tree->id].name ) << endl;
-// }
-// #endif
-
- /* match node, recurse on children. */
- if ( pat != -1 && kid != 0 ) {
- if ( nodes[pat].id == kid->tree->id ) {
- /* If the pattern node has data, then this means we need to match
- * the data against the token data. */
- if ( nodes[pat].data != 0 ) {
- /* Check the length of token text. */
- if ( nodes[pat].length != stringLength( kid->tree->tokdata ) )
- return false;
-
- /* Check the token text data. */
- if ( nodes[pat].length > 0 && memcmp( nodes[pat].data,
- stringData( kid->tree->tokdata ), nodes[pat].length ) != 0 )
- return false;
- }
-
- /* No failure, all okay. */
- if ( nodes[pat].bindId > 0 ) {
-// #ifdef COLM_LOG_MATCH
-// if ( colm_log_match ) {
-// cerr << "bindId: " << nodes[pat].bindId << endl;
-// }
-// #endif
- bindings[nodes[pat].bindId] = kid->tree;
- }
-
- /* If we didn't match a terminal duplicate of a nonterm then check
- * down the children. */
- if ( !nodes[pat].stop ) {
- /* Check for failure down child branch. */
- int childCheck = matchPattern( bindings, prg,
- nodes[pat].child, treeChild( prg, kid->tree ), true );
- if ( ! childCheck )
- return false;
- }
-
- /* If checking next, then look for failure there. */
- if ( checkNext ) {
- int nextCheck = matchPattern( bindings, prg,
- nodes[pat].next, kid->next, true );
- if ( ! nextCheck )
- return false;
- }
-
- return true;
- }
- }
- else if ( pat == -1 && kid == 0 ) {
- /* Both null is a match. */
- return 1;
- }
-
- return false;
-}
-
-
-long cmpTree( Program *prg, const Tree *tree1, const Tree *tree2 )
-{
- long cmpres = 0;
- if ( tree1 == 0 ) {
- if ( tree2 == 0 )
- return 0;
- else
- return -1;
- }
- else if ( tree2 == 0 )
- return 1;
- else if ( tree1->id < tree2->id )
- return -1;
- else if ( tree1->id > tree2->id )
- return 1;
- else if ( tree1->id == LEL_ID_PTR ) {
- if ( ((Pointer*)tree1)->value < ((Pointer*)tree2)->value )
- return -1;
- else if ( ((Pointer*)tree1)->value > ((Pointer*)tree2)->value )
- return 1;
- }
- else if ( tree1->id == LEL_ID_INT ) {
- if ( ((Int*)tree1)->value < ((Int*)tree2)->value )
- return -1;
- else if ( ((Int*)tree1)->value > ((Int*)tree2)->value )
- return 1;
- }
- else if ( tree1->id == LEL_ID_STR ) {
- cmpres = cmpString( ((Str*)tree1)->value, ((Str*)tree2)->value );
- if ( cmpres != 0 )
- return cmpres;
- }
- else {
- if ( tree1->tokdata == 0 && tree2->tokdata != 0 )
- return -1;
- else if ( tree1->tokdata != 0 && tree2->tokdata == 0 )
- return 1;
- else if ( tree1->tokdata != 0 && tree2->tokdata != 0 ) {
- cmpres = cmpString( tree1->tokdata, tree2->tokdata );
- if ( cmpres != 0 )
- return cmpres;
- }
- }
-
- Kid *kid1 = treeChild( prg, tree1 );
- Kid *kid2 = treeChild( prg, tree2 );
-
- while ( true ) {
- if ( kid1 == 0 && kid2 == 0 )
- return 0;
- else if ( kid1 == 0 && kid2 != 0 )
- return -1;
- else if ( kid1 != 0 && kid2 == 0 )
- return 1;
- else {
- cmpres = cmpTree( prg, kid1->tree, kid2->tree );
- if ( cmpres != 0 )
- return cmpres;
- }
- kid1 = kid1->next;
- kid2 = kid2->next;
- }
-}
-
-
-void splitRef( Program *prg, Tree ***psp, Ref *fromRef )
-{
- /* Go up the chain of kids, turing the pointers down. */
- Ref *last = 0, *ref = fromRef, *next = 0;
- while ( ref->next != 0 ) {
- next = ref->next;
- ref->next = last;
- last = ref;
- ref = next;
- }
- ref->next = last;
-
- /* Now traverse the list, which goes down. */
- while ( ref != 0 ) {
- if ( ref->kid->tree->refs > 1 ) {
-// #ifdef COLM_LOG_BYTECODE
-// if ( colm_log_bytecode ) {
-// cerr << "splitting tree: " << ref->kid << " refs: " <<
-// ref->kid->tree->refs << endl;
-// }
-// #endif
-
- Ref *nextDown = ref->next;
- while ( nextDown != 0 && nextDown->kid == ref->kid )
- nextDown = nextDown->next;
-
- Kid *oldNextKidDown = nextDown != 0 ? nextDown->kid : 0;
- Kid *newNextKidDown = 0;
-
- Tree *newTree = copyTree( prg, ref->kid->tree,
- oldNextKidDown, &newNextKidDown );
- treeUpref( newTree );
-
- /* Downref the original. Don't need to consider freeing because
- * refs were > 1. */
- ref->kid->tree->refs -= 1;
-
- while ( ref != 0 && ref != nextDown ) {
- next = ref->next;
- ref->next = 0;
-
- ref->kid->tree = newTree;
- ref = next;
- }
-
- /* Correct kid pointers down from ref. */
- while ( nextDown != 0 && nextDown->kid == oldNextKidDown ) {
- nextDown->kid = newNextKidDown;
- nextDown = nextDown->next;
- }
- }
- else {
- /* Reset the list as we go down. */
- next = ref->next;
- ref->next = 0;
- ref = next;
- }
- }
-}
-
-void splitIterCur( Program *prg, Tree ***psp, TreeIter *iter )
-{
- if ( iter->ref.kid == 0 )
- return;
-
- splitRef( prg, psp, &iter->ref );
-}
-
-Tree *setListMem( List *list, Half field, Tree *value )
-{
- assert( list->refs == 1 );
- if ( value != 0 )
- assert( value->refs >= 1 );
-
- Tree *existing = 0;
- switch ( field ) {
- case 0:
- existing = list->head->value;
- list->head->value = value;
- break;
- case 1:
- existing = list->tail->value;
- list->tail->value = value;
- break;
- default:
- assert( false );
- break;
- }
- return existing;
-}
-
-TreePair mapRemove( Program *prg, Map *map, Tree *key )
-{
- MapEl *mapEl = mapImplFind( prg, map, key );
- TreePair result = { 0, 0 };
- if ( mapEl != 0 ) {
- mapDetach( prg, map, mapEl );
- result.key = mapEl->key;
- result.val = mapEl->tree;
- mapElFree( prg, mapEl );
- }
-
- return result;
-}
-
-Tree *mapUnstore( Program *prg, Map *map, Tree *key, Tree *existing )
-{
- Tree *stored = 0;
- if ( existing == 0 ) {
- MapEl *mapEl = mapDetachByKey( prg, map, key );
- stored = mapEl->tree;
- mapElFree( prg, mapEl );
- }
- else {
- MapEl *mapEl = mapImplFind( prg, map, key );
- stored = mapEl->tree;
- mapEl->tree = existing;
- }
- return stored;
-}
-
-Tree *mapFind( Program *prg, Map *map, Tree *key )
-{
- MapEl *mapEl = mapImplFind( prg, map, key );
- return mapEl == 0 ? 0 : mapEl->tree;
-}
-
-long mapLength( Map *map )
-{
- return map->treeSize;
-}
-
-void listAppend2( Program *prg, List *list, Tree *val )
-{
- assert( list->refs == 1 );
- if ( val != 0 )
- assert( val->refs >= 1 );
- ListEl *listEl = listElAllocate( prg );
- listEl->value = val;
- listAppend( list, listEl );
-}
-
-Tree *listRemoveEnd( Program *prg, List *list )
-{
- Tree *tree = list->tail->value;
- listElFree( prg, listDetachLast( list ) );
- return tree;
-}
-
-Tree *getListMem( List *list, Word field )
-{
- Tree *result = 0;
- switch ( field ) {
- case 0:
- result = list->head->value;
- break;
- case 1:
- result = list->tail->value;
- break;
- default:
- assert( false );
- break;
- }
- return result;
-}
-
-Tree *getListMemSplit( Program *prg, List *list, Word field )
-{
- Tree *sv = 0;
- switch ( field ) {
- case 0:
- sv = splitTree( prg, list->head->value );
- list->head->value = sv;
- break;
- case 1:
- sv = splitTree( prg, list->tail->value );
- list->tail->value = sv;
- break;
- default:
- assert( false );
- break;
- }
- return sv;
-}
-
-
-int mapInsert( Program *prg, Map *map, Tree *key, Tree *element )
-{
- MapEl *mapEl = mapInsertKey( prg, map, key, 0 );
-
- if ( mapEl != 0 ) {
- mapEl->tree = element;
- return true;
- }
-
- return false;
-}
-
-void mapUnremove( Program *prg, Map *map, Tree *key, Tree *element )
-{
- MapEl *mapEl = mapInsertKey( prg, map, key, 0 );
- assert( mapEl != 0 );
- mapEl->tree = element;
-}
-
-Tree *mapUninsert( Program *prg, Map *map, Tree *key )
-{
- MapEl *el = mapDetachByKey( prg, map, key );
- Tree *val = el->tree;
- mapElFree( prg, el );
- return val;
-}
-
-Tree *mapStore( Program *prg, Map *map, Tree *key, Tree *element )
-{
- Tree *oldTree = 0;
- MapEl *elInTree = 0;
- MapEl *mapEl = mapInsertKey( prg, map, key, &elInTree );
-
- if ( mapEl != 0 )
- mapEl->tree = element;
- else {
- /* Element with key exists. Overwriting the value. */
- oldTree = elInTree->tree;
- elInTree->tree = element;
- }
-
- return oldTree;
-}
-
-void iterFind( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst )
-{
- int anyTree = iter->searchId == prg->rtd->anyId;
- Tree **top = iter->stackRoot;
- Kid *child;
- Tree **sp = *psp;
-
-rec_call:
- if ( tryFirst && ( iter->ref.kid->tree->id == iter->searchId || anyTree ) ) {
- *psp = sp;
- return;
- }
- else {
- child = treeChild( prg, iter->ref.kid->tree );
- if ( child != 0 ) {
- vm_push( (SW) iter->ref.next );
- vm_push( (SW) iter->ref.kid );
- iter->ref.kid = child;
- iter->ref.next = (Ref*)vm_ptop();
- while ( iter->ref.kid != 0 ) {
- tryFirst = true;
- goto rec_call;
- rec_return:
- iter->ref.kid = iter->ref.kid->next;
- }
- iter->ref.kid = (Kid*)vm_pop();
- iter->ref.next = (Ref*)vm_pop();
- }
- }
-
- if ( top != vm_ptop() )
- goto rec_return;
-
- iter->ref.kid = 0;
- *psp = sp;
-}
-
-Tree *treeIterAdvance( Program *prg, Tree ***psp, TreeIter *iter )
-{
- assert( iter->stackSize == iter->stackRoot - *psp );
-
- if ( iter->ref.kid == 0 ) {
- /* Kid is zero, start from the root. */
- iter->ref = iter->rootRef;
- iterFind( prg, psp, iter, true );
- }
- else {
- /* Have a previous item, continue searching from there. */
- iterFind( prg, psp, iter, false );
- }
-
- iter->stackSize = iter->stackRoot - *psp;
-
- return (iter->ref.kid ? prg->trueVal : prg->falseVal );
-}
-
-Tree *treeIterNextChild( Program *prg, Tree ***psp, TreeIter *iter )
-{
- Tree **sp = *psp;
- assert( iter->stackSize == iter->stackRoot - vm_ptop() );
- Kid *kid = 0;
-
- if ( iter->ref.kid == 0 ) {
- /* Kid is zero, start from the first child. */
- Kid *child = treeChild( prg, iter->rootRef.kid->tree );
-
- if ( child == 0 )
- iter->ref.next = 0;
- else {
- /* Make a reference to the root. */
- vm_push( (SW) iter->rootRef.next );
- vm_push( (SW) iter->rootRef.kid );
- iter->ref.next = (Ref*)vm_ptop();
-
- kid = child;
- }
- }
- else {
- /* Start at next. */
- kid = iter->ref.kid->next;
- }
-
- if ( iter->searchId != prg->rtd->anyId ) {
- /* Have a previous item, go to the next sibling. */
- while ( kid != 0 && kid->tree->id != iter->searchId )
- kid = kid->next;
- }
-
- iter->ref.kid = kid;
- iter->stackSize = iter->stackRoot - vm_ptop();
- *psp = sp;
- return ( iter->ref.kid ? prg->trueVal : prg->falseVal );
-}
-
-Tree *treeRevIterPrevChild( Program *prg, Tree ***psp, RevTreeIter *iter )
-{
- Tree **sp = *psp;
-
- assert( iter->stackSize == iter->stackRoot - vm_ptop() );
-
- if ( iter->kidAtYield != iter->ref.kid ) {
- /* Need to reload the kids. */
- Kid *kid = treeChild( prg, iter->rootRef.kid->tree );
- Kid **dst = (Kid**)iter->stackRoot - 1;
- while ( kid != 0 ) {
- *dst-- = kid;
- kid = kid->next;
- }
- }
-
- if ( iter->ref.kid == 0 )
- iter->cur = (Kid**)iter->stackRoot - iter->children;
- else
- iter->cur += 1;
-
- if ( iter->searchId != prg->rtd->anyId ) {
- /* Have a previous item, go to the next sibling. */
- while ( iter->cur != (Kid**)iter->stackRoot && (*iter->cur)->tree->id != iter->searchId )
- iter->cur += 1;
- }
-
- if ( iter->cur == (Kid**)iter->stackRoot ) {
- iter->ref.next = 0;
- iter->ref.kid = 0;
- }
- else {
- iter->ref.next = &iter->rootRef;
- iter->ref.kid = *iter->cur;
- }
-
- /* We will use this to detect a split above the iterated tree. */
- iter->kidAtYield = iter->ref.kid;
-
- iter->stackSize = iter->stackRoot - vm_ptop();
-
- *psp = sp;
-
- return (iter->ref.kid ? prg->trueVal : prg->falseVal );
-}
-
-void iterFindRepeat( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst )
-{
- Tree **sp = *psp;
- int anyTree = iter->searchId == prg->rtd->anyId;
- Tree **top = iter->stackRoot;
- Kid *child;
-
-rec_call:
- if ( tryFirst && ( iter->ref.kid->tree->id == iter->searchId || anyTree ) ) {
- *psp = sp;
- return;
- }
- else {
- /* The repeat iterator is just like the normal top-down-left-right,
- * execept it only goes into the children of a node if the node is the
- * root of the iteration, or if does not have any neighbours to the
- * right. */
- if ( top == vm_ptop() || iter->ref.kid->next == 0 ) {
- child = treeChild( prg, iter->ref.kid->tree );
- if ( child != 0 ) {
- vm_push( (SW) iter->ref.next );
- vm_push( (SW) iter->ref.kid );
- iter->ref.kid = child;
- iter->ref.next = (Ref*)vm_ptop();
- while ( iter->ref.kid != 0 ) {
- tryFirst = true;
- goto rec_call;
- rec_return:
- iter->ref.kid = iter->ref.kid->next;
- }
- iter->ref.kid = (Kid*)vm_pop();
- iter->ref.next = (Ref*)vm_pop();
- }
- }
- }
-
- if ( top != vm_ptop() )
- goto rec_return;
-
- iter->ref.kid = 0;
- *psp = sp;
-}
-
-Tree *treeIterNextRepeat( Program *prg, Tree ***psp, TreeIter *iter )
-{
- assert( iter->stackSize == iter->stackRoot - *psp );
-
- if ( iter->ref.kid == 0 ) {
- /* Kid is zero, start from the root. */
- iter->ref = iter->rootRef;
- iterFindRepeat( prg, psp, iter, true );
- }
- else {
- /* Have a previous item, continue searching from there. */
- iterFindRepeat( prg, psp, iter, false );
- }
-
- iter->stackSize = iter->stackRoot - *psp;
-
- return (iter->ref.kid ? prg->trueVal : prg->falseVal );
-}
-
-void iterFindRevRepeat( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst )
-{
- Tree **sp = *psp;
- int anyTree = iter->searchId == prg->rtd->anyId;
- Tree **top = iter->stackRoot;
- Kid *child;
-
- if ( tryFirst ) {
- while ( true ) {
- if ( top == vm_ptop() || iter->ref.kid->next == 0 ) {
- child = treeChild( prg, iter->ref.kid->tree );
-
- if ( child == 0 )
- break;
- vm_push( (SW) iter->ref.next );
- vm_push( (SW) iter->ref.kid );
- iter->ref.kid = child;
- iter->ref.next = (Ref*)vm_ptop();
- }
- else {
- /* Not the top and not there is a next, go over to it. */
- iter->ref.kid = iter->ref.kid->next;
- }
- }
-
- goto first;
- }
-
- while ( true ) {
- if ( top == vm_ptop() ) {
- iter->ref.kid = 0;
- return;
- }
-
- if ( iter->ref.kid->next == 0 ) {
- /* Go up one and then down. Remember we can't use iter->ref.next
- * because the chain may have been split, setting it null (to
- * prevent repeated walks up). */
- Ref *ref = (Ref*)vm_ptop();
- iter->ref.kid = treeChild( prg, ref->kid->tree );
- }
- else {
- iter->ref.kid = (Kid*)vm_pop();
- iter->ref.next = (Ref*)vm_pop();
- }
-first:
- if ( iter->ref.kid->tree->id == iter->searchId || anyTree ) {
- *psp = sp;
- return;
- }
- }
- *psp = sp;
- return;
-}
-
-
-Tree *treeIterPrevRepeat( Program *prg, Tree ***psp, TreeIter *iter )
-{
- assert( iter->stackSize == iter->stackRoot - *psp );
-
- if ( iter->ref.kid == 0 ) {
- /* Kid is zero, start from the root. */
- iter->ref = iter->rootRef;
- iterFindRevRepeat( prg, psp, iter, true );
- }
- else {
- /* Have a previous item, continue searching from there. */
- iterFindRevRepeat( prg, psp, iter, false );
- }
-
- iter->stackSize = iter->stackRoot - *psp;
-
- return (iter->ref.kid ? prg->trueVal : prg->falseVal );
-}
-
-Tree *treeSearch( Program *prg, Kid *kid, long id )
-{
- /* This node the one? */
- if ( kid->tree->id == id )
- return kid->tree;
-
- Tree *res = 0;
-
- /* Search children. */
- Kid *child = treeChild( prg, kid->tree );
- if ( child != 0 )
- res = treeSearch( prg, child, id );
-
- /* Search siblings. */
- if ( res == 0 && kid->next != 0 )
- res = treeSearch( prg, kid->next, id );
-
- return res;
-}
-
-Tree *treeSearch2( Program *prg, Tree *tree, long id )
-{
- Tree *res = 0;
- if ( tree->id == id )
- res = tree;
- else {
- Kid *child = treeChild( prg, tree );
- if ( child != 0 )
- res = treeSearch( prg, child, id );
- }
- return res;
-}
-
-void xmlEscapeData( struct ColmPrintArgs *printArgs, const char *data, long len )
-{
- int i;
- for ( i = 0; i < len; i++ ) {
- if ( data[i] == '<' )
- printArgs->out( printArgs, "&lt;", 4 );
- else if ( data[i] == '>' )
- printArgs->out( printArgs, "&gt;", 4 );
- else if ( data[i] == '&' )
- printArgs->out( printArgs, "&amp;", 5 );
- else if ( (32 <= data[i] && data[i] <= 126) || data[i] == '\t' || data[i] == '\n' || data[i] == '\r' )
- printArgs->out( printArgs, &data[i], 1 );
- else {
- char out[64];
- sprintf( out, "&#%u;", ((unsigned)data[i]) );
- printArgs->out( printArgs, out, strlen(out) );
- }
- }
-}
-
-void initStrCollect( StrCollect *collect )
-{
- collect->data = (char*) malloc( BUFFER_INITIAL_SIZE );
- collect->allocated = BUFFER_INITIAL_SIZE;
- collect->length = 0;
-}
-
-void strCollectDestroy( StrCollect *collect )
-{
- free( collect->data );
-}
-
-void strCollectAppend( StrCollect *collect, const char *data, long len )
-{
- long newLen = collect->length + len;
- if ( newLen > collect->allocated ) {
- collect->allocated *= newLen * 2;
- collect->data = (char*) realloc( collect->data, collect->allocated );
- }
- memcpy( collect->data + collect->length, data, len );
- collect->length += len;
-}
-
-void strCollectClear( StrCollect *collect )
-{
- collect->length = 0;
-}
-
-#define INT_SZ 32
-
-void printStr( struct ColmPrintArgs *printArgs, Head *str )
-{
- printArgs->out( printArgs, (char*)(str->data), str->length );
-}
-
-void appendCollect( struct ColmPrintArgs *args, const char *data, int length )
-{
- strCollectAppend( (StrCollect*) args->arg, data, length );
-}
-
-void appendFile( struct ColmPrintArgs *args, const char *data, int length )
-{
- fwrite( data, length, 1, (FILE*)args->arg );
-}
-
-Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree )
-{
- debug( REALM_PARSE, "attaching left ignore\n" );
-
- /* Make the ignore list for the left-ignore. */
- Tree *leftIgnore = treeAllocate( prg );
- leftIgnore->id = LEL_ID_IGNORE;
- leftIgnore->flags |= AF_SUPPRESS_RIGHT;
-
- tree = pushLeftIgnore( prg, tree, leftIgnore );
-
- debug( REALM_PARSE, "attaching ignore right\n" );
-
- /* Copy the ignore list first if we need to attach it as a right
- * ignore. */
- Tree *rightIgnore = 0;
- rightIgnore = treeAllocate( prg );
- rightIgnore->id = LEL_ID_IGNORE;
- rightIgnore->flags |= AF_SUPPRESS_LEFT;
-
- tree = pushRightIgnore( prg, tree, rightIgnore );
-
- return tree;
-}
-
-enum ReturnType
-{
- Done = 1,
- CollectIgnoreLeft,
- CollectIgnoreRight,
- RecIgnoreList,
- ChildPrint
-};
-
-enum VisitType
-{
- IgnoreWrapper,
- IgnoreData,
- Term,
- NonTerm,
-};
-
-#define TF_TERM_SEEN 0x1
-
-void printKid( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid )
-{
- enum ReturnType rt;
- Kid *parent = 0;
- Kid *leadingIgnore = 0;
- enum VisitType visitType;
- int flags = 0;
-
- /* Iterate the kids passed in. We are expecting a next, which will allow us
- * to print the trailing ignore list. */
- while ( kid != 0 ) {
- vm_push( (SW) Done );
- goto rec_call;
- rec_return_top:
- kid = kid->next;
- }
-
- return;
-
-rec_call:
- if ( kid->tree == 0 )
- goto skip_null;
-
- /* If not currently skipping ignore data, then print it. Ignore data can
- * be associated with terminals and nonterminals. */
- if ( kid->tree->flags & AF_LEFT_IGNORE ) {
- vm_push( (SW)parent );
- vm_push( (SW)kid );
- parent = kid;
- kid = treeLeftIgnoreKid( prg, kid->tree );
- vm_push( (SW) CollectIgnoreLeft );
- goto rec_call;
- rec_return_ign_left:
- kid = (Kid*)vm_pop();
- parent = (Kid*)vm_pop();
- }
-
- if ( kid->tree->id == LEL_ID_IGNORE )
- visitType = IgnoreWrapper;
- else if ( parent != 0 && parent->tree->id == LEL_ID_IGNORE )
- visitType = IgnoreData;
- else if ( kid->tree->id < prg->rtd->firstNonTermId )
- visitType = Term;
- else
- visitType = NonTerm;
-
- debug( REALM_PRINT, "visit type: %d\n", visitType );
-
- if ( visitType == IgnoreData ) {
- debug( REALM_PRINT, "putting %p on ignore list\n", kid->tree );
- Kid *newIgnore = kidAllocate( prg );
- newIgnore->next = leadingIgnore;
- leadingIgnore = newIgnore;
- leadingIgnore->tree = kid->tree;
- goto skip_node;
- }
-
- if ( visitType == IgnoreWrapper ) {
- Kid *newIgnore = kidAllocate( prg );
- newIgnore->next = leadingIgnore;
- leadingIgnore = newIgnore;
- leadingIgnore->tree = kid->tree;
- /* Don't skip. */
- }
-
- /* print leading ignore? Triggered by terminals. */
- if ( visitType == Term ) {
- /* Reverse the leading ignore list. */
- if ( leadingIgnore != 0 ) {
- Kid *ignore = 0, *last = 0;
-
- /* Reverse the list and take the opportunity to implement the
- * suppress left. */
- while ( true ) {
- Kid *next = leadingIgnore->next;
- leadingIgnore->next = last;
-
- if ( leadingIgnore->tree->flags & AF_SUPPRESS_LEFT ) {
- /* We are moving left. Chop off the tail. */
- debug( REALM_PRINT, "suppressing left\n" );
- freeKidList( prg, next );
- break;
- }
-
- if ( next == 0 )
- break;
-
- last = leadingIgnore;
- leadingIgnore = next;
- }
-
- /* Print the leading ignore list. Also implement the suppress right
- * in the process. */
- if ( printArgs->comm && (!printArgs->trim || (flags & TF_TERM_SEEN && kid->tree->id > 0)) ) {
- ignore = leadingIgnore;
- while ( ignore != 0 ) {
- if ( ignore->tree->flags & AF_SUPPRESS_RIGHT )
- break;
-
- if ( ignore->tree->id != LEL_ID_IGNORE ) {
- vm_push( (SW)visitType );
- vm_push( (SW)leadingIgnore );
- vm_push( (SW)ignore );
- vm_push( (SW)parent );
- vm_push( (SW)kid );
-
- leadingIgnore = 0;
- kid = ignore;
- parent = 0;
-
- debug( REALM_PRINT, "rec call on %p\n", kid->tree );
- vm_push( (SW) RecIgnoreList );
- goto rec_call;
- rec_return_il:
-
- kid = (Kid*)vm_pop();
- parent = (Kid*)vm_pop();
- ignore = (Kid*)vm_pop();
- leadingIgnore = (Kid*)vm_pop();
- visitType = (enum VisitType)vm_pop();
- }
-
- ignore = ignore->next;
- }
- }
-
- /* Free the leading ignore list. */
- freeKidList( prg, leadingIgnore );
- leadingIgnore = 0;
- }
- }
-
- if ( visitType == Term || visitType == NonTerm ) {
- /* Open the tree. */
- printArgs->openTree( prg, sp, printArgs, parent, kid );
- }
-
- if ( visitType == Term )
- flags |= TF_TERM_SEEN;
-
- if ( visitType == Term || visitType == IgnoreData ) {
- /* Print contents. */
- if ( kid->tree->id < prg->rtd->firstNonTermId ) {
- debug( REALM_PRINT, "printing terminal %p\n", kid->tree );
- if ( kid->tree->id != 0 )
- printArgs->printTerm( prg, sp, printArgs, kid );
- }
- }
-
- /* Print children. */
- Kid *child = printArgs->attr ?
- treeAttr( prg, kid->tree ) :
- treeChild( prg, kid->tree );
-
- if ( child != 0 ) {
- vm_push( (SW)visitType );
- vm_push( (SW)parent );
- vm_push( (SW)kid );
- parent = kid;
- kid = child;
- while ( kid != 0 ) {
- vm_push( (SW) ChildPrint );
- goto rec_call;
- rec_return:
- kid = kid->next;
- }
- kid = (Kid*)vm_pop();
- parent = (Kid*)vm_pop();
- visitType = (enum VisitType)vm_pop();
- }
-
- if ( visitType == Term || visitType == NonTerm ) {
- /* close the tree. */
- printArgs->closeTree( prg, sp, printArgs, parent, kid );
- }
-
-skip_node:
-
- /* If not currently skipping ignore data, then print it. Ignore data can
- * be associated with terminals and nonterminals. */
- if ( kid->tree->flags & AF_RIGHT_IGNORE ) {
- debug( REALM_PRINT, "right ignore\n" );
- vm_push( (SW)parent );
- vm_push( (SW)kid );
- parent = kid;
- kid = treeRightIgnoreKid( prg, kid->tree );
- vm_push( (SW) CollectIgnoreRight );
- goto rec_call;
- rec_return_ign_right:
- kid = (Kid*)vm_pop();
- parent = (Kid*)vm_pop();
- }
-
-/* For skiping over content on null. */
-skip_null:
-
- rt = (enum ReturnType)vm_pop();
- switch ( rt ) {
- case Done:
- debug( REALM_PRINT, "return: done\n" );
- goto rec_return_top;
- break;
- case CollectIgnoreLeft:
- debug( REALM_PRINT, "return: ignore left\n" );
- goto rec_return_ign_left;
- case CollectIgnoreRight:
- debug( REALM_PRINT, "return: ignore right\n" );
- goto rec_return_ign_right;
- case RecIgnoreList:
- debug( REALM_PRINT, "return: ignore list\n" );
- goto rec_return_il;
- case ChildPrint:
- debug( REALM_PRINT, "return: child print\n" );
- goto rec_return;
- }
-}
-
-void printTreeArgs( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Tree *tree )
-{
- if ( tree == 0 )
- printArgs->out( printArgs, "NIL", 3 );
- else {
- /* This term tree allows us to print trailing ignores. */
- Tree termTree;
- memset( &termTree, 0, sizeof(termTree) );
-
- Kid kid, term;
- term.tree = &termTree;
- term.next = 0;
- term.flags = 0;
-
- kid.tree = tree;
- kid.next = &term;
- kid.flags = 0;
-
- printKid( prg, sp, printArgs, &kid );
- }
-}
-
-void printTermTree( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid )
-{
- debug( REALM_PRINT, "printing term %p\n", kid->tree );
-
- if ( kid->tree->id == LEL_ID_INT ) {
- char buf[INT_SZ];
- sprintf( buf, "%ld", ((Int*)kid->tree)->value );
- printArgs->out( printArgs, buf, strlen(buf) );
- }
- else if ( kid->tree->id == LEL_ID_BOOL ) {
- if ( ((Int*)kid->tree)->value )
- printArgs->out( printArgs, "true", 4 );
- else
- printArgs->out( printArgs, "false", 5 );
- }
- else if ( kid->tree->id == LEL_ID_PTR ) {
- char buf[INT_SZ];
- printArgs->out( printArgs, "#", 1 );
- sprintf( buf, "%p", (void*) ((Pointer*)kid->tree)->value );
- printArgs->out( printArgs, buf, strlen(buf) );
- }
- else if ( kid->tree->id == LEL_ID_STR ) {
- printStr( printArgs, ((Str*)kid->tree)->value );
- }
- else if ( kid->tree->id == LEL_ID_STREAM ) {
- char buf[INT_SZ];
- printArgs->out( printArgs, "#", 1 );
- sprintf( buf, "%p", (void*) ((Stream*)kid->tree)->file );
- printArgs->out( printArgs, buf, strlen(buf) );
- }
- else if ( kid->tree->tokdata != 0 &&
- stringLength( kid->tree->tokdata ) > 0 )
- {
- printArgs->out( printArgs, stringData( kid->tree->tokdata ),
- stringLength( kid->tree->tokdata ) );
- }
-}
-
-
-void printNull( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid )
-{
-}
-
-void openTreeXml( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid )
-{
- /* Skip the terminal that is for forcing trailing ignores out. */
- if ( kid->tree->id == 0 )
- return;
-
- LangElInfo *lelInfo = prg->rtd->lelInfo;
-
- /* List flattening: skip the repeats and lists that are a continuation of
- * the list. */
- if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 &&
- ( lelInfo[parent->tree->id].repeat || lelInfo[parent->tree->id].list ) )
- {
- return;
- }
-
- const char *name = lelInfo[kid->tree->id].xmlTag;
- args->out( args, "<", 1 );
- args->out( args, name, strlen( name ) );
- args->out( args, ">", 1 );
-}
-
-void printTermXml( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid )
-{
- //Kid *child;
-
- /*child = */ treeChild( prg, kid->tree );
- if ( kid->tree->id == LEL_ID_PTR ) {
- char ptr[32];
- sprintf( ptr, "%p\n", (void*)((Pointer*)kid->tree)->value );
- printArgs->out( printArgs, ptr, strlen(ptr) );
- }
- else if ( kid->tree->id == LEL_ID_BOOL ) {
- if ( ((Int*)kid->tree)->value )
- printArgs->out( printArgs, "true", 4 );
- else
- printArgs->out( printArgs, "false", 5 );
- }
- else if ( kid->tree->id == LEL_ID_INT ) {
- char ptr[32];
- sprintf( ptr, "%ld", ((Int*)kid->tree)->value );
- printArgs->out( printArgs, ptr, strlen(ptr) );
- }
- else if ( kid->tree->id == LEL_ID_STR ) {
- Head *head = (Head*) ((Str*)kid->tree)->value;
-
- xmlEscapeData( printArgs, (char*)(head->data), head->length );
- }
- else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->firstNonTermId &&
- kid->tree->id != LEL_ID_IGNORE &&
- kid->tree->tokdata != 0 &&
- stringLength( kid->tree->tokdata ) > 0 )
- {
- xmlEscapeData( printArgs, stringData( kid->tree->tokdata ),
- stringLength( kid->tree->tokdata ) );
- }
-}
-
-
-void closeTreeXml( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid )
-{
- /* Skip the terminal that is for forcing trailing ignores out. */
- if ( kid->tree->id == 0 )
- return;
-
- LangElInfo *lelInfo = prg->rtd->lelInfo;
-
- /* List flattening: skip the repeats and lists that are a continuation of
- * the list. */
- if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 &&
- ( lelInfo[parent->tree->id].repeat || lelInfo[parent->tree->id].list ) )
- {
- return;
- }
-
- const char *name = lelInfo[kid->tree->id].xmlTag;
- args->out( args, "</", 2 );
- args->out( args, name, strlen( name ) );
- args->out( args, ">", 1 );
-}
-
-void printTreeCollect( Program *prg, Tree **sp, StrCollect *collect, Tree *tree, int trim )
-{
- struct ColmPrintArgs printArgs = { collect, true, false, trim, &appendCollect,
- &printNull, &printTermTree, &printNull };
- printTreeArgs( prg, sp, &printArgs, tree );
-}
-
-void printTreeFile( Program *prg, Tree **sp, FILE *out, Tree *tree, int trim )
-{
- struct ColmPrintArgs printArgs = { out, true, false, trim, &appendFile,
- &printNull, &printTermTree, &printNull };
- printTreeArgs( prg, sp, &printArgs, tree );
-}
-
-void printXmlStdout( Program *prg, Tree **sp, Tree *tree, int commAttr, int trim )
-{
- struct ColmPrintArgs printArgs = { stdout, commAttr, commAttr, trim, &appendFile,
- &openTreeXml, &printTermXml, &closeTreeXml };
- printTreeArgs( prg, sp, &printArgs, tree );
-}
-
diff --git a/colm/tree.h b/colm/tree.h
deleted file mode 100644
index 4425cfc5..00000000
--- a/colm/tree.h
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Copyright 2010-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __COLM_TREE_H
-#define __COLM_TREE_H
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#include <colm/colm.h>
-
-typedef unsigned char Code;
-typedef unsigned long Word;
-typedef unsigned long Half;
-struct Bindings;
-
-typedef struct _File
-{
- struct _File *prev;
- struct _File *next;
-} File;
-
-typedef struct _Location
-{
- File *file;
- long line;
- long column;
- long byte;
-} Location;
-
-/* Header located just before string data. */
-typedef struct _Head
-{
- const char *data;
- long length;
- Location *location;
-} Head;
-
-typedef struct ColmKid
-{
- /* The tree needs to be first since pointers to kids are used to reference
- * trees on the stack. A pointer to the word that is a Tree* is cast to
- * a Kid*. */
- struct ColmTree *tree;
- struct ColmKid *next;
- unsigned char flags;
-} Kid;
-
-typedef struct _Ref
-{
- struct ColmKid *kid;
- struct _Ref *next;
-} Ref;
-
-typedef struct ColmTree
-{
- /* First four will be overlaid in other structures. */
- short id;
- unsigned short flags;
- long refs;
- Kid *child;
-
- Head *tokdata;
-
- /* FIXME: this needs to go somewhere else. Will do for now. */
- unsigned short prodNum;
-} Tree;
-
-
-typedef struct _TreePair
-{
- Tree *key;
- Tree *val;
-} TreePair;
-
-typedef struct _ParseTree
-{
- short id;
- unsigned short flags;
-
- struct _ParseTree *child;
- struct _ParseTree *next;
- struct _ParseTree *leftIgnore;
- struct _ParseTree *rightIgnore;
- Kid *shadow;
-
- /* Parsing algorithm. */
- long state;
- long region;
- short causeReduce;
-
- /* FIXME: unify probably. */
- char retryLower;
- char retryUpper;
-} ParseTree;
-
-typedef struct _Int
-{
- /* Must overlay Tree. */
- short id;
- unsigned short flags;
- long refs;
- Kid *child;
-
- long value;
-} Int;
-
-typedef struct _Pointer
-{
- /* Must overlay Tree. */
- short id;
- unsigned short flags;
- long refs;
- Kid *child;
-
- Kid *value;
-} Pointer;
-
-typedef struct _Str
-{
- /* Must overlay Tree. */
- short id;
- unsigned short flags;
- long refs;
- Kid *child;
-
- Head *value;
-} Str;
-
-typedef struct _ListEl
-{
- /* Must overlay kid. */
- Tree *value;
- struct _ListEl *next;
- struct _ListEl *prev;
-} ListEl;
-
-/*
- * Maps
- */
-typedef struct _GenericInfo
-{
- long type;
- long typeArg;
- long keyOffset;
- long keyType;
- long langElId;
- long parserId;
-} GenericInfo;
-
-typedef struct _List
-{
- /* Must overlay Tree. */
- short id;
- unsigned short flags;
- long refs;
- ListEl *head;
-
- ListEl *tail;
- long listLen;
- GenericInfo *genericInfo;
-
-} List;
-
-typedef struct _Stream
-{
- /* Must overlay Tree. */
- short id;
- unsigned short flags;
- long refs;
- Kid *child;
-
- FILE *file;
- SourceStream *in;
-} Stream;
-
-typedef struct _Input
-{
- /* Must overlay Tree. */
- short id;
- unsigned short flags;
- long refs;
- Kid *child;
-
- InputStream *in;
-} Input;
-
-typedef struct _Parser
-{
- /* Must overlay Tree. */
- short id;
- unsigned short flags;
- long refs;
- Kid *child;
-
- GenericInfo *genericInfo;
-
- struct _PdaRun *pdaRun;
- struct _FsmRun *fsmRun;
- struct _Input *input;
- Tree *result;
-} Parser;
-
-typedef struct _TreeIter
-{
- Ref rootRef;
- Ref ref;
- long searchId;
- Tree **stackRoot;
- long stackSize;
-} TreeIter;
-
-/* This must overlay tree iter because some of the same bytecodes are used. */
-typedef struct _RevTreeIter
-{
- Ref rootRef;
- Ref ref;
- long searchId;
- Tree **stackRoot;
- long stackSize;
-
- /* For detecting a split at the leaf. */
- Kid *kidAtYield;
- long children;
- Kid **cur;
-} RevTreeIter;
-
-
-typedef struct _UserIter
-{
- /* The current item. */
- Ref ref;
- Tree **stackRoot;
- long argSize;
- long stackSize;
- Code *resume;
- Tree **frame;
- long searchId;
-} UserIter;
-
-
-void treeUpref( Tree *tree );
-void treeDownref( struct ColmProgram *prg, Tree **sp, Tree *tree );
-long cmpTree( struct ColmProgram *prg, const Tree *tree1, const Tree *tree2 );
-
-Tree *pushRightIgnore( struct ColmProgram *prg, Tree *pushTo, Tree *rightIgnore );
-Tree *pushLeftIgnore( struct ColmProgram *prg, Tree *pushTo, Tree *leftIgnore );
-Tree *popRightIgnore( struct ColmProgram *prg, Tree **sp, Tree *popFrom, Tree **rightIgnore );
-Tree *popLeftIgnore( struct ColmProgram *prg, Tree **sp, Tree *popFrom, Tree **leftIgnore );
-Tree *treeLeftIgnore( struct ColmProgram *prg, Tree *tree );
-Tree *treeRightIgnore( struct ColmProgram *prg, Tree *tree );
-Kid *treeLeftIgnoreKid( struct ColmProgram *prg, Tree *tree );
-Kid *treeRightIgnoreKid( struct ColmProgram *prg, Tree *tree );
-Kid *treeChild( struct ColmProgram *prg, const Tree *tree );
-Kid *treeAttr( struct ColmProgram *prg, const Tree *tree );
-Kid *kidListConcat( Kid *list1, Kid *list2 );
-Kid *treeExtractChild( struct ColmProgram *prg, Tree *tree );
-Kid *reverseKidList( Kid *kid );
-
-Tree *constructInteger( struct ColmProgram *prg, long i );
-Tree *constructPointer( struct ColmProgram *prg, Tree *tree );
-Tree *constructTerm( struct ColmProgram *prg, Word id, Head *tokdata );
-Tree *constructReplacementTree( Kid *kid, Tree **bindings, struct ColmProgram *prg, long pat );
-Tree *createGeneric( struct ColmProgram *prg, long genericId );
-Tree *constructToken( struct ColmProgram *prg, Tree **root, long nargs );
-Tree *constructInput( struct ColmProgram *prg );
-
-
-int testFalse( struct ColmProgram *prg, Tree *tree );
-Tree *makeTree( struct ColmProgram *prg, Tree **root, long nargs );
-Stream *openFile( struct ColmProgram *prg, Tree *name, Tree *mode );
-Stream *openStreamFd( struct ColmProgram *prg, long fd );
-Kid *copyIgnoreList( struct ColmProgram *prg, Kid *ignoreHeader );
-Kid *copyKidList( struct ColmProgram *prg, Kid *kidList );
-void streamFree( struct ColmProgram *prg, Stream *s );
-Tree *copyTree( struct ColmProgram *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown );
-
-Tree *getPtrVal( Pointer *ptr );
-Tree *getPtrValSplit( struct ColmProgram *prg, Pointer *ptr );
-Tree *getField( Tree *tree, Word field );
-Tree *getFieldSplit( struct ColmProgram *prg, Tree *tree, Word field );
-Tree *getRhsEl( struct ColmProgram *prg, Tree *lhs, long position );
-void setField( struct ColmProgram *prg, Tree *tree, long field, Tree *value );
-
-void setTriterCur( struct ColmProgram *prg, TreeIter *iter, Tree *tree );
-void setUiterCur( struct ColmProgram *prg, UserIter *uiter, Tree *tree );
-void refSetValue( Ref *ref, Tree *v );
-Tree *treeSearch( struct ColmProgram *prg, Kid *kid, long id );
-Tree *treeSearch2( struct ColmProgram *prg, Tree *tree, long id );
-
-int matchPattern( Tree **bindings, struct ColmProgram *prg, long pat, Kid *kid, int checkNext );
-Tree *treeIterDerefCur( TreeIter *iter );
-
-/* For making references of attributes. */
-Kid *getFieldKid( Tree *tree, Word field );
-
-Tree *copyRealTree( struct ColmProgram *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown );
-void splitIterCur( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
-Tree *setListMem( List *list, Half field, Tree *value );
-
-void listAppend2( struct ColmProgram *prg, List *list, Tree *val );
-Tree *listRemoveEnd( struct ColmProgram *prg, List *list );
-Tree *getListMem( List *list, Word field );
-Tree *getListMemSplit( struct ColmProgram *prg, List *list, Word field );
-
-Tree *treeIterAdvance( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
-Tree *treeIterNextChild( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
-Tree *treeRevIterPrevChild( struct ColmProgram *prg, Tree ***psp, RevTreeIter *iter );
-Tree *treeIterNextRepeat( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
-Tree *treeIterPrevRepeat( struct ColmProgram *prg, Tree ***psp, TreeIter *iter );
-
-/* An automatically grown buffer for collecting tokens. Always reuses space;
- * never down resizes. */
-typedef struct _StrCollect
-{
- char *data;
- int allocated;
- int length;
-} StrCollect;
-
-void initStrCollect( StrCollect *collect );
-void strCollectDestroy( StrCollect *collect );
-void strCollectAppend( StrCollect *collect, const char *data, long len );
-void strCollectClear( StrCollect *collect );
-Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree );
-
-void printTreeCollect( struct ColmProgram *prg, Tree **sp, StrCollect *collect, Tree *tree, int trim );
-void printTreeFile( struct ColmProgram *prg, Tree **sp, FILE *out, Tree *tree, int trim );
-void printXmlStdout( struct ColmProgram *prg, Tree **sp, Tree *tree, int commAttr, int trim );
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
-