summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2020-03-14 15:29:52 +0200
committerAdrian Thurston <thurston@colm.net>2020-03-14 15:29:52 +0200
commitf653735830d537715f2885bd832cf04851d35401 (patch)
tree95e6551e39407543366d4f49aedf7b78c6e8bbe1 /src
parentbcc54d5df10cf425e7134b06f70d7ffe1abee4e4 (diff)
downloadcolm-f653735830d537715f2885bd832cf04851d35401.tar.gz
moved source files into commit repository
Diffstat (limited to 'src')
-rw-r--r--src/.gitignore37
-rw-r--r--src/CMakeLists.txt188
-rw-r--r--src/ChangeLog92
-rw-r--r--src/Makefile.am207
-rw-r--r--src/buffer.h57
-rw-r--r--src/bytecode.c5025
-rw-r--r--src/bytecode.h678
-rw-r--r--src/closure.cc458
-rw-r--r--src/codegen.cc62
-rw-r--r--src/codevect.c183
-rw-r--r--src/colm-config.cmake.in3
-rw-r--r--src/colm-wrap.sh79
-rw-r--r--src/colm.h159
-rw-r--r--src/colm.lm910
-rw-r--r--src/colmex.h109
-rw-r--r--src/commit.c111
-rw-r--r--src/compiler.cc1263
-rw-r--r--src/compiler.h1158
-rw-r--r--src/config.h.cmake.in1
-rw-r--r--src/consinit.cc922
-rw-r--r--src/consinit.h113
-rw-r--r--src/cstring.h862
-rw-r--r--src/ctinput.cc570
-rw-r--r--src/debug.c82
-rw-r--r--src/debug.h65
-rw-r--r--src/declare.cc1623
-rw-r--r--src/defs.h.cmake.in11
-rw-r--r--src/defs.h.in40
-rw-r--r--src/dotgen.cc117
-rw-r--r--src/dotgen.h52
-rw-r--r--src/exports.cc260
-rw-r--r--src/fsmap.cc806
-rw-r--r--src/fsmattach.cc427
-rw-r--r--src/fsmbase.cc603
-rw-r--r--src/fsmcodegen.cc918
-rw-r--r--src/fsmcodegen.h211
-rw-r--r--src/fsmexec.cc220
-rw-r--r--src/fsmgraph.cc981
-rw-r--r--src/fsmgraph.h1321
-rw-r--r--src/fsmmin.cc737
-rw-r--r--src/fsmstate.cc441
-rw-r--r--src/global.h110
-rw-r--r--src/input.c759
-rw-r--r--src/input.h232
-rw-r--r--src/internal.h33
-rw-r--r--src/iter.c648
-rw-r--r--src/keyops.h196
-rw-r--r--src/list.c255
-rw-r--r--src/lmparse.kh86
-rw-r--r--src/lmparse.kl2139
-rw-r--r--src/lmscan.h104
-rw-r--r--src/lmscan.rl637
-rw-r--r--src/loadboot2.cc3
-rw-r--r--src/loadcolm.cc2
-rw-r--r--src/loadfinal.cc2978
-rw-r--r--src/loadfinal.h31
-rw-r--r--src/loadinit.cc416
-rw-r--r--src/loadinit.h77
-rw-r--r--src/lookup.cc323
-rw-r--r--src/main.cc836
-rw-r--r--src/map.c876
-rw-r--r--src/map.cc27
-rw-r--r--src/map.h86
-rw-r--r--src/parser.cc1128
-rw-r--r--src/parser.h197
-rw-r--r--src/parsetree.cc1495
-rw-r--r--src/parsetree.h3607
-rw-r--r--src/pcheck.cc156
-rw-r--r--src/pcheck.h50
-rw-r--r--src/pdabuild.cc2205
-rw-r--r--src/pdacodegen.cc698
-rw-r--r--src/pdacodegen.h107
-rw-r--r--src/pdagraph.cc533
-rw-r--r--src/pdagraph.h517
-rw-r--r--src/pdarun.c2265
-rw-r--r--src/pdarun.h471
-rw-r--r--src/pool.c248
-rw-r--r--src/pool.h73
-rw-r--r--src/print.c775
-rw-r--r--src/prog.lm88
-rw-r--r--src/program.c333
-rw-r--r--src/program.h186
-rw-r--r--src/redbuild.cc562
-rw-r--r--src/redbuild.h161
-rw-r--r--src/redfsm.cc1049
-rw-r--r--src/redfsm.h479
-rw-r--r--src/reduce.cc954
-rw-r--r--src/resolve.cc988
-rw-r--r--src/rtvector.h35
-rw-r--r--src/stream.c828
-rw-r--r--src/string.c281
-rw-r--r--src/struct.c185
-rw-r--r--src/struct.h180
-rw-r--r--src/synthesis.cc3370
-rw-r--r--src/tree.c1655
-rw-r--r--src/tree.h401
-rw-r--r--src/type.h43
-rw-r--r--src/version.h.cmake.in9
98 files changed, 59322 insertions, 6 deletions
diff --git a/src/.gitignore b/src/.gitignore
index 8b728643..082d9db4 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -1,11 +1,40 @@
+/*.o
+/*.lo
+/Makefile.in
+/Makefile
+/.*.d
+/colm
+/defs.h
/config.h
-/config.h.in
-/config.h.in~
+/version.h
+/tags
+/.deps
+/libcolm.a
+/libcolm.la
+/.libs
/stamp-h1
/stamp-h2
-/Makefile
+/bootstrap[012]
+/gen
+/tar
+/include
+
+/libprog.a
/CMakeFiles
/cmake_install.cmake
-/install_manifest.txt
+/*.exe
+
+# Common testing files.
+/tmp.lm
+/tmp.c
+/tmp
+/input[0-9]
+/input
+/output
+/log
+
+/colm-wrap
+
+/config.h.in
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7afa276d..3d855523 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1 +1,187 @@
-configure_file(config.h.cmake.in config.h @ONLY)
+# Package name
+set(_PACKAGE_NAME colm)
+
+# Read project configuration from ../configure.ac file
+file(STRINGS ../configure.ac _PROJECT_CONFIGS
+ REGEX "(COLM_VERSION=)|(COLM_PUBDATE=)")
+foreach(_PROJECT_CONFIG ${_PROJECT_CONFIGS})
+ if(_PROJECT_CONFIG MATCHES "COLM_VERSION=\"([^\"]+)")
+ string(STRIP ${CMAKE_MATCH_1} COLM_VERSION)
+ endif()
+ if(_PROJECT_CONFIG MATCHES "COLM_PUBDATE=\"([^\"]+)")
+ string(STRIP ${CMAKE_MATCH_1} COLM_PUBDATE)
+ endif()
+endforeach()
+
+# Generate headers
+configure_file(version.h.cmake.in version.h @ONLY)
+configure_file(defs.h.cmake.in defs.h @ONLY)
+configure_file(colm-config.cmake.in
+"${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake" @ONLY)
+
+# Runtime headers
+set(RUNTIME_HDR
+ bytecode.h debug.h pool.h input.h
+ pdarun.h map.h type.h tree.h struct.h program.h colm.h internal.h)
+
+foreach(_hdr defs.h)
+ list(APPEND RUNTIME_HDR "${CMAKE_CURRENT_BINARY_DIR}/${_hdr}")
+endforeach()
+
+# Other CMake modules
+include(GNUInstallDirs)
+
+# libcolm
+
+add_library(libcolm
+ map.c pdarun.c list.c input.c stream.c debug.c
+ codevect.c pool.c string.c tree.c iter.c
+ bytecode.c program.c struct.c commit.c
+ print.c)
+
+target_include_directories(libcolm
+ PUBLIC
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/..>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/..>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/../src>
+ $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+
+set_target_properties(libcolm PROPERTIES
+ OUTPUT_NAME colm)
+
+# libprog
+
+add_library(libprog
+ buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h
+ input.h keyops.h map.h compiler.h
+ parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h
+ redfsm.h rtvector.h tree.h global.h colm.h parser.h cstring.h
+ internal.h
+ resolve.cc lookup.cc synthesis.cc parsetree.cc
+ fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc
+ fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc
+ redfsm.cc fsmexec.cc redbuild.cc closure.cc fsmap.cc
+ dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc
+ exports.cc compiler.cc parser.cc reduce.cc)
+
+target_include_directories(libprog
+ PUBLIC
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/..>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../aapl>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/..>)
+
+set_target_properties(libprog PROPERTIES
+ OUTPUT_NAME prog)
+
+# bootstrap0
+
+add_executable(bootstrap0
+ consinit.cc consinit.h main.cc)
+
+target_link_libraries(bootstrap0 libprog libcolm)
+
+set_property(TARGET bootstrap0 APPEND PROPERTY
+ COMPILE_DEFINITIONS CONS_INIT ${common_COMPILE_DEFINITIONS})
+
+set_property(TARGET bootstrap0 APPEND PROPERTY
+ COMPILE_FLAGS -fpermissive)
+
+# bootstrap1
+
+make_directory("${CMAKE_CURRENT_BINARY_DIR}/gen")
+
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/gen/parse1.c"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/if1.h"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/if1.cc"
+ COMMAND bootstrap0
+ ARGS -c -o parse1.c -e if1.h -x if1.cc
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen")
+
+add_executable(bootstrap1
+ loadinit.h loadinit.cc main.cc
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/parse1.c"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/if1.cc")
+
+target_link_libraries(bootstrap1 libprog libcolm)
+
+set_property(TARGET bootstrap1 APPEND PROPERTY
+ COMPILE_DEFINITIONS LOAD_INIT ${common_COMPILE_DEFINITIONS})
+
+# bootstrap2
+
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/gen/parse2.c"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/if2.h"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/if2.cc"
+ COMMAND bootstrap1
+ ARGS -c -o parse2.c -e if2.h -x if2.cc "${CMAKE_CURRENT_LIST_DIR}/colm.lm"
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen")
+
+add_executable(bootstrap2
+ main.cc loadboot2.cc loadfinal.h version.h
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/parse2.c"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/if2.cc")
+
+target_link_libraries(bootstrap2 libprog libcolm)
+
+set_property(TARGET bootstrap2 APPEND PROPERTY
+ COMPILE_DEFINITIONS LOAD_COLM ${common_COMPILE_DEFINITIONS})
+
+# colm
+
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/gen/parse3.c"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/if3.h"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/if3.cc"
+ COMMAND bootstrap2
+ ARGS -I "${CMAKE_CURRENT_LIST_DIR}" -c -o parse3.c -e if3.h -x if3.cc "${CMAKE_CURRENT_LIST_DIR}/prog.lm"
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen")
+
+add_executable(colm
+ main.cc loadcolm.cc loadfinal.h version.h
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/parse3.c"
+ "${CMAKE_CURRENT_BINARY_DIR}/gen/if3.cc")
+
+if(BUILD_STANDALONE)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ target_link_libraries(colm -static)
+ else()
+ message(FATAL_ERROR "Unsupported toolset for standalone build.")
+ endif()
+endif()
+
+target_link_libraries(colm libprog libcolm)
+
+set_property(TARGET colm APPEND PROPERTY
+ COMPILE_DEFINITIONS LOAD_COLM ${common_COMPILE_DEFINITIONS})
+
+if(${PROJECT_NAME}_MAKE_INSTALL)
+ if(NOT DEFINED CMAKE_INSTALL_CMAKEDIR)
+ set(CMAKE_INSTALL_CMAKEDIR
+ "${CMAKE_INSTALL_LIBDIR}/cmake/${_PACKAGE_NAME}"
+ CACHE STRING "CMake packages")
+ endif()
+ install(FILES ${RUNTIME_HDR}
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/colm")
+ install(TARGETS libcolm colm
+ EXPORT ${_PACKAGE_NAME}-targets
+ RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
+ LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+ ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+ install(EXPORT ${_PACKAGE_NAME}-targets
+ NAMESPACE ${_PACKAGE_NAME}::
+ DESTINATION "${CMAKE_INSTALL_CMAKEDIR}")
+ export(EXPORT ${_PACKAGE_NAME}-targets
+ NAMESPACE ${_PACKAGE_NAME}::
+ FILE "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-targets.cmake")
+ include(CMakePackageConfigHelpers)
+ write_basic_package_version_file(
+ "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake"
+ VERSION ${COLM_VERSION}
+ COMPATIBILITY AnyNewerVersion)
+ install(FILES
+ "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake"
+ "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake"
+ DESTINATION "${CMAKE_INSTALL_CMAKEDIR}")
+endif()
diff --git a/src/ChangeLog b/src/ChangeLog
new file mode 100644
index 00000000..1479d2d5
--- /dev/null
+++ b/src/ChangeLog
@@ -0,0 +1,92 @@
+colm 0.12.0 - Feb 1, 2014
+-------------------------
+ * allow matching against reference types
+ * refcount fixes for setting references
+ * can use 'in' expr to search ref types
+ * can take refs of temps in expresssions
+ * error handling improvements
+ * fixed the colm input open check
+ * added a default capture name to all production definition rhs elements,
+ defaults to the type name
+ * allow var ref as the target of a send construct
+ * added the cast operator
+ * return nil from open if file open fails
+ * improvements to locals downrefs (large func offset overflow and iters)
+ * fixed the typeref for stdout and stderr, now working
+ * evaluate print arguments left to right
+ * improved separation of declare, resolve/lookup, and compile passes
+ * lookup of types in cons/pats uses the cons type or pattern tree namespace
+ * added the void type, useful for calling func in send expression
+ * can re-enter namespaces that already exist
+ * don't search parent scopes when qualifications are present
+ * added a new syntax for literal tokens; a single backtick is starts the
+ literal, ends at whitespace or ] in second or greater position
+ * removed comma as separator in literal statement
+ * single-quoted strings are now treated like double-quoted strings; they
+ may appear in expressions and are concatenated with the other string forms
+ * new syntax for collect-ignore tokens "token <id> -"
+ * added the system function, which calls C's system
+ * fixes for contiguous stack regions that fixes some segfaults
+ * don't use stack top offsets in print instructions
+ * allow make_tree anywhere
+ * don't use sp offset for make_tree, removes need for contiguous
+ * eliminated stack offset from make-token instruction
+ * print function names in call op debug stmts
+
+colm 0.11 - May 26, 2013
+------------------------
+ * Require <> around ref and ptr type declarations (eg: ptr<type_ref>)
+ * Added production labels, which follow a production with the form
+ :Label. Labels are exposed in the C++ interface using an enum. They
+ are prefixed with _ to avoid conflict with member access functions.
+ * It's now possible to access Tree::tokdata from the C++ interface.
+ * The print, stream print, stream push, and stream append operations all
+ no longer trim trees by default. This makes these functions
+ consistent with constructor, which refrains from trimming to avoid
+ extra work. A construct with a tree trim is accomplished with an
+ additional operator. The above print and stream operations now have
+ the same usage patterns.
+ * Fixes to prodNum preservation through tree copy, and to the parser
+
+colm 0.10 - Apr 18, 2013
+------------------------
+ * Implemented the colm parser in colm. Bootstrapping with a parse tree
+ construction in C++. Using that generated parser to to parse the colm
+ grammar, from which the primary parser is produced.
+ * Improved error reporting.
+ * Added some tracking of progress through a stream for better
+ backtracking over includes that are pushed onto a stream.
+ * Updated the vim syntax, no longer highlighting parser, list, etc
+ (types).
+ * Parse expressions are now returning trees again. Returning the parser
+ results in semantics inconsistent with many other areas of the
+ program.
+ * Can now put '.' or eos on the end of a send expression to terminate
+ the parse
+ * Removed the original send syntax (<<).
+ * Lowercased and otherwise improved the C interface.
+ * The default binary name no longer has .bin suffix.
+ * Fixed -o option, now using -c to mean compile on (library). The
+ export filename options are now -e and -x.
+
+colm 0.9 - Feb 19, 2013
+-----------------------
+ * The parse loop now scans data that is owned by the input stream. It
+ is copied into a contiguous block in the scanner when the token is
+ consumed.
+ * The syntax of lexical regions was altered to omit the name. The
+ curlies were replaced with lex ... end syntax.
+ * The syntax of namespaces were altered. Curlies were replaced with
+ namespace <ID> ... end.
+
+colm 0.8 - Dec 29, 2012
+-----------------------
+ * The parse statement now includes a call to the finish operation. It
+ returns a value of type parser<Type>. The result tree and any error
+ message can be retrieved from this object using 'tree' and 'error'
+ members.
+ * Dropped curly brackets and the name from the syntax of lexical region
+ defintition. Using "lex ... end"
+ * Dropped curly brackets from the syntax of namespaces and context blocks.
+ Using "namespace N ... end N" and "context N ... end N"
+ * Now have a growable stack instead of a large, fixed, pre-allocated stack.
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 00000000..5a53f040
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,207 @@
+#
+# Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+#
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+AM_CPPFLAGS = -I$(top_srcdir)/aapl
+
+AUTOMAKE_OPTIONS = subdir-objects
+
+bin_PROGRAMS = colm
+bin_SCRIPTS = colm-wrap
+
+RUNTIME_SRC = \
+ map.c pdarun.c list.c input.c stream.c debug.c \
+ codevect.c pool.c string.c tree.c iter.c \
+ bytecode.c program.c struct.c commit.c \
+ print.c
+
+RUNTIME_HDR = \
+ config.h bytecode.h defs.h debug.h pool.h input.h \
+ pdarun.h map.h type.h tree.h struct.h program.h colm.h \
+ internal.h colmex.h
+
+lib_LTLIBRARIES = libcolm.la
+noinst_LIBRARIES = libprog.a
+
+libcolm_la_SOURCES = $(RUNTIME_SRC)
+libcolm_la_LDFLAGS = -release ${COLM_VERSION} -no-undefined
+
+if LINKER_NO_UNDEFINED
+libcolm_la_LDFLAGS += -Wl,--no-undefined
+endif
+
+common_CFLAGS = \
+ -Wall \
+ -DPREFIX='"$(prefix)"'
+
+libprog_a_SOURCES = \
+ buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \
+ input.h keyops.h map.h compiler.h \
+ parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \
+ redfsm.h rtvector.h tree.h version.h global.h colm.h parser.h cstring.h \
+ internal.h \
+ \
+ resolve.cc lookup.cc synthesis.cc parsetree.cc \
+ fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \
+ fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \
+ redfsm.cc fsmexec.cc redbuild.cc closure.cc fsmap.cc \
+ dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc \
+ exports.cc compiler.cc parser.cc reduce.cc
+
+libprog_a_CXXFLAGS = $(common_CFLAGS)
+
+colmincdir = $(includedir)/colm
+
+colminc_HEADERS = $(RUNTIME_HDR)
+
+BUILT_SOURCES = version.h include/colm
+
+if EXTERNAL_COLM
+
+#
+# Generate the parser using a single run with an external colm program.
+#
+BUILD_PARSE_3_WITH = $(EXTERNAL_COLM)/bin/colm$(EXEEXT)
+AM_CPPFLAGS += $(EXTERNAL_INC)
+AM_LDFLAGS = $(EXTERNAL_LIBS)
+
+else
+
+noinst_PROGRAMS = bootstrap0 bootstrap1 bootstrap2
+
+BUILD_PARSE_3_WITH = $(builddir)/bootstrap2$(EXEEXT)
+AM_CPPFLAGS += -Iinclude
+AM_LDFLAGS = -L.
+
+#
+# bootstrap0: The input program for bootstrap0 is construced using internal
+# data structure constructors. It produces a program that can parse a grammar
+# using limited features. No code is supported.
+#
+bootstrap0_CXXFLAGS = $(common_CFLAGS) -DCONS_INIT
+bootstrap0_SOURCES = consinit.cc consinit.h main.cc version.h
+bootstrap0_LDADD = libprog.a libcolm.la
+
+#
+# bootstrap1: The input program is specified using a stripped down colm syntax.
+# It produces a program that can parse most colm syntax, with the exception of
+# the colm syntax/semantics that is implemented in colm itself.
+#
+
+gen/bootstrap1.pack: colm-wrap bootstrap0$(EXEEXT)
+ mkdir -p gen
+ $(builddir)/colm-wrap -w bootstrap0 -o $@ \
+ -c -p gen/parse1.c -e gen/if1.h -x gen/if1.cc no-input
+
+gen/parse1.c: gen/bootstrap1.pack
+ $(builddir)/colm-wrap -o $@ $<
+
+gen/if1.h: gen/bootstrap1.pack
+ $(builddir)/colm-wrap -o $@ $<
+
+gen/if1.cc: gen/bootstrap1.pack
+ $(builddir)/colm-wrap -o $@ $<
+
+# Listing if1.h in BUILT_SOURCES isn't sufficient because it depends on the
+# building of bootstrap0. Automake wants to put all built sources into a list
+# of files built before ANYTHING else (which includes bootstrap0). Not sure if
+# it is rejected by automake or make. But in any case, it doesn't work. Fixed
+# with the following additional dependency.
+BUILT_SOURCES += gen/parse1.c gen/if1.h gen/if1.cc
+loadinit.cc: gen/if1.h
+
+bootstrap1_CXXFLAGS = $(common_CFLAGS) -DLOAD_INIT
+bootstrap1_CFLAGS = $(common_CFLAGS)
+bootstrap1_SOURCES = loadinit.h loadinit.cc main.cc version.h
+nodist_bootstrap1_SOURCES = gen/if1.h gen/if1.cc gen/parse1.c
+bootstrap1_LDADD = libprog.a libcolm.la
+
+#
+# bootstrap2: The input program is specified using the colm grammar used in
+# bootstrap1, plus some rewrite rules that implement the final parts of
+# syntax/semantics. It produces a program that can parse full colm programs,
+# and thus generates the sources used in the colm binary.
+#
+
+gen/bootstrap2.pack: colm-wrap bootstrap1$(EXEEXT) colm.lm
+ mkdir -p gen
+ $(builddir)/colm-wrap -w bootstrap1 -o $@ \
+ -c -p gen/parse2.c -e gen/if2.h -x gen/if2.cc colm.lm
+
+gen/parse2.c: gen/bootstrap2.pack
+ $(builddir)/colm-wrap -o $@ $<
+
+gen/if2.h: gen/bootstrap2.pack
+ $(builddir)/colm-wrap -o $@ $<
+
+gen/if2.cc: gen/bootstrap2.pack
+ $(builddir)/colm-wrap -o $@ $<
+
+BUILT_SOURCES += gen/parse2.c gen/if2.h gen/if2.cc
+loadboot2.cc: gen/if2.h
+
+bootstrap2_CXXFLAGS = $(common_CFLAGS) -DLOAD_COLM
+bootstrap2_CFLAGS = $(common_CFLAGS)
+bootstrap2_SOURCES = main.cc loadboot2.cc loadfinal.h version.h
+nodist_bootstrap2_SOURCES = gen/if2.h gen/if2.cc gen/parse2.c
+bootstrap2_LDADD = libprog.a libcolm.la
+
+endif
+
+gen/bootstrap3.pack: colm-wrap $(BUILD_PARSE_3_WITH) prog.lm colm.lm
+ mkdir -p gen
+ $(builddir)/colm-wrap -w $(BUILD_PARSE_3_WITH) -o $@ \
+ -c -p gen/parse3.c -e gen/if3.h -x gen/if3.cc prog.lm
+
+gen/parse3.c: gen/bootstrap3.pack
+ $(builddir)/colm-wrap -o $@ $<
+
+gen/if3.h: gen/bootstrap3.pack
+ $(builddir)/colm-wrap -o $@ $<
+
+gen/if3.cc: gen/bootstrap3.pack
+ $(builddir)/colm-wrap -o $@ $<
+
+BUILT_SOURCES += gen/parse3.c gen/if3.h gen/if3.cc
+loadcolm.cc: gen/if3.h
+
+colm_CXXFLAGS = $(common_CFLAGS) -DLOAD_COLM
+colm_CFLAGS = $(common_CFLAGS)
+colm_SOURCES = main.cc loadcolm.cc loadfinal.h version.h
+nodist_colm_SOURCES = gen/if3.h gen/if3.cc gen/parse3.c
+colm_LDADD = libprog.a -lcolm
+
+include/colm:
+ mkdir -p include
+ ln -s .. include/colm
+
+version.h: Makefile
+ echo '#define COLM_VERSION "$(COLM_VERSION)"' > version.h
+ echo '#define COLM_PUBDATE "$(COLM_PUBDATE)"' >> version.h
+
+config.h: ../src/config.h
+ cp ../src/config.h ./
+
+CLEANFILES = $(BUILT_SOURCES) gen/bootstrap1.pack gen/bootstrap2.pack gen/bootstrap3.pack
+EXTRA_DIST = prog.lm colm.lm loadfinal.cc colm-wrap.sh
+
+colm-wrap: colm-wrap.sh
+ @$(top_srcdir)/sedsubst $< $@ -w,+x $(SED_SUBST)
diff --git a/src/buffer.h b/src/buffer.h
new file mode 100644
index 00000000..58db85de
--- /dev/null
+++ b/src/buffer.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2003-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_BUFFER_H
+#define _COLM_BUFFER_H
+
+#define BUFFER_INITIAL_SIZE 4096
+
+/* An automatically grown buffer for collecting tokens. Always reuses space;
+ * never down resizes. */
+struct Buffer
+{
+ Buffer()
+ {
+ data = (char*) malloc( BUFFER_INITIAL_SIZE );
+ allocated = BUFFER_INITIAL_SIZE;
+ length = 0;
+ }
+ ~Buffer() { free(data); }
+
+ void append( char p )
+ {
+ if ( length == allocated ) {
+ allocated *= 2;
+ data = (char*) realloc( data, allocated );
+ }
+ data[length++] = p;
+ }
+
+ void clear() { length = 0; }
+
+ char *data;
+ int allocated;
+ int length;
+};
+
+#endif /* _COLM_BUFFER_H */
+
diff --git a/src/bytecode.c b/src/bytecode.c
new file mode 100644
index 00000000..39aee070
--- /dev/null
+++ b/src/bytecode.c
@@ -0,0 +1,5025 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <colm/bytecode.h>
+
+#include <sys/types.h>
+#if defined(HAVE_SYS_WAIT_H)
+#include <sys/wait.h>
+#endif
+#include <assert.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include <colm/pool.h>
+#include <colm/debug.h>
+#include <colm/colm.h>
+
+#define TRUE_VAL 1
+#define FALSE_VAL 0
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+#define read_byte( i ) do { \
+ i = ((uchar) *instr++); \
+} while(0)
+
+#define read_half( i ) do { \
+ i = ((word_t) *instr++); \
+ i |= ((word_t) *instr++) << 8; \
+} while(0)
+
+/* There are better ways. */
+#if SIZEOF_LONG == 4
+
+ #define read_type( type, i ) do { \
+ word_t _w; \
+ _w = ((word_t) *instr++); \
+ _w |= ((word_t) *instr++) << 8; \
+ _w |= ((word_t) *instr++) << 16; \
+ _w |= ((word_t) *instr++) << 24; \
+ i = (type) _w; \
+ } while(0)
+
+ #define read_type_p( Type, i, p ) do { \
+ i = ((Type) p[0]); \
+ i |= ((Type) p[1]) << 8; \
+ i |= ((Type) p[2]) << 16; \
+ i |= ((Type) p[3]) << 24; \
+ } while(0)
+
+ #define consume_word() instr += 4
+
+#else
+
+ #define read_type( type, i ) do { \
+ word_t _w; \
+ _w = ((word_t) *instr++); \
+ _w |= ((word_t) *instr++) << 8; \
+ _w |= ((word_t) *instr++) << 16; \
+ _w |= ((word_t) *instr++) << 24; \
+ _w |= ((word_t) *instr++) << 32; \
+ _w |= ((word_t) *instr++) << 40; \
+ _w |= ((word_t) *instr++) << 48; \
+ _w |= ((word_t) *instr++) << 56; \
+ i = (type) _w; \
+ } while(0)
+
+ #define read_type_p( type, i, p ) do { \
+ i = ((type) p[0]); \
+ i |= ((type) p[1]) << 8; \
+ i |= ((type) p[2]) << 16; \
+ i |= ((type) p[3]) << 24; \
+ i |= ((type) p[4]) << 32; \
+ i |= ((type) p[5]) << 40; \
+ i |= ((type) p[6]) << 48; \
+ i |= ((type) p[7]) << 56; \
+ } while(0)
+
+ #define consume_word() instr += 8
+#endif
+
+#define read_tree( i ) read_type( tree_t*, i )
+#define read_parser( i ) read_type( parser_t*, i )
+#define read_word( i ) read_type( word_t, i )
+#define read_stream( i ) read_type( stream_t*, i )
+#define read_input( i ) read_type( input_t*, i )
+
+#define read_word_p( i, p ) read_type_p( word_t, i, p )
+
+#define consume_byte() instr += 1
+#define consume_half() instr += 2
+
+static void rcode_downref( program_t *prg, tree_t **sp, code_t *instr );
+
+static void make_stdin( program_t *prg )
+{
+ if ( prg->stdin_val == 0 )
+ prg->stdin_val = colm_stream_open_fd( prg, "<stdin>", 0 );
+}
+
+static void make_stdout( program_t *prg )
+{
+ if ( prg->stdout_val == 0 )
+ prg->stdout_val = colm_stream_open_fd( prg, "<stdout>", 1 );
+}
+
+static void make_stderr( program_t *prg )
+{
+ if ( prg->stderr_val == 0 )
+ prg->stderr_val = colm_stream_open_fd( prg, "<stderr>", 2 );
+}
+
+static void flush_streams( program_t *prg )
+{
+ if ( prg->stdout_val != 0 ) {
+ struct stream_impl *si = prg->stdout_val->impl;
+ si->funcs->flush_stream( prg, si );
+ }
+
+ if ( prg->stderr_val != 0 ) {
+ struct stream_impl *si = prg->stderr_val->impl;
+ si->funcs->flush_stream( prg, si );
+ }
+}
+
+void colm_parser_set_context( program_t *prg, tree_t **sp, parser_t *parser, struct_t *val )
+{
+ parser->pda_run->context = val;
+}
+
+static head_t *tree_to_str_xml( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs )
+{
+ /* Collect the tree data. */
+ str_collect_t collect;
+ init_str_collect( &collect );
+
+ colm_print_tree_collect_xml( prg, sp, &collect, tree, trim );
+
+ /* Set up the input stream. */
+ head_t *ret = string_alloc_full( prg, collect.data, collect.length );
+
+ str_collect_destroy( &collect );
+
+ return ret;
+}
+
+static head_t *tree_to_str_xml_ac( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs )
+{
+ /* Collect the tree data. */
+ str_collect_t collect;
+ init_str_collect( &collect );
+
+ colm_print_tree_collect_xml_ac( prg, sp, &collect, tree, trim );
+
+ /* Set up the input stream. */
+ head_t *ret = string_alloc_full( prg, collect.data, collect.length );
+
+ str_collect_destroy( &collect );
+
+ return ret;
+}
+
+static head_t *tree_to_str_postfix( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs )
+{
+ /* Collect the tree data. */
+ str_collect_t collect;
+ init_str_collect( &collect );
+
+ colm_postfix_tree_collect( prg, sp, &collect, tree, trim );
+
+ /* Set up the input stream. */
+ head_t *ret = string_alloc_full( prg, collect.data, collect.length );
+
+ str_collect_destroy( &collect );
+
+ return ret;
+}
+
+static void input_push_text( struct colm_program *prg, struct input_impl *is,
+ struct colm_location *loc, const char *data, long length )
+{
+ is->funcs->prepend_data( prg, is, loc, colm_alph_from_cstr( data ), length );
+}
+
+static void colm_stream_push_tree( struct colm_program *prg, struct input_impl *is,
+ tree_t *tree, int ignore )
+{
+ is->funcs->prepend_tree( prg, is, tree, ignore );
+}
+
+static void colm_stream_push_stream( struct colm_program *prg, struct input_impl *is, stream_t *stream )
+{
+ is->funcs->prepend_stream( prg, is, stream );
+}
+
+static void colm_undo_stream_push( program_t *prg, tree_t **sp, struct input_impl *is, long length )
+{
+ if ( length < 0 ) {
+ /* tree_t *tree = */ is->funcs->undo_prepend_tree( prg, is );
+ // colm_tree_downref( prg, sp, tree );
+ }
+ else {
+ is->funcs->undo_prepend_data( prg, is, length );
+ }
+}
+
+
+static word_t stream_append_text( program_t *prg, tree_t **sp, input_t *dest, tree_t *input, int trim )
+{
+ long length = 0;
+ struct input_impl *impl = input_to_impl( dest );
+
+ if ( input->id == LEL_ID_PTR ) {
+ assert(false);
+ }
+ else {
+ /* Collect the tree data. */
+ str_collect_t collect;
+ init_str_collect( &collect );
+ colm_print_tree_collect( prg, sp, &collect, input, trim );
+
+ /* Load it into the input. */
+ impl->funcs->append_data( prg, impl, colm_alph_from_cstr( collect.data ), collect.length );
+ length = collect.length;
+ str_collect_destroy( &collect );
+ }
+
+ return length;
+}
+
+static word_t stream_append_tree( program_t *prg, tree_t **sp, input_t *dest, tree_t *to_append )
+{
+ long length = 0;
+ struct input_impl *impl = input_to_impl( dest );
+
+ if ( to_append->id == LEL_ID_PTR ) {
+ assert(false);
+ }
+ else if ( to_append->id == LEL_ID_STR ) {
+ /* Collect the tree data. */
+ str_collect_t collect;
+ init_str_collect( &collect );
+ colm_print_tree_collect( prg, sp, &collect, to_append, false );
+
+ /* Load it into the to_append. */
+ impl->funcs->append_data( prg, impl, colm_alph_from_cstr( collect.data ), collect.length );
+ length = collect.length;
+ str_collect_destroy( &collect );
+ }
+ else {
+ colm_tree_upref( prg, to_append );
+ impl->funcs->append_tree( prg, impl, to_append );
+ }
+
+ return length;
+}
+
+static word_t stream_append_stream( program_t *prg, tree_t **sp, input_t *dest, stream_t *stream )
+{
+ long length = 0;
+
+ struct input_impl *impl = input_to_impl( dest );
+ impl->funcs->append_stream( prg, impl, stream );
+
+ return length;
+}
+
+static void stream_undo_append( program_t *prg, tree_t **sp,
+ struct input_impl *is, tree_t *input, long length )
+{
+ if ( input->id == LEL_ID_PTR )
+ assert(false);
+ else if ( input->id == LEL_ID_STR )
+ is->funcs->undo_append_data( prg, is, length );
+ else {
+ is->funcs->undo_append_data( prg, is, length );
+ }
+}
+
+static void stream_undo_append_stream( program_t *prg, tree_t **sp, struct input_impl *is,
+ tree_t *input, long length )
+{
+ is->funcs->undo_append_stream( prg, is );
+}
+
+static tree_t *stream_pull_bc( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ input_t *input, tree_t *length )
+{
+ long len = ((long)length);
+ struct input_impl *impl = input_to_impl( input );
+ head_t *tokdata = colm_stream_pull( prg, sp, pda_run, impl, len );
+ return construct_string( prg, tokdata );
+}
+
+
+static void undo_stream_pull( struct colm_program *prg, struct input_impl *is,
+ const char *data, long length )
+{
+ //debug( REALM_PARSE, "undoing stream pull\n" );
+ is->funcs->undo_consume_data( prg, is, colm_alph_from_cstr( data ), length );
+}
+
+static void undo_pull( program_t *prg, input_t *input, tree_t *str )
+{
+ struct input_impl *impl = input_to_impl( input );
+ const char *data = string_data( ( (str_t*)str )->value );
+ long length = string_length( ( (str_t*)str )->value );
+ undo_stream_pull( prg, impl, data, length );
+}
+
+static long input_push( program_t *prg, tree_t **sp, struct input_impl *in, tree_t *tree, int ignore )
+{
+ long length = -1;
+ if ( tree->id == LEL_ID_PTR ) {
+ assert(false);
+ }
+ else if ( tree->id == LEL_ID_STR ) {
+ /* This should become a compile error. If it's text, it's up to the
+ * scanner to decide. Want to force it then send a token. */
+ assert( !ignore );
+
+ /* Collect the tree data. */
+ str_collect_t collect;
+ init_str_collect( &collect );
+ colm_print_tree_collect( prg, sp, &collect, tree, false );
+
+ input_push_text( prg, in, tree->tokdata->location, collect.data, collect.length );
+ length = collect.length;
+ str_collect_destroy( &collect );
+ }
+ else {
+ colm_tree_upref( prg, tree );
+ colm_stream_push_tree( prg, in, tree, ignore );
+ }
+
+ return length;
+}
+
+static long input_push_stream( program_t *prg, tree_t **sp,
+ struct input_impl *in, stream_t *stream )
+{
+ colm_stream_push_stream( prg, in, stream );
+ return -1;
+}
+
+static void set_local( execution_t *exec, long field, tree_t *tree )
+{
+ if ( tree != 0 )
+ assert( tree->refs >= 1 );
+ vm_set_local( exec, field, tree );
+}
+
+static tree_t *get_local_split( program_t *prg, execution_t *exec, long field )
+{
+ tree_t *val = vm_get_local( exec, field );
+ tree_t *split = split_tree( prg, val );
+ vm_set_local( exec, field, split );
+ return split;
+}
+
+static void downref_local_trees( program_t *prg, tree_t **sp,
+ execution_t *exec, struct local_info *locals, long locals_len )
+{
+ long i;
+ for ( i = locals_len-1; i >= 0; i-- ) {
+ if ( locals[i].type == LI_Tree ) {
+ debug( prg, REALM_BYTECODE, "local tree downref: %ld\n",
+ (long)locals[i].offset );
+
+ tree_t *tree = (tree_t*) vm_get_local( exec, (long)locals[i].offset );
+ colm_tree_downref( prg, sp, tree );
+ }
+ }
+}
+
+static void downref_locals( program_t *prg, tree_t ***psp,
+ execution_t *exec, struct local_info *locals, long locals_len )
+{
+ long i;
+ for ( i = locals_len-1; i >= 0; i-- ) {
+ switch ( locals[i].type ) {
+ case LI_Tree: {
+ debug( prg, REALM_BYTECODE, "local tree downref: %ld\n",
+ (long)locals[i].offset );
+ tree_t *tree = (tree_t*) vm_get_local( exec, (long)locals[i].offset );
+ colm_tree_downref( prg, *psp, tree );
+ break;
+ }
+ case LI_Iter: {
+ debug( prg, REALM_BYTECODE, "local iter downref: %ld\n",
+ (long)locals[i].offset );
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal( exec, (long)locals[i].offset );
+ colm_tree_iter_destroy( prg, psp, iter );
+ break;
+ }
+ case LI_RevIter: {
+ debug( prg, REALM_BYTECODE, "local rev iter downref: %ld\n",
+ (long)locals[i].offset );
+ rev_tree_iter_t *riter = (rev_tree_iter_t*) vm_get_plocal( exec,
+ (long)locals[i].offset );
+ colm_rev_tree_iter_destroy( prg, psp, riter );
+ break;
+ }
+ case LI_UserIter: {
+ debug( prg, REALM_BYTECODE, "local user iter downref: %ld\n",
+ (long)locals[i].offset );
+ user_iter_t *uiter = (user_iter_t*) vm_get_local( exec, locals[i].offset );
+ colm_uiter_unwind( prg, psp, uiter );
+ break;
+ }
+ }
+ }
+}
+
+
+static tree_t *construct_arg0( program_t *prg, int argc, const char **argv, const int *argl )
+{
+ tree_t *arg0 = 0;
+ if ( argc > 0 ) {
+ const char *argv0 = argv[0];
+ size_t len = argl != 0 ? argl[0] : strlen( argv[0] );
+ head_t *head = colm_string_alloc_pointer( prg, argv0, len );
+ arg0 = construct_string( prg, head );
+ colm_tree_upref( prg, arg0 );
+ }
+ return arg0;
+}
+
+static list_t *construct_argv( program_t *prg, int argc, const char **argv, const int *argl )
+{
+ list_t *list = (list_t*)colm_construct_generic( prg, prg->rtd->argv_generic_id, 0 );
+ int i;
+ for ( i = 1; i < argc; i++ ) {
+ size_t len = argl != 0 ? argl[i] : strlen(argv[i]);
+ const char *argv_i = argv[i];
+ head_t *head = colm_string_alloc_pointer( prg, argv_i, len );
+ tree_t *arg = construct_string( prg, head );
+ colm_tree_upref( prg, arg );
+
+ struct_t *strct = colm_struct_new_size( prg, 16 );
+ strct->id = prg->rtd->argv_el_id;
+ colm_struct_set_field( strct, tree_t*, 0, arg );
+ list_el_t *list_el = colm_struct_get_addr( strct, list_el_t*, 1 );
+ colm_list_append( list, list_el );
+ }
+
+ return list;
+}
+
+
+static list_t *construct_stds( program_t *prg )
+{
+ make_stdout( prg );
+
+ list_t *list = (list_t*)colm_construct_generic( prg, prg->rtd->stds_generic_id, 0 );
+
+ struct_t *strct = colm_struct_new_size( prg, 16 );
+ strct->id = prg->rtd->stds_el_id;
+ colm_struct_set_field( strct, stream_t*, 0, prg->stdout_val );
+ list_el_t *list_el = colm_struct_get_addr( strct, list_el_t*, 1 );
+ colm_list_append( list, list_el );
+
+ return list;
+}
+
+/*
+ * Execution environment
+ */
+
+void colm_rcode_downref_all( program_t *prg, tree_t **sp, struct rt_code_vect *rev )
+{
+ while ( rev->tab_len > 0 ) {
+ /* Read the length */
+ code_t *prcode = rev->data + rev->tab_len - SIZEOF_WORD;
+ word_t len;
+ read_word_p( len, prcode );
+
+ /* Find the start of block. */
+ long start = rev->tab_len - len - SIZEOF_WORD;
+ prcode = rev->data + start;
+
+ /* Execute it. */
+ rcode_downref( prg, sp, prcode );
+
+ /* Backup over it. */
+ rev->tab_len -= len + SIZEOF_WORD;
+ }
+}
+
+static code_t *pcr_call( program_t *prg, execution_t *exec, tree_t ***psp, code_t *instr, parser_t *parser )
+{
+ tree_t **sp = *psp;
+
+ int frame_size = 0;
+ if ( parser->pda_run->frame_id >= 0 ) {
+ struct frame_info *fi = &prg->rtd->frame_info[parser->pda_run->frame_id];
+ frame_size = fi->frame_size;
+ }
+
+ vm_contiguous( 8 + frame_size );
+
+ vm_push_type( tree_t**, exec->frame_ptr );
+ vm_push_type( tree_t**, exec->iframe_ptr );
+ vm_push_type( long, exec->frame_id );
+ vm_push_type( word_t, exec->steps );
+ vm_push_type( word_t, exec->pcr );
+ vm_push_parser( exec->parser );
+ vm_push_type( word_t, exec->WV );
+
+ /* Return back to this instruction. We are alternating between
+ * parsing and calling instructions. */
+ code_t *return_to = instr - SIZEOF_CODE;
+ vm_push_type( code_t*, return_to );
+
+ exec->frame_ptr = 0;
+ exec->iframe_ptr = 0;
+ exec->frame_id = 0;
+ exec->steps = 0;
+ exec->parser = parser;
+
+ instr = parser->pda_run->code;
+ exec->WV = 1;
+
+ exec->frame_id = parser->pda_run->frame_id;
+
+ if ( parser->pda_run->frame_id >= 0 ) {
+ struct frame_info *fi = &prg->rtd->frame_info[parser->pda_run->frame_id];
+
+ exec->frame_ptr = vm_ptop();
+ vm_pushn( fi->frame_size );
+ memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size );
+ }
+
+ *psp = sp;
+ return instr;
+}
+
+void colm_execute( program_t *prg, execution_t *exec, code_t *code )
+{
+ tree_t **sp = prg->stack_root;
+
+ struct frame_info *fi = &prg->rtd->frame_info[prg->rtd->root_frame_id];
+
+ /* Set up the stack as if we have
+ * called. We allow a return value. */
+
+ long stretch = FR_AA + fi->frame_size;
+ vm_contiguous( stretch );
+
+ vm_push_tree( 0 );
+ vm_push_tree( 0 );
+ vm_push_tree( 0 );
+ vm_push_tree( 0 );
+ vm_push_tree( 0 );
+
+ exec->frame_ptr = vm_ptop();
+ vm_pushn( fi->frame_size );
+ memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size );
+
+ /* Execution loop. */
+ sp = colm_execute_code( prg, exec, sp, code );
+
+ downref_locals( prg, &sp, exec, fi->locals, fi->locals_len );
+ vm_popn( fi->frame_size );
+
+ vm_pop_ignore();
+ vm_pop_ignore();
+ colm_tree_downref( prg, sp, prg->return_val );
+ prg->return_val = vm_pop_tree();
+ vm_pop_ignore();
+
+ prg->stack_root = sp;
+}
+
+tree_t *colm_run_func( struct colm_program *prg, int frame_id,
+ const char **params, int param_count )
+{
+ /* Make the arguments available to the program. */
+ prg->argc = 0;
+ prg->argv = 0;
+ prg->argl = 0;
+
+ execution_t execution;
+ memset( &execution, 0, sizeof(execution) );
+
+ tree_t **sp = prg->stack_root;
+
+ struct frame_info *fi = &prg->rtd->frame_info[frame_id];
+ code_t *code = fi->codeWC;
+
+ vm_pushn( param_count );
+ execution.call_args = vm_ptop();
+ memset( vm_ptop(), 0, sizeof(word_t) * param_count );
+
+ int p;
+ for ( p = 0; p < param_count; p++ ) {
+ if ( params[p] == 0 ) {
+ ((value_t*)execution.call_args)[p] = 0;
+ }
+ else {
+ const char *param_p = params[p];
+ size_t param_len = strlen(params[p]);
+ head_t *head = colm_string_alloc_pointer( prg, param_p, param_len );
+ tree_t *tree = construct_string( prg, head );
+ colm_tree_upref( prg, tree );
+ ((tree_t**)execution.call_args)[p] = tree;
+ }
+ }
+
+ long stretch = FR_AA + fi->frame_size;
+ vm_contiguous( stretch );
+
+ /* Set up the stack as if we have called. We allow a return value. */
+ vm_push_tree( (tree_t*)execution.call_args );
+ vm_push_tree( 0 );
+ vm_push_tree( 0 );
+ vm_push_tree( 0 );
+ vm_push_tree( 0 );
+
+ execution.frame_id = frame_id;
+
+ execution.frame_ptr = vm_ptop();
+ vm_pushn( fi->frame_size );
+ memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size );
+
+ /* Execution loop. */
+ sp = colm_execute_code( prg, &execution, sp, code );
+
+ colm_tree_downref( prg, sp, prg->return_val );
+ prg->return_val = execution.ret_val;
+
+ vm_popn( param_count );
+
+ assert( sp == prg->stack_root );
+
+ return prg->return_val;
+};
+
+int colm_make_reverse_code( struct pda_run *pda_run )
+{
+ struct rt_code_vect *reverse_code = &pda_run->reverse_code;
+ struct rt_code_vect *rcode_collect = &pda_run->rcode_collect;
+
+ /* Do we need to revert the left hand side? */
+
+ /* Check if there was anything generated. */
+ if ( rcode_collect->tab_len == 0 )
+ return false;
+
+ if ( pda_run->rc_block_count == 0 ) {
+ /* One reverse code run for the DECK terminator. */
+ append_code_val( reverse_code, IN_PCR_END_DECK );
+ append_code_val( reverse_code, IN_PCR_RET );
+ append_word( reverse_code, 2 );
+ pda_run->rc_block_count += 1;
+ colm_increment_steps( pda_run );
+ }
+
+ long start_length = reverse_code->tab_len;
+
+ /* Go backwards, group by group, through the reverse code. Push each group
+ * to the global reverse code stack. */
+ code_t *p = rcode_collect->data + rcode_collect->tab_len;
+ while ( p != rcode_collect->data ) {
+ p--;
+ long len = *p;
+ p = p - len;
+ append_code_vect( reverse_code, p, len );
+ }
+
+ /* Stop, then place a total length in the global stack. */
+ append_code_val( reverse_code, IN_PCR_RET );
+ long length = reverse_code->tab_len - start_length;
+ append_word( reverse_code, length );
+
+ /* Clear the revere code buffer. */
+ rcode_collect->tab_len = 0;
+
+ pda_run->rc_block_count += 1;
+ colm_increment_steps( pda_run );
+
+ return true;
+}
+
+void colm_transfer_reverse_code( struct pda_run *pda_run, parse_tree_t *parse_tree )
+{
+ if ( pda_run->rc_block_count > 0 ) {
+ //debug( REALM_PARSE, "attaching reverse code to token\n" );
+ parse_tree->flags |= PF_HAS_RCODE;
+ pda_run->rc_block_count = 0;
+ }
+}
+
+static void rcode_unit_term( execution_t *exec )
+{
+ append_code_val( &exec->parser->pda_run->rcode_collect, exec->rcode_unit_len );
+ exec->rcode_unit_len = 0;
+}
+
+static void rcode_unit_start( execution_t *exec )
+{
+ exec->rcode_unit_len = 0;
+}
+
+static void rcode_code( execution_t *exec, const code_t code )
+{
+ append_code_val( &exec->parser->pda_run->rcode_collect, code );
+ exec->rcode_unit_len += SIZEOF_CODE;
+}
+
+static void rcode_half( execution_t *exec, const half_t half )
+{
+ append_half( &exec->parser->pda_run->rcode_collect, half );
+ exec->rcode_unit_len += SIZEOF_HALF;
+}
+
+static void rcode_word( execution_t *exec, const word_t word )
+{
+ append_word( &exec->parser->pda_run->rcode_collect, word );
+ exec->rcode_unit_len += SIZEOF_WORD;
+}
+
+code_t *colm_pop_reverse_code( struct rt_code_vect *all_rev )
+{
+ /* Read the length */
+ code_t *prcode = all_rev->data + all_rev->tab_len - SIZEOF_WORD;
+ word_t len;
+ read_word_p( len, prcode );
+
+ /* Find the start of block. */
+ long start = all_rev->tab_len - len - SIZEOF_WORD;
+ prcode = all_rev->data + start;
+
+ /* Backup over it. */
+ all_rev->tab_len -= len + SIZEOF_WORD;
+ return prcode;
+}
+
+tree_t **colm_execute_code( program_t *prg, execution_t *exec, tree_t **sp, code_t *instr )
+{
+ /* When we exit we are going to verify that we did not eat up any stack
+ * space. */
+ tree_t **root = sp;
+ code_t c;
+
+again:
+ c = *instr++;
+ //debug( REALM_BYTECODE, "--in 0x%x\n", c );
+
+ switch ( c ) {
+ case IN_RESTORE_LHS: {
+ tree_t *restore;
+ read_tree( restore );
+
+ debug( prg, REALM_BYTECODE, "IN_RESTORE_LHS\n" );
+ colm_tree_downref( prg, sp, exec->parser->pda_run->parse_input->shadow->tree );
+ exec->parser->pda_run->parse_input->shadow->tree = restore;
+ break;
+ }
+ case IN_LOAD_NIL: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_NIL\n" );
+ vm_push_tree( 0 );
+ break;
+ }
+ case IN_LOAD_TREE: {
+ tree_t *tree;
+ read_tree( tree );
+ vm_push_tree( tree );
+ debug( prg, REALM_BYTECODE, "IN_LOAD_TREE %p id: %d refs: %d\n",
+ tree, tree->id, tree->refs );
+ break;
+ }
+ case IN_LOAD_WORD: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_WORD\n" );
+ word_t w;
+ read_word( w );
+ vm_push_type( word_t, w );
+ break;
+ }
+ case IN_LOAD_TRUE: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_TRUE\n" );
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+ break;
+ }
+ case IN_LOAD_FALSE: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_FALSE\n" );
+ //colm_tree_upref( prg, prg->falseVal );
+ vm_push_tree( prg->false_val );
+ break;
+ }
+ case IN_LOAD_INT: {
+ word_t i;
+ read_word( i );
+
+ debug( prg, REALM_BYTECODE, "IN_LOAD_INT %d\n", i );
+
+ value_t value = i;
+ vm_push_value( value );
+ break;
+ }
+ case IN_LOAD_STR: {
+ word_t offset;
+ read_word( offset );
+
+ debug( prg, REALM_BYTECODE, "IN_LOAD_STR %d\n", offset );
+
+ head_t *lit = make_literal( prg, offset );
+ tree_t *tree = construct_string( prg, lit );
+ colm_tree_upref( prg, tree );
+ vm_push_tree( tree );
+ break;
+ }
+ case IN_READ_REDUCE: {
+ half_t generic_id;
+ half_t reducer_id;
+ read_half( generic_id );
+ read_half( reducer_id );
+
+ input_t *input = vm_pop_input();
+
+ debug( prg, REALM_BYTECODE, "IN_READ_REDUCE %hd %hd\n", generic_id, reducer_id );
+
+ prg->rtd->read_reduce( prg, reducer_id, input );
+
+ vm_push_tree( 0 );
+
+ break;
+ }
+
+ /*
+ * LOAD_GLOBAL
+ */
+ case IN_LOAD_GLOBAL_R: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_R\n" );
+
+ vm_push_struct( prg->global );
+ break;
+ }
+ case IN_LOAD_GLOBAL_WV: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_WV\n" );
+
+ assert( exec->WV );
+
+ vm_push_struct( prg->global );
+
+ /* Set up the reverse instruction. */
+ rcode_unit_start( exec );
+ rcode_code( exec, IN_LOAD_GLOBAL_BKT );
+ break;
+ }
+ case IN_LOAD_GLOBAL_WC: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_WC\n" );
+
+ assert( !exec->WV );
+
+ /* This is identical to the _R version, but using it for writing
+ * would be confusing. */
+ vm_push_struct( prg->global );
+ break;
+ }
+ case IN_LOAD_GLOBAL_BKT: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" );
+
+ vm_push_struct( prg->global );
+ break;
+ }
+
+ case IN_LOAD_INPUT_R: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_R\n" );
+
+ assert( exec->parser != 0 );
+ vm_push_input( exec->parser->input );
+ break;
+ }
+ case IN_LOAD_INPUT_WV: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_WV\n" );
+
+ assert( exec->WV );
+
+ assert( exec->parser != 0 );
+ vm_push_input( exec->parser->input );
+
+ /* Set up the reverse instruction. */
+ rcode_unit_start( exec );
+ rcode_code( exec, IN_LOAD_INPUT_BKT );
+ rcode_word( exec, (word_t)exec->parser->input );
+ break;
+ }
+ case IN_LOAD_INPUT_WC: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_WC\n" );
+
+ assert( !exec->WV );
+
+ assert( exec->parser != 0 );
+ vm_push_input( exec->parser->input );
+ break;
+ }
+ case IN_LOAD_INPUT_BKT: {
+ tree_t *accum_stream;
+ read_tree( accum_stream );
+
+ debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" );
+
+ colm_tree_upref( prg, accum_stream );
+ vm_push_tree( accum_stream );
+ break;
+ }
+
+ case IN_LOAD_CONTEXT_R: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_R\n" );
+
+ vm_push_type( struct_t*, exec->parser->pda_run->context );
+ break;
+ }
+ case IN_LOAD_CONTEXT_WV: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_WV\n" );
+
+ assert( exec->WV );
+
+ vm_push_type( struct_t *, exec->parser->pda_run->context );
+
+ /* Set up the reverse instruction. */
+ rcode_unit_start( exec );
+ rcode_code( exec, IN_LOAD_CONTEXT_BKT );
+ break;
+ }
+ case IN_LOAD_CONTEXT_WC: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_WC\n" );
+
+ assert( !exec->WV );
+
+ /* This is identical to the _R version, but using it for writing
+ * would be confusing. */
+ vm_push_type( struct_t *, exec->parser->pda_run->context );
+ break;
+ }
+ case IN_LOAD_CONTEXT_BKT: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" );
+
+ vm_push_type( struct_t *, exec->parser->pda_run->context );
+ break;
+ }
+
+ case IN_SET_PARSER_CONTEXT: {
+ debug( prg, REALM_BYTECODE, "IN_SET_PARSER_CONTEXT\n" );
+
+ struct_t *strct = vm_pop_struct();
+ parser_t *parser = vm_pop_parser();
+
+ colm_parser_set_context( prg, sp, parser, strct );
+
+ vm_push_parser( parser );
+ break;
+ }
+
+ case IN_SET_PARSER_INPUT: {
+ debug( prg, REALM_BYTECODE, "IN_SET_PARSER_INPUT\n" );
+
+ input_t *to_replace_with = vm_pop_input();
+ parser_t *parser = vm_pop_parser();
+
+ parser->input = to_replace_with;
+
+ vm_push_parser( parser );
+
+ break;
+ }
+
+ case IN_INIT_CAPTURES: {
+ consume_byte();
+
+ debug( prg, REALM_BYTECODE, "IN_INIT_CAPTURES\n" );
+
+ /* If there are captures (this is a translate block) then copy them into
+ * the local frame now. */
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ struct pda_run *pda_run = exec->parser->pda_run;
+ alph_t **mark = pda_run->mark;
+
+ int i, num_capture_attr = lel_info[pda_run->token_id].num_capture_attr;
+ for ( i = 0; i < num_capture_attr; i++ ) {
+ struct lang_el_info *lei = &lel_info[exec->parser->pda_run->token_id];
+ CaptureAttr *ca = &prg->rtd->capture_attr[lei->capture_attr + i];
+ head_t *data = string_alloc_full( prg,
+ colm_cstr_from_alph( mark[ca->mark_enter] ),
+ mark[ca->mark_leave] - mark[ca->mark_enter] );
+ tree_t *string = construct_string( prg, data );
+ colm_tree_upref( prg, string );
+ set_local( exec, -1 - i, string );
+ }
+ break;
+ }
+ case IN_INIT_RHS_EL: {
+ half_t position;
+ short field;
+ read_half( position );
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_INIT_RHS_EL %hd\n", field );
+
+ tree_t *val = get_rhs_el( prg, exec->parser->pda_run->red_lel->shadow->tree, position );
+ colm_tree_upref( prg, val );
+ vm_set_local(exec, field, val);
+ break;
+ }
+
+ case IN_INIT_LHS_EL: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_INIT_LHS_EL %hd\n", field );
+
+ /* We transfer it to to the local field. Possibly take a copy. */
+ tree_t *val = exec->parser->pda_run->red_lel->shadow->tree;
+
+ /* Save it. */
+ colm_tree_upref( prg, val );
+ exec->parser->pda_run->parsed = val;
+
+ exec->parser->pda_run->red_lel->shadow->tree = 0;
+ vm_set_local(exec, field, val);
+ break;
+ }
+ case IN_STORE_LHS_EL: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_STORE_LHS_EL %hd\n", field );
+
+ tree_t *val = vm_get_local(exec, field);
+ vm_set_local(exec, field, 0);
+ exec->parser->pda_run->red_lel->shadow->tree = val;
+ break;
+ }
+ case IN_UITER_ADVANCE: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_UITER_ADVANCE\n" );
+
+ /* Get the iterator. */
+ user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field);
+
+ long yield_size = vm_ssize() - uiter->root_size;
+ assert( uiter->yield_size == yield_size );
+
+ /* Fix the return instruction pointer. */
+ uiter->stack_root[-IFR_AA + IFR_RIN] = (SW)instr;
+
+ instr = uiter->resume;
+ exec->frame_ptr = uiter->frame;
+ exec->iframe_ptr = &uiter->stack_root[-IFR_AA];
+ break;
+ }
+ case IN_UITER_GET_CUR_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_UITER_GET_CUR_R\n" );
+
+ user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field);
+ tree_t *val = uiter->ref.kid->tree;
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_UITER_GET_CUR_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_UITER_GET_CUR_WC\n" );
+
+ user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field);
+ split_ref( prg, &sp, &uiter->ref );
+ tree_t *split = uiter->ref.kid->tree;
+ colm_tree_upref( prg, split );
+ vm_push_tree( split );
+ break;
+ }
+ case IN_UITER_SET_CUR_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_UITER_SET_CUR_WC\n" );
+
+ tree_t *t = vm_pop_tree();
+ user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field);
+ split_ref( prg, &sp, &uiter->ref );
+ tree_t *old = uiter->ref.kid->tree;
+ set_uiter_cur( prg, uiter, t );
+ colm_tree_downref( prg, sp, old );
+ break;
+ }
+ case IN_GET_LOCAL_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_R %hd\n", field );
+
+ tree_t *val = vm_get_local(exec, field);
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_GET_LOCAL_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_WC %hd\n", field );
+
+ tree_t *split = get_local_split( prg, exec, field );
+ colm_tree_upref( prg, split );
+ vm_push_tree( split );
+ break;
+ }
+ case IN_SET_LOCAL_WC: {
+ short field;
+ read_half( field );
+ debug( prg, REALM_BYTECODE, "IN_SET_LOCAL_WC %hd\n", field );
+
+ tree_t *val = vm_pop_tree();
+ colm_tree_downref( prg, sp, vm_get_local(exec, field) );
+ set_local( exec, field, val );
+ break;
+ }
+ case IN_GET_LOCAL_VAL_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_VAL_R %hd\n", field );
+
+ tree_t *val = vm_get_local(exec, field);
+ vm_push_tree( val );
+ break;
+ }
+ case IN_SET_LOCAL_VAL_WC: {
+ short field;
+ read_half( field );
+ debug( prg, REALM_BYTECODE, "IN_SET_LOCAL_VAL_WC %hd\n", field );
+
+ tree_t *val = vm_pop_tree();
+ vm_set_local(exec, field, val);
+ break;
+ }
+ case IN_SAVE_RET: {
+ debug( prg, REALM_BYTECODE, "IN_SAVE_RET\n" );
+
+ value_t val = vm_pop_value();
+ vm_set_local(exec, FR_RV, (tree_t*)val);
+ break;
+ }
+ case IN_GET_LOCAL_REF_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_REF_R\n" );
+
+ ref_t *ref = (ref_t*) vm_get_plocal(exec, field);
+ tree_t *val = ref->kid->tree;
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_GET_LOCAL_REF_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_REF_WC\n" );
+
+ ref_t *ref = (ref_t*) vm_get_plocal(exec, field);
+ split_ref( prg, &sp, ref );
+ tree_t *val = ref->kid->tree;
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_SET_LOCAL_REF_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_LOCAL_REF_WC\n" );
+
+ tree_t *val = vm_pop_tree();
+ ref_t *ref = (ref_t*) vm_get_plocal(exec, field);
+ split_ref( prg, &sp, ref );
+ ref_set_value( prg, sp, ref, val );
+ break;
+ }
+ case IN_GET_FIELD_TREE_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_R %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *val = colm_tree_get_field( obj, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_GET_FIELD_TREE_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_WC %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *split = get_field_split( prg, obj, field );
+ colm_tree_upref( prg, split );
+ vm_push_tree( split );
+ break;
+ }
+ case IN_GET_FIELD_TREE_WV: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_WV\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *split = get_field_split( prg, obj, field );
+ colm_tree_upref( prg, split );
+ vm_push_tree( split );
+
+ /* Set up the reverse instruction. */
+ rcode_code( exec, IN_GET_FIELD_TREE_BKT );
+ rcode_half( exec, field );
+ break;
+ }
+ case IN_GET_FIELD_TREE_BKT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_BKT\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *split = get_field_split( prg, obj, field );
+ colm_tree_upref( prg, split );
+ vm_push_tree( split );
+ break;
+ }
+ case IN_SET_FIELD_TREE_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_WC %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ tree_t *val = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ /* Downref the old value. */
+ tree_t *prev = colm_tree_get_field( obj, field );
+ colm_tree_downref( prg, sp, prev );
+
+ colm_tree_set_field( prg, obj, field, val );
+ break;
+ }
+ case IN_SET_FIELD_TREE_WV: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_WV %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ tree_t *val = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ /* Save the old value, then set the field. */
+ tree_t *prev = colm_tree_get_field( obj, field );
+ colm_tree_set_field( prg, obj, field, val );
+
+ /* Set up the reverse instruction. */
+ rcode_code( exec, IN_SET_FIELD_TREE_BKT );
+ rcode_half( exec, field );
+ rcode_word( exec, (word_t)prev );
+ rcode_unit_term( exec );
+ break;
+ }
+ case IN_SET_FIELD_TREE_BKT: {
+ short field;
+ tree_t *val;
+ read_half( field );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_BKT\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ /* Downref the old value. */
+ tree_t *prev = colm_tree_get_field( obj, field );
+ colm_tree_downref( prg, sp, prev );
+
+ colm_tree_set_field( prg, obj, field, val );
+ break;
+ }
+ case IN_SET_FIELD_TREE_LEAVE_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_LEAVE_WC\n" );
+
+ /* Note that we don't downref the object here because we are
+ * leaving it on the stack. */
+ tree_t *obj = vm_pop_tree();
+ tree_t *val = vm_pop_tree();
+
+ /* Downref the old value. */
+ tree_t *prev = colm_tree_get_field( obj, field );
+ colm_tree_downref( prg, sp, prev );
+
+ /* Set the field. */
+ colm_tree_set_field( prg, obj, field, val );
+
+ /* Leave the object on the top of the stack. */
+ vm_push_tree( obj );
+ break;
+ }
+ case IN_GET_FIELD_VAL_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_FIELD_VAL_R %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *pointer = colm_tree_get_field( obj, field );
+ value_t value = 0;
+ if ( pointer != 0 )
+ value = colm_get_pointer_val( pointer );
+ vm_push_value( value );
+ break;
+ }
+ case IN_SET_FIELD_VAL_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_FIELD_VAL_WC %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ value_t value = vm_pop_value();
+ colm_tree_downref( prg, sp, obj );
+
+ /* Downref the old value. */
+ tree_t *prev = colm_tree_get_field( obj, field );
+ colm_tree_downref( prg, sp, prev );
+
+ /* Make it into a pointer. */
+ tree_t *pointer = colm_construct_pointer( prg, value );
+ colm_tree_upref( prg, pointer );
+
+ colm_tree_set_field( prg, obj, field, pointer );
+ break;
+ }
+ case IN_NEW_STRUCT: {
+ short id;
+ read_half( id );
+
+ debug( prg, REALM_BYTECODE, "IN_NEW_STRUCT %hd\n", id );
+ struct_t *item = colm_struct_new( prg, id );
+ vm_push_struct( item );
+ break;
+ }
+ case IN_NEW_STREAM: {
+ debug( prg, REALM_BYTECODE, "IN_NEW_STREAM\n" );
+ stream_t *item = colm_stream_open_collect( prg );
+ vm_push_stream( item );
+ break;
+ }
+ case IN_GET_COLLECT_STRING: {
+ debug( prg, REALM_BYTECODE, "IN_GET_COLLECT_STRING\n" );
+ stream_t *stream = vm_pop_stream();
+ str_t *str = collect_string( prg, stream );
+ colm_tree_upref( prg, (tree_t*)str );
+ vm_push_string( str );
+ break;
+ }
+ case IN_GET_STRUCT_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_R %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ tree_t *val = colm_struct_get_field( obj, tree_t*, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_GET_STRUCT_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_WC %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ tree_t *val = colm_struct_get_field( obj, tree_t*, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+
+ break;
+ }
+ case IN_GET_STRUCT_WV: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_WV\n" );
+
+ tree_t *obj = vm_pop_tree();
+ tree_t *val = colm_struct_get_field( obj, tree_t*, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+
+ /* Set up the reverse instruction. */
+ rcode_code( exec, IN_GET_STRUCT_BKT );
+ rcode_half( exec, field );
+ break;
+ }
+ case IN_GET_STRUCT_BKT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_BKT\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *split = get_field_split( prg, obj, field );
+ colm_tree_upref( prg, split );
+ vm_push_tree( split );
+ break;
+ }
+ case IN_SET_STRUCT_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_WC %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ tree_t *val = vm_pop_tree();
+
+ /* Downref the old value. */
+ tree_t *prev = colm_struct_get_field( obj, tree_t*, field );
+ colm_tree_downref( prg, sp, prev );
+ colm_struct_set_field( obj, tree_t*, field, val );
+ break;
+ }
+ case IN_SET_STRUCT_WV: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_WV %d\n", field );
+
+ struct_t *obj = vm_pop_struct();
+ tree_t *val = vm_pop_tree();
+
+ /* Save the old value, then set the field. */
+ tree_t *prev = colm_struct_get_field( obj, tree_t*, field );
+ colm_struct_set_field( obj, tree_t*, field, val );
+
+ /* Set up the reverse instruction. */
+ rcode_code( exec, IN_SET_STRUCT_BKT );
+ rcode_half( exec, field );
+ rcode_word( exec, (word_t)prev );
+ rcode_unit_term( exec );
+ break;
+ }
+ case IN_SET_STRUCT_BKT: {
+ short field;
+ tree_t *val;
+ read_half( field );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_BKT\n" );
+
+ tree_t *obj = vm_pop_tree();
+
+ /* Downref the old value. */
+ tree_t *prev = colm_struct_get_field( obj, tree_t*, field );
+ colm_tree_downref( prg, sp, prev );
+
+ colm_struct_set_field( obj, tree_t*, field, val );
+ break;
+ }
+ case IN_GET_STRUCT_VAL_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_VAL_R %d\n", field );
+
+ tree_t *obj = vm_pop_tree();
+ tree_t *val = colm_struct_get_field( obj, tree_t*, field );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_SET_STRUCT_VAL_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_WC %d\n", field );
+
+ struct_t *strct = vm_pop_struct();
+ tree_t *val = vm_pop_tree();
+
+ colm_struct_set_field( strct, tree_t*, field, val );
+ break;
+ }
+ case IN_SET_STRUCT_VAL_WV: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_WV %d\n", field );
+
+ struct_t *strct = vm_pop_struct();
+ tree_t *val = vm_pop_tree();
+
+ tree_t *prev = colm_struct_get_field( strct, tree_t*, field );
+ colm_struct_set_field( strct, tree_t*, field, val );
+
+ rcode_code( exec, IN_SET_STRUCT_VAL_BKT );
+ rcode_half( exec, field );
+ rcode_word( exec, (word_t)prev );
+ rcode_unit_term( exec );
+ break;
+ }
+ case IN_SET_STRUCT_VAL_BKT: {
+ short field;
+ tree_t *val;
+ read_half( field );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_BKT\n" );
+
+ tree_t *obj = vm_pop_tree();
+
+ colm_struct_set_field( obj, tree_t*, field, val );
+ break;
+ }
+ case IN_GET_RHS_VAL_R: {
+ debug( prg, REALM_BYTECODE, "IN_GET_RHS_VAL_R\n" );
+ int i, done = 0;
+ uchar len;
+
+ tree_t *obj = vm_pop_tree(), *val = 0;
+ colm_tree_downref( prg, sp, obj );
+
+ read_byte( len );
+ for ( i = 0; i < len; i++ ) {
+ uchar prod_num, child_num;
+ read_byte( prod_num );
+ read_byte( child_num );
+ if ( !done && obj->prod_num == prod_num ) {
+ val = get_rhs_el( prg, obj, child_num );
+ done = 1;
+ }
+ }
+
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_GET_RHS_VAL_WC:
+ fatal( "UNIMPLEMENTED INSRUCTION: IN_GET_RHS_VAL_WC\n" );
+ break;
+ case IN_GET_RHS_VAL_WV:
+ fatal( "UNIMPLEMENTED INSRUCTION: IN_GET_RHS_VAL_WV\n" );
+ break;
+ case IN_GET_RHS_VAL_BKT:
+ fatal( "UNIMPLEMENTED INSRUCTION: IN_GET_RHS_VAL_BKT\n" );
+ break;
+
+ case IN_SET_RHS_VAL_WC:
+ debug( prg, REALM_BYTECODE, "IN_SET_RHS_VAL_WC\n" );
+ int i, done = 0;
+ uchar len;
+
+ tree_t *obj = vm_pop_tree();
+ tree_t *val = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ read_byte( len );
+ for ( i = 0; i < len; i++ ) {
+ uchar prod_num, child_num;
+ read_byte( prod_num );
+ read_byte( child_num );
+ if ( !done && obj->prod_num == prod_num ) {
+ tree_t *prev = get_rhs_el( prg, obj, child_num );
+ colm_tree_downref( prg, sp, prev );
+ set_rhs_el( prg, obj, child_num, val );
+ done = 1;
+ }
+ }
+
+ //colm_tree_upref( prg, val );
+ //vm_push_tree( val );
+ break;
+ case IN_SET_RHS_VAL_WV:
+ fatal( "UNIMPLEMENTED INSRUCTION: IN_SET_RHS_VAL_WV\n" );
+ break;
+ case IN_SET_RHS_VAL_BKT:
+ fatal( "UNIMPLEMENTED INSRUCTION: IN_SET_RHS_VAL_BKT\n" );
+ break;
+ case IN_POP_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_POP_TREE\n" );
+
+ tree_t *val = vm_pop_tree();
+ colm_tree_downref( prg, sp, val );
+ break;
+ }
+ case IN_POP_VAL: {
+ debug( prg, REALM_BYTECODE, "IN_POP_VAL\n" );
+
+ vm_pop_tree();
+ break;
+ }
+ case IN_POP_N_WORDS: {
+ short n;
+ read_half( n );
+
+ debug( prg, REALM_BYTECODE, "IN_POP_N_WORDS %hd\n", n );
+
+ vm_popn( n );
+ break;
+ }
+ case IN_INT_TO_STR: {
+ debug( prg, REALM_BYTECODE, "IN_INT_TO_STR\n" );
+
+ value_t i = vm_pop_value();
+ head_t *res = int_to_str( prg, (long)i );
+ tree_t *str = construct_string( prg, res );
+ colm_tree_upref( prg, str );
+ vm_push_tree( str );
+ break;
+ }
+ case IN_TREE_TO_STR_XML: {
+ debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_XML_AC\n" );
+
+ tree_t *tree = vm_pop_tree();
+ head_t *res = tree_to_str_xml( prg, sp, tree, false, false );
+ tree_t *str = construct_string( prg, res );
+ colm_tree_upref( prg, str );
+ vm_push_tree( str );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_TREE_TO_STR_XML_AC: {
+ debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_XML_AC\n" );
+
+ tree_t *tree = vm_pop_tree();
+ head_t *res = tree_to_str_xml_ac( prg, sp, tree, false, false );
+ tree_t *str = construct_string( prg, res );
+ colm_tree_upref( prg, str );
+ vm_push_tree( str );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_TREE_TO_STR_POSTFIX: {
+ debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_XML_AC\n" );
+
+ tree_t *tree = vm_pop_tree();
+ head_t *res = tree_to_str_postfix( prg, sp, tree, false, false );
+ tree_t *str = construct_string( prg, res );
+ colm_tree_upref( prg, str );
+ vm_push_tree( str );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_TREE_TO_STR: {
+ debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR\n" );
+
+ tree_t *tree = vm_pop_tree();
+ head_t *res = tree_to_str( prg, sp, tree, false, false );
+ tree_t *str = construct_string( prg, res );
+ colm_tree_upref( prg, str );
+ vm_push_tree( str );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_TREE_TO_STR_TRIM: {
+ debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_TRIM\n" );
+
+ tree_t *tree = vm_pop_tree();
+ head_t *res = tree_to_str( prg, sp, tree, true, false );
+ tree_t *str = construct_string( prg, res );
+ colm_tree_upref( prg, str );
+ vm_push_tree( str );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_TREE_TO_STR_TRIM_A: {
+ debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_TRIM_A\n" );
+
+ tree_t *tree = vm_pop_tree();
+ head_t *res = tree_to_str( prg, sp, tree, true, true );
+ tree_t *str = construct_string( prg, res );
+ colm_tree_upref( prg, str );
+ vm_push_tree( str );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_TREE_TRIM: {
+ debug( prg, REALM_BYTECODE, "IN_TREE_TRIM\n" );
+
+ tree_t *tree = vm_pop_tree();
+ tree_t *trimmed = tree_trim( prg, sp, tree );
+ vm_push_tree( trimmed );
+ break;
+ }
+ case IN_CONCAT_STR: {
+ debug( prg, REALM_BYTECODE, "IN_CONCAT_STR\n" );
+
+ str_t *s2 = vm_pop_string();
+ str_t *s1 = vm_pop_string();
+ head_t *res = concat_str( s1->value, s2->value );
+ tree_t *str = construct_string( prg, res );
+ colm_tree_upref( prg, str );
+ colm_tree_downref( prg, sp, (tree_t*)s1 );
+ colm_tree_downref( prg, sp, (tree_t*)s2 );
+ vm_push_tree( str );
+ break;
+ }
+
+ case IN_STR_LENGTH: {
+ debug( prg, REALM_BYTECODE, "IN_STR_LENGTH\n" );
+
+ str_t *str = vm_pop_string();
+ long len = string_length( str->value );
+ value_t res = len;
+ vm_push_value( res );
+ colm_tree_downref( prg, sp, (tree_t*)str );
+ break;
+ }
+ case IN_JMP_FALSE_TREE: {
+ short dist;
+ read_half( dist );
+
+ debug( prg, REALM_BYTECODE, "IN_JMP_FALSE_TREE %d\n", dist );
+
+ tree_t *tree = vm_pop_tree();
+ if ( test_false( prg, tree ) )
+ instr += dist;
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_JMP_TRUE_TREE: {
+ short dist;
+ read_half( dist );
+
+ debug( prg, REALM_BYTECODE, "IN_JMP_TRUE_TREE %d\n", dist );
+
+ tree_t *tree = vm_pop_tree();
+ if ( !test_false( prg, tree ) )
+ instr += dist;
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_JMP_FALSE_VAL: {
+ short dist;
+ read_half( dist );
+
+ debug( prg, REALM_BYTECODE, "IN_JMP_FALSE_VAL %d\n", dist );
+
+ tree_t *tree = vm_pop_tree();
+ if ( tree == 0 )
+ instr += dist;
+ break;
+ }
+ case IN_JMP_TRUE_VAL: {
+ short dist;
+ read_half( dist );
+
+ debug( prg, REALM_BYTECODE, "IN_JMP_TRUE_VAL %d\n", dist );
+
+ tree_t *tree = vm_pop_tree();
+ if ( tree != 0 )
+ instr += dist;
+ break;
+ }
+ case IN_JMP: {
+ short dist;
+ read_half( dist );
+
+ debug( prg, REALM_BYTECODE, "IN_JMP\n" );
+
+ instr += dist;
+ break;
+ }
+ case IN_REJECT: {
+ debug( prg, REALM_BYTECODE, "IN_REJECT\n" );
+ exec->parser->pda_run->reject = true;
+ break;
+ }
+
+ /*
+ * Binary comparison operators.
+ */
+ case IN_TST_EQL_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_TST_EQL_TREE\n" );
+
+ tree_t *o2 = vm_pop_tree();
+ tree_t *o1 = vm_pop_tree();
+ long r = colm_cmp_tree( prg, o1, o2 );
+ value_t val = r == 0 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ colm_tree_downref( prg, sp, o1 );
+ colm_tree_downref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_EQL_VAL: {
+ debug( prg, REALM_BYTECODE, "IN_TST_EQL_VAL\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ value_t val = o1 == o2 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ break;
+ }
+ case IN_TST_NOT_EQL_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_TST_NOT_EQL_TREE\n" );
+
+ tree_t *o2 = vm_pop_tree();
+ tree_t *o1 = vm_pop_tree();
+ long r = colm_cmp_tree( prg, o1, o2 );
+ value_t val = r != 0 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ colm_tree_downref( prg, sp, o1 );
+ colm_tree_downref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_NOT_EQL_VAL: {
+ debug( prg, REALM_BYTECODE, "IN_TST_NOT_EQL_VAL\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ value_t val = o1 != o2 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ break;
+ }
+ case IN_TST_LESS_VAL: {
+ debug( prg, REALM_BYTECODE, "IN_TST_LESS_VAL\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ value_t res = (long)o1 < (long)o2 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( res );
+ break;
+ }
+ case IN_TST_LESS_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_TST_LESS_TREE\n" );
+
+ tree_t *o2 = vm_pop_tree();
+ tree_t *o1 = vm_pop_tree();
+ long r = colm_cmp_tree( prg, o1, o2 );
+ value_t val = r < 0 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ colm_tree_downref( prg, sp, o1 );
+ colm_tree_downref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_LESS_EQL_VAL: {
+ debug( prg, REALM_BYTECODE, "IN_TST_LESS_EQL_VAL\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ value_t val = (long)o1 <= (long)o2 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ break;
+ }
+ case IN_TST_LESS_EQL_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_TST_LESS_EQL_TREE\n" );
+
+ tree_t *o2 = vm_pop_tree();
+ tree_t *o1 = vm_pop_tree();
+ long r = colm_cmp_tree( prg, o1, o2 );
+ value_t val = r <= 0 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ colm_tree_downref( prg, sp, o1 );
+ colm_tree_downref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_GRTR_VAL: {
+ debug( prg, REALM_BYTECODE, "IN_TST_GRTR_VAL\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ value_t val = (long)o1 > (long)o2 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ break;
+ }
+ case IN_TST_GRTR_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_TST_GRTR_TREE\n" );
+
+ tree_t *o2 = vm_pop_tree();
+ tree_t *o1 = vm_pop_tree();
+ long r = colm_cmp_tree( prg, o1, o2 );
+ value_t val = r > 0 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ colm_tree_downref( prg, sp, o1 );
+ colm_tree_downref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_GRTR_EQL_VAL: {
+ debug( prg, REALM_BYTECODE, "IN_TST_GRTR_EQL_VAL\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+
+ value_t val = (long)o1 >= (long)o2 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ break;
+ }
+ case IN_TST_GRTR_EQL_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_TST_GRTR_EQL_TREE\n" );
+
+ tree_t *o2 = vm_pop_tree();
+ tree_t *o1 = vm_pop_tree();
+ long r = colm_cmp_tree( prg, o1, o2 );
+ value_t val = r >= 0 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ colm_tree_downref( prg, sp, o1 );
+ colm_tree_downref( prg, sp, o2 );
+ break;
+ }
+ case IN_TST_LOGICAL_AND: {
+ debug( prg, REALM_BYTECODE, "IN_TST_LOGICAL_AND\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ value_t val = o1 && o2 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ break;
+ }
+ case IN_TST_LOGICAL_OR: {
+ debug( prg, REALM_BYTECODE, "IN_TST_LOGICAL_OR\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ value_t val = o1 || o2 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ break;
+ }
+
+ case IN_TST_NZ_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_TST_NZ_TREE\n" );
+
+ tree_t *tree = vm_pop_tree();
+ long r = !test_false( prg, tree );
+ colm_tree_downref( prg, sp, tree );
+ vm_push_value( r );
+ break;
+ }
+
+ case IN_NOT_VAL: {
+ debug( prg, REALM_BYTECODE, "IN_NOT_VAL\n" );
+
+ value_t o1 = vm_pop_value();
+ value_t val = o1 == 0 ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ break;
+ }
+
+ case IN_NOT_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_NOT_TREE\n" );
+
+ tree_t *tree = vm_pop_tree();
+ long r = test_false( prg, tree );
+ value_t val = r ? TRUE_VAL : FALSE_VAL;
+ vm_push_value( val );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+
+ case IN_ADD_INT: {
+ debug( prg, REALM_BYTECODE, "IN_ADD_INT\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ long r = (long)o1 + (long)o2;
+ value_t val = r;
+ vm_push_value( val );
+ break;
+ }
+ case IN_MULT_INT: {
+ debug( prg, REALM_BYTECODE, "IN_MULT_INT\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ long r = (long)o1 * (long)o2;
+ value_t val = r;
+ vm_push_value( val );
+ break;
+ }
+ case IN_DIV_INT: {
+ debug( prg, REALM_BYTECODE, "IN_DIV_INT\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ long r = (long)o1 / (long)o2;
+ value_t val = r;
+ vm_push_value( val );
+ break;
+ }
+ case IN_SUB_INT: {
+ debug( prg, REALM_BYTECODE, "IN_SUB_INT\n" );
+
+ value_t o2 = vm_pop_value();
+ value_t o1 = vm_pop_value();
+ long r = (long)o1 - (long)o2;
+ value_t val = r;
+ vm_push_value( val );
+ break;
+ }
+ case IN_DUP_VAL: {
+ debug( prg, REALM_BYTECODE, "IN_DUP_VAL\n" );
+
+ word_t val = (word_t)vm_top();
+ vm_push_type( word_t, val );
+ break;
+ }
+ case IN_DUP_TREE: {
+ debug( prg, REALM_BYTECODE, "IN_DUP_TREE\n" );
+
+ tree_t *val = vm_top();
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_TRITER_FROM_REF: {
+ short field;
+ half_t arg_size;
+ half_t search_type_id;
+ read_half( field );
+ read_half( arg_size );
+ read_half( search_type_id );
+
+ debug( prg, REALM_BYTECODE, "IN_TRITER_FROM_REF "
+ "%hd %hd %hd\n", field, arg_size, search_type_id );
+
+ ref_t root_ref;
+ root_ref.kid = vm_pop_kid();
+ root_ref.next = vm_pop_ref();
+ void *mem = vm_get_plocal(exec, field);
+
+ tree_t **stack_root = vm_ptop();
+ long root_size = vm_ssize();
+
+ colm_init_tree_iter( (tree_iter_t*)mem, stack_root,
+ arg_size, root_size, &root_ref, search_type_id );
+ break;
+ }
+ case IN_TRITER_UNWIND:
+ case IN_TRITER_DESTROY: {
+ short field;
+ read_half( field );
+
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field);
+ debug( prg, REALM_BYTECODE, "IN_TRITER_DESTROY %hd %d\n",
+ field, iter->yield_size );
+ colm_tree_iter_destroy( prg, &sp, iter );
+ break;
+ }
+ case IN_REV_TRITER_FROM_REF: {
+ short field;
+ half_t arg_size;
+ half_t search_type_id;
+ read_half( field );
+ read_half( arg_size );
+ read_half( search_type_id );
+
+ debug( prg, REALM_BYTECODE, "IN_REV_TRITER_FROM_REF "
+ "%hd %hd %hd\n", field, arg_size, search_type_id );
+
+ ref_t root_ref;
+ root_ref.kid = vm_pop_kid();
+ root_ref.next = vm_pop_ref();
+
+ tree_t **stack_root = vm_ptop();
+ long root_size = vm_ssize();
+
+ int children = 0;
+ kid_t *kid = tree_child( prg, root_ref.kid->tree );
+ while ( kid != 0 ) {
+ vm_push_kid( kid );
+ kid = kid->next;
+ children++;
+ }
+
+ void *mem = vm_get_plocal(exec, field);
+ colm_init_rev_tree_iter( (rev_tree_iter_t*)mem, stack_root,
+ arg_size, root_size, &root_ref, search_type_id, children );
+ break;
+ }
+ case IN_REV_TRITER_UNWIND:
+ case IN_REV_TRITER_DESTROY: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_REV_TRITER_DESTROY\n" );
+
+ rev_tree_iter_t *iter = (rev_tree_iter_t*) vm_get_plocal(exec, field);
+ colm_rev_tree_iter_destroy( prg, &sp, iter );
+ break;
+ }
+ case IN_TREE_SEARCH: {
+ word_t id;
+ read_word( id );
+
+ debug( prg, REALM_BYTECODE, "IN_TREE_SEARCH\n" );
+
+ tree_t *tree = vm_pop_tree();
+ tree_t *res = tree_search( prg, tree, id );
+ colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_TRITER_ADVANCE: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_TRITER_ADVANCE\n" );
+
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field);
+ tree_t *res = tree_iter_advance( prg, &sp, iter );
+ //colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_TRITER_NEXT_CHILD: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_TRITER_NEXT_CHILD\n" );
+
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field);
+ tree_t *res = tree_iter_next_child( prg, &sp, iter );
+ //colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_REV_TRITER_PREV_CHILD: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_REV_TRITER_PREV_CHILD\n" );
+
+ rev_tree_iter_t *iter = (rev_tree_iter_t*) vm_get_plocal(exec, field);
+ tree_t *res = tree_rev_iter_prev_child( prg, &sp, iter );
+ //colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_TRITER_NEXT_REPEAT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_TRITER_NEXT_REPEAT\n" );
+
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field);
+ tree_t *res = tree_iter_next_repeat( prg, &sp, iter );
+ //colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_TRITER_PREV_REPEAT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_TRITER_PREV_REPEAT\n" );
+
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field);
+ tree_t *res = tree_iter_prev_repeat( prg, &sp, iter );
+ //colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_TRITER_GET_CUR_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_TRITER_GET_CUR_R\n" );
+
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field);
+ tree_t *tree = tree_iter_deref_cur( iter );
+ colm_tree_upref( prg, tree );
+ vm_push_tree( tree );
+ break;
+ }
+ case IN_TRITER_GET_CUR_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_TRITER_GET_CUR_WC\n" );
+
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field);
+ split_iter_cur( prg, &sp, iter );
+ tree_t *tree = tree_iter_deref_cur( iter );
+ colm_tree_upref( prg, tree );
+ vm_push_tree( tree );
+ break;
+ }
+ case IN_TRITER_SET_CUR_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_TRITER_SET_CUR_WC\n" );
+
+ tree_t *tree = vm_pop_tree();
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field);
+ split_iter_cur( prg, &sp, iter );
+ tree_t *old = tree_iter_deref_cur( iter );
+ set_triter_cur( prg, iter, tree );
+ colm_tree_downref( prg, sp, old );
+ break;
+ }
+ case IN_GEN_ITER_FROM_REF: {
+ short field;
+ half_t arg_size;
+ half_t generic_id;
+ read_half( field );
+ read_half( arg_size );
+ read_half( generic_id );
+
+ debug( prg, REALM_BYTECODE, "IN_GEN_ITER_FROM_REF "
+ "%hd %hd %hd\n", field, arg_size, generic_id );
+
+ ref_t root_ref;
+ root_ref.kid = vm_pop_kid();
+ root_ref.next = vm_pop_ref();
+ void *mem = vm_get_plocal(exec, field);
+
+ tree_t **stack_root = vm_ptop();
+ long root_size = vm_ssize();
+
+ colm_init_list_iter( (generic_iter_t*)mem, stack_root, arg_size,
+ root_size, &root_ref, generic_id );
+ break;
+ }
+ case IN_GEN_ITER_UNWIND:
+ case IN_GEN_ITER_DESTROY: {
+ short field;
+ read_half( field );
+
+ generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field);
+
+ debug( prg, REALM_BYTECODE, "IN_LIST_ITER_DESTROY %d\n", iter->yield_size );
+
+ colm_list_iter_destroy( prg, &sp, iter );
+ break;
+ }
+ case IN_LIST_ITER_ADVANCE: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_LIST_ITER_ADVANCE\n" );
+
+ generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field);
+ tree_t *res = colm_list_iter_advance( prg, &sp, iter );
+ //colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_REV_LIST_ITER_ADVANCE: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_REV_LIST_ITER_ADVANCE\n" );
+
+ generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field);
+ tree_t *res = colm_rev_list_iter_advance( prg, &sp, iter );
+ //colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_MAP_ITER_ADVANCE: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_MAP_ITER_ADVANCE\n" );
+
+ generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field);
+ tree_t *res = colm_map_iter_advance( prg, &sp, iter );
+ //colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_GEN_ITER_GET_CUR_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GEN_ITER_GET_CUR_R\n" );
+
+ generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field);
+ tree_t *tree = colm_list_iter_deref_cur( prg, iter );
+ //colm_tree_upref( prg, tree );
+ vm_push_tree( tree );
+ break;
+ }
+ case IN_GEN_VITER_GET_CUR_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GEN_VITER_GET_CUR_R\n" );
+
+ generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field);
+ value_t value = colm_viter_deref_cur( prg, iter );
+ vm_push_value( value );
+ break;
+ }
+ case IN_MATCH: {
+ half_t pattern_id;
+ read_half( pattern_id );
+
+ debug( prg, REALM_BYTECODE, "IN_MATCH\n" );
+
+ tree_t *tree = vm_pop_tree();
+
+ /* Run the match, push the result. */
+ int root_node = prg->rtd->pat_repl_info[pattern_id].offset;
+
+ /* Bindings are indexed starting at 1. Zero bindId to represent no
+ * binding. We make a space for it here rather than do math at
+ * access them. */
+ long num_bindings = prg->rtd->pat_repl_info[pattern_id].num_bindings;
+ tree_t *bindings[1+num_bindings];
+ memset( bindings, 0, sizeof(tree_t*)*(1+num_bindings) );
+
+ kid_t kid;
+ kid.tree = tree;
+ kid.next = 0;
+ int matched = match_pattern( bindings, prg, root_node, &kid, false );
+
+ if ( !matched )
+ memset( bindings, 0, sizeof(tree_t*)*(1+num_bindings) );
+ else {
+ int b;
+ for ( b = 1; b <= num_bindings; b++ )
+ assert( bindings[b] != 0 );
+ }
+
+ tree_t *result = matched ? tree : 0;
+ colm_tree_upref( prg, result );
+ vm_push_tree( result ? tree : 0 );
+ int b;
+ for ( b = 1; b <= num_bindings; b++ ) {
+ colm_tree_upref( prg, bindings[b] );
+ vm_push_tree( bindings[b] );
+ }
+
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+
+ case IN_PROD_NUM: {
+ debug( prg, REALM_BYTECODE, "IN_PROD_NUM\n" );
+
+ tree_t *tree = vm_pop_tree();
+ colm_tree_downref( prg, sp, tree );
+
+ value_t v = tree->prod_num;
+ vm_push_value( v );
+ break;
+ }
+
+ case IN_PRINT_TREE: {
+ uchar trim;
+ read_byte( trim );
+
+ debug( prg, REALM_BYTECODE, "IN_PRINT_TREE %d\n", (int)trim );
+
+ tree_t *to_send = vm_pop_tree();
+ stream_t *stream = vm_pop_stream();
+
+ struct stream_impl *si = stream_to_impl( stream );
+
+ int auto_trim;
+ if ( trim == TRIM_YES )
+ auto_trim = true;
+ else if ( trim == TRIM_NO )
+ auto_trim = false;
+ else
+ auto_trim = si->funcs->get_option( prg, si, 0 );
+
+ si->funcs->print_tree( prg, sp, si, to_send, auto_trim );
+ vm_push_stream( stream );
+ colm_tree_downref( prg, sp, to_send );
+ break;
+ }
+
+ case IN_SEND_TEXT_W: {
+ uchar trim;
+ read_byte( trim );
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_TEXT_W %d\n", (int)trim );
+
+ tree_t *to_send = vm_pop_tree();
+ parser_t *parser = vm_pop_parser();
+
+ struct input_impl *si = input_to_impl( parser->input );
+
+ int auto_trim;
+ if ( trim == TRIM_YES )
+ auto_trim = true;
+ else if ( trim == TRIM_NO )
+ auto_trim = false;
+ else
+ auto_trim = si->funcs->get_option( prg, si, 0 );
+
+ word_t len = stream_append_text( prg, sp, parser->input, to_send, auto_trim );
+
+ vm_push_parser( parser );
+
+ if ( !exec->WV )
+ colm_tree_downref( prg, sp, to_send );
+ else {
+ rcode_unit_start( exec );
+ rcode_code( exec, IN_SEND_TEXT_BKT );
+ rcode_word( exec, (word_t) parser );
+ rcode_word( exec, (word_t) to_send );
+ rcode_word( exec, (word_t) len );
+ rcode_unit_term( exec );
+ }
+
+ exec->steps = parser->pda_run->steps;
+ exec->pcr = PCR_START;
+ break;
+ }
+
+ case IN_SEND_TEXT_BKT: {
+ parser_t *parser;
+ tree_t *sent;
+ word_t len;
+ read_parser( parser );
+ read_tree( sent );
+ read_word( len );
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_TEXT_BKT\n" );
+
+ struct input_impl *si = input_to_impl( parser->input );
+ stream_undo_append( prg, sp, si, sent, len );
+
+ colm_tree_downref( prg, sp, sent );
+ break;
+ }
+
+ case IN_SEND_TREE_W: {
+ uchar trim;
+ read_byte( trim );
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_TREE_W %d\n", (int)trim );
+
+ tree_t *to_send = vm_pop_tree();
+ parser_t *parser = vm_pop_parser();
+
+ struct input_impl *si = input_to_impl( parser->input );
+
+ int auto_trim;
+ if ( trim == TRIM_YES )
+ auto_trim = true;
+ else if ( trim == TRIM_NO )
+ auto_trim = false;
+ else
+ auto_trim = si->funcs->get_option( prg, si, 0 );
+
+ if ( auto_trim )
+ to_send = tree_trim( prg, sp, to_send );
+
+ word_t len = stream_append_tree( prg, sp, parser->input, to_send );
+
+ vm_push_parser( parser );
+
+ if ( !exec->WV )
+ colm_tree_downref( prg, sp, to_send );
+ else {
+ rcode_unit_start( exec );
+ rcode_code( exec, IN_SEND_TREE_BKT );
+ rcode_word( exec, (word_t) parser );
+ rcode_word( exec, (word_t) to_send );
+ rcode_word( exec, (word_t) len );
+ rcode_unit_term( exec );
+ }
+
+ exec->steps = parser->pda_run->steps;
+ exec->pcr = PCR_START;
+ break;
+ }
+
+ case IN_SEND_TREE_BKT: {
+ parser_t *parser;
+ tree_t *sent;
+ word_t len;
+ read_parser( parser );
+ read_tree( sent );
+ read_word( len );
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_TREE_BKT\n" );
+
+ struct input_impl *si = input_to_impl( parser->input );
+ stream_undo_append( prg, sp, si, sent, len );
+
+ colm_tree_downref( prg, sp, sent );
+ break;
+ }
+
+ case IN_SEND_NOTHING: {
+ parser_t *parser = vm_pop_parser();
+ vm_push_parser( parser );
+ exec->steps = parser->pda_run->steps;
+ exec->pcr = PCR_START;
+ break;
+ }
+ case IN_SEND_STREAM_W: {
+ debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_W\n" );
+
+ stream_t *to_send = vm_pop_stream();
+ parser_t *parser = vm_pop_parser();
+
+ word_t len = stream_append_stream( prg, sp, parser->input, to_send );
+
+ vm_push_parser( parser );
+
+ if ( exec->WV ) {
+ rcode_unit_start( exec );
+ rcode_code( exec, IN_SEND_STREAM_BKT );
+ rcode_word( exec, (word_t) parser );
+ rcode_word( exec, (word_t) to_send );
+ rcode_word( exec, (word_t) len );
+ rcode_unit_term( exec );
+ }
+
+ exec->steps = parser->pda_run->steps;
+ exec->pcr = PCR_START;
+
+ break;
+ }
+
+ case IN_SEND_STREAM_BKT: {
+ parser_t *parser;
+ tree_t *sent;
+ word_t len;
+ read_parser( parser );
+ read_tree( sent );
+ read_word( len );
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_BKT\n" );
+
+ struct input_impl *si = input_to_impl( parser->input );
+ stream_undo_append_stream( prg, sp, si, sent, len );
+ break;
+ }
+
+ case IN_SEND_EOF_W: {
+ struct input_impl *si;
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_EOF_W\n" );
+ parser_t *parser = vm_pop_parser();
+ vm_push_parser( parser );
+
+ si = input_to_impl( parser->input );
+ si->funcs->set_eof_mark( prg, si, true );
+
+ if ( exec->WV ) {
+ rcode_unit_start( exec );
+ rcode_code( exec, IN_SEND_EOF_BKT );
+ rcode_word( exec, (word_t) parser );
+ rcode_unit_term( exec );
+ }
+
+ exec->steps = parser->pda_run->steps;
+ exec->pcr = PCR_START;
+ break;
+ }
+
+ case IN_SEND_EOF_BKT: {
+ parser_t *parser;
+ read_parser( parser );
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_EOF_BKT\n" );
+
+ struct input_impl *si = input_to_impl( parser->input );
+ si->funcs->set_eof_mark( prg, si, false );
+ break;
+ }
+
+ case IN_INPUT_CLOSE_WC: {
+ debug( prg, REALM_BYTECODE, "IN_INPUT_CLOSE_WC\n" );
+
+ stream_t *stream = vm_pop_stream();
+ struct stream_impl *si = stream->impl;
+
+ si->funcs->close_stream( prg, si );
+
+ vm_push_stream( stream );
+ break;
+ }
+ case IN_INPUT_AUTO_TRIM_WC: {
+ debug( prg, REALM_BYTECODE, "IN_INPUT_AUTO_TRIM_WC\n" );
+
+ stream_t *stream = vm_pop_stream();
+ value_t auto_trim = vm_pop_value();
+ struct stream_impl *si = stream->impl;
+
+ si->funcs->set_option( prg, si, 0, (long) auto_trim );
+
+ vm_push_stream( stream );
+ break;
+ }
+ case IN_IINPUT_AUTO_TRIM_WC: {
+ debug( prg, REALM_BYTECODE, "IN_INPUT_AUTO_TRIM_WC\n" );
+
+ input_t *input = vm_pop_input();
+ value_t auto_trim = vm_pop_value();
+ struct input_impl *ii = input->impl;
+
+ ii->funcs->set_option( prg, ii, 0, (long) auto_trim );
+
+ vm_push_input( input );
+ break;
+ }
+
+ case IN_SET_ERROR: {
+ debug( prg, REALM_BYTECODE, "IN_SET_ERROR\n" );
+
+ tree_t *error = vm_pop_tree();
+ colm_tree_downref( prg, sp, prg->error );
+ prg->error = error;
+ break;
+ }
+
+ case IN_GET_ERROR: {
+ debug( prg, REALM_BYTECODE, "IN_GET_ERROR\n" );
+
+ vm_pop_tree();
+ colm_tree_upref( prg, prg->error );
+ vm_push_tree( prg->error );
+ break;
+ }
+
+ /* stream:
+ * Push value and stash current on IN_PCR_CALL. The instructions
+ * exectued by a call need access to the stream the parser was called
+ * with. We need to preserver the stream for the caller, so we push
+ * first set it to the current stream.
+ * pcr:
+ * Need to preserve the pcr value between pda run invocations. Push
+ * current pcr value and start fresh with a new value on PCR_CALL.
+ * steps:
+ * Init from the PDA run when we start to parse. Need to preserve the
+ * starting steps value from the start of parsing to the moment we
+ * write the backtrack instruction. Start fresh with a private value
+ * on a PCR_CALL by pushing and initializing. */
+
+ case IN_PARSE_INIT_BKT: {
+ debug( prg, REALM_BYTECODE, "IN_PARSE_INIT_BKT\n" );
+
+ parser_t *parser;
+ word_t steps;
+
+ read_parser( parser );
+ read_word( steps );
+
+ vm_push_parser( parser );
+
+ exec->steps = steps;
+ exec->pcr = PCR_START;
+ break;
+ }
+
+ case IN_LOAD_RETVAL: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_RETVAL\n" );
+ vm_push_tree( exec->ret_val );
+ break;
+ }
+
+ case IN_PCR_RET: {
+ debug( prg, REALM_BYTECODE, "IN_PCR_RET\n" );
+
+ if ( exec->frame_id >= 0 ) {
+ struct frame_info *fi = &prg->rtd->frame_info[exec->frame_id];
+ downref_local_trees( prg, sp, exec, fi->locals, fi->locals_len );
+ debug( prg, REALM_BYTECODE, "RET: %d\n", fi->frame_size );
+
+ vm_popn( fi->frame_size );
+ }
+
+ instr = vm_pop_type(code_t*);
+
+ exec->WV = vm_pop_type(word_t);
+ exec->parser = vm_pop_parser();
+ exec->pcr = vm_pop_type(word_t);
+ exec->steps = vm_pop_type(word_t);
+ exec->frame_id = vm_pop_type(long);
+ exec->iframe_ptr = vm_pop_type(tree_t**);
+ exec->frame_ptr = vm_pop_type(tree_t**);
+
+ assert( instr != 0 );
+ break;
+ }
+
+ case IN_PCR_END_DECK: {
+ debug( prg, REALM_BYTECODE, "IN_PCR_END_DECK\n" );
+ exec->parser->pda_run->on_deck = false;
+ break;
+ }
+
+ case IN_PARSE_FRAG_W: {
+ parser_t *parser = vm_pop_parser();
+ vm_push_parser( parser );
+
+ debug( prg, REALM_BYTECODE, "IN_PARSE_FRAG_W\n" );
+
+ exec->pcr = colm_parse_frag( prg, sp, parser->pda_run,
+ parser->input, exec->pcr );
+
+ /* If done, jump to the terminating instruction, otherwise fall
+ * through to call some code, then jump back here. */
+ if ( exec->pcr != PCR_DONE )
+ instr = pcr_call( prg, exec, &sp, instr, parser );
+ else {
+ if ( exec->WV ) {
+ rcode_unit_start( exec );
+
+ rcode_code( exec, IN_PARSE_INIT_BKT );
+ rcode_word( exec, (word_t)parser );
+ rcode_word( exec, (word_t)exec->steps );
+ rcode_code( exec, IN_PARSE_FRAG_BKT );
+ rcode_unit_term( exec );
+ }
+
+ if ( prg->induce_exit )
+ goto out;
+ }
+ break;
+ }
+
+ case IN_PARSE_FRAG_BKT: {
+ parser_t *parser = vm_pop_parser();
+ vm_push_parser( parser );
+
+ debug( prg, REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" );
+
+ exec->pcr = colm_parse_undo_frag( prg, sp, parser->pda_run,
+ parser->input, exec->pcr, exec->steps );
+
+ if ( exec->pcr != PCR_DONE )
+ instr = pcr_call( prg, exec, &sp, instr, parser );
+ else {
+ vm_pop_parser();
+ }
+ break;
+ }
+
+ case IN_REDUCE_COMMIT: {
+ parser_t *parser = vm_pop_parser();
+ vm_push_parser( parser );
+
+ debug( prg, REALM_BYTECODE, "IN_REDUCE_COMMIT\n" );
+
+ colm_parse_reduce_commit( prg, sp, parser->pda_run );
+ break;
+ }
+
+
+ case IN_INPUT_PULL_WV: {
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_WV\n" );
+
+ input_t *input = vm_pop_input();
+ tree_t *len = vm_pop_tree();
+ tree_t *string = stream_pull_bc( prg, sp, 0, input, len );
+ colm_tree_upref( prg, string );
+ vm_push_tree( string );
+
+ /* Single unit. */
+ colm_tree_upref( prg, string );
+ rcode_code( exec, IN_INPUT_PULL_BKT );
+ rcode_word( exec, (word_t) string );
+ rcode_unit_term( exec );
+
+ //colm_tree_downref( prg, sp, len );
+ break;
+ }
+
+ case IN_INPUT_PULL_WC: {
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_WC\n" );
+
+ input_t *input = vm_pop_input();
+ tree_t *len = vm_pop_tree();
+ tree_t *string = stream_pull_bc( prg, sp, 0, input, len );
+ colm_tree_upref( prg, string );
+ vm_push_tree( string );
+
+ //colm_tree_downref( prg, sp, len );
+ break;
+ }
+ case IN_INPUT_PULL_BKT: {
+ tree_t *string;
+ read_tree( string );
+
+ input_t *input = vm_pop_input();
+
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" );
+
+ undo_pull( prg, input, string );
+ colm_tree_downref( prg, sp, string );
+ break;
+ }
+ case IN_INPUT_PUSH_WV: {
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_WV\n" );
+
+ input_t *input = vm_pop_input();
+ tree_t *tree = vm_pop_tree();
+ long len = input_push( prg, sp, input_to_impl( input ), tree, false );
+ vm_push_tree( 0 );
+
+ /* Single unit. */
+ rcode_code( exec, IN_INPUT_PUSH_BKT );
+ rcode_word( exec, len );
+ rcode_unit_term( exec );
+
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_INPUT_PUSH_IGNORE_WV: {
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_IGNORE_WV\n" );
+
+ input_t *input = vm_pop_input();
+ tree_t *tree = vm_pop_tree();
+ long len = input_push( prg, sp, input_to_impl( input ), tree, true );
+ vm_push_tree( 0 );
+
+ /* Single unit. */
+ rcode_code( exec, IN_INPUT_PUSH_BKT );
+ rcode_word( exec, len );
+ rcode_unit_term( exec );
+
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_INPUT_PUSH_BKT: {
+ word_t len;
+ read_word( len );
+
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_BKT %d\n", len );
+
+ input_t *input = vm_pop_input();
+ colm_undo_stream_push( prg, sp, input_to_impl( input ), len );
+ break;
+ }
+ case IN_INPUT_PUSH_STREAM_WV: {
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_STREAM_WV\n" );
+
+ input_t *input = vm_pop_input();
+ stream_t *to_push = vm_pop_stream();
+ long len = input_push_stream( prg, sp, input_to_impl( input ), to_push );
+ vm_push_tree( 0 );
+
+ /* Single unit. */
+ rcode_code( exec, IN_INPUT_PUSH_BKT );
+ rcode_word( exec, len );
+ rcode_unit_term( exec );
+ break;
+ }
+ case IN_INPUT_PUSH_STREAM_BKT: {
+ word_t len;
+ read_word( len );
+
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_STREAM_BKT %d\n", len );
+
+ input_t *input = vm_pop_input();
+ colm_undo_stream_push( prg, sp, input_to_impl( input ), len );
+ break;
+ }
+ case IN_CONS_GENERIC: {
+ half_t generic_id;
+ half_t stop_id;
+ read_half( generic_id );
+ read_half( stop_id );
+
+ debug( prg, REALM_BYTECODE, "IN_CONS_GENERIC %hd %hd\n", generic_id, stop_id );
+
+ struct_t *gen = colm_construct_generic( prg, generic_id, stop_id );
+ vm_push_struct( gen );
+ break;
+ }
+ case IN_CONS_REDUCER: {
+ half_t generic_id;
+ half_t reducer_id;
+ read_half( generic_id );
+ read_half( reducer_id );
+
+ debug( prg, REALM_BYTECODE, "IN_CONS_REDUCER %hd\n", generic_id );
+
+ struct_t *gen = colm_construct_reducer( prg, generic_id, reducer_id );
+ vm_push_struct( gen );
+ break;
+ }
+ case IN_CONS_OBJECT: {
+ half_t lang_el_id;
+ read_half( lang_el_id );
+
+ debug( prg, REALM_BYTECODE, "IN_CONS_OBJECT %hd\n", lang_el_id );
+
+ tree_t *repl_tree = colm_construct_object( prg, 0, 0, lang_el_id );
+ vm_push_tree( repl_tree );
+ break;
+ }
+ case IN_CONSTRUCT: {
+ half_t pattern_id;
+ read_half( pattern_id );
+
+ debug( prg, REALM_BYTECODE, "IN_CONSTRUCT\n" );
+
+ //struct lang_el_info *lelInfo = prg->rtd->lelInfo;
+ //struct pat_cons_node *nodes = prg->rtd->patReplNodes;
+ int root_node = prg->rtd->pat_repl_info[pattern_id].offset;
+
+ /* Note that bindIds are indexed at one. Add one spot for them. */
+ int num_bindings = prg->rtd->pat_repl_info[pattern_id].num_bindings;
+ tree_t *bindings[1+num_bindings];
+
+ int b;
+ for ( b = 1; b <= num_bindings; b++ ) {
+ bindings[b] = vm_pop_tree();
+ assert( bindings[b] != 0 );
+ }
+
+ tree_t *repl_tree = colm_construct_tree( prg, 0, bindings, root_node );
+
+ vm_push_tree( repl_tree );
+ break;
+ }
+ case IN_CONSTRUCT_TERM: {
+ half_t token_id;
+ read_half( token_id );
+
+ debug( prg, REALM_BYTECODE, "IN_CONSTRUCT_TERM\n" );
+
+ /* Pop the string we are constructing the token from. */
+ str_t *str = vm_pop_string();
+ tree_t *res = colm_construct_term( prg, token_id, str->value );
+ colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_MAKE_TOKEN: {
+ uchar nargs;
+ int i;
+ read_byte( nargs );
+
+ debug( prg, REALM_BYTECODE, "IN_MAKE_TOKEN\n" );
+
+ tree_t *arg[nargs];
+ for ( i = nargs-1; i >= 0; i-- )
+ arg[i] = vm_pop_tree();
+
+ tree_t *result = colm_construct_token( prg, arg, nargs );
+ for ( i = 1; i < nargs; i++ )
+ colm_tree_downref( prg, sp, arg[i] );
+ vm_push_tree( result );
+ break;
+ }
+ case IN_MAKE_TREE: {
+ uchar nargs;
+ int i;
+ read_byte( nargs );
+
+ debug( prg, REALM_BYTECODE, "IN_MAKE_TREE\n" );
+
+ tree_t *arg[nargs];
+ for ( i = nargs-1; i >= 0; i-- )
+ arg[i] = vm_pop_tree();
+
+ tree_t *result = make_tree( prg, arg, nargs );
+ for ( i = 1; i < nargs; i++ )
+ colm_tree_downref( prg, sp, arg[i] );
+
+ vm_push_tree( result );
+ break;
+ }
+ case IN_TREE_CAST: {
+ half_t lang_el_id;
+ read_half( lang_el_id );
+
+ debug( prg, REALM_BYTECODE, "IN_TREE_CAST %hd\n", lang_el_id );
+
+ tree_t *tree = vm_pop_tree();
+ tree_t *res = cast_tree( prg, lang_el_id, tree );
+ colm_tree_upref( prg, res );
+ colm_tree_downref( prg, sp, tree );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_PTR_ACCESS_WV: {
+ debug( prg, REALM_BYTECODE, "IN_PTR_ACCESS_WV\n" );
+
+ struct_t *ptr = vm_pop_struct();
+ vm_push_struct( ptr );
+
+ /* This is an initial global load. Need to reverse execute it. */
+ rcode_unit_start( exec );
+ rcode_code( exec, IN_PTR_ACCESS_BKT );
+ rcode_word( exec, (word_t) ptr );
+ break;
+ }
+ case IN_PTR_ACCESS_BKT: {
+ word_t p;
+ read_word( p );
+
+ debug( prg, REALM_BYTECODE, "IN_PTR_ACCESS_BKT\n" );
+
+ struct_t *ptr = (struct_t*)p;
+ vm_push_type( struct_t *, ptr );
+ break;
+ }
+ case IN_REF_FROM_LOCAL: {
+ short int field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_REF_FROM_LOCAL %hd\n", field );
+
+ /* First push the null next pointer, then the kid pointer. */
+ kid_t *kid = (kid_t*)vm_get_plocal(exec, field);
+ vm_contiguous( 2 );
+ vm_push_ref( 0 );
+ vm_push_kid( kid );
+ break;
+ }
+ case IN_REF_FROM_REF: {
+ short int field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_REF_FROM_REF %hd\n", field );
+
+ ref_t *ref = (ref_t*)vm_get_plocal(exec, field);
+ vm_contiguous( 2 );
+ vm_push_ref( ref );
+ vm_push_kid( ref->kid );
+ break;
+ }
+ case IN_REF_FROM_QUAL_REF: {
+ short int back;
+ short int field;
+ read_half( back );
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_REF_FROM_QUAL_REF\n" );
+
+ ref_t *ref = (ref_t*)(sp + back);
+
+ tree_t *obj = ref->kid->tree;
+ kid_t *attr_kid = get_field_kid( obj, field );
+
+ vm_contiguous( 2 );
+ vm_push_ref( ref );
+ vm_push_kid( attr_kid );
+ break;
+ }
+ case IN_RHS_REF_FROM_QUAL_REF: {
+ short int back;
+ int i, done = 0;
+ uchar len;
+
+ read_half( back );
+
+ debug( prg, REALM_BYTECODE, "IN_RHS_REF_FROM_QUAL_REF\n" );
+
+ ref_t *ref = (ref_t*)(sp + back);
+
+ tree_t *obj = ref->kid->tree;
+ kid_t *attr_kid = 0;
+
+ read_byte( len );
+ for ( i = 0; i < len; i++ ) {
+ uchar prod_num, child_num;
+ read_byte( prod_num );
+ read_byte( child_num );
+ if ( !done && obj->prod_num == prod_num ) {
+ attr_kid = get_rhs_el_kid( prg, obj, child_num );
+ done = 1;
+ }
+ }
+
+ vm_contiguous( 2 );
+ vm_push_ref( ref );
+ vm_push_kid( attr_kid );
+ break;
+ }
+ case IN_REF_FROM_BACK: {
+ short int back;
+ read_half( back );
+
+ debug( prg, REALM_BYTECODE, "IN_REF_FROM_BACK %hd\n", back );
+
+ kid_t *ptr = (kid_t*)(sp + back);
+
+ vm_contiguous( 2 );
+ vm_push_ref( 0 );
+ vm_push_kid( ptr );
+ break;
+ }
+ case IN_TRITER_REF_FROM_CUR: {
+ short int field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_TRITER_REF_FROM_CUR\n" );
+
+ /* Push the next pointer first, then the kid. */
+ tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field);
+ ref_t *ref = &iter->ref;
+ vm_contiguous( 2 );
+ vm_push_ref( ref );
+ vm_push_kid( iter->ref.kid );
+ break;
+ }
+ case IN_UITER_REF_FROM_CUR: {
+ short int field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_UITER_REF_FROM_CUR\n" );
+
+ /* Push the next pointer first, then the kid. */
+ user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field);
+ vm_contiguous( 2 );
+ vm_push_ref( uiter->ref.next );
+ vm_push_kid( uiter->ref.kid );
+ break;
+ }
+ case IN_GET_TOKEN_DATA_R: {
+ debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_DATA_R\n" );
+
+ tree_t *tree = vm_pop_tree();
+ head_t *data = string_copy( prg, tree->tokdata );
+ tree_t *str = construct_string( prg, data );
+ colm_tree_upref( prg, str );
+ vm_push_tree( str );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_SET_TOKEN_DATA_WC: {
+ debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_WC\n" );
+
+ tree_t *tree = vm_pop_tree();
+ tree_t *val = vm_pop_tree();
+ head_t *head = string_copy( prg, ((str_t*)val)->value );
+ string_free( prg, tree->tokdata );
+ tree->tokdata = head;
+
+ colm_tree_downref( prg, sp, tree );
+ colm_tree_downref( prg, sp, val );
+ break;
+ }
+ case IN_SET_TOKEN_DATA_WV: {
+ debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_WV\n" );
+
+ tree_t *tree = vm_pop_tree();
+ tree_t *val = vm_pop_tree();
+
+ head_t *oldval = tree->tokdata;
+ head_t *head = string_copy( prg, ((str_t*)val)->value );
+ tree->tokdata = head;
+
+ /* Set up reverse code. Needs no args. */
+ rcode_code( exec, IN_SET_TOKEN_DATA_BKT );
+ rcode_word( exec, (word_t)oldval );
+ rcode_unit_term( exec );
+
+ colm_tree_downref( prg, sp, tree );
+ colm_tree_downref( prg, sp, val );
+ break;
+ }
+ case IN_SET_TOKEN_DATA_BKT: {
+ debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT \n" );
+
+ word_t oldval;
+ read_word( oldval );
+
+ tree_t *tree = vm_pop_tree();
+ head_t *head = (head_t*)oldval;
+ string_free( prg, tree->tokdata );
+ tree->tokdata = head;
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_GET_TOKEN_FILE_R: {
+ debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_FILE_R\n" );
+ tree_t *tree = vm_pop_tree();
+ tree_t *str = 0;
+ if ( tree->tokdata->location ) {
+ const char *fn = tree->tokdata->location->name;
+ size_t fnlen = strlen( fn );
+ head_t *data = string_alloc_full( prg, fn, fnlen );
+ str = construct_string( prg, data );
+ colm_tree_upref( prg, str );
+ }
+ vm_push_tree( str );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_GET_TOKEN_LINE_R: {
+ debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_LINE_R\n" );
+
+ tree_t *tree = vm_pop_tree();
+ value_t integer = 0;
+ if ( tree->tokdata->location )
+ integer = tree->tokdata->location->line;
+ vm_push_value( integer );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_GET_TOKEN_COL_R: {
+ debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_COL_R\n" );
+
+ tree_t *tree = vm_pop_tree();
+ value_t integer = 0;
+ if ( tree->tokdata->location )
+ integer = tree->tokdata->location->column;
+ vm_push_value( integer );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_GET_TOKEN_POS_R: {
+ debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_POS_R\n" );
+
+ tree_t *tree = vm_pop_tree();
+ value_t integer = 0;
+ if ( tree->tokdata->location )
+ integer = tree->tokdata->location->byte;
+ vm_push_value( integer );
+ colm_tree_downref( prg, sp, tree );
+ break;
+ }
+ case IN_GET_MATCH_LENGTH_R: {
+ debug( prg, REALM_BYTECODE, "IN_GET_MATCH_LENGTH_R\n" );
+
+ value_t integer = string_length(exec->parser->pda_run->tokdata);
+ vm_push_value( integer );
+ break;
+ }
+ case IN_GET_MATCH_TEXT_R: {
+ debug( prg, REALM_BYTECODE, "IN_GET_MATCH_TEXT_R\n" );
+
+ head_t *s = string_copy( prg, exec->parser->pda_run->tokdata );
+ tree_t *tree = construct_string( prg, s );
+ colm_tree_upref( prg, tree );
+ vm_push_tree( tree );
+ break;
+ }
+ case IN_LIST_LENGTH: {
+ debug( prg, REALM_BYTECODE, "IN_LIST_LENGTH\n" );
+
+ list_t *list = vm_pop_list();
+ long len = colm_list_length( list );
+ value_t res = len;
+ vm_push_value( res );
+ break;
+ }
+ case IN_GET_LIST_EL_MEM_R: {
+ short gen_id, field;
+ read_half( gen_id );
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LIST_EL_MEM_R\n" );
+
+ struct_t *s = vm_pop_struct();
+
+ list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id );
+ struct_t *val = colm_list_el_get( prg, list_el, gen_id, field );
+ vm_push_struct( val );
+ break;
+ }
+ case IN_GET_LIST_MEM_R: {
+ short gen_id, field;
+ read_half( gen_id );
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE,
+ "IN_GET_LIST_MEM_R %hd %hd\n", gen_id, field );
+
+ list_t *list = vm_pop_list();
+ struct_t *val = colm_list_get( prg, list, gen_id, field );
+ vm_push_struct( val );
+ break;
+ }
+ case IN_GET_LIST_MEM_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_WC\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *val = get_list_mem_split( prg, (list_t*)obj, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_GET_LIST_MEM_WV: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_WV\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *val = get_list_mem_split( prg, (list_t*)obj, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+
+ /* Set up the reverse instruction. */
+ rcode_code( exec, IN_GET_LIST_MEM_BKT );
+ rcode_half( exec, field );
+ break;
+ }
+ case IN_GET_LIST_MEM_BKT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_BKT\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *res = get_list_mem_split( prg, (list_t*)obj, field );
+ colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_GET_VLIST_MEM_R: {
+ short gen_id, field;
+ read_half( gen_id );
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE,
+ "IN_GET_VLIST_MEM_R %hd %hd\n", gen_id, field );
+
+ list_t *list = vm_pop_list();
+ struct_t *el = colm_list_get( prg, list, gen_id, field );
+
+ value_t val = colm_struct_get_field( el, value_t, 0 );
+ vm_push_value( val );
+ break;
+ }
+ case IN_GET_VLIST_MEM_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_VLIST_MEM_WC\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *val = get_list_mem_split( prg, (list_t*)obj, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_GET_VLIST_MEM_WV: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_VLIST_MEM_WV\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *val = get_list_mem_split( prg, (list_t*)obj, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+
+ /* Set up the reverse instruction. */
+ rcode_code( exec, IN_GET_LIST_MEM_BKT );
+ rcode_half( exec, field );
+ break;
+ }
+ case IN_GET_VLIST_MEM_BKT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_VLIST_MEM_BKT\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *res = get_list_mem_split( prg, (list_t*)obj, field );
+ colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+ case IN_GET_PARSER_STREAM: {
+ debug( prg, REALM_BYTECODE, "IN_GET_PARSER_STREAM\n" );
+ parser_t *parser = vm_pop_parser();
+ vm_push_input( parser->input );
+ break;
+ }
+ case IN_GET_PARSER_MEM_R: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_PARSER_MEM_R %hd\n", field );
+
+ parser_t *parser = vm_pop_parser();
+
+ tree_t *val = get_parser_mem( parser, field );
+
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+
+ case IN_GET_MAP_EL_MEM_R: {
+ short gen_id, field;
+ read_half( gen_id );
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_MAP_EL_MEM_R\n" );
+
+ struct_t *strct = vm_pop_struct();
+
+ map_el_t *map_el = colm_struct_to_map_el( prg, strct, gen_id );
+ struct_t *val = colm_map_el_get( prg, map_el, gen_id, field );
+ vm_push_struct( val );
+ break;
+ }
+ case IN_MAP_LENGTH: {
+ debug( prg, REALM_BYTECODE, "IN_MAP_LENGTH\n" );
+
+ tree_t *obj = vm_pop_tree();
+ long len = map_length( (map_t*)obj );
+ value_t res = len;
+ vm_push_value( res );
+ break;
+ }
+ case IN_GET_MAP_MEM_R: {
+ short gen_id, field;
+ read_half( gen_id );
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE,
+ "IN_GET_MAP_MEM_R %hd %hd\n", gen_id, field );
+
+ map_t *map = vm_pop_map();
+ struct_t *val = colm_map_get( prg, map, gen_id, field );
+ vm_push_struct( val );
+ break;
+ }
+ case IN_GET_MAP_MEM_WC: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_WC\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *val = get_list_mem_split( prg, (list_t*)obj, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+ break;
+ }
+ case IN_GET_MAP_MEM_WV: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_WV\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *val = get_list_mem_split( prg, (list_t*)obj, field );
+ colm_tree_upref( prg, val );
+ vm_push_tree( val );
+
+ /* Set up the reverse instruction. */
+ rcode_code( exec, IN_GET_MAP_MEM_BKT );
+ rcode_half( exec, field );
+ break;
+ }
+ case IN_GET_MAP_MEM_BKT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_BKT\n" );
+
+ tree_t *obj = vm_pop_tree();
+ colm_tree_downref( prg, sp, obj );
+
+ tree_t *res = get_list_mem_split( prg, (list_t*)obj, field );
+ colm_tree_upref( prg, res );
+ vm_push_tree( res );
+ break;
+ }
+
+ case IN_STASH_ARG: {
+ half_t pos;
+ half_t size;
+ read_half( pos );
+ read_half( size );
+
+ debug( prg, REALM_BYTECODE, "IN_STASH_ARG %hd %hd\n", pos, size );
+
+ while ( size > 0 ) {
+ value_t v = vm_pop_value();
+ ((value_t*)exec->call_args)[pos] = v;
+ size -= 1;
+ pos += 1;
+ }
+
+ break;
+ }
+
+ case IN_PREP_ARGS: {
+ half_t size;
+ read_half( size );
+
+ debug( prg, REALM_BYTECODE, "IN_PREP_ARGS %hd\n", size );
+
+ vm_push_type( tree_t**, exec->call_args );
+ vm_pushn( size );
+ exec->call_args = vm_ptop();
+ memset( vm_ptop(), 0, sizeof(word_t) * size );
+ break;
+ }
+
+ case IN_CLEAR_ARGS: {
+ half_t size;
+ read_half( size );
+
+ debug( prg, REALM_BYTECODE, "IN_CLEAR_ARGS %hd\n", size );
+
+ vm_popn( size );
+ exec->call_args = vm_pop_type( tree_t** );
+ break;
+ }
+
+ case IN_HOST: {
+ half_t func_id;
+ read_half( func_id );
+
+ debug( prg, REALM_BYTECODE, "IN_HOST %hd\n", func_id );
+
+ sp = prg->rtd->host_call( prg, func_id, sp );
+ break;
+ }
+ case IN_CALL_WV: {
+ half_t func_id;
+ read_half( func_id );
+
+ struct function_info *fi = &prg->rtd->function_info[func_id];
+ struct frame_info *fr = &prg->rtd->frame_info[fi->frame_id];
+
+ debug( prg, REALM_BYTECODE, "IN_CALL_WV %s\n", fr->name );
+
+ vm_contiguous( FR_AA + fi->frame_size );
+
+ vm_push_type( tree_t**, exec->call_args );
+ vm_push_value( 0 ); /* Return value. */
+ vm_push_type( code_t*, instr );
+ vm_push_type( tree_t**, exec->frame_ptr );
+ vm_push_type( long, exec->frame_id );
+
+ instr = fr->codeWV;
+ exec->frame_id = fi->frame_id;
+
+ exec->frame_ptr = vm_ptop();
+ vm_pushn( fr->frame_size );
+ memset( vm_ptop(), 0, sizeof(word_t) * fr->frame_size );
+ break;
+ }
+ case IN_CALL_WC: {
+ half_t func_id;
+ read_half( func_id );
+
+ struct function_info *fi = &prg->rtd->function_info[func_id];
+ struct frame_info *fr = &prg->rtd->frame_info[fi->frame_id];
+
+ debug( prg, REALM_BYTECODE, "IN_CALL_WC %s %d\n", fr->name, fr->frame_size );
+
+ vm_contiguous( FR_AA + fi->frame_size );
+
+ vm_push_type( tree_t**, exec->call_args );
+ vm_push_value( 0 ); /* Return value. */
+ vm_push_type( code_t*, instr );
+ vm_push_type( tree_t**, exec->frame_ptr );
+ vm_push_type( long, exec->frame_id );
+
+ instr = fr->codeWC;
+ exec->frame_id = fi->frame_id;
+
+ exec->frame_ptr = vm_ptop();
+ vm_pushn( fr->frame_size );
+ memset( vm_ptop(), 0, sizeof(word_t) * fr->frame_size );
+ break;
+ }
+ case IN_YIELD: {
+ debug( prg, REALM_BYTECODE, "IN_YIELD\n" );
+
+ kid_t *kid = vm_pop_kid();
+ ref_t *next = vm_pop_ref();
+ user_iter_t *uiter = (user_iter_t*) vm_plocal_iframe( IFR_AA );
+
+ if ( kid == 0 || kid->tree == 0 ||
+ kid->tree->id == uiter->search_id ||
+ uiter->search_id == prg->rtd->any_id )
+ {
+ /* Store the yeilded value. */
+ uiter->ref.kid = kid;
+ uiter->ref.next = next;
+ uiter->yield_size = vm_ssize() - uiter->root_size;
+ uiter->resume = instr;
+ uiter->frame = exec->frame_ptr;
+
+ /* Restore the instruction and frame pointer. */
+ instr = (code_t*) vm_local_iframe(IFR_RIN);
+ exec->frame_ptr = (tree_t**) vm_local_iframe(IFR_RFR);
+ exec->iframe_ptr = (tree_t**) vm_local_iframe(IFR_RIF);
+
+ /* Return the yield result on the top of the stack. */
+ tree_t *result = uiter->ref.kid != 0 ? prg->true_val : prg->false_val;
+ //colm_tree_upref( prg, result );
+ vm_push_tree( result );
+ }
+ break;
+ }
+ case IN_UITER_CREATE_WV: {
+ short field;
+ half_t func_id, search_id;
+ read_half( field );
+ read_half( func_id );
+ read_half( search_id );
+
+ debug( prg, REALM_BYTECODE, "IN_UITER_CREATE_WV\n" );
+
+ struct function_info *fi = prg->rtd->function_info + func_id;
+
+ vm_contiguous( (sizeof(user_iter_t) / sizeof(word_t)) + FR_AA + fi->frame_size );
+
+ user_iter_t *uiter = colm_uiter_create( prg, &sp, fi, search_id );
+ vm_set_local(exec, field, (SW) uiter);
+
+ /* This is a setup similar to as a call, only the frame structure
+ * is slightly different for user iterators. We aren't going to do
+ * the call. We don't need to set up the return ip because the
+ * uiter advance will set it. The frame we need to do because it
+ * is set once for the lifetime of the iterator. */
+ vm_push_type( tree_t**, exec->call_args );
+ vm_push_value( 0 );
+
+ vm_push_type( code_t*, 0 ); /* Return instruction pointer, */
+ vm_push_type( tree_t**, exec->iframe_ptr ); /* Return iframe. */
+ vm_push_type( tree_t**, exec->frame_ptr ); /* Return frame. */
+
+ uiter->frame = vm_ptop();
+ vm_pushn( fi->frame_size );
+ memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size );
+
+ uiter_init( prg, sp, uiter, fi, true );
+ break;
+ }
+ case IN_UITER_CREATE_WC: {
+ short field;
+ half_t func_id, search_id;
+ read_half( field );
+ read_half( func_id );
+ read_half( search_id );
+
+ debug( prg, REALM_BYTECODE, "IN_UITER_CREATE_WC\n" );
+
+ struct function_info *fi = prg->rtd->function_info + func_id;
+
+ vm_contiguous( (sizeof(user_iter_t) / sizeof(word_t)) + FR_AA + fi->frame_size );
+
+ user_iter_t *uiter = colm_uiter_create( prg, &sp, fi, search_id );
+ vm_set_local(exec, field, (SW) uiter);
+
+ /* This is a setup similar to as a call, only the frame structure
+ * is slightly different for user iterators. We aren't going to do
+ * the call. We don't need to set up the return ip because the
+ * uiter advance will set it. The frame we need to do because it
+ * is set once for the lifetime of the iterator. */
+ vm_push_type( tree_t**, exec->call_args );
+ vm_push_value( 0 );
+
+ vm_push_type( code_t*, 0 ); /* Return instruction pointer, */
+ vm_push_type( tree_t**, exec->iframe_ptr ); /* Return iframe. */
+ vm_push_type( tree_t**, exec->frame_ptr ); /* Return frame. */
+
+ uiter->frame = vm_ptop();
+ vm_pushn( fi->frame_size );
+ memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size );
+
+ uiter_init( prg, sp, uiter, fi, false );
+ break;
+ }
+ case IN_UITER_DESTROY: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_UITER_DESTROY %hd\n", field );
+
+ user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field);
+ colm_uiter_destroy( prg, &sp, uiter );
+ break;
+ }
+
+ case IN_UITER_UNWIND: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_UITER_UNWIND %hd\n", field );
+
+ user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field);
+ colm_uiter_unwind( prg, &sp, uiter );
+ break;
+ }
+
+ case IN_RET: {
+ struct frame_info *fi = &prg->rtd->frame_info[exec->frame_id];
+ downref_local_trees( prg, sp, exec, fi->locals, fi->locals_len );
+ vm_popn( fi->frame_size );
+
+ exec->frame_id = vm_pop_type(long);
+ exec->frame_ptr = vm_pop_type(tree_t**);
+ instr = vm_pop_type(code_t*);
+ exec->ret_val = vm_pop_tree();
+ vm_pop_value();
+ //vm_popn( fi->argSize );
+
+ fi = &prg->rtd->frame_info[exec->frame_id];
+ debug( prg, REALM_BYTECODE, "IN_RET %s\n", fi->name );
+
+ /* This if for direct calls of functions. */
+ if ( instr == 0 ){
+ //assert( sp == root );
+ return sp;
+ }
+
+ /* Might be some unwind code. */
+ {
+ short unwind_len;
+ read_half( unwind_len );
+ if ( unwind_len > 0 ) {
+ instr += unwind_len;
+ debug( prg, REALM_BYTECODE,
+ "skipping unwind code length: %hd\n", unwind_len );
+ }
+ }
+
+ break;
+ }
+ case IN_TO_UPPER: {
+ debug( prg, REALM_BYTECODE, "IN_TO_UPPER\n" );
+
+ tree_t *in = vm_pop_tree();
+ head_t *head = string_to_upper( in->tokdata );
+ tree_t *upper = construct_string( prg, head );
+ colm_tree_upref( prg, upper );
+ vm_push_tree( upper );
+ colm_tree_downref( prg, sp, in );
+ break;
+ }
+ case IN_TO_LOWER: {
+ debug( prg, REALM_BYTECODE, "IN_TO_LOWER\n" );
+
+ tree_t *in = vm_pop_tree();
+ head_t *head = string_to_lower( in->tokdata );
+ tree_t *lower = construct_string( prg, head );
+ colm_tree_upref( prg, lower );
+ vm_push_tree( lower );
+ colm_tree_downref( prg, sp, in );
+ break;
+ }
+ case IN_OPEN_FILE: {
+ debug( prg, REALM_BYTECODE, "IN_OPEN_FILE\n" );
+
+ tree_t *mode = vm_pop_tree();
+ tree_t *name = vm_pop_tree();
+ stream_t *res = colm_stream_open_file( prg, name, mode );
+ vm_push_stream( res );
+ colm_tree_downref( prg, sp, name );
+ colm_tree_downref( prg, sp, mode );
+ break;
+ }
+ case IN_GET_CONST: {
+ short constValId;
+ read_half( constValId );
+
+ switch ( constValId ) {
+ case CONST_STDIN: {
+ debug( prg, REALM_BYTECODE, "CONST_STDIN\n" );
+
+ /* Pop the root object. */
+ vm_pop_tree();
+
+ make_stdin( prg );
+
+ vm_push_stream( prg->stdin_val );
+ break;
+ }
+ case CONST_STDOUT: {
+ debug( prg, REALM_BYTECODE, "CONST_STDOUT\n" );
+
+ /* Pop the root object. */
+ vm_pop_tree();
+ make_stdout( prg );
+
+ vm_push_stream( prg->stdout_val );
+ break;
+ }
+ case CONST_STDERR: {
+ debug( prg, REALM_BYTECODE, "CONST_STDERR\n" );
+
+ /* Pop the root object. */
+ vm_pop_tree();
+
+ make_stderr( prg );
+
+ vm_push_stream( prg->stderr_val );
+ break;
+ }
+ case CONST_ARG: {
+ word_t offset;
+ read_word( offset );
+
+ debug( prg, REALM_BYTECODE, "CONST_ARG %d\n", offset );
+
+ /* Pop the root object. */
+ vm_pop_tree();
+
+ head_t *lit = make_literal( prg, offset );
+ tree_t *tree = construct_string( prg, lit );
+ colm_tree_upref( prg, tree );
+ vm_push_tree( tree );
+ break;
+ }
+ }
+ break;
+ }
+ case IN_SYSTEM: {
+ debug( prg, REALM_BYTECODE, "IN_SYSTEM\n" );
+
+ vm_pop_tree();
+ str_t *cmd = vm_pop_string();
+
+ char *cmd0 = malloc( cmd->value->length + 1 );
+ memcpy( cmd0, cmd->value->data, cmd->value->length );
+ cmd0[cmd->value->length] = 0;
+
+ int res = system( cmd0 );
+
+ free( cmd0 );
+
+#if defined(HAVE_SYS_WAIT_H)
+ if ( WIFSIGNALED( res ) )
+ raise( WTERMSIG( res ) );
+ res = WEXITSTATUS( res );
+#else
+ // WARNING: Check result
+#endif
+
+ colm_tree_downref( prg, sp, (tree_t*)cmd );
+
+ value_t val = res;
+ vm_push_value( val );
+ break;
+ }
+
+ case IN_DONE:
+ return sp;
+
+ case IN_FN: {
+ c = *instr++;
+ switch ( c ) {
+ case FN_STR_ATOI: {
+ debug( prg, REALM_BYTECODE, "FN_STR_ATOI\n" );
+
+ str_t *str = vm_pop_string();
+ word_t res = str_atoi( str->value );
+ value_t integer = res;
+ vm_push_value( integer );
+ colm_tree_downref( prg, sp, (tree_t*)str );
+ break;
+ }
+ case FN_STR_ATOO: {
+ debug( prg, REALM_BYTECODE, "FN_STR_ATOO\n" );
+
+ str_t *str = vm_pop_string();
+ word_t res = str_atoo( str->value );
+ value_t integer = res;
+ vm_push_value( integer );
+ colm_tree_downref( prg, sp, (tree_t*)str );
+ break;
+ }
+ case FN_STR_UORD8: {
+ debug( prg, REALM_BYTECODE, "FN_STR_UORD8\n" );
+
+ str_t *str = vm_pop_string();
+ word_t res = str_uord8( str->value );
+ value_t integer = res;
+ vm_push_value( integer );
+ colm_tree_downref( prg, sp, (tree_t*)str );
+ break;
+ }
+ case FN_STR_UORD16: {
+ debug( prg, REALM_BYTECODE, "FN_STR_UORD16\n" );
+
+ str_t *str = vm_pop_string();
+ word_t res = str_uord16( str->value );
+ value_t integer = res;
+ vm_push_value( integer );
+ colm_tree_downref( prg, sp, (tree_t*)str );
+ break;
+ }
+ case FN_STR_PREFIX: {
+ debug( prg, REALM_BYTECODE, "FN_STR_PREFIX\n" );
+
+ str_t *str = vm_pop_string();
+ value_t len = vm_pop_value();
+
+ str_t *res = string_prefix( prg, str, (long) len );
+ colm_tree_upref( prg, (tree_t*) res );
+ vm_push_string( res );
+ colm_tree_downref( prg, sp, (tree_t*)str );
+ break;
+ }
+ case FN_STR_SUFFIX: {
+ debug( prg, REALM_BYTECODE, "FN_STR_SUFFIX\n" );
+
+ str_t *str = vm_pop_string();
+ value_t pos = vm_pop_value();
+
+ str_t *res = string_suffix( prg, str, (long) pos );
+ colm_tree_upref( prg, (tree_t*) res );
+ vm_push_string( res );
+ colm_tree_downref( prg, sp, (tree_t*)str );
+ break;
+ }
+ case FN_PREFIX: {
+ debug( prg, REALM_BYTECODE, "FN_PREFIX\n" );
+
+ value_t len = vm_pop_value();
+ str_t *str = vm_pop_string();
+
+ str_t *res = string_prefix( prg, str, (long) len );
+ colm_tree_upref( prg, (tree_t*) res );
+ vm_push_string( res );
+ colm_tree_downref( prg, sp, (tree_t*)str );
+ break;
+ }
+ case FN_SUFFIX: {
+ debug( prg, REALM_BYTECODE, "FN_SUFFIX\n" );
+
+ value_t pos = vm_pop_value();
+ str_t *str = vm_pop_string();
+
+ str_t *res = string_suffix( prg, str, (long) pos );
+ colm_tree_upref( prg, (tree_t*) res );
+ vm_push_string( res );
+ colm_tree_downref( prg, sp, (tree_t*)str );
+ break;
+ }
+ case FN_SPRINTF: {
+ debug( prg, REALM_BYTECODE, "FN_SPRINTF\n" );
+
+ vm_pop_tree();
+ value_t integer = vm_pop_value();
+ str_t *format = vm_pop_string();
+ head_t *res = string_sprintf( prg, format, (long)integer );
+ str_t *str = (str_t*)construct_string( prg, res );
+ colm_tree_upref( prg, (tree_t*)str );
+ vm_push_string( str );
+ colm_tree_downref( prg, sp, (tree_t*)format );
+ break;
+ }
+ case FN_LOAD_ARG0: {
+ half_t field;
+ read_half( field );
+ debug( prg, REALM_BYTECODE, "FN_LOAD_ARG0 %lu\n", field );
+
+ /* tree_t comes back upreffed. */
+ tree_t *tree = construct_arg0( prg, prg->argc, prg->argv, prg->argl );
+ tree_t *prev = colm_struct_get_field( prg->global, tree_t*, field );
+ colm_tree_downref( prg, sp, prev );
+ colm_struct_set_field( prg->global, tree_t*, field, tree );
+ break;
+ }
+ case FN_LOAD_ARGV: {
+ half_t field;
+ read_half( field );
+ debug( prg, REALM_BYTECODE, "FN_LOAD_ARGV %lu\n", field );
+
+ list_t *list = construct_argv( prg, prg->argc, prg->argv, prg->argl );
+ colm_struct_set_field( prg->global, list_t*, field, list );
+ break;
+ }
+ case FN_INIT_STDS: {
+ half_t field;
+ read_half( field );
+ debug( prg, REALM_BYTECODE, "FN_INIT_STDS %lu\n", field );
+
+ list_t *list = construct_stds( prg );
+ colm_struct_set_field( prg->global, list_t*, field, list );
+ break;
+ }
+ case FN_STOP: {
+ debug( prg, REALM_BYTECODE, "FN_STOP\n" );
+
+ flush_streams( prg );
+ goto out;
+ }
+
+ case FN_LIST_PUSH_HEAD_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_WC\n" );
+
+ list_t *list = vm_pop_list();
+ struct_t *s = vm_pop_struct();
+
+ list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id );
+ colm_list_prepend( list, list_el );
+
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+ break;
+ }
+ case FN_LIST_PUSH_HEAD_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_WV\n" );
+
+ list_t *list = vm_pop_list();
+ struct_t *s = vm_pop_struct();
+
+ list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id );
+ colm_list_prepend( list, list_el );
+
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+
+ /* Set up reverse code. Needs no args. */
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_LIST_PUSH_HEAD_BKT );
+ rcode_unit_term( exec );
+ break;
+ }
+ case FN_LIST_PUSH_HEAD_BKT: {
+ debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_BKT\n" );
+
+ list_t *list = vm_pop_list();
+ colm_list_detach_head( list );
+ break;
+ }
+ case FN_LIST_PUSH_TAIL_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_WC\n" );
+
+ list_t *list = vm_pop_list();
+ struct_t *s = vm_pop_struct();
+
+ list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id );
+ colm_list_append( list, list_el );
+
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+ break;
+ }
+ case FN_LIST_PUSH_TAIL_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_WV\n" );
+
+ list_t *list = vm_pop_list();
+ struct_t *s = vm_pop_struct();
+
+ list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id );
+ colm_list_append( list, list_el );
+
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+
+ /* Set up reverse code. Needs no args. */
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_LIST_PUSH_TAIL_BKT );
+ rcode_unit_term( exec );
+ break;
+ }
+ case FN_LIST_PUSH_TAIL_BKT: {
+ debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_BKT\n" );
+
+ list_t *list = vm_pop_list();
+ colm_list_detach_tail( list );
+ break;
+ }
+ case FN_LIST_POP_TAIL_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_WC\n" );
+
+ list_t *list = vm_pop_list();
+
+ list_el_t *tail = list->tail;
+ colm_list_detach_tail( list );
+ struct_t *s = colm_generic_el_container( prg, tail, gen_id );
+
+ vm_push_struct( s );
+ break;
+ }
+ case FN_LIST_POP_TAIL_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_WV\n" );
+
+ list_t *list = vm_pop_list();
+
+ list_el_t *tail = list->tail;
+ colm_list_detach_tail( list );
+ struct_t *s = colm_generic_el_container( prg, tail, gen_id );
+
+ vm_push_struct( s );
+
+ /* Set up reverse. */
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_LIST_POP_TAIL_BKT );
+ rcode_half( exec, gen_id );
+ rcode_word( exec, (word_t)s );
+ rcode_unit_term( exec );
+ break;
+ }
+ case FN_LIST_POP_TAIL_BKT: {
+ short gen_id;
+ tree_t *val;
+ read_half( gen_id );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_BKT\n" );
+
+ list_t *list = vm_pop_list();
+ struct_t *s = (struct_t*) val;
+
+ list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id );
+
+ colm_list_append( list, list_el );
+ break;
+ }
+ case FN_LIST_POP_HEAD_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_WC\n" );
+
+ list_t *list = vm_pop_list();
+
+ list_el_t *head = list->head;
+ colm_list_detach_head( list );
+ struct_t *s = colm_generic_el_container( prg, head, gen_id );
+
+ vm_push_struct( s );
+ break;
+ }
+ case FN_LIST_POP_HEAD_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_WV\n" );
+
+ list_t *list = vm_pop_list();
+
+ list_el_t *head = list->head;
+ colm_list_detach_head( list );
+ struct_t *s = colm_generic_el_container( prg, head, gen_id );
+
+ vm_push_struct( s );
+
+ /* Set up reverse. The result comes off the list downrefed.
+ * Need it up referenced for the reverse code too. */
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_LIST_POP_HEAD_BKT );
+ rcode_half( exec, gen_id );
+ rcode_word( exec, (word_t)s );
+ rcode_unit_term( exec );
+ break;
+ }
+ case FN_LIST_POP_HEAD_BKT: {
+ short gen_id;
+ tree_t *val;
+ read_half( gen_id );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_BKT\n" );
+
+ list_t *list = vm_pop_list();
+ struct_t *s = (struct_t*) val;
+
+ list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id );
+
+ colm_list_prepend( list, list_el );
+ break;
+ }
+ case FN_MAP_FIND: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_MAP_FIND %hd\n", gen_id );
+
+ map_t *map = vm_pop_map();
+ tree_t *key = vm_pop_tree();
+
+ map_el_t *map_el = colm_map_find( prg, map, key );
+
+ struct colm_struct *strct = map_el != 0 ?
+ colm_generic_el_container( prg, map_el, gen_id ) : 0;
+
+ vm_push_struct( strct );
+
+ if ( map->generic_info->key_type == TYPE_TREE )
+ colm_tree_downref( prg, sp, key );
+ break;
+ }
+ case FN_MAP_INSERT_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_WC %hd\n", gen_id );
+
+ map_t *map = vm_pop_map();
+ struct_t *s = vm_pop_struct();
+
+ map_el_t *map_el = colm_struct_to_map_el( prg, s, gen_id );
+
+ colm_map_insert( prg, map, map_el );
+
+ vm_push_tree( prg->true_val );
+ break;
+ }
+ case FN_MAP_INSERT_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_WV %hd\n", gen_id );
+
+ map_t *map = vm_pop_map();
+ struct_t *s = vm_pop_struct();
+
+ map_el_t *map_el = colm_struct_to_map_el( prg, s, gen_id );
+
+ map_el_t *inserted = colm_map_insert( prg, map, map_el );
+
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_MAP_INSERT_BKT );
+ rcode_half( exec, gen_id );
+ rcode_code( exec, inserted != 0 ? 1 : 0 );
+ rcode_word( exec, (word_t)map_el );
+ rcode_unit_term( exec );
+ break;
+ }
+
+ case FN_MAP_INSERT_BKT: {
+ short gen_id;
+ uchar inserted;
+ word_t wmap_el;
+
+ read_half( gen_id );
+ read_byte( inserted );
+ read_word( wmap_el );
+
+ map_el_t *map_el = (map_el_t*)wmap_el;
+
+ debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_BKT %d\n",
+ (int)inserted );
+
+ map_t *map = vm_pop_map();
+
+ if ( inserted )
+ colm_map_detach( prg, map, map_el );
+ break;
+ }
+ case FN_MAP_DETACH_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_WC %hd\n", gen_id );
+
+ map_t *map = vm_pop_map();
+ struct_t *s = vm_pop_struct();
+
+ map_el_t *map_el = colm_struct_to_map_el( prg, s, gen_id );
+
+ colm_map_detach( prg, map, map_el );
+
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+ break;
+ }
+ case FN_MAP_DETACH_WV: {
+ debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_WV\n" );
+
+ tree_t *obj = vm_pop_tree();
+ tree_t *key = vm_pop_tree();
+ struct tree_pair pair = map_remove( prg, (map_t*)obj, key );
+
+ colm_tree_upref( prg, pair.val );
+ vm_push_tree( pair.val );
+
+ /* Reverse instruction. */
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_MAP_DETACH_BKT );
+ rcode_word( exec, (word_t)pair.key );
+ rcode_word( exec, (word_t)pair.val );
+ rcode_unit_term( exec );
+
+ colm_tree_downref( prg, sp, obj );
+ colm_tree_downref( prg, sp, key );
+ break;
+ }
+ case FN_MAP_DETACH_BKT: {
+ tree_t *key, *val;
+ read_tree( key );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_BKT\n" );
+
+ /* Either both or neither. */
+ assert( ( key == 0 ) ^ ( val != 0 ) );
+
+ tree_t *obj = vm_pop_tree();
+ #if 0
+ if ( key != 0 )
+ map_unremove( prg, (map_t*)obj, key, val );
+ #endif
+
+ colm_tree_downref( prg, sp, obj );
+ break;
+ }
+ case FN_VMAP_INSERT_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_WC %hd\n", gen_id );
+
+ map_t *map = vm_pop_map();
+ struct_t *value = vm_pop_struct();
+ struct_t *key = vm_pop_struct();
+
+ colm_vmap_insert( prg, map, key, value );
+
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+ break;
+ }
+ case FN_VMAP_INSERT_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_WV %hd\n", gen_id );
+
+ map_t *map = vm_pop_map();
+ struct_t *value = vm_pop_struct();
+ struct_t *key = vm_pop_struct();
+
+ map_el_t *inserted = colm_vmap_insert( prg, map, key, value );
+
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_VMAP_INSERT_BKT );
+ rcode_half( exec, gen_id );
+ rcode_code( exec, inserted != 0 ? 1 : 0 );
+ rcode_word( exec, (word_t)inserted );
+ rcode_unit_term( exec );
+ break;
+ }
+ case FN_VMAP_INSERT_BKT: {
+ short gen_id;
+ uchar inserted;
+ word_t wmap_el;
+
+ read_half( gen_id );
+ read_byte( inserted );
+ read_word( wmap_el );
+
+ map_el_t *map_el = (map_el_t*)wmap_el;
+
+ debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_BKT %d\n",
+ (int)inserted );
+
+ map_t *map = vm_pop_map();
+
+ if ( inserted )
+ colm_map_detach( prg, map, map_el );
+ break;
+ }
+ case FN_VMAP_REMOVE_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VMAP_REMOVE_WC %hd\n", gen_id );
+
+ map_t *map = vm_pop_map();
+ tree_t *key = vm_pop_tree();
+
+ colm_vmap_remove( prg, map, key );
+
+ //colm_tree_upref( prg, prg->trueVal );
+ vm_push_tree( prg->true_val );
+ break;
+ }
+ case FN_VMAP_FIND: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VMAP_FIND %hd\n", gen_id );
+
+ map_t *map = vm_pop_map();
+ tree_t *key = vm_pop_tree();
+
+ tree_t *result = colm_vmap_find( prg, map, key );
+
+ vm_push_tree( result );
+
+ if ( map->generic_info->key_type == TYPE_TREE )
+ colm_tree_downref( prg, sp, key );
+ break;
+ }
+ case FN_VLIST_PUSH_TAIL_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_TAIL_WC %hd\n", gen_id );
+
+ list_t *list = vm_pop_list();
+ value_t value = vm_pop_value();
+
+ colm_vlist_append( prg, list, value );
+
+ vm_push_tree( prg->true_val );
+ break;
+ }
+ case FN_VLIST_PUSH_TAIL_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_TAIL_WV %hd\n", gen_id );
+
+ list_t *list = vm_pop_list();
+ value_t value = vm_pop_value();
+
+ colm_vlist_append( prg, list, value );
+
+ vm_push_tree( prg->true_val );
+
+ /* Set up reverse code. Needs no args. */
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_VLIST_PUSH_TAIL_BKT );
+ rcode_unit_term( exec );
+ break;
+ }
+ case FN_VLIST_PUSH_TAIL_BKT: {
+ debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_TAIL_BKT\n" );
+
+ list_t *list = vm_pop_list();
+ colm_list_detach_tail( list );
+ break;
+ }
+ case FN_VLIST_PUSH_HEAD_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_HEAD_WC %hd\n", gen_id );
+
+ list_t *list = vm_pop_list();
+ value_t value = vm_pop_value();
+
+ colm_vlist_prepend( prg, list, value );
+
+ vm_push_tree( prg->true_val );
+ break;
+ }
+ case FN_VLIST_PUSH_HEAD_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_HEAD_WV %hd\n", gen_id );
+
+ list_t *list = vm_pop_list();
+ value_t value = vm_pop_value();
+
+ colm_vlist_prepend( prg, list, value );
+
+ vm_push_tree( prg->true_val );
+
+ /* Set up reverse code. Needs no args. */
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_VLIST_PUSH_HEAD_BKT );
+ rcode_unit_term( exec );
+ break;
+ }
+ case FN_VLIST_PUSH_HEAD_BKT: {
+ debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_HEAD_BKT\n" );
+
+ list_t *list = vm_pop_list();
+ colm_list_detach_head( list );
+ break;
+ }
+ case FN_VLIST_POP_HEAD_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_POP_HEAD_WC %hd\n", gen_id );
+
+ list_t *list = vm_pop_list();
+
+ value_t result = colm_vlist_detach_head( prg, list );
+ vm_push_value( result );
+ break;
+ }
+ case FN_VLIST_POP_HEAD_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_POP_HEAD_WV %hd\n", gen_id );
+
+ list_t *list = vm_pop_list();
+
+ value_t result = colm_vlist_detach_head( prg, list );
+ vm_push_value( result );
+
+ /* Set up reverse. */
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_VLIST_POP_HEAD_BKT );
+ rcode_half( exec, gen_id );
+ rcode_word( exec, (word_t)result );
+ rcode_unit_term( exec );
+ break;
+ }
+ case FN_VLIST_POP_HEAD_BKT: {
+ short gen_id;
+ tree_t *val;
+ read_half( gen_id );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_POP_HEAD_BKT\n" );
+
+ list_t *list = vm_pop_list();
+
+ colm_vlist_prepend( prg, list, (value_t)val );
+ break;
+ }
+ case FN_VLIST_POP_TAIL_WC: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_POP_TAIL_WC %hd\n", gen_id );
+
+ list_t *list = vm_pop_list();
+
+ value_t result = colm_vlist_detach_tail( prg, list );
+ vm_push_value( result );
+ break;
+ }
+ case FN_VLIST_POP_TAIL_WV: {
+ short gen_id;
+ read_half( gen_id );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_POP_TAIL_WV %hd\n", gen_id );
+
+ list_t *list = vm_pop_list();
+
+ value_t result = colm_vlist_detach_tail( prg, list );
+ vm_push_value( result );
+
+ /* Set up reverse. */
+ rcode_code( exec, IN_FN );
+ rcode_code( exec, FN_VLIST_POP_TAIL_BKT );
+ rcode_half( exec, gen_id );
+ rcode_word( exec, (word_t)result );
+ rcode_unit_term( exec );
+ break;
+ }
+ case FN_VLIST_POP_TAIL_BKT: {
+ short gen_id;
+ tree_t *val;
+ read_half( gen_id );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "FN_VLIST_POP_TAIL_BKT\n" );
+
+ list_t *list = vm_pop_list();
+
+ colm_vlist_append( prg, list, (value_t)val );
+ break;
+ }
+
+ case FN_EXIT_HARD: {
+ debug( prg, REALM_BYTECODE, "FN_EXIT\n" );
+
+ vm_pop_tree();
+ prg->exit_status = vm_pop_type(long);
+ prg->induce_exit = 1;
+ exit( prg->exit_status );
+ }
+ case FN_EXIT: {
+ /* The unwind code follows the exit call (exception, see
+ * synthesis). */
+ short unwind_len;
+ read_half( unwind_len );
+
+ debug( prg, REALM_BYTECODE, "FN_EXIT, unwind len: %hd\n", unwind_len );
+
+ vm_pop_tree();
+ prg->exit_status = vm_pop_type(long);
+ prg->induce_exit = 1;
+
+ while ( true ) {
+ /* We stop on the root, leaving the psuedo-call setup on the
+ * stack. Note we exclude the local data. */
+ if ( exec->frame_id == prg->rtd->root_frame_id )
+ break;
+
+ struct frame_info *fi = &prg->rtd->frame_info[exec->frame_id];
+
+ debug( prg, REALM_BYTECODE, "FN_EXIT, popping frame %s, "
+ "unwind-len %hd, arg-size %ld\n",
+ ( fi->name != 0 ? fi->name : "<no-name>" ),
+ unwind_len, fi->arg_size );
+
+ if ( unwind_len > 0 )
+ sp = colm_execute_code( prg, exec, sp, instr );
+
+ downref_locals( prg, &sp, exec, fi->locals, fi->locals_len );
+ vm_popn( fi->frame_size );
+
+ /* Call layout. */
+ exec->frame_id = vm_pop_type(long);
+ exec->frame_ptr = vm_pop_type(tree_t**);
+ instr = vm_pop_type(code_t*);
+
+ tree_t *ret_val = vm_pop_tree();
+ vm_pop_value();
+
+ /* The IN_PREP_ARGS stack data. */
+ vm_popn( fi->arg_size );
+ vm_pop_value();
+
+ if ( fi->ret_tree ) {
+ /* Problem here. */
+ colm_tree_downref( prg, sp, ret_val );
+ }
+
+ read_half( unwind_len );
+ }
+
+ goto out;
+ }
+ default: {
+ fatal( "UNKNOWN FUNCTION: 0x%02x -- something is wrong\n", c );
+ break;
+ }}
+ break;
+ }
+
+ /* Halt is a default instruction given by the compiler when it is
+ * asked to generate and instruction it doesn't have. It is deliberate
+ * and can represent "not implemented" or "compiler error" because a
+ * variable holding instructions was not properly initialize. */
+ case IN_HALT: {
+ fatal( "IN_HALT -- compiler did something wrong\n" );
+ exit(1);
+ break;
+ }
+ default: {
+ fatal( "UNKNOWN INSTRUCTION: 0x%02x -- something is wrong\n", *(instr-1) );
+ assert(false);
+ break;
+ }
+ }
+ goto again;
+
+out:
+ if ( ! prg->induce_exit )
+ assert( sp == root );
+ return sp;
+}
+
+/*
+ * Deleteing rcode required downreffing any trees held by it.
+ */
+static void rcode_downref( program_t *prg, tree_t **sp, code_t *instr )
+{
+again:
+ switch ( *instr++ ) {
+ case IN_PARSE_INIT_BKT: {
+ debug( prg, REALM_BYTECODE, "IN_PARSE_INIT_BKT\n" );
+
+ consume_word(); //( parser );
+ consume_word(); //( steps );
+
+ break;
+ }
+ case IN_SEND_EOF_BKT: {
+ debug( prg, REALM_BYTECODE, "IN_SEND_EOF_BKT\n" );
+ consume_word(); //( parser );
+ break;
+ }
+
+ case IN_LOAD_TREE: {
+ tree_t *w;
+ read_tree( w );
+ debug( prg, REALM_BYTECODE, "IN_LOAD_TREE %p\n", w );
+ colm_tree_downref( prg, sp, w );
+ break;
+ }
+ case IN_LOAD_WORD: {
+ consume_word();
+ debug( prg, REALM_BYTECODE, "IN_LOAD_WORD\n" );
+ break;
+ }
+ case IN_RESTORE_LHS: {
+ tree_t *restore;
+ read_tree( restore );
+ debug( prg, REALM_BYTECODE, "IN_RESTORE_LHS\n" );
+ colm_tree_downref( prg, sp, restore );
+ break;
+ }
+
+ case IN_PARSE_FRAG_BKT: {
+ debug( prg, REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" );
+ break;
+ }
+ case IN_PCR_RET: {
+ debug( prg, REALM_BYTECODE, "IN_PCR_RET\n" );
+ return;
+ }
+ case IN_PCR_END_DECK: {
+ debug( prg, REALM_BYTECODE, "IN_PCR_END_DECK\n" );
+ return;
+ }
+ case IN_SEND_TEXT_BKT: {
+ tree_t *input;
+
+ consume_word(); //( parser );
+ read_tree( input );
+ consume_word(); //( len );
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_TEXT_BKT\n" );
+
+ colm_tree_downref( prg, sp, input );
+ break;
+ }
+ case IN_SEND_TREE_BKT: {
+ tree_t *input;
+
+ consume_word(); //( parser );
+ read_tree( input );
+ consume_word(); //( len );
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_TREE_BKT\n" );
+
+ colm_tree_downref( prg, sp, input );
+ break;
+ }
+ case IN_SEND_STREAM_BKT: {
+ consume_word(); //( sptr );
+ consume_word(); //( input );
+ consume_word(); //( len );
+
+ debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_BKT\n" );
+ break;
+ }
+
+ case IN_INPUT_PULL_BKT: {
+ tree_t *string;
+ read_tree( string );
+
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" );
+
+ colm_tree_downref( prg, sp, string );
+ break;
+ }
+ case IN_INPUT_PUSH_BKT: {
+ consume_word(); //( len );
+
+ debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" );
+ break;
+ }
+ case IN_LOAD_GLOBAL_BKT: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" );
+ break;
+ }
+ case IN_LOAD_CONTEXT_BKT: {
+ debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" );
+ break;
+ }
+ case IN_LOAD_INPUT_BKT: {
+ consume_word(); //( input );
+ debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" );
+ break;
+ }
+ case IN_GET_FIELD_TREE_BKT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_BKT %hd\n", field );
+ break;
+ }
+ case IN_SET_FIELD_TREE_BKT: {
+ short field;
+ tree_t *val;
+ read_half( field );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_BKT %hd\n", field );
+
+ colm_tree_downref( prg, sp, val );
+ break;
+ }
+ case IN_SET_STRUCT_BKT: {
+ short field;
+ tree_t *val;
+ read_half( field );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_BKT %hd\n", field );
+
+ colm_tree_downref( prg, sp, val );
+ break;
+ }
+ case IN_SET_STRUCT_VAL_BKT: {
+ consume_half(); //( field );
+ consume_word(); //( val );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_BKT\n" );
+ break;
+ }
+ case IN_PTR_ACCESS_BKT: {
+ consume_word(); //( ptr );
+
+ debug( prg, REALM_BYTECODE, "IN_PTR_ACCESS_BKT\n" );
+ break;
+ }
+ case IN_SET_TOKEN_DATA_BKT: {
+ word_t oldval;
+ read_word( oldval );
+
+ debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT\n" );
+
+ head_t *head = (head_t*)oldval;
+ string_free( prg, head );
+ break;
+ }
+ case IN_GET_LIST_MEM_BKT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_BKT %hd\n", field );
+ break;
+ }
+ case IN_GET_MAP_MEM_BKT: {
+ short field;
+ read_half( field );
+
+ debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_BKT %hd\n", field );
+ break;
+ }
+ case IN_FN: {
+ switch ( *instr++ ) {
+ case FN_LIST_PUSH_HEAD_BKT: {
+ debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_BKT\n" );
+ break;
+ }
+ case FN_LIST_POP_HEAD_BKT: {
+ consume_half(); //( genId );
+ consume_word(); //( val );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_BKT\n" );
+
+ break;
+ }
+ case FN_LIST_PUSH_TAIL_BKT: {
+ debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_BKT\n" );
+ break;
+ }
+ case FN_LIST_POP_TAIL_BKT: {
+ consume_half(); //( genId );
+ consume_word(); //( val );
+
+ debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_BKT\n" );
+
+ break;
+ }
+ case FN_MAP_INSERT_BKT: {
+ #ifdef DEBUG
+ uchar inserted;
+ consume_half(); //( genId );
+ read_byte( inserted );
+ consume_word(); //( wmapEl );
+ #else
+ consume_half(); //( genId );
+ consume_byte(); // inserted
+ consume_word(); //( wmapEl );
+ #endif
+
+ debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_BKT %d\n",
+ (int)inserted );
+ break;
+ }
+ case FN_VMAP_INSERT_BKT: {
+ short gen_id;
+
+ #ifdef DEBUG
+ uchar inserted;
+ read_half( gen_id );
+ read_byte( inserted );
+ consume_word(); //read_word( wmap_el );
+ #else
+ read_half( gen_id );
+ consume_byte();
+ consume_word(); //read_word( wmap_el );
+ #endif
+
+ //map_el_t *map_el = (map_el_t*)wmap_el;
+
+ debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_BKT %d\n",
+ (int)inserted );
+
+ break;
+ }
+ case FN_MAP_DETACH_BKT: {
+ tree_t *key, *val;
+ read_tree( key );
+ read_tree( val );
+
+ debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_BKT\n" );
+
+ colm_tree_downref( prg, sp, key );
+ colm_tree_downref( prg, sp, val );
+ break;
+ }
+
+ case FN_VLIST_PUSH_TAIL_BKT: {
+ break;
+ }
+
+ case FN_VLIST_PUSH_HEAD_BKT: {
+ break;
+ }
+
+ case FN_VLIST_POP_HEAD_BKT: {
+ short gen_id;
+ //word_t result;
+ read_half( gen_id );
+ consume_word(); //read_word( result );
+ break;
+ }
+
+ case FN_VLIST_POP_TAIL_BKT: {
+ short gen_id;
+ //word_t result;
+ read_half( gen_id );
+ consume_word(); //read_word( result );
+ break;
+ }
+
+ default: {
+ fatal( "UNKNOWN FUNCTION 0x%02x: -- reverse code downref\n", *(instr-1));
+ assert(false);
+ }}
+ break;
+ }
+ default: {
+ fatal( "UNKNOWN INSTRUCTION 0x%02x: -- reverse code downref\n", *(instr-1));
+ assert(false);
+ break;
+ }
+ }
+ goto again;
+}
+
+
diff --git a/src/bytecode.h b/src/bytecode.h
new file mode 100644
index 00000000..02cd78f4
--- /dev/null
+++ b/src/bytecode.h
@@ -0,0 +1,678 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_BYTECODE_H
+#define _COLM_BYTECODE_H
+
+#include <colm/pdarun.h>
+#include <colm/type.h>
+#include <colm/tree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+typedef unsigned long ulong;
+typedef unsigned char uchar;
+
+#define IN_NONE 0x00
+#define IN_LOAD_INT 0x01
+#define IN_LOAD_STR 0x02
+#define IN_LOAD_NIL 0x03
+#define IN_LOAD_TRUE 0x04
+#define IN_LOAD_FALSE 0x05
+#define IN_LOAD_TREE 0x06
+#define IN_LOAD_WORD 0x07
+
+#define IN_ADD_INT 0x08
+#define IN_SUB_INT 0x09
+#define IN_MULT_INT 0x0a
+#define IN_DIV_INT 0x0b
+
+#define IN_TST_EQL_VAL 0x59
+#define IN_TST_EQL_TREE 0x0c
+#define IN_TST_NOT_EQL_TREE 0x0d
+#define IN_TST_NOT_EQL_VAL 0x5f
+#define IN_TST_LESS_VAL 0x0e
+#define IN_TST_LESS_TREE 0xbd
+#define IN_TST_GRTR_VAL 0x0f
+#define IN_TST_GRTR_TREE 0xbf
+#define IN_TST_LESS_EQL_VAL 0x10
+#define IN_TST_LESS_EQL_TREE 0xc0
+#define IN_TST_GRTR_EQL_VAL 0x11
+#define IN_TST_GRTR_EQL_TREE 0xcd
+#define IN_TST_LOGICAL_AND 0x12
+#define IN_TST_LOGICAL_OR 0x13
+
+#define IN_TST_NZ_TREE 0xd1
+
+#define IN_LOAD_RETVAL 0xd4
+
+#define IN_STASH_ARG 0x20
+#define IN_PREP_ARGS 0xe8
+#define IN_CLEAR_ARGS 0xe9
+
+#define IN_GEN_ITER_FROM_REF 0xd3
+#define IN_GEN_ITER_DESTROY 0xd5
+#define IN_GEN_ITER_UNWIND 0x74
+#define IN_GEN_ITER_GET_CUR_R 0xdf
+#define IN_GEN_VITER_GET_CUR_R 0xe7
+#define IN_LIST_ITER_ADVANCE 0xde
+#define IN_REV_LIST_ITER_ADVANCE 0x77
+#define IN_MAP_ITER_ADVANCE 0xe6
+
+#define IN_NOT_VAL 0x14
+#define IN_NOT_TREE 0xd2
+
+#define IN_JMP 0x15
+#define IN_JMP_FALSE_TREE 0x16
+#define IN_JMP_TRUE_TREE 0x17
+#define IN_JMP_FALSE_VAL 0xb8
+#define IN_JMP_TRUE_VAL 0xed
+
+#define IN_STR_LENGTH 0x19
+#define IN_CONCAT_STR 0x1a
+#define IN_TREE_TRIM 0x1b
+
+#define IN_POP_TREE 0x1d
+#define IN_POP_N_WORDS 0x1e
+#define IN_POP_VAL 0xbe
+#define IN_DUP_VAL 0x1f
+#define IN_DUP_TREE 0xf2
+
+#define IN_REJECT 0x21
+#define IN_MATCH 0x22
+#define IN_PROD_NUM 0x6a
+#define IN_CONSTRUCT 0x23
+#define IN_CONS_OBJECT 0xf0
+#define IN_CONS_GENERIC 0xf1
+#define IN_TREE_CAST 0xe4
+
+#define IN_GET_LOCAL_R 0x25
+#define IN_GET_LOCAL_WC 0x26
+#define IN_SET_LOCAL_WC 0x27
+
+#define IN_GET_LOCAL_REF_R 0x28
+#define IN_GET_LOCAL_REF_WC 0x29
+#define IN_SET_LOCAL_REF_WC 0x2a
+
+#define IN_SAVE_RET 0x2b
+
+#define IN_GET_FIELD_TREE_R 0x2c
+#define IN_GET_FIELD_TREE_WC 0x2d
+#define IN_GET_FIELD_TREE_WV 0x2e
+#define IN_GET_FIELD_TREE_BKT 0x2f
+
+#define IN_SET_FIELD_TREE_WV 0x30
+#define IN_SET_FIELD_TREE_WC 0x31
+#define IN_SET_FIELD_TREE_BKT 0x32
+#define IN_SET_FIELD_TREE_LEAVE_WC 0x33
+
+#define IN_GET_FIELD_VAL_R 0x5e
+#define IN_SET_FIELD_VAL_WC 0x60
+
+#define IN_GET_MATCH_LENGTH_R 0x34
+#define IN_GET_MATCH_TEXT_R 0x35
+
+#define IN_GET_TOKEN_DATA_R 0x36
+#define IN_SET_TOKEN_DATA_WC 0x37
+#define IN_SET_TOKEN_DATA_WV 0x38
+#define IN_SET_TOKEN_DATA_BKT 0x39
+
+#define IN_GET_TOKEN_FILE_R 0x80
+#define IN_GET_TOKEN_LINE_R 0x3b
+#define IN_GET_TOKEN_POS_R 0x3a
+#define IN_GET_TOKEN_COL_R 0x81
+
+#define IN_INIT_RHS_EL 0x3c
+#define IN_INIT_LHS_EL 0x3d
+#define IN_INIT_CAPTURES 0x3e
+#define IN_STORE_LHS_EL 0x3f
+#define IN_RESTORE_LHS 0x40
+
+#define IN_TRITER_FROM_REF 0x41
+#define IN_TRITER_ADVANCE 0x42
+#define IN_TRITER_NEXT_CHILD 0x43
+#define IN_TRITER_GET_CUR_R 0x44
+#define IN_TRITER_GET_CUR_WC 0x45
+#define IN_TRITER_SET_CUR_WC 0x46
+#define IN_TRITER_UNWIND 0x73
+#define IN_TRITER_DESTROY 0x47
+#define IN_TRITER_NEXT_REPEAT 0x48
+#define IN_TRITER_PREV_REPEAT 0x49
+
+#define IN_REV_TRITER_FROM_REF 0x4a
+#define IN_REV_TRITER_DESTROY 0x4b
+#define IN_REV_TRITER_UNWIND 0x75
+#define IN_REV_TRITER_PREV_CHILD 0x4c
+
+#define IN_UITER_DESTROY 0x4d
+#define IN_UITER_UNWIND 0x71
+#define IN_UITER_CREATE_WV 0x4e
+#define IN_UITER_CREATE_WC 0x4f
+#define IN_UITER_ADVANCE 0x50
+#define IN_UITER_GET_CUR_R 0x51
+#define IN_UITER_GET_CUR_WC 0x52
+#define IN_UITER_SET_CUR_WC 0x53
+
+#define IN_TREE_SEARCH 0x54
+
+#define IN_LOAD_GLOBAL_R 0x55
+#define IN_LOAD_GLOBAL_WV 0x56
+#define IN_LOAD_GLOBAL_WC 0x57
+#define IN_LOAD_GLOBAL_BKT 0x58
+
+#define IN_PTR_ACCESS_WV 0x5a
+#define IN_PTR_ACCESS_BKT 0x61
+
+#define IN_REF_FROM_LOCAL 0x62
+#define IN_REF_FROM_REF 0x63
+#define IN_REF_FROM_QUAL_REF 0x64
+#define IN_RHS_REF_FROM_QUAL_REF 0xee
+#define IN_REF_FROM_BACK 0xe3
+#define IN_TRITER_REF_FROM_CUR 0x65
+#define IN_UITER_REF_FROM_CUR 0x66
+
+#define IN_GET_MAP_EL_MEM_R 0x6c
+
+#define IN_MAP_LENGTH 0x67
+
+#define IN_LIST_LENGTH 0x72
+
+#define IN_GET_LIST_MEM_R 0x79
+#define IN_GET_LIST_MEM_WC 0x7a
+#define IN_GET_LIST_MEM_WV 0x7b
+#define IN_GET_LIST_MEM_BKT 0x7c
+
+#define IN_GET_VLIST_MEM_R 0xeb
+#define IN_GET_VLIST_MEM_WC 0xec
+#define IN_GET_VLIST_MEM_WV 0x70
+#define IN_GET_VLIST_MEM_BKT 0x5c
+
+#define IN_CONS_REDUCER 0x76
+#define IN_READ_REDUCE 0x69
+
+#define IN_DONE 0x78
+
+#define IN_GET_LIST_EL_MEM_R 0xf5
+
+#define IN_GET_MAP_MEM_R 0x6d
+#define IN_GET_MAP_MEM_WV 0x7d
+#define IN_GET_MAP_MEM_WC 0x7e
+#define IN_GET_MAP_MEM_BKT 0x7f
+
+#define IN_TREE_TO_STR_XML 0x6e
+#define IN_TREE_TO_STR_XML_AC 0x6f
+#define IN_TREE_TO_STR_POSTFIX 0xb6
+
+#define IN_HOST 0xea
+
+#define IN_CALL_WC 0x8c
+#define IN_CALL_WV 0x8d
+#define IN_RET 0x8e
+#define IN_YIELD 0x8f
+#define IN_HALT 0x8b
+
+#define IN_INT_TO_STR 0x97
+#define IN_TREE_TO_STR 0x98
+#define IN_TREE_TO_STR_TRIM 0x99
+#define IN_TREE_TO_STR_TRIM_A 0x18
+
+#define IN_CREATE_TOKEN 0x9a
+#define IN_MAKE_TOKEN 0x9b
+#define IN_MAKE_TREE 0x9c
+#define IN_CONSTRUCT_TERM 0x9d
+
+#define IN_INPUT_PULL_WV 0x9e
+#define IN_INPUT_PULL_WC 0xe1
+#define IN_INPUT_PULL_BKT 0x9f
+
+#define IN_INPUT_CLOSE_WC 0xef
+#define IN_INPUT_AUTO_TRIM_WC 0x82
+#define IN_IINPUT_AUTO_TRIM_WC 0x83
+
+#define IN_PARSE_FRAG_W 0xa2
+#define IN_PARSE_INIT_BKT 0xa1
+#define IN_PARSE_FRAG_BKT 0xa6
+
+#define IN_SEND_NOTHING 0xa0
+#define IN_SEND_TEXT_W 0x89
+#define IN_SEND_TEXT_BKT 0x8a
+
+#define IN_PRINT_TREE 0xa3
+
+#define IN_SEND_TREE_W 0xa9
+#define IN_SEND_TREE_BKT 0xaa
+
+#define IN_REPLACE_STREAM 0x88
+
+#define IN_SEND_STREAM_W 0x90
+#define IN_SEND_STREAM_BKT 0x1c
+
+#define IN_SEND_EOF_W 0x87
+#define IN_SEND_EOF_BKT 0xa4
+
+#define IN_REDUCE_COMMIT 0xa5
+
+#define IN_PCR_RET 0xb2
+#define IN_PCR_END_DECK 0xb3
+
+#define IN_OPEN_FILE 0xb4
+
+#define IN_GET_CONST 0xb5
+
+#define IN_TO_UPPER 0xb9
+#define IN_TO_LOWER 0xba
+
+#define IN_LOAD_INPUT_R 0xc1
+#define IN_LOAD_INPUT_WV 0xc2
+#define IN_LOAD_INPUT_WC 0xc3
+#define IN_LOAD_INPUT_BKT 0xc4
+
+#define IN_INPUT_PUSH_WV 0xc5
+#define IN_INPUT_PUSH_BKT 0xc6
+#define IN_INPUT_PUSH_IGNORE_WV 0xc7
+
+#define IN_INPUT_PUSH_STREAM_WV 0xf3
+#define IN_INPUT_PUSH_STREAM_BKT 0xf4
+
+#define IN_LOAD_CONTEXT_R 0xc8
+#define IN_LOAD_CONTEXT_WV 0xc9
+#define IN_LOAD_CONTEXT_WC 0xca
+#define IN_LOAD_CONTEXT_BKT 0xcb
+
+#define IN_SET_PARSER_CONTEXT 0xd0
+#define IN_SET_PARSER_INPUT 0x96
+
+#define IN_GET_RHS_VAL_R 0xd7
+#define IN_GET_RHS_VAL_WC 0xd8
+#define IN_GET_RHS_VAL_WV 0xd9
+#define IN_GET_RHS_VAL_BKT 0xda
+#define IN_SET_RHS_VAL_WC 0xdb
+#define IN_SET_RHS_VAL_WV 0xdc
+#define IN_SET_RHS_VAL_BKT 0xdd
+
+#define IN_GET_PARSER_MEM_R 0x5b
+
+#define IN_GET_STREAM_MEM_R 0xb7
+
+#define IN_GET_PARSER_STREAM 0x6b
+
+#define IN_GET_ERROR 0xcc
+#define IN_SET_ERROR 0xe2
+
+#define IN_SYSTEM 0xe5
+
+#define IN_GET_STRUCT_R 0xf7
+#define IN_GET_STRUCT_WC 0xf8
+#define IN_GET_STRUCT_WV 0xf9
+#define IN_GET_STRUCT_BKT 0xfa
+#define IN_SET_STRUCT_WC 0xfb
+#define IN_SET_STRUCT_WV 0xfc
+#define IN_SET_STRUCT_BKT 0xfd
+#define IN_GET_STRUCT_VAL_R 0x93
+#define IN_SET_STRUCT_VAL_WV 0x94
+#define IN_SET_STRUCT_VAL_WC 0x95
+#define IN_SET_STRUCT_VAL_BKT 0x5d
+#define IN_NEW_STRUCT 0xfe
+
+#define IN_GET_LOCAL_VAL_R 0x91
+#define IN_SET_LOCAL_VAL_WC 0x92
+
+#define IN_NEW_STREAM 0x24
+#define IN_GET_COLLECT_STRING 0x68
+
+/*
+ * Const things to get.
+ */
+#define CONST_STDIN 0x10
+#define CONST_STDOUT 0x11
+#define CONST_STDERR 0x12
+#define CONST_ARG 0x13
+
+
+
+/*
+ * IN_FN instructions.
+ */
+
+#define IN_FN 0xff
+#define FN_NONE 0x00
+#define FN_STOP 0x0a
+
+#define FN_STR_ATOI 0x1d
+#define FN_STR_ATOO 0x38
+#define FN_STR_UORD8 0x01
+#define FN_STR_SORD8 0x02
+#define FN_STR_UORD16 0x03
+#define FN_STR_SORD16 0x04
+#define FN_STR_UORD32 0x05
+#define FN_STR_SORD32 0x06
+#define FN_STR_PREFIX 0x36
+#define FN_STR_SUFFIX 0x37
+#define FN_SPRINTF 0xd6
+#define FN_LOAD_ARGV 0x07
+#define FN_LOAD_ARG0 0x08
+#define FN_INIT_STDS 0x3e
+
+
+#define FN_LIST_PUSH_TAIL_WV 0x11
+#define FN_LIST_PUSH_TAIL_WC 0x12
+#define FN_LIST_PUSH_TAIL_BKT 0x13
+#define FN_LIST_POP_TAIL_WV 0x14
+#define FN_LIST_POP_TAIL_WC 0x15
+#define FN_LIST_POP_TAIL_BKT 0x16
+#define FN_LIST_PUSH_HEAD_WV 0x17
+#define FN_LIST_PUSH_HEAD_WC 0x18
+#define FN_LIST_PUSH_HEAD_BKT 0x19
+#define FN_LIST_POP_HEAD_WV 0x1a
+#define FN_LIST_POP_HEAD_WC 0x1b
+#define FN_LIST_POP_HEAD_BKT 0x1c
+
+#define FN_MAP_FIND 0x24
+#define FN_MAP_INSERT_WV 0x1e
+#define FN_MAP_INSERT_WC 0x1f
+#define FN_MAP_INSERT_BKT 0x20
+#define FN_MAP_DETACH_WV 0x21
+#define FN_MAP_DETACH_WC 0x22
+#define FN_MAP_DETACH_BKT 0x23
+
+#define FN_VMAP_FIND 0x29
+#define FN_VMAP_INSERT_WC 0x25
+#define FN_VMAP_INSERT_WV 0x26
+#define FN_VMAP_INSERT_BKT 0x3d
+#define FN_VMAP_REMOVE_WC 0x27
+#define FN_VMAP_REMOVE_WV 0x28
+
+#define FN_VLIST_PUSH_TAIL_WV 0x2a
+#define FN_VLIST_PUSH_TAIL_WC 0x2b
+#define FN_VLIST_PUSH_TAIL_BKT 0x2c
+#define FN_VLIST_POP_TAIL_WV 0x2d
+#define FN_VLIST_POP_TAIL_WC 0x2e
+#define FN_VLIST_POP_TAIL_BKT 0x2f
+#define FN_VLIST_PUSH_HEAD_WV 0x30
+#define FN_VLIST_PUSH_HEAD_WC 0x31
+#define FN_VLIST_PUSH_HEAD_BKT 0x32
+#define FN_VLIST_POP_HEAD_WV 0x33
+#define FN_VLIST_POP_HEAD_WC 0x34
+#define FN_VLIST_POP_HEAD_BKT 0x35
+#define FN_EXIT 0x39
+#define FN_EXIT_HARD 0x3a
+#define FN_PREFIX 0x3b
+#define FN_SUFFIX 0x3c
+
+#define TRIM_DEFAULT 0x01
+#define TRIM_YES 0x02
+#define TRIM_NO 0x03
+
+/* Types of Generics. */
+enum GEN {
+ GEN_PARSER = 0x14,
+ GEN_LIST = 0x15,
+ GEN_MAP = 0x16
+};
+
+/* Known language element ids. */
+enum LEL_ID {
+ LEL_ID_PTR = 1,
+ LEL_ID_STR = 2,
+ LEL_ID_IGNORE = 3
+};
+
+/*
+ * Flags
+ */
+
+/* A tree that has been generated by a termDup. */
+#define PF_TERM_DUP 0x0001
+
+/* Has been processed by the commit function. All children have also been
+ * processed. */
+#define PF_COMMITTED 0x0002
+
+/* Created by a token generation action, not made from the input. */
+#define PF_ARTIFICIAL 0x0004
+
+/* Named node from a pattern or constructor. */
+#define PF_NAMED 0x0008
+
+/* There is reverse code associated with this tree node. */
+#define PF_HAS_RCODE 0x0010
+
+#define PF_RIGHT_IGNORE 0x0020
+
+#define PF_LEFT_IL_ATTACHED 0x0400
+#define PF_RIGHT_IL_ATTACHED 0x0800
+
+#define AF_LEFT_IGNORE 0x0100
+#define AF_RIGHT_IGNORE 0x0200
+
+#define AF_SUPPRESS_LEFT 0x4000
+#define AF_SUPPRESS_RIGHT 0x8000
+
+/*
+ * Call stack.
+ */
+
+/* Number of spots in the frame, after the args. */
+#define FR_AA 5
+
+/* Positions relative to the frame pointer. */
+#define FR_CA 4 /* call args */
+#define FR_RV 3 /* return value */
+#define FR_RI 2 /* return instruction */
+#define FR_RFP 1 /* return frame pointer */
+#define FR_RFD 0 /* return frame id. */
+
+/*
+ * Calling Convention:
+ * a1
+ * a2
+ * a3
+ * ...
+ * return value FR_RV
+ * return instr FR_RI
+ * return frame ptr FR_RFP
+ * return frame id FR_RFD
+ */
+
+/*
+ * User iterator call stack.
+ * Adds an iframe pointer, removes the return value.
+ */
+
+/* Number of spots in the frame, after the args. */
+#define IFR_AA 5
+
+/* Positions relative to the frame pointer. */
+#define IFR_RIN 2 /* return instruction */
+#define IFR_RIF 1 /* return iframe pointer */
+#define IFR_RFR 0 /* return frame pointer */
+
+#define vm_push_type(type, i) \
+ ( ( sp == prg->sb_beg ? (sp = vm_bs_add(prg, sp, 1)) : 0 ), (*((type*)(--sp)) = (i)) )
+
+#define vm_pushn(n) \
+ ( ( (sp-(n)) < prg->sb_beg ? (sp = vm_bs_add(prg, sp, n)) : 0 ), (sp -= (n)) )
+
+#define vm_pop_type(type) \
+ ({ SW r = *sp; (sp+1) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, 1)) : (sp += 1); (type)r; })
+
+#define vm_push_tree(i) vm_push_type(tree_t*, i)
+#define vm_push_input(i) vm_push_type(input_t*, i)
+#define vm_push_stream(i) vm_push_type(stream_t*, i)
+#define vm_push_struct(i) vm_push_type(struct_t*, i)
+#define vm_push_parser(i) vm_push_type(parser_t*, i)
+#define vm_push_value(i) vm_push_type(value_t, i)
+#define vm_push_string(i) vm_push_type(str_t*, i)
+#define vm_push_kid(i) vm_push_type(kid_t*, i)
+#define vm_push_ref(i) vm_push_type(ref_t*, i)
+#define vm_push_string(i) vm_push_type(str_t*, i)
+#define vm_push_ptree(i) vm_push_type(parse_tree_t*, i)
+
+#define vm_pop_tree() vm_pop_type(tree_t*)
+#define vm_pop_input() vm_pop_type(input_t*)
+#define vm_pop_stream() vm_pop_type(stream_t*)
+#define vm_pop_struct() vm_pop_type(struct_t*)
+#define vm_pop_parser() vm_pop_type(parser_t*)
+#define vm_pop_list() vm_pop_type(list_t*)
+#define vm_pop_map() vm_pop_type(map_t*)
+#define vm_pop_value() vm_pop_type(value_t)
+#define vm_pop_string() vm_pop_type(str_t*)
+#define vm_pop_kid() vm_pop_type(kid_t*)
+#define vm_pop_ref() vm_pop_type(ref_t*)
+#define vm_pop_ptree() vm_pop_type(parse_tree_t*)
+
+#define vm_pop_ignore() \
+ ({ (sp+1) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, 1)) : (sp += 1); })
+
+#define vm_popn(n) \
+ ({ (sp+(n)) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, n)) : (sp += (n)); })
+
+#define vm_contiguous(n) \
+ ( ( (sp-(n)) < prg->sb_beg ? (sp = vm_bs_add(prg, sp, n)) : 0 ) )
+
+#define vm_top() (*sp)
+#define vm_ptop() (sp)
+
+#define vm_ssize() ( prg->sb_total + (prg->sb_end - sp) )
+
+#define vm_local_iframe(o) (exec->iframe_ptr[o])
+#define vm_plocal_iframe(o) (&exec->iframe_ptr[o])
+
+void vm_init( struct colm_program * );
+tree_t** vm_bs_add( struct colm_program *, tree_t **, int );
+tree_t** vm_bs_pop( struct colm_program *, tree_t **, int );
+void vm_clear( struct colm_program * );
+
+typedef tree_t *SW;
+typedef tree_t **StackPtr;
+
+/* Can't use sizeof() because we have used types that are bigger than the
+ * serial representation. */
+#define SIZEOF_CODE 1
+#define SIZEOF_HALF 2
+#define SIZEOF_WORD sizeof(word_t)
+
+typedef struct colm_execution
+{
+ tree_t **frame_ptr;
+ tree_t **iframe_ptr;
+ long frame_id;
+ tree_t **call_args;
+
+ long rcode_unit_len;
+
+ parser_t *parser;
+ long steps;
+ long pcr;
+ tree_t *ret_val;
+ char WV;
+} execution_t;
+
+struct colm_execution;
+
+static inline tree_t **vm_get_plocal( struct colm_execution *exec, int o )
+{
+ if ( o >= FR_AA ) {
+ tree_t **call_args = (tree_t**)exec->frame_ptr[FR_CA];
+ return &call_args[o - FR_AA];
+ }
+ else {
+ return &exec->frame_ptr[o];
+ }
+}
+
+static inline tree_t *vm_get_local( struct colm_execution *exec, int o )
+{
+ if ( o >= FR_AA ) {
+ tree_t **call_args = (tree_t**)exec->frame_ptr[FR_CA];
+ return call_args[o - FR_AA];
+ }
+ else {
+ return exec->frame_ptr[o];
+ }
+}
+
+static inline void vm_set_local( struct colm_execution *exec, int o, tree_t* v )
+{
+ if ( o >= FR_AA ) {
+ tree_t **call_args = (tree_t**)exec->frame_ptr[FR_CA];
+ call_args[o - FR_AA] = v;
+ }
+ else {
+ exec->frame_ptr[o] = v;
+ }
+}
+
+
+long string_length( head_t *str );
+const char *string_data( head_t *str );
+head_t *init_str_space( long length );
+head_t *string_copy( struct colm_program *prg, head_t *head );
+void string_free( struct colm_program *prg, head_t *head );
+void string_shorten( head_t *tokdata, long newlen );
+head_t *concat_str( head_t *s1, head_t *s2 );
+word_t str_atoi( head_t *str );
+word_t str_atoo( head_t *str );
+word_t str_uord16( head_t *head );
+word_t str_uord8( head_t *head );
+word_t cmp_string( head_t *s1, head_t *s2 );
+head_t *string_to_upper( head_t *s );
+head_t *string_to_lower( head_t *s );
+head_t *string_sprintf( program_t *prg, str_t *format, long integer );
+
+head_t *make_literal( struct colm_program *prg, long litoffset );
+head_t *int_to_str( struct colm_program *prg, word_t i );
+
+void colm_execute( struct colm_program *prg, execution_t *exec, code_t *code );
+void reduction_execution( execution_t *exec, tree_t **sp );
+void generation_execution( execution_t *exec, tree_t **sp );
+void reverse_execution( execution_t *exec, tree_t **sp, struct rt_code_vect *all_rev );
+
+kid_t *alloc_attrs( struct colm_program *prg, long length );
+void free_attrs( struct colm_program *prg, kid_t *attrs );
+kid_t *get_attr_kid( tree_t *tree, long pos );
+
+tree_t *split_tree( struct colm_program *prg, tree_t *t );
+
+void colm_rcode_downref_all( struct colm_program *prg, tree_t **sp, struct rt_code_vect *cv );
+int colm_make_reverse_code( struct pda_run *pda_run );
+void colm_transfer_reverse_code( struct pda_run *pda_run, parse_tree_t *tree );
+
+void split_ref( struct colm_program *prg, tree_t ***sp, ref_t *from_ref );
+
+void alloc_global( struct colm_program *prg );
+tree_t **colm_execute_code( struct colm_program *prg,
+ execution_t *exec, tree_t **sp, code_t *instr );
+code_t *colm_pop_reverse_code( struct rt_code_vect *all_rev );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COLM_BYTECODE_H */
+
diff --git a/src/closure.cc b/src/closure.cc
new file mode 100644
index 00000000..066bf12b
--- /dev/null
+++ b/src/closure.cc
@@ -0,0 +1,458 @@
+/*
+ * Copyright 2005-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include <iostream>
+
+#include "compiler.h"
+
+using std::endl;
+using std::cerr;
+
+void Compiler::lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState,
+ PdaTrans *expandFrom, Production *prod )
+{
+ /* We use dot sets for finding unique states. In the future, should merge
+ * dots sets with the stateSet pointer (only need one of these). */
+ assert( dest != prodState );
+ dest->dotSet.insert( prodState->dotSet );
+
+ /* Get the epsilons, context, out priorities. */
+ dest->pendingCommits.insert( prodState->pendingCommits );
+ //if ( prodState->pendingCommits.length() > 0 )
+ // cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl;
+
+ if ( prodState->transMap.length() > 0 ) {
+ assert( prodState->transMap.length() == 1 );
+ PdaTrans *srcTrans = prodState->transMap[0].value;
+
+ /* Look for the source in the destination. */
+ TransMapEl *destTel = dest->transMap.find( srcTrans->lowKey );
+ if ( destTel == 0 ) {
+ /* Make a new state and transition to it. */
+ PdaState *newState = pdaGraph->addState();
+ PdaTrans *newTrans = new PdaTrans();
+
+ /* Attach the new transition to the new state. */
+ newTrans->lowKey = srcTrans->lowKey;
+ pdaGraph->attachTrans( dest, newState, newTrans );
+ pdaGraph->addInTrans( newTrans, srcTrans );
+
+ /* The transitions we make during lr0 closure are all shifts. */
+ assert( newTrans->isShift );
+ assert( srcTrans->isShift );
+
+ /* The new state must have its state set setup. */
+ newState->stateSet = new PdaStateSet;
+ newState->stateSet->insert( srcTrans->toState );
+
+ /* Insert the transition into the map. Be sure to set destTel, it
+ * is needed below. */
+ dest->transMap.insert( srcTrans->lowKey, newTrans, &destTel );
+
+ /* If the item is a non-term, queue it for closure. */
+ LangEl *langEl = langElIndex[srcTrans->lowKey];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ pdaGraph->transClosureQueue.append( newTrans );
+ //cerr << "put to trans closure queue" << endl;
+ }
+ }
+ else {
+ //cerr << "merging transitions" << endl;
+ destTel->value->toState->stateSet->insert( srcTrans->toState );
+ pdaGraph->addInTrans( destTel->value, srcTrans );
+ }
+
+ /* If this is an expansion then we may need to bring in commits. */
+ if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) {
+ //cerr << "SETTING COMMIT ON CLOSURE ROUND" << endl;
+ destTel->value->commits.insert( expandFrom->commits );
+
+ expandFrom->commits.empty();
+ }
+ }
+ else {
+ /* ProdState does not have any transitions out. It is at the end of a
+ * production. */
+ if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) {
+ //cerr << "SETTING COMMIT IN PENDING LOOKAHEAD" << endl;
+ for ( LongSet::Iter len = expandFrom->commits; len.lte(); len++ )
+ dest->pendingCommits.insert( ProdIdPair( prod->prodId, *len ) );
+
+ expandFrom->commits.empty();
+ }
+ }
+}
+
+void Compiler::lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state )
+{
+ /* State should not already be closed. */
+ assert( !state->inClosedMap );
+
+ /* This is used each time we invoke closure, it must be cleared. */
+ pdaGraph->transClosureQueue.abandon();
+
+ /* Drag in the core items. */
+ for ( PdaStateSet::Iter ssi = *state->stateSet; ssi.lte(); ssi++ )
+ lr0BringInItem( pdaGraph, state, *ssi, 0, 0 );
+
+ /* Now bring in the derived items. */
+ while ( pdaGraph->transClosureQueue.length() > 0 ) {
+ PdaTrans *toClose = pdaGraph->transClosureQueue.detachFirst();
+ //cerr << "have a transition to derive" << endl;
+
+ /* Get the langEl. */
+ LangEl *langEl = langElIndex[toClose->lowKey];
+
+ /* Make graphs for all of the productions that the non
+ * terminal goes to that are not already in the state's dotSet. */
+ for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) {
+ /* Bring in the start state of the production. */
+ lr0BringInItem( pdaGraph, state, prod->fsm->startState, toClose, prod );
+ }
+ }
+
+ /* Try and insert into the closed dict. */
+ DotSetMapEl *lastFound;
+ if ( pdaGraph->closedMap.insert( state, &lastFound ) ) {
+ /* Insertion into closed dict succeeded. There is no state with the
+ * same dot set. The state is now closed. It is guaranteed a spot in
+ * the closed dict and it will never go away (states never deleted
+ * during closure). */
+ pdaGraph->stateClosedList.append( state );
+ state->inClosedMap = true;
+
+ /* Add all of the states in the out transitions to the closure queue.
+ * This will give us a depth first search of the graph. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ /* Get the state the transEl goes to. */
+ PdaState *targ = trans->value->toState;
+
+ /* If the state on this tranisition has not already been slated
+ * for closure, then add it to the queue. */
+ if ( !targ->onClosureQueue && !targ->inClosedMap ) {
+ pdaGraph->stateClosureQueue.append( targ );
+ targ->onClosureQueue = true;
+ }
+ }
+ }
+ else {
+ /* Insertion into closed dict failed. There is an existing state
+ * with the same dot set. Get the existing state. */
+ pdaGraph->inTransMove( lastFound, state );
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ pdaGraph->stateList.detach( tel->value->toState );
+ delete tel->value->toState;
+ delete tel->value;
+ }
+ pdaGraph->stateList.detach( state );
+ delete state;
+ }
+}
+
+/* Invoke cloure on the graph. We use a queue here to achieve a breadth
+ * first search of the tree we build. Note, there are back edges in this
+ * tree. They are the edges made when upon closure, a dot set exists
+ * already. */
+void Compiler::lr0CloseAllStates( PdaGraph *pdaGraph )
+{
+ /* While there are items on the closure queue. */
+ while ( pdaGraph->stateClosureQueue.length() > 0 ) {
+ /* Pop the first item off. */
+ PdaState *state = pdaGraph->stateClosureQueue.detachFirst();
+ state->onClosureQueue = false;
+
+ /* Invoke closure upon the state. */
+ lr0InvokeClosure( pdaGraph, state );
+ }
+}
+
+void Compiler::transferCommits( PdaGraph *pdaGraph, PdaTrans *trans,
+ PdaState *state, long prodId )
+{
+ ProdIdPairSet &pendingCommits = state->pendingCommits;
+ for ( ProdIdPairSet::Iter pi = pendingCommits; pi.lte(); pi++ ) {
+ if ( pi->onReduce == prodId )
+ trans->commits.insert( pi->length );
+ }
+}
+
+void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys )
+{
+ for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) {
+ int prodId = ets->prodId;
+ PdaState *expandTo = ets->state;
+
+ for ( FollowToAdd::Iter fkey = followKeys; fkey.lte(); fkey++ ) {
+ TransMapEl *transEl = expandTo->transMap.find( fkey->key );
+
+ if ( transEl != 0 ) {
+ /* Set up the follow transition. */
+ PdaTrans *destTrans = transEl->value;
+
+ transferCommits( pdaGraph, destTrans, expandTo, prodId );
+
+ pdaGraph->addInReduction( destTrans, prodId, fkey->value );
+ }
+ else {
+ /* Set up the follow transition. */
+ PdaTrans *followTrans = new PdaTrans;
+ followTrans->lowKey = fkey->key;
+ followTrans->isShift = false;
+ followTrans->reductions.insert( prodId, fkey->value );
+
+ transferCommits( pdaGraph, followTrans, expandTo, prodId );
+
+ pdaGraph->attachTrans( expandTo, actionDestState, followTrans );
+ expandTo->transMap.insert( followTrans->lowKey, followTrans );
+ pdaGraph->transClosureQueue.append( followTrans );
+ }
+ }
+ }
+}
+
+long PdaTrans::maxPrior()
+{
+ long prior = LONG_MIN;
+ if ( isShift && shiftPrior > prior )
+ prior = shiftPrior;
+ for ( ReductionMap::Iter red = reductions; red.lte(); red++ ) {
+ if ( red->value > prior )
+ prior = red->value;
+ }
+ return prior;
+}
+
+void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state )
+{
+ /* Finding non-terminals into the state. */
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ long key = in->lowKey;
+ LangEl *langEl = langElIndex[key];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ /* Finding the following transitions. */
+ FollowToAdd followKeys;
+ for ( TransMap::Iter fout = state->transMap; fout.lte(); fout++ ) {
+ int fkey = fout->key;
+ LangEl *flel = langElIndex[fkey];
+ if ( flel == 0 || flel->type == LangEl::Term ) {
+ long prior = fout->value->maxPrior();
+ followKeys.insert( fkey, prior );
+ }
+ }
+
+ if ( followKeys.length() > 0 )
+ lalr1AddFollow2( pdaGraph, in, followKeys );
+ }
+ }
+}
+
+void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans,
+ long followKey, long prior )
+{
+ for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) {
+ int prodId = ets->prodId;
+ PdaState *expandTo = ets->state;
+
+ TransMapEl *transEl = expandTo->transMap.find( followKey );
+ if ( transEl != 0 ) {
+ /* Add in the reductions, or in the shift. */
+ PdaTrans *destTrans = transEl->value;
+
+ transferCommits( pdaGraph, destTrans, expandTo, prodId );
+
+ pdaGraph->addInReduction( destTrans, prodId, prior );
+ }
+ else {
+ /* Set up the follow transition. */
+ PdaTrans *followTrans = new PdaTrans;
+ followTrans->lowKey = followKey;
+ followTrans->isShift = false;
+ followTrans->reductions.insert( prodId, prior );
+
+ transferCommits( pdaGraph, followTrans, expandTo, prodId );
+
+ pdaGraph->attachTrans( expandTo, actionDestState, followTrans );
+ expandTo->transMap.insert( followTrans->lowKey, followTrans );
+ pdaGraph->transClosureQueue.append( followTrans );
+ }
+ }
+}
+
+void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans )
+{
+ PdaState *state = trans->fromState;
+ int fkey = trans->lowKey;
+ LangEl *flel = langElIndex[fkey];
+ if ( flel == 0 || flel->type == LangEl::Term ) {
+ /* Finding non-terminals into the state. */
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ long key = in->lowKey;
+ LangEl *langEl = langElIndex[key];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ //cerr << "FOLLOW PRIOR TRANSFER 2: " << prior << endl;
+ long prior = trans->maxPrior();
+ lalr1AddFollow2( pdaGraph, in, fkey, prior );
+ }
+ }
+ }
+}
+
+/* Add follow sets to an LR(0) graph to make it LALR(1). */
+void Compiler::lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ /* Make the state that all reduction actions go to. Since a reduction pops
+ * states of the stack and sets the new target state, this state is
+ * actually never reached. Just here to link the trans to. */
+ actionDestState = pdaGraph->addState();
+ pdaGraph->setFinState( actionDestState );
+
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ /* Get the entry into the graph and traverse over start. */
+ PdaState *overStart = pdaGraph->followFsm( (*pe)->startState, (*pe)->rootDef->fsm );
+
+ /* Add _eof after the initial _start. */
+ PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState,
+ (*pe)->eofLel->id, (*pe)->eofLel->id );
+ eofTrans->isShift = true;
+ }
+
+ /* This was used during lr0 table construction. */
+ pdaGraph->transClosureQueue.abandon();
+
+ /* Need to pass over every state initially. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ lalr1AddFollow1( pdaGraph, state );
+
+ /* While the closure queue has items, pop them off and add follow
+ * characters. */
+ while ( pdaGraph->transClosureQueue.length() > 0 ) {
+ /* Pop the first item off and add Follow for it . */
+ PdaTrans *trans = pdaGraph->transClosureQueue.detachFirst();
+ lalr1AddFollow1( pdaGraph, trans );
+ }
+}
+
+void Compiler::linkExpansions( PdaGraph *pdaGraph )
+{
+ pdaGraph->setStateNumbers();
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ /* Find transitions out on non terminals. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ long key = trans->key;
+ LangEl *langEl = langElIndex[key];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ /* For each production that the non terminal expand to ... */
+ for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) {
+ /* Follow the production and add to the trans's expand to set. */
+ PdaState *followRes = pdaGraph->followFsm( state, prod->fsm );
+
+ //LangEl *lel = langElIndex[key];
+ //cerr << state->stateNum << ", ";
+ //if ( lel != 0 )
+ // cerr << lel->data;
+ //else
+ // cerr << (char)key;
+ //cerr << " -> " << (*fto)->stateNum << " on " <<
+ // prod->data << " (fss = " << fin.pos() << ")" << endl;
+ trans->value->expandTo.insert( ExpandToEl( followRes, prod->prodId ) );
+ }
+ }
+ }
+ }
+}
+
+/* Add terminal versions of all nonterminal transitions. */
+void Compiler::addDupTerms( PdaGraph *pdaGraph )
+{
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ PdaTransList newTranitions;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ LangEl *lel = langElIndex[trans->value->lowKey];
+ if ( lel->type == LangEl::NonTerm ) {
+ PdaTrans *dupTrans = new PdaTrans;
+ dupTrans->lowKey = lel->termDup->id;
+ dupTrans->isShift = true;
+
+ /* Save the target state in to state. In the next loop when we
+ * attach the transition we must clear this because the
+ * attaching code requires the transition to be unattached. */
+ dupTrans->toState = trans->value->toState;
+ newTranitions.append( dupTrans );
+
+ /* Commit code used? */
+ //transferCommits( pdaGraph, followTrans, expandTo, prodId );
+ }
+ }
+
+ for ( PdaTrans *dup = newTranitions.head; dup != 0; ) {
+ PdaTrans *next = dup->next;
+ PdaState *toState = dup->toState;
+ dup->toState = 0;
+ pdaGraph->attachTrans( state, toState, dup );
+ state->transMap.insert( dup->lowKey, dup );
+ dup = next;
+ }
+ }
+}
+
+/* Generate a LALR(1) graph. */
+void Compiler::lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ /* Make the intial graph. */
+ pdaGraph->langElIndex = langElIndex;
+
+ for ( Vector<LangEl*>::Iter r = parserEls; r.lte(); r++ ) {
+ /* Create the entry point. */
+ PdaState *rs = pdaGraph->addState();
+ pdaGraph->entryStateSet.insert( rs );
+
+ /* State set of just one state. */
+ rs->stateSet = new PdaStateSet;
+ rs->stateSet->insert( (*r)->rootDef->fsm->startState );
+
+ /* Queue the start state for closure. */
+ rs->onClosureQueue = true;
+ pdaGraph->stateClosureQueue.append( rs );
+
+ (*r)->startState = rs;
+ }
+
+ /* Run the lr0 closure. */
+ lr0CloseAllStates( pdaGraph );
+
+ /* Add terminal versions of all nonterminal transitions. */
+ addDupTerms( pdaGraph );
+
+ /* Link production expansions to the place they expand to. */
+ linkExpansions( pdaGraph );
+
+ /* Walk the graph adding follow sets to the LR(0) graph. */
+ lalr1AddFollowSets( pdaGraph, parserEls );
+
+// /* Set the commit on the final eof shift. */
+// PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id );
+// PdaTrans *eofTrans = overStart->toState->findTrans( eofLangEl->id );
+// eofTrans->afterShiftCommits.insert( 2 );
+}
diff --git a/src/codegen.cc b/src/codegen.cc
new file mode 100644
index 00000000..6ff9983f
--- /dev/null
+++ b/src/codegen.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <iostream>
+#include "fsmcodegen.h"
+
+void FsmCodeGen::writeIncludes()
+{
+ out <<
+ "#include <stdio.h>\n"
+ "#include <stdlib.h>\n"
+ "#include <string.h>\n"
+ "#include <assert.h>\n"
+ "\n"
+ "#include <colm/pdarun.h>\n"
+ "#include <colm/debug.h>\n"
+ "#include <colm/bytecode.h>\n"
+ "#include <colm/defs.h>\n"
+ "#include <colm/input.h>\n"
+ "#include <colm/tree.h>\n"
+ "#include <colm/program.h>\n"
+ "#include <colm/colm.h>\n"
+ "\n";
+}
+
+void FsmCodeGen::writeMain( long activeRealm )
+{
+ out <<
+ "int main( int argc, const char **argv )\n"
+ "{\n"
+ " struct colm_program *prg;\n"
+ " int exit_status;\n"
+ "\n"
+ " prg = colm_new_program( &" << objectName << " );\n"
+ " colm_set_debug( prg, " << activeRealm << " );\n"
+ " colm_run_program( prg, argc, argv );\n"
+ " exit_status = colm_delete_program( prg );\n"
+ " return exit_status;\n"
+ "}\n"
+ "\n";
+
+ out.flush();
+}
diff --git a/src/codevect.c b/src/codevect.c
new file mode 100644
index 00000000..50b86336
--- /dev/null
+++ b/src/codevect.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2010-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include <colm/rtvector.h>
+#include <colm/pdarun.h>
+
+void init_rt_code_vect( struct rt_code_vect *vect )
+{
+ vect->data = 0;
+ vect->tab_len = 0;
+ vect->alloc_len = 0;
+}
+
+static long new_size_up( long existing, long needed )
+{
+ return needed > existing ? (needed<<1) : existing;
+}
+
+static long new_size_down( long existing, long needed )
+{
+ return needed < (existing>>2) ? (needed<<1) : existing;
+}
+
+/* Up resize the data for len elements using Resize::upResize to tell us the
+ * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */
+static void up_resize( struct rt_code_vect *vect, long len )
+{
+ /* Ask the resizer what the new tabLen will be. */
+ long new_len = new_size_up(vect->alloc_len, len);
+
+ /* Did the data grow? */
+ if ( new_len > vect->alloc_len ) {
+ vect->alloc_len = new_len;
+ if ( vect->data != 0 ) {
+ /* Table exists already, resize it up. */
+ vect->data = (code_t*) realloc( vect->data, sizeof(code_t) * new_len );
+ //if ( vect->data == 0 )
+ // throw std::bad_alloc();
+ }
+ else {
+ /* Create the data. */
+ vect->data = (code_t*) malloc( sizeof(code_t) * new_len );
+ //if ( vect->data == 0 )
+ // throw std::bad_alloc();
+ }
+ }
+}
+
+/* Down resize the data for len elements using Resize::downResize to determine
+ * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */
+static void down_resize( struct rt_code_vect *vect, long len)
+{
+ /* Ask the resizer what the new tabLen will be. */
+ long new_len = new_size_down( vect->alloc_len, len );
+
+ /* Did the data shrink? */
+ if ( new_len < vect->alloc_len ) {
+ vect->alloc_len = new_len;
+ if ( new_len == 0 ) {
+ /* Simply free the data. */
+ free( vect->data );
+ vect->data = 0;
+ }
+ else {
+ /* Not shrinking to size zero, realloc it to the smaller size. */
+ vect->data = (code_t*) realloc( vect->data, sizeof(code_t) * new_len );
+ //if ( vect->data == 0 )
+ // throw std::bad_alloc();
+ }
+ }
+}
+
+
+void colm_rt_code_vect_empty( struct rt_code_vect *vect )
+{
+ if ( vect->data != 0 ) {
+ /* Free the data space. */
+ free( vect->data );
+ vect->data = 0;
+ vect->tab_len = vect->alloc_len = 0;
+ }
+}
+
+void colm_rt_code_vect_replace( struct rt_code_vect *vect, long pos,
+ const code_t *val, long len )
+{
+ long end_pos, i;
+ //code_t *item;
+
+ /* If we are given a negative position to replace at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = vect->tab_len + pos;
+
+ /* The end is the one past the last item that we want
+ * to write to. */
+ end_pos = pos + len;
+
+ /* Make sure we have enough space. */
+ if ( end_pos > vect->tab_len ) {
+ up_resize( vect, end_pos );
+
+ /* Delete any objects we need to delete. */
+ //item = vect->data + pos;
+ //for ( i = pos; i < vect->tabLen; i++, item++ )
+ // item->~code_t();
+
+ /* We are extending the vector, set the new data length. */
+ vect->tab_len = end_pos;
+ }
+ else {
+ /* Delete any objects we need to delete. */
+ //item = vect->data + pos;
+ //for ( i = pos; i < endPos; i++, item++ )
+ // item->~code_t();
+ }
+
+ /* Copy data in using copy constructor. */
+ code_t *dst = vect->data + pos;
+ const code_t *src = val;
+ for ( i = 0; i < len; i++, dst++, src++ )
+ *dst = *src;
+}
+
+void colm_rt_code_vect_remove( struct rt_code_vect *vect, long pos, long len )
+{
+ long new_len, len_to_slide_over, end_pos;
+ code_t *dst;//, *item;
+
+ /* If we are given a negative position to remove at then
+ * treat it as a position relative to the length. */
+ if ( pos < 0 )
+ pos = vect->tab_len + pos;
+
+ /* The first position after the last item deleted. */
+ end_pos = pos + len;
+
+ /* The new data length. */
+ new_len = vect->tab_len - len;
+
+ /* The place in the data we are deleting at. */
+ dst = vect->data + pos;
+
+ /* Call Destructors. */
+ //item = dst;
+ //for ( long i = 0; i < len; i += 1, item += 1 )
+ // item->~code_t();
+
+ /* Shift data over if necessary. */
+ len_to_slide_over = vect->tab_len - end_pos;
+ if ( len > 0 && len_to_slide_over > 0 )
+ memmove(dst, dst + len, sizeof(code_t)*len_to_slide_over);
+
+ /* Shrink the data if necessary. */
+ down_resize( vect, new_len );
+
+ /* Set the new data length. */
+ vect->tab_len = new_len;
+}
+
+
diff --git a/src/colm-config.cmake.in b/src/colm-config.cmake.in
new file mode 100644
index 00000000..7e3b8f27
--- /dev/null
+++ b/src/colm-config.cmake.in
@@ -0,0 +1,3 @@
+# @_PACKAGE_NAME@-config.cmake Generated from colm-config.cmake.in by cmake
+
+include("${CMAKE_CURRENT_LIST_DIR}/@_PACKAGE_NAME@-targets.cmake")
diff --git a/src/colm-wrap.sh b/src/colm-wrap.sh
new file mode 100644
index 00000000..8140afca
--- /dev/null
+++ b/src/colm-wrap.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+#
+
+# This wrapper around the colm program (and bootstrap programs) allows us to
+# limit ourselves to one output file per makefile rule. It packs up multiple
+# colm output files into one pack file, which is used as an intermediate file.
+# We can extract the individual files from the pack one at a time, in separate
+# rules.
+#
+# Ultimately this functionality should be rolled into the colm program itself.
+# Until that is complete, this wrapper exists.
+#
+
+unset CMD
+unset ARGS
+unset OUTPUT
+unset PACKS
+
+while getopts "w:p:o:e:x:RcD:I:L:vdlirS:M:vHh?-:sVa:m:b:E:" opt; do
+
+ # For the colm wrapper case.
+ case "$opt" in
+ w)
+ # Which command to wrap.
+ CMD=$OPTARG
+ ;;
+ o)
+ # Pack file name. For wrapping.
+ OUTPUT=$OPTARG
+ ;;
+ [pexm])
+ ARGS="$ARGS -$opt $OPTARG.pack"
+ PACKS="$PACKS $OPTARG.pack"
+ ;;
+ [DILSMambE-]) ARGS="$ARGS -$opt $OPTARG" ;;
+ [RcvdlirvHhsV]) ARGS="$ARGS -$opt" ;;
+ ?)
+ exit 1;
+ ;;
+ esac
+done
+
+# Shift over the args.
+shift $((OPTIND - 1));
+
+INPUT="$1"
+if [ -z "$INPUT" ]; then
+ echo colm-wrap: no input file given >&2
+ exit 1
+fi
+
+if [ -z "$OUTPUT" ]; then
+ echo colm-wrap: no output file given >&2
+ exit 1
+fi
+
+# Default command to colm.
+if [ "${INPUT%.pack}" != "$INPUT" ]; then
+ tar -xmf "$INPUT" "$OUTPUT.pack"
+ mv $OUTPUT.pack $OUTPUT
+ EXIT_STATUS=$?
+else
+ CMD=${CMD:=colm}
+ if [ "`basename $0`" != "$0" ] && [ -x "`dirname $0`/$CMD" ]; then
+ COLM="`dirname $0`/$CMD"
+ else
+ COLM=@prefix@/bin/$CMD
+ fi
+
+ $COLM $ARGS "$INPUT"
+ EXIT_STATUS=$?
+ if [ $EXIT_STATUS = 0 ]; then
+ tar -cf "$OUTPUT" $PACKS
+ fi
+ rm -f $PACKS
+fi
+
+exit $EXIT_STATUS
+
diff --git a/src/colm.h b/src/colm.h
new file mode 100644
index 00000000..39506cab
--- /dev/null
+++ b/src/colm.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_COLM_H
+#define _COLM_COLM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct colm_data;
+struct colm_tree;
+struct colm_kid;
+struct colm_program;
+struct colm_sections;
+struct colm_tree;
+struct colm_location;
+
+struct indent_impl
+{
+ /* Indentation. */
+ int level;
+ int indent;
+};
+
+extern struct colm_sections colm_object;
+
+typedef unsigned long colm_value_t;
+typedef unsigned char colm_alph_t;
+
+struct colm_tree
+{
+ /* First four will be overlaid in other structures. */
+ short id;
+ unsigned short flags;
+ long refs;
+ struct colm_kid *child;
+
+ struct colm_data *tokdata;
+
+ /* FIXME: this needs to go somewhere else. Will do for now. */
+ unsigned short prod_num;
+};
+
+struct colm_print_args
+{
+ void *arg;
+ int comm;
+ int attr;
+ int trim;
+ struct indent_impl *indent;
+
+ void (*out)( struct colm_print_args *args, const char *data, int length );
+ void (*open_tree)( struct colm_program *prg, struct colm_tree **sp,
+ struct colm_print_args *args, struct colm_kid *parent, struct colm_kid *kid );
+ void (*print_term)( struct colm_program *prg, struct colm_tree **sp,
+ struct colm_print_args *args, struct colm_kid *kid );
+ void (*close_tree)( struct colm_program *prg, struct colm_tree **sp,
+ struct colm_print_args *args, struct colm_kid *parent, struct colm_kid *kid );
+};
+
+void colm_print_null( struct colm_program *prg, struct colm_tree **sp,
+ struct colm_print_args *args, struct colm_kid *parent, struct colm_kid *kid );
+void colm_print_term_tree( struct colm_program *prg, struct colm_tree **sp,
+ struct colm_print_args *print_args, struct colm_kid *kid );
+
+struct colm_tree **colm_vm_root( struct colm_program *prg );
+struct colm_tree *colm_return_val( struct colm_program *prg );
+void colm_print_tree_args( struct colm_program *prg, struct colm_tree **sp,
+ struct colm_print_args *print_args, struct colm_tree *tree );
+
+int colm_repeat_end( struct colm_tree *tree );
+int colm_list_last( struct colm_tree *tree );
+
+struct colm_tree *colm_get_rhs_val( struct colm_program *prg, struct colm_tree *tree, int *a );
+struct colm_tree *colm_get_attr( struct colm_tree *tree, long pos );
+struct colm_tree *colm_get_global( struct colm_program *prg, long pos );
+struct colm_tree *colm_get_repeat_next( struct colm_tree *tree );
+struct colm_tree *colm_get_repeat_val( struct colm_tree *tree );
+struct colm_tree *colm_get_left_repeat_next( struct colm_tree *tree );
+struct colm_tree *colm_get_left_repeat_val( struct colm_tree *tree );
+struct colm_location *colm_find_location( struct colm_program *prg, struct colm_tree *tree );
+
+static inline const colm_alph_t *colm_alph_from_cstr( const char *cstr ) { return (const colm_alph_t*)cstr; }
+static inline const char *colm_cstr_from_alph( const colm_alph_t *alph ) { return (const char*)alph; }
+
+/* Debug realms. To turn on, pass to colm_set_debug before invocation. */
+#define COLM_DBG_BYTECODE 0x00000001
+#define COLM_DBG_PARSE 0x00000002
+#define COLM_DBG_MATCH 0x00000004
+#define COLM_DBG_COMPILE 0x00000008
+#define COLM_DBG_POOL 0x00000010
+#define COLM_DBG_PRINT 0x00000020
+#define COLM_DBG_INPUT 0x00000040
+#define COLM_DBG_SCAN 0x00000080
+
+#define COLM_RN_NEITHER 0x00
+#define COLM_RN_DATA 0x01
+#define COLM_RN_LOC 0x02
+#define COLM_RN_BOTH 0x03
+
+/*
+ * Primary Interface.
+ */
+
+/* Allocate a program. Takes program static data as arg. Normally this is
+ * &colm_object. */
+struct colm_program *colm_new_program( struct colm_sections *rtd );
+
+/* Enable debug realms for a program. */
+void colm_set_debug( struct colm_program *prg, long active_realm );
+
+/* Run a top-level colm program. */
+void colm_run_program( struct colm_program *prg, int argc, const char **argv );
+
+/* Run a top-level colm program, with argument lengths (allows binary data). */
+void colm_run_program2( struct colm_program *prg, int argc, const char **argv, const int *argl );
+
+/* Run a single exported colm function. */
+struct colm_tree *colm_run_func( struct colm_program *prg, int frame_id,
+ const char **params, int param_count );
+
+/* Delete a colm program. Clears all memory. */
+int colm_delete_program( struct colm_program *prg );
+
+/* Set the pointer to the reduce struct used. */
+void *colm_get_reduce_ctx( struct colm_program *prg );
+void colm_set_reduce_ctx( struct colm_program *prg, void *ctx );
+void colm_set_reduce_clean( struct colm_program *prg, unsigned char reduce_clean );
+
+const char *colm_error( struct colm_program *prg, int *length );
+
+const char **colm_extract_fns( struct colm_program *prg );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COLM_COLM_H */
+
diff --git a/src/colm.lm b/src/colm.lm
new file mode 100644
index 00000000..c95d0c41
--- /dev/null
+++ b/src/colm.lm
@@ -0,0 +1,910 @@
+# Main region.
+lex
+ token DEF / 'def' /
+ token LEX / 'lex' /
+ token END / 'end' /
+ token TOKEN / 'token' /
+ token RL / 'rl' /
+ token IGNORE / 'ignore' /
+ token PRINT / 'print' /
+ token PRINTS / 'prints' /
+ token PARSE / 'parse' /
+ token REDUCE / 'reduce' /
+ token READ_REDUCE /'read_reduce'/
+ token PARSE_TREE / 'parse_tree' /
+ token PARSE_STOP / 'parse_stop' /
+ token CONS / 'construct' | 'cons' /
+ token MATCH / 'match' /
+ token REQUIRE / 'require' /
+ token SEND / 'send' /
+ token SEND_TREE / 'send_tree' /
+ token NAMESPACE / 'namespace' /
+ token REDUCTION / 'reduction' /
+ token FOR / 'for' /
+ token IF / 'if' /
+ token YIELD / 'yield' /
+ token WHILE / 'while' /
+ token ELSIF / 'elsif' /
+ token ELSE / 'else' /
+ token IN / 'in' /
+ token PARSER / 'parser' | 'accum' /
+ token LIST / 'list' /
+ token LIST_EL / 'list_el' /
+ token MAP / 'map' /
+ token MAP_EL / 'map_el' /
+ token PTR / 'ptr' /
+ token ITER / 'iter' /
+ token REF / 'ref' /
+ token EXPORT / 'export' /
+ token RETURN / 'return' /
+ token BREAK / 'break' /
+ token REJECT / 'reject' /
+ token REDUCEFIRST / 'reducefirst' /
+ token ALIAS / 'alias' /
+ token COMMIT / 'commit' /
+ token NEW / 'new' /
+ token PREEOF / 'preeof' /
+ token GLOBAL / 'global' /
+ token EOS / 'eos' /
+ token CAST / 'cast' /
+ token SWITCH / 'switch' /
+ token CASE / 'case' /
+ token DEFAULT / 'default' /
+ token INT / 'int' /
+ token BOOL / 'bool' /
+ token VOID / 'void' /
+
+ token MAKE_TOKEN / 'make_token' /
+ token MAKE_TREE / 'make_tree' /
+
+ token TYPEID / 'typeid' /
+
+ token LITERAL / 'literal' /
+ token CONTEXT / 'context' /
+ token STRUCT / 'struct' /
+ token NI /'ni'/
+
+ token NIL / 'nil' /
+ token TRUE / 'true' /
+ token FALSE / 'false' /
+
+ token LEFT /'left'/
+ token RIGHT /'right'/
+ token NONASSOC /'nonassoc'/
+
+ token INCLUDE /'include'/
+
+ token id /
+ ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) .
+ ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' )*
+ /
+
+ token number
+ / ( '0' .. '9' ) + /
+
+ token backtick_lit /
+ '`' .
+ ^( ' ' | '\n' | '\t' ) .
+ ^( ' ' | '\n' | '\t' | '`' | ']' )* .
+ ( '`' | '' )
+ /
+
+ token DQ / '\"' / - ni
+ token SQ / '\'' / - ni
+ token TILDE / '~' / - ni
+
+ token SQOPEN /'['/
+ token SQCLOSE /']'/
+ token BAR /'|'/
+ token FSLASH /'/'/
+ token COLON /':'/
+ token DOUBLE_COLON /'::'/
+ token DOT /'.'/
+ token ARROW /'->'/
+ token POPEN /'('/
+ token PCLOSE /')'/
+ token COPEN /'{'/
+ token CCLOSE /'}'/
+ token STAR /'*'/
+ token QUESTION /'?'/
+ token EQUALS /'='/
+ token EQEQ /'=='/
+ token NEQ /'!='/
+ token COMMA /','/
+ token LT /'<'/
+ token GT /'>'/
+ token LTEQ /'<='/
+ token GTEQ /'>='/
+ token BANG /'!'/
+ token DOLLAR /'$'/
+ token CARET /'^'/
+ token AT /'@'/
+ token PERCENT /'%'/
+ token PLUS /'+'/
+ token MINUS /'-'/
+ token AMPAMP /'&&'/
+ token BARBAR /'||'/
+
+ ignore / ( '\n' | '\t' | ' ' )+ /
+ ignore / '#' . ( ^'\n' )* . '\n' /
+end
+
+lex
+ token LIT_DQ / '\"' /
+ token LIT_DQ_NL / '\n' /
+ token LIT_SQOPEN / '[' /
+ token LIT_SQCLOSE / ']' /
+
+ token lit_dq_data
+ / ( ^( '\n' | '\"' | '[' | ']' | '\\' ) | '\\' . any )+ /
+end
+
+lex
+ token CONS_SQ / '\'' /
+ token CONS_SQ_NL / '\n' /
+
+ token sq_cons_data
+ / ( ^( '\n' | '\'' | '\\' ) | '\\' . any )+ /
+end
+
+lex
+ token TILDE_NL / '\n' /
+ token tilde_data
+ / ( ^'\n' )+ /
+end
+
+lex
+ token lex_id /
+ ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) .
+ ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' )*
+ /
+
+ token lex_uint
+ / ( '0' .. '9' )+ /
+
+ token lex_hex
+ / '0x' . ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' )+ /
+
+ token lex_lit /
+ '\'' . ( ^( '\'' | '\\' ) | '\\' . any )* . ( '\'' | '\'i' ) |
+ '\"' . ( ^( '\"' | '\\' ) | '\\' . any )* . ( '\"' | '\"i' )
+ /
+
+ token LEX_DOT /'.'/
+ token LEX_BAR /'|'/
+ token LEX_AMP /'&'/
+ token LEX_DASH /'-'/
+ token LEX_POPEN /'('/
+ token LEX_PCLOSE /')'/
+ token LEX_STAR /'*'/
+ token LEX_STARSTAR /'**'/
+ token LEX_QUESTION /'?'/
+ token LEX_PLUS /'+'/
+ token LEX_CARET /'^'/
+ token LEX_DOTDOT /'..'/
+ token LEX_SQOPEN_POS /'['/ - ni
+ token LEX_SQOPEN_NEG /'[^'/ - ni
+ token LEX_FSLASH /'/'/
+
+ token LEX_DASHDASH /'--'/
+ token LEX_COLON_GT /':>'/
+ token LEX_COLON_GTGT /':>>'/
+ token LEX_LT_COLON /'<:'/
+
+ ignore /
+ ( '\n' | '\t' | ' ' ) .
+ ( '\n' | '\t' | ' ' )*
+ /
+
+ ignore / '#' . ( ^'\n' )* . '\n' /
+end
+
+lex
+ token RE_DASH / '-' /
+ token RE_CHAR / ^( '\\' | '-' | ']' ) | '\\' . any /
+ token RE_SQCLOSE / ']' /
+end
+
+def start
+ [RootItemList: root_item<*]
+
+def root_item
+ [rl_def] :Rl commit
+| [literal_def] :Literal commit
+| [token_def] :Token commit
+| [ic_def] :IgnoreCollector commit
+| [ignore_def] :Ignore commit
+| [cfl_def] :Cfl commit
+| [region_def] :Region commit
+| [struct_def] :Struct commit
+| [namespace_def] :Namespace commit
+| [function_def] :Function commit
+| [in_host_def] :InHost commit
+| [iter_def] :Iter commit
+| [statement] :Statement commit
+| [global_def] :Global commit
+| [export_def] :Export commit
+| [pre_eof_def] :PreEof commit
+| [precedence_def] :Precedence commit
+| [alias_def] :Alias commit
+| [_include] :Include commit
+| [reduction_def] :Reduction commit
+
+def _include
+ [INCLUDE SQ SqConsDataList: sq_cons_data<* sq_lit_term]
+
+def precedence_def
+ [pred_type pred_token_list]
+
+def pred_type
+ [LEFT] :Left
+| [RIGHT] :Right
+| [NONASSOC] :NonAssoc
+
+def pred_token_list
+ [pred_token_list COMMA pred_token] :List
+| [pred_token] :Base
+
+def pred_token
+ [region_qual id] :Id
+| [region_qual backtick_lit] :Lit
+
+def pre_eof_def
+ [PREEOF COPEN lang_stmt_list CCLOSE]
+
+def alias_def
+ [ALIAS id type_ref]
+
+def struct_item
+ [struct_var_def] :StructVar commit
+| [literal_def] :Literal commit
+| [rl_def] :Rl commit
+| [token_def] :Token commit
+| [ic_def] :IgnoreCollector commit
+| [ignore_def] :Ignore commit
+| [cfl_def] :Cfl commit
+| [region_def] :Region commit
+| [struct_def] :Struct commit
+| [function_def] :Function commit
+| [in_host_def] :InHost commit
+| [iter_def] :Iter commit
+| [export_def] :Export commit
+| [pre_eof_def] :PreEof commit
+| [precedence_def] :Precedence commit
+| [alias_def] :Alias commit
+
+def export_def
+ [EXPORT var_def opt_def_init]
+
+def global_def
+ [GLOBAL var_def opt_def_init]
+
+def iter_def
+ [ITER id POPEN ParamVarDefList: param_var_def_list PCLOSE
+ COPEN lang_stmt_list CCLOSE]
+
+def reference_type_ref
+ [REF LT type_ref GT]
+
+def param_var_def_seq
+ [param_var_def COMMA param_var_def_seq]
+| [param_var_def]
+
+def param_var_def_list
+ [param_var_def_seq]
+| []
+
+def param_var_def
+ [id COLON type_ref] :Type
+| [id COLON reference_type_ref] :Ref
+
+def opt_export
+ [EXPORT] :Export
+| []
+
+def function_def
+ [opt_export type_ref id
+ POPEN ParamVarDefList: param_var_def_list PCLOSE
+ COPEN lang_stmt_list CCLOSE]
+
+def in_host_def
+ [opt_export type_ref id
+ POPEN ParamVarDefList: param_var_def_list PCLOSE
+ EQUALS HostFunc: id]
+
+def struct_var_def
+ [var_def]
+
+def struct_key
+ [STRUCT] | [CONTEXT]
+
+def struct_def
+ [struct_key id ItemList: struct_item<* END]
+
+def literal_keyword
+ [LITERAL]
+| [TOKEN]
+
+def literal_def
+ [literal_keyword literal_list]
+
+def literal_list
+ [literal_list literal_item] :Item
+| [literal_item] :Base
+
+def literal_item
+ [no_ignore_left backtick_lit no_ignore_right]
+
+def no_ignore_left
+ [NI MINUS] :Ni
+| []
+
+def no_ignore_right
+ [MINUS NI] :Ni
+| []
+
+def reduction_def
+ [REDUCTION id ItemList: reduction_item<* END]
+
+lex
+ token RED_OPEN / '{' /
+ token RED_CLOSE / '}' /
+
+ token red_id /
+ ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) .
+ ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' )*
+ /
+
+ token red_comment /
+ '//' . ( ^'\n' )* . '\n' |
+ '/*' . any* :> '*/'
+ /
+
+ token red_ws /
+ ( '\n' | '\t' | ' ' )+
+ /
+
+ token red_lit /
+ '\'' . ( ^( '\'' | '\\' ) | '\\' . any )* . ( '\'' | '\'i' ) |
+ '\"' . ( ^( '\"' | '\\' ) | '\\' . any )* . ( '\"' | '\"i' )
+ /
+
+ token RED_LHS / '$' . '$' /
+ token RED_RHS_REF / '$' . red_id /
+ token RED_RHS_LOC / '@' . red_id /
+ token RED_TREE_REF / '$*' . red_id /
+
+ token RED_RHS_NREF / '$' . ('1' .. '9') . ('0' .. '9')* /
+ token RED_RHS_NLOC / '@' . ('1' .. '9') . ('0' .. '9')* /
+ token RED_TREE_NREF / '$*' . ('1' .. '9') . ('0' .. '9')* /
+
+ token red_any / any /
+end
+
+def red_nonterm
+ [type_ref RED_OPEN HostItems: host_item<* RED_CLOSE]
+
+def red_action
+ [type_ref COLON id RED_OPEN HostItems: host_item<* RED_CLOSE]
+
+def host_item
+ [red_id]
+| [red_lit]
+| [red_comment]
+| [red_ws]
+| [red_any]
+| [RED_LHS]
+| [RED_RHS_REF]
+| [RED_TREE_REF]
+| [RED_RHS_LOC]
+| [RED_RHS_NREF]
+| [RED_TREE_NREF]
+| [RED_RHS_NLOC]
+| [RED_OPEN HostItems: host_item<* RED_CLOSE]
+
+def reduction_item
+ [red_nonterm] :NonTerm commit
+| [red_action] :Action commit
+
+def namespace_def
+ [NAMESPACE id ItemList: namespace_item<* END]
+
+def namespace_item
+ [rl_def] :Rl commit
+| [literal_def] :Literal commit
+| [token_def] :Token commit
+| [ic_def] :IgnoreCollector commit
+| [ignore_def] :Ignore commit
+| [cfl_def] :Cfl commit
+| [region_def] :Region commit
+| [struct_def] :Struct commit
+| [namespace_def] :Namespace commit
+| [function_def] :Function commit
+| [in_host_def] :InHost commit
+| [iter_def] :Iter commit
+| [pre_eof_def] :PreEof commit
+| [precedence_def] :Precedence commit
+| [alias_def] :Alias commit
+| [_include] :Include commit
+| [global_def] :Global commit
+
+def obj_var_list
+ []
+
+def opt_reduce_first
+ [REDUCEFIRST]
+| []
+
+def cfl_def
+ [DEF id
+ VarDefList: var_def<*
+ opt_reduce_first
+ prod_list]
+
+def region_def
+ [LEX RootItemList: root_item<* END]
+
+def rl_def
+ [RL id LEX_FSLASH lex_expr LEX_FSLASH]
+
+def opt_lex_expr
+ [lex_expr]
+| []
+
+def token_def
+ [TOKEN id VarDefList: var_def<*
+ no_ignore_left
+ LEX_FSLASH opt_lex_expr LEX_FSLASH
+ no_ignore_right
+ opt_translate]
+
+def ic_def
+ [TOKEN id MINUS]
+
+def opt_translate
+ [COPEN lang_stmt_list CCLOSE] :Translate
+| []
+
+def opt_id
+ [id] :Id
+| []
+
+def ignore_def
+ [IGNORE opt_id LEX_FSLASH opt_lex_expr LEX_FSLASH]
+
+def prod_sublist
+ [prod_sublist BAR prod_el_list]
+| [prod_el_list]
+
+def prod_el
+ [opt_prod_el_name region_qual id opt_repeat] :Id
+| [opt_prod_el_name region_qual backtick_lit opt_repeat] :Lit
+| [opt_prod_el_name POPEN prod_sublist PCLOSE opt_repeat ] :SubList
+
+def opt_prod_el_name
+ [id COLON] :Name
+| []
+
+def prod_el_list
+ [prod_el_list prod_el] :List
+| []
+
+def opt_commit
+ [COMMIT] :Commit
+| []
+
+def opt_prod_name
+ [COLON id] :Name
+| []
+
+def prod
+ [SQOPEN prod_el_list SQCLOSE
+ opt_prod_name
+ opt_commit
+ opt_reduce]
+
+def opt_reduce
+ [COPEN lang_stmt_list CCLOSE] :Reduce
+| []
+
+def prod_list
+ [prod_list BAR prod] :List
+| [prod] :Base
+
+def case_clause
+ [CASE pattern block_or_single] :Pattern commit
+| [CASE id block_or_single] :Id commit
+| [CASE id pattern block_or_single] :IdPat commit
+
+def default_clause
+ [DEFAULT block_or_single] commit
+
+def case_clause_list
+ [case_clause case_clause_list] :Recursive
+| [case_clause] :BaseCase
+| [default_clause] :BaseDefault
+
+# Note a commit on switch doesn't work because the default clause in
+# case_clause follow sets cause a premature commit. We could use a proper list
+# for case clauses, followed by an optional default, but just move the commits
+# to the clauses, which is is a better commit strategy anyways. Gives more
+# regular commits.
+
+def statement
+ [print_stmt] :Print commit
+| [var_def opt_def_init] :VarDef commit
+| [FOR id COLON type_ref IN iter_call block_or_single] :For commit
+| [IF code_expr block_or_single elsif_list] :If commit
+| [SWITCH var_ref case_clause_list] :SwitchUnder
+| [SWITCH var_ref COPEN case_clause_list CCLOSE] :SwitchBlock
+| [WHILE code_expr block_or_single] :While commit
+| [var_ref EQUALS code_expr] :LhsVarRef commit
+| [YIELD var_ref] :Yield commit
+| [RETURN code_expr] :Return commit
+| [BREAK] :Break commit
+| [REJECT] :Reject commit
+| [var_ref POPEN call_arg_list PCLOSE] :Call
+| [stmt_or_factor] :StmtOrFactor
+| [accumulate opt_eos] :BareSend
+
+def elsif_list
+ [elsif_clause elsif_list] :Clause
+| [optional_else] :OptElse
+
+def elsif_clause
+ [ELSIF code_expr block_or_single]
+
+def optional_else
+ [ELSE block_or_single] :Else
+| []
+
+def call_arg_seq
+ [code_expr COMMA call_arg_seq]
+| [code_expr]
+
+def call_arg_list
+ [call_arg_seq]
+| []
+
+def iter_call
+ [E1 var_ref POPEN call_arg_list PCLOSE] :Call
+| [E2 id] :Id
+| [E3 code_expr] :Expr
+
+def block_or_single
+ [COPEN lang_stmt_list CCLOSE] :Block
+| [statement] :Single
+
+def require_pattern
+ [REQUIRE var_ref pattern]
+
+def opt_require_stmt
+ [require_pattern lang_stmt_list] :Require
+| [] :Base
+
+def lang_stmt_list
+ [StmtList: statement<* opt_require_stmt]
+
+def opt_def_init
+ [EQUALS code_expr] :Init
+| [] :Base
+
+def var_def
+ [id COLON type_ref]
+
+def print_stmt
+ [PRINT POPEN call_arg_list PCLOSE] :Tree
+| [PRINTS POPEN var_ref COMMA call_arg_list PCLOSE] :PrintStream
+| [PRINT accumulate] :Accum
+
+def expr_stmt
+ [code_expr]
+
+def code_expr
+ [code_expr AMPAMP code_relational] :AmpAmp
+| [code_expr BARBAR code_relational] :BarBar
+| [code_relational] :Base
+
+def code_relational
+ [code_relational EQEQ code_additive] :EqEq
+| [code_relational NEQ code_additive] :Neq
+| [code_relational LT code_additive] :Lt
+| [code_relational GT code_additive] :Gt
+| [code_relational LTEQ code_additive] :LtEq
+| [code_relational GTEQ code_additive] :GtEq
+| [code_additive] :Base
+
+def code_additive
+ [code_additive PLUS code_multiplicitive] :Plus
+| [code_additive MINUS code_multiplicitive] :Minus
+| [code_multiplicitive] :Base
+
+def code_multiplicitive
+ [code_multiplicitive STAR code_unary] :Star
+| [code_multiplicitive FSLASH code_unary] :Fslash
+| [code_unary] :Base
+
+def code_unary
+ [BANG code_factor] :Bang
+| [DOLLAR code_factor] :Dollar
+| [DOLLAR DOLLAR code_factor] :DollarDollar
+| [CARET code_factor] :Caret
+| [AT code_factor] :At
+| [PERCENT code_factor] :Percent
+| [code_factor] :Base
+
+def opt_eos
+ [DOT] :Dot
+| [EOS] :Eos
+| []
+
+def code_factor
+ [number] :Number
+| [var_ref POPEN call_arg_list PCLOSE] :Call
+| [var_ref] :VarRef
+| [NIL] :Nil
+| [TRUE] :True
+| [FALSE] :False
+| [POPEN code_expr PCLOSE] :Paren
+| [string] :String
+| [type_ref IN var_ref] :In
+| [TYPEID LT type_ref GT] :TypeId
+| [CAST LT type_ref GT code_factor] :Cast
+| [stmt_or_factor] :StmtOrFactor
+
+def type_ref
+ [region_qual id opt_repeat] :Id
+| [INT] :Int
+| [BOOL] :Bool
+| [VOID] :Void
+| [PARSER LT type_ref GT] :Parser
+| [LIST LT type_ref GT] :List
+| [MAP LT KeyType: type_ref COMMA ValType: type_ref GT] :Map
+| [LIST_EL LT type_ref GT] :ListEl
+| [MAP_EL LT KeyType: type_ref COMMA ValType: type_ref GT] :MapEl
+
+def region_qual
+ [region_qual id DOUBLE_COLON] :Qual
+| [] :Base
+
+def opt_repeat
+ [STAR] :Star
+| [PLUS] :Plus
+| [QUESTION] :Question
+| [LT STAR] :LeftStar
+| [LT PLUS] :LeftPlus
+| []
+
+def opt_capture
+ [id COLON] :Id
+| []
+
+def opt_field_init
+ [POPEN FieldInitList: field_init<* PCLOSE] :Init
+| [] :Base
+
+def field_init
+ [code_expr]
+
+def stmt_or_factor
+ [PARSE opt_capture type_ref opt_field_init accumulate] :Parse
+| [PARSE_TREE opt_capture type_ref opt_field_init accumulate] :ParseTree
+| [PARSE_STOP opt_capture type_ref opt_field_init accumulate] :ParseStop
+| [REDUCE id type_ref opt_field_init accumulate] :Reduce
+| [READ_REDUCE id type_ref opt_field_init accumulate] :ReadReduce
+| [SEND var_ref accumulate opt_eos] :Send
+| [SEND_TREE var_ref accumulate opt_eos] :SendTree
+| [MAKE_TREE POPEN call_arg_list PCLOSE] :MakeTree
+| [MAKE_TOKEN POPEN call_arg_list PCLOSE] :MakeToken
+| [CONS opt_capture type_ref opt_field_init constructor] :Cons
+| [MATCH var_ref pattern] :Match
+| [NEW opt_capture type_ref POPEN FieldInitList: field_init<* PCLOSE] :New
+
+#
+# Pattern
+#
+
+def opt_label
+ [id COLON] :Id
+| []
+
+def dq_lit_term
+ [LIT_DQ] | [LIT_DQ_NL]
+
+def sq_lit_term
+ [CONS_SQ] | [CONS_SQ_NL]
+
+def opt_tilde_data
+ [tilde_data]
+| []
+
+def pattern_el_lel
+ [region_qual id opt_repeat] :Id
+| [region_qual backtick_lit opt_repeat] :Lit
+
+def pattern_el
+ [opt_label pattern_el_lel] :PatternEl
+| [DQ LitpatElList: litpat_el<* dq_lit_term] :Dq
+| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq
+| [TILDE opt_tilde_data TILDE_NL] :Tilde
+
+def litpat_el
+ [lit_dq_data] :ConsData
+| [LIT_SQOPEN PatternElList: pattern_el<* LIT_SQCLOSE] :SubList
+
+def pattern_top_el
+ [DQ LitpatElList: litpat_el<* dq_lit_term] :Dq
+| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq
+| [TILDE opt_tilde_data TILDE_NL] :Tilde
+
+def pattern_list
+ [pattern_top_el pattern_list] :List
+| [pattern_top_el] :Base
+
+def pattern
+ [pattern_list] :TopList
+| [SQOPEN PatternElList: pattern_el<* SQCLOSE] :SubList
+
+#
+# Constructor List
+#
+
+def E1 []
+def E2 []
+def E3 []
+def E4 []
+
+def cons_el
+ [E1 region_qual backtick_lit] :Lit
+| [E1 DQ LitConsElList: lit_cons_el<* dq_lit_term] :Dq
+| [E1 SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq
+| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde
+| [E2 code_expr] :CodeExpr
+
+def lit_cons_el
+ [lit_dq_data] :ConsData
+| [LIT_SQOPEN ConsElList: cons_el<* LIT_SQCLOSE] :SubList
+
+def cons_top_el
+ [DQ LitConsElList: lit_cons_el<* dq_lit_term] :Dq
+| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq
+| [TILDE opt_tilde_data TILDE_NL] :Tilde
+
+def cons_list
+ [cons_top_el cons_list] :List
+| [cons_top_el] :Base
+
+def constructor
+ [cons_list] :TopList
+| [SQOPEN ConsElList: cons_el<* SQCLOSE] :SubList
+
+#
+# Accumulate
+#
+
+def accum_el
+ [E1 DQ LitAccumElList: lit_accum_el<* dq_lit_term] :Dq
+| [E1 SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq
+| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde
+| [E2 code_expr] :CodeExpr
+
+def lit_accum_el
+ [lit_dq_data] :ConsData
+| [LIT_SQOPEN AccumElList: accum_el<* LIT_SQCLOSE] :SubList
+
+def accum_top_el
+ [DQ LitAccumElList: lit_accum_el<* dq_lit_term] :Dq
+| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq
+| [TILDE opt_tilde_data TILDE_NL] :Tilde
+| [SQOPEN AccumElList: accum_el<* SQCLOSE] :SubList
+
+def accum_list
+ [accum_top_el accum_list] :List
+| [accum_top_el] :Base
+
+def accumulate
+ [accum_list]
+
+#
+# String List
+#
+
+def string_el
+ [E1 DQ LitStringElList: lit_string_el<* dq_lit_term] :Dq
+| [E1 SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq
+| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde
+| [E2 code_expr] :CodeExpr
+
+def lit_string_el
+ [lit_dq_data] :ConsData
+| [LIT_SQOPEN StringElList: string_el<* LIT_SQCLOSE] :SubList
+
+def string_top_el
+ [DQ LitStringElList: lit_string_el<* dq_lit_term] :Dq
+| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq
+| [TILDE opt_tilde_data TILDE_NL] :Tilde
+
+def string_list
+ [string_top_el string_list] :List
+| [string_top_el] :Base
+
+def string
+ [string_list] :TopList
+| [SQOPEN StringElList: string_el<* SQCLOSE] :SubList
+
+#
+# Variable References
+#
+
+def var_ref
+ [region_qual qual id]
+
+def qual
+ [qual id DOT] :Dot
+| [qual id ARROW] :Arrow
+| [] :Base
+
+#
+# Lexical analysis.
+#
+
+def lex_expr
+ [lex_expr LEX_BAR lex_term] :Bar
+| [lex_expr LEX_AMP lex_term] :Amp
+| [lex_expr LEX_DASH lex_term] :Dash
+| [lex_expr LEX_DASHDASH lex_term] :DashDash
+| [lex_term] :Base
+
+def opt_lex_dot
+ [LEX_DOT]
+| []
+
+def lex_term
+ [lex_term opt_lex_dot lex_factor_rep] :Dot
+| [lex_term LEX_COLON_GT lex_factor_rep] :ColonGt
+| [lex_term LEX_COLON_GTGT lex_factor_rep] :ColonGtGt
+| [lex_term LEX_LT_COLON lex_factor_rep] :LtColon
+| [lex_factor_rep] :Base
+
+def lex_factor_rep
+ [lex_factor_rep LEX_STAR] :Star
+| [lex_factor_rep LEX_STARSTAR] :StarStar
+| [lex_factor_rep LEX_PLUS] :Plus
+| [lex_factor_rep LEX_QUESTION] :Question
+| [lex_factor_rep COPEN lex_uint CCLOSE ] :Exact
+| [lex_factor_rep COPEN COMMA lex_uint CCLOSE ] :Max
+| [lex_factor_rep COPEN lex_uint COMMA CCLOSE ] :Min
+| [lex_factor_rep COPEN Low: lex_uint COMMA High: lex_uint CCLOSE ] :Range
+| [lex_factor_neg] :Base
+
+def lex_factor_neg
+ [LEX_CARET lex_factor_neg] :Caret
+| [lex_factor] :Base
+
+def lex_range_lit
+ [lex_lit] :Lit
+| [lex_num] :Number
+
+def lex_num
+ [lex_uint]
+| [lex_hex]
+
+#| [LEX_DASH num]
+
+def lex_factor
+ [lex_lit] :Literal
+| [lex_id] :Id
+| [lex_uint] :Number
+| [lex_hex] :Hex
+| [Low: lex_range_lit LEX_DOTDOT High: lex_range_lit] :Range
+| [LEX_SQOPEN_POS reg_or_data RE_SQCLOSE] :PosOrBlock
+| [LEX_SQOPEN_NEG reg_or_data RE_SQCLOSE] :NegOrBlock
+| [LEX_POPEN lex_expr LEX_PCLOSE] :Paren
+
+def reg_or_data
+ [reg_or_data reg_or_char] :Data
+| [] :Base
+
+def reg_or_char
+ [RE_CHAR] :Char
+| [Low: RE_CHAR RE_DASH High: RE_CHAR] :Range
diff --git a/src/colmex.h b/src/colmex.h
new file mode 100644
index 00000000..2abc7b2f
--- /dev/null
+++ b/src/colmex.h
@@ -0,0 +1,109 @@
+#ifndef _COLMEX_H
+#define _COLMEX_H
+
+#include <colm/colm.h>
+#include <colm/tree.h>
+#include <colm/colmex.h>
+#include <string.h>
+#include <string>
+#include <vector>
+
+inline void appendString( colm_print_args *args, const char *data, int length )
+{
+ std::string *str = (std::string*)args->arg;
+ *str += std::string( data, length );
+}
+
+inline std::string printTreeStr( colm_program *prg, colm_tree *tree, bool trim )
+{
+ std::string str;
+ struct indent_impl indent = { -1, 0 };
+ colm_print_args printArgs = { &str, 1, 0, trim, &indent, &appendString,
+ &colm_print_null, &colm_print_term_tree, &colm_print_null };
+ colm_print_tree_args( prg, colm_vm_root(prg), &printArgs, tree );
+ return str;
+}
+
+struct ExportTree
+{
+ ExportTree( colm_program *prg, colm_tree *tree )
+ : __prg(prg), __tree(tree) {}
+
+ std::string text() { return printTreeStr( __prg, __tree, true ); }
+ colm_location *loc() { return colm_find_location( __prg, __tree ); }
+ std::string text_notrim() { return printTreeStr( __prg, __tree, false ); }
+ std::string text_ws() { return printTreeStr( __prg, __tree, false ); }
+ colm_data *data() { return __tree->tokdata; }
+ operator colm_tree *() { return __tree; }
+
+ colm_program *__prg;
+ colm_tree *__tree;
+};
+
+/* Non-recursive tree iterator. Runs an in-order traversal and when it finds a
+ * search target it yields it and then resumes searching the next child. It
+ * does not go into what it finds. This iterator can be used to search lists,
+ * regardless if they are left-recursive or right-recursive. */
+template <class SearchType> struct RepeatIter
+{
+ RepeatIter( const ExportTree &root )
+ :
+ prg(root.__prg),
+ search_id(SearchType::ID)
+ {
+ /* We use one code path for the first call to forward and all
+ * subsequent calls. To achieve this we create a sentinal in front of
+ * root called first and point cur to it. On the first forward() call
+ * it will be as if we just visited the sentinal.
+ *
+ * Note that we are also creating a kid for root, rather than
+ * jump into root's child list so we can entertain the
+ * possiblity that root is exactly the thing we want to visit.
+ */
+
+ memset( &first, 0, sizeof(first) );
+ memset( &kid, 0, sizeof(kid) );
+
+ first.next = &kid;
+ kid.tree = root.__tree;
+ cur = &first;
+ next();
+ }
+
+ colm_program *prg;
+ colm_kid first, kid, *cur;
+ int search_id;
+ std::vector<colm_kid*> stack;
+
+ void next()
+ {
+ goto return_to;
+ recurse:
+
+ if ( cur->tree->id == search_id )
+ return;
+ else {
+ stack.push_back( cur );
+ cur = tree_child( prg, cur->tree );
+ while ( cur != 0 ) {
+ goto recurse;
+ return_to: cur = cur->next;
+ }
+ if ( stack.size() == 0 ) {
+ cur = 0;
+ return;
+ }
+ cur = stack.back();
+ stack.pop_back();
+ goto return_to;
+ }
+ }
+
+ bool end()
+ { return cur == 0; }
+
+ SearchType value()
+ { return SearchType( prg, cur->tree ); }
+};
+
+#endif
diff --git a/src/commit.c b/src/commit.c
new file mode 100644
index 00000000..28da8c1d
--- /dev/null
+++ b/src/commit.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2015-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "config.h"
+#include "debug.h"
+#include "pdarun.h"
+#include "bytecode.h"
+#include "tree.h"
+#include "pool.h"
+#include "internal.h"
+
+void commit_clear_kid_list( program_t *prg, tree_t **sp, kid_t *kid )
+{
+ kid_t *next;
+ while ( kid ) {
+ colm_tree_downref( prg, sp, kid->tree );
+ next = kid->next;
+ kid_free( prg, kid );
+ kid = next;
+ }
+}
+
+void commit_clear_parse_tree( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *pt )
+{
+ tree_t **top = vm_ptop();
+
+ if ( pt == 0 )
+ return;
+
+free_tree:
+ if ( pt->next != 0 ) {
+ vm_push_ptree( pt->next );
+ }
+
+ if ( pt->left_ignore != 0 ) {
+ vm_push_ptree( pt->left_ignore );
+ }
+
+ if ( pt->child != 0 ) {
+ vm_push_ptree( pt->child );
+ }
+
+ if ( pt->right_ignore != 0 ) {
+ vm_push_ptree( pt->right_ignore );
+ }
+
+ /* Only the root level of the stack has tree
+ * shadows and we are below that. */
+ assert( pt->shadow == 0 );
+ parse_tree_free( pda_run, pt );
+
+ /* Any trees to downref? */
+ if ( sp != top ) {
+ pt = vm_pop_ptree();
+ goto free_tree;
+ }
+}
+
+static int been_committed( parse_tree_t *parse_tree )
+{
+ return parse_tree->flags & PF_COMMITTED;
+}
+
+void commit_reduce( program_t *prg, tree_t **root, struct pda_run *pda_run )
+{
+ tree_t **sp = root;
+ parse_tree_t *pt = pda_run->stack_top;
+
+ /* The top level of the stack is linked right to left. This is the
+ * traversal order we need for committing. */
+ while ( pt != 0 && !been_committed( pt ) ) {
+ vm_push_ptree( pt );
+ pt = pt->next;
+ }
+
+ while ( sp != root ) {
+ pt = vm_pop_ptree();
+
+ prg->rtd->commit_reduce_forward( prg, sp, pda_run, pt );
+ pt->child = 0;
+
+ pt->flags |= PF_COMMITTED;
+ pt = pt->next;
+ }
+}
diff --git a/src/compiler.cc b/src/compiler.cc
new file mode 100644
index 00000000..72f87dac
--- /dev/null
+++ b/src/compiler.cc
@@ -0,0 +1,1263 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <assert.h>
+#include <iostream>
+
+#include "redbuild.h"
+#include "pdacodegen.h"
+#include "fsmcodegen.h"
+#include "colm.h"
+
+using std::ostringstream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+char machineMain[] = "main";
+exit_object endp;
+void operator<<( ostream &out, exit_object & )
+{
+ out << endl;
+ exit(1);
+}
+
+/* Perform minimization after an operation according
+ * to the command line args. */
+void afterOpMinimize( FsmGraph *fsm, bool lastInSeq )
+{
+ /* Switch on the prefered minimization algorithm. */
+ if ( lastInSeq ) {
+ /* First clean up the graph. FsmGraph operations may leave these
+ * lying around. There should be no dead end states. The subtract
+ * intersection operators are the only places where they may be
+ * created and those operators clean them up. */
+ fsm->removeUnreachableStates();
+ fsm->minimizePartition2();
+ }
+}
+
+/* Count the transitions in the fsm by walking the state list. */
+int countTransitions( FsmGraph *fsm )
+{
+ int numTrans = 0;
+ FsmState *state = fsm->stateList.head;
+ while ( state != 0 ) {
+ numTrans += state->outList.length();
+ state = state->next;
+ }
+ return numTrans;
+}
+
+Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd )
+{
+ /* Reset errno so we can check for overflow or underflow. In the event of
+ * an error, sets the return val to the upper or lower bound being tested
+ * against. */
+ errno = 0;
+ unsigned int size = keyOps->alphType->size;
+ bool unusedBits = size < sizeof(unsigned long);
+
+ unsigned long ul = strtoul( str, 0, 16 );
+
+
+ if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) {
+ error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+ ul = 1 << (size * 8);
+ }
+
+ if ( keyOps->alphType->isSigned && unusedBits && ul >> (size * 8 - 1) )
+ ul |= (ULONG_MAX >> (size*8 ) ) << (size*8);
+
+ return Key( (long)ul );
+}
+
+Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd )
+{
+ /* Convert the number to a decimal. First reset errno so we can check
+ * for overflow or underflow. */
+ errno = 0;
+ long long minVal = keyOps->alphType->minVal;
+ long long maxVal = keyOps->alphType->maxVal;
+
+ long long ll = strtoll( str, 0, 10 );
+
+ /* Check for underflow. */
+ if ( (errno == ERANGE && ll < 0) || ll < minVal) {
+ error(loc) << "literal " << str << " underflows the alphabet type" << endl;
+ ll = minVal;
+ }
+ /* Check for overflow. */
+ else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) {
+ error(loc) << "literal " << str << " overflows the alphabet type" << endl;
+ ll = maxVal;
+ }
+
+ return Key( (long)ll );
+}
+
+/* Make an fsm key in int format (what the fsm graph uses) from an alphabet
+ * number returned by the parser. Validates that the number doesn't overflow
+ * the alphabet type. */
+Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd )
+{
+ /* Switch on hex/decimal format. */
+ if ( str[0] == '0' && str[1] == 'x' )
+ return makeFsmKeyHex( str, loc, pd );
+ else
+ return makeFsmKeyDec( str, loc, pd );
+}
+
+/* Make an fsm int format (what the fsm graph uses) from a single character.
+ * Performs proper conversion depending on signed/unsigned property of the
+ * alphabet. */
+Key makeFsmKeyChar( char c, Compiler *pd )
+{
+ /* Copy from a char type. */
+ return Key( c );
+}
+
+/* Make an fsm key array in int format (what the fsm graph uses) from a string
+ * of characters. Performs proper conversion depending on signed/unsigned
+ * property of the alphabet. */
+void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd )
+{
+ /* Copy from a char star type. */
+ char *src = data;
+ for ( int i = 0; i < len; i++ )
+ result[i] = Key(src[i]);
+}
+
+/* Like makeFsmKeyArray except the result has only unique keys. They ordering
+ * will be changed. */
+void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
+ bool caseInsensitive, Compiler *pd )
+{
+ /* Copy from a char star type. */
+ char *src = data;
+ for ( int si = 0; si < len; si++ ) {
+ Key key( src[si] );
+ result.insert( key );
+ if ( caseInsensitive ) {
+ if ( key.isLower() )
+ result.insert( key.toUpper() );
+ else if ( key.isUpper() )
+ result.insert( key.toLower() );
+ }
+ }
+}
+
+FsmGraph *dotFsm( Compiler *pd )
+{
+ FsmGraph *retFsm = new FsmGraph();
+ retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
+ return retFsm;
+}
+
+FsmGraph *dotStarFsm( Compiler *pd )
+{
+ FsmGraph *retFsm = new FsmGraph();
+ retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
+ return retFsm;
+}
+
+/* Make a builtin type. Depends on the signed nature of the alphabet type. */
+FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd )
+{
+ /* FsmGraph created to return. */
+ FsmGraph *retFsm = 0;
+
+ switch ( builtin ) {
+ case BT_Any: {
+ /* All characters. */
+ retFsm = dotFsm( pd );
+ break;
+ }
+ case BT_Ascii: {
+ /* Ascii characters 0 to 127. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( 0, 127 );
+ break;
+ }
+ case BT_Extend: {
+ /* Ascii extended characters. This is the full byte range. Dependent
+ * on signed, vs no signed. If the alphabet is one byte then just use
+ * dot fsm. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( -128, 127 );
+ break;
+ }
+ case BT_Alpha: {
+ /* Alpha [A-Za-z]. */
+ FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph();
+ upper->rangeFsm( 'A', 'Z' );
+ lower->rangeFsm( 'a', 'z' );
+ upper->unionOp( lower );
+ upper->minimizePartition2();
+ retFsm = upper;
+ break;
+ }
+ case BT_Digit: {
+ /* Digits [0-9]. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( '0', '9' );
+ break;
+ }
+ case BT_Alnum: {
+ /* Alpha numerics [0-9A-Za-z]. */
+ FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph();
+ FsmGraph *upper = new FsmGraph();
+ digit->rangeFsm( '0', '9' );
+ upper->rangeFsm( 'A', 'Z' );
+ lower->rangeFsm( 'a', 'z' );
+ digit->unionOp( upper );
+ digit->unionOp( lower );
+ digit->minimizePartition2();
+ retFsm = digit;
+ break;
+ }
+ case BT_Lower: {
+ /* Lower case characters. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( 'a', 'z' );
+ break;
+ }
+ case BT_Upper: {
+ /* Upper case characters. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( 'A', 'Z' );
+ break;
+ }
+ case BT_Cntrl: {
+ /* Control characters. */
+ FsmGraph *cntrl = new FsmGraph();
+ FsmGraph *highChar = new FsmGraph();
+ cntrl->rangeFsm( 0, 31 );
+ highChar->concatFsm( 127 );
+ cntrl->unionOp( highChar );
+ cntrl->minimizePartition2();
+ retFsm = cntrl;
+ break;
+ }
+ case BT_Graph: {
+ /* Graphical ascii characters [!-~]. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( '!', '~' );
+ break;
+ }
+ case BT_Print: {
+ /* Printable characters. Same as graph except includes space. */
+ retFsm = new FsmGraph();
+ retFsm->rangeFsm( ' ', '~' );
+ break;
+ }
+ case BT_Punct: {
+ /* Punctuation. */
+ FsmGraph *range1 = new FsmGraph();
+ FsmGraph *range2 = new FsmGraph();
+ FsmGraph *range3 = new FsmGraph();
+ FsmGraph *range4 = new FsmGraph();
+ range1->rangeFsm( '!', '/' );
+ range2->rangeFsm( ':', '@' );
+ range3->rangeFsm( '[', '`' );
+ range4->rangeFsm( '{', '~' );
+ range1->unionOp( range2 );
+ range1->unionOp( range3 );
+ range1->unionOp( range4 );
+ range1->minimizePartition2();
+ retFsm = range1;
+ break;
+ }
+ case BT_Space: {
+ /* Whitespace: [\t\v\f\n\r ]. */
+ FsmGraph *cntrl = new FsmGraph();
+ FsmGraph *space = new FsmGraph();
+ cntrl->rangeFsm( '\t', '\r' );
+ space->concatFsm( ' ' );
+ cntrl->unionOp( space );
+ cntrl->minimizePartition2();
+ retFsm = cntrl;
+ break;
+ }
+ case BT_Xdigit: {
+ /* Hex digits [0-9A-Fa-f]. */
+ FsmGraph *digit = new FsmGraph();
+ FsmGraph *upper = new FsmGraph();
+ FsmGraph *lower = new FsmGraph();
+ digit->rangeFsm( '0', '9' );
+ upper->rangeFsm( 'A', 'F' );
+ lower->rangeFsm( 'a', 'f' );
+ digit->unionOp( upper );
+ digit->unionOp( lower );
+ digit->minimizePartition2();
+ retFsm = digit;
+ break;
+ }
+ case BT_Lambda: {
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ break;
+ }
+ case BT_Empty: {
+ retFsm = new FsmGraph();
+ retFsm->emptyFsm();
+ break;
+ }}
+
+ return retFsm;
+}
+
+/*
+ * Compiler
+ */
+
+/* Initialize the structure that will collect info during the parse of a
+ * machine. */
+Compiler::Compiler( )
+:
+ nextPriorKey(0),
+ nextNameId(0),
+ alphTypeSet(false),
+ getKeyExpr(0),
+ accessExpr(0),
+ curStateExpr(0),
+ lowerNum(0),
+ upperNum(0),
+ errorCount(0),
+ curActionOrd(0),
+ curPriorOrd(0),
+ nextEpsilonResolvedLink(0),
+ nextTokenId(1),
+ rootCodeBlock(0),
+ mainReturnUT(0),
+ //access(0),
+ //tokenStruct(0),
+
+ ptrLangEl(0),
+ strLangEl(0),
+ anyLangEl(0),
+ rootLangEl(0),
+ noTokenLangEl(0),
+ eofLangEl(0),
+ errorLangEl(0),
+ ignoreLangEl(0),
+
+ firstNonTermId(0),
+ prodIdIndex(0),
+
+ global(0),
+ globalSel(0),
+ globalObjectDef(0),
+ arg0(0),
+ argv(0),
+
+ stream(0),
+ inputSel(0),
+ streamSel(0),
+
+ uniqueTypeNil(0),
+ uniqueTypePtr(0),
+ uniqueTypeBool(0),
+ uniqueTypeInt(0),
+ uniqueTypeStr(0),
+ uniqueTypeIgnore(0),
+ uniqueTypeAny(0),
+ uniqueTypeInput(0),
+ uniqueTypeStream(0),
+ nextPatConsId(0),
+ nextGenericId(1),
+ nextFuncId(0),
+ nextHostId(0),
+ nextObjectId(1), /* 0 is reserved for no object. */
+ nextFrameId(0),
+ nextParserId(0),
+ revertOn(true),
+ predValue(0),
+ nextMatchEndNum(0),
+ argvTypeRef(0),
+ inContiguous(false),
+ contiguousOffset(0),
+ contiguousStretch(0)
+{
+}
+
+/* Clean up the data collected during a parse. */
+Compiler::~Compiler()
+{
+ /* Delete all the nodes in the action list. Will cause all the
+ * string data that represents the actions to be deallocated. */
+ actionList.empty();
+
+ for ( CharVectVect::Iter fns = streamFileNames; fns.lte(); fns++ ) {
+ const char **ptr = *fns;
+ while ( *ptr != 0 ) {
+ ::free( (void*)*ptr );
+ ptr += 1;
+ }
+ free( (void*) *fns );
+ }
+}
+
+ostream &operator<<( ostream &out, const Token &token )
+{
+ out << token.data;
+ return out;
+}
+
+/* Write out a name reference. */
+ostream &operator<<( ostream &out, const NameRef &nameRef )
+{
+ int pos = 0;
+ if ( nameRef[pos] == 0 ) {
+ out << "::";
+ pos += 1;
+ }
+ out << nameRef[pos++];
+ for ( ; pos < nameRef.length(); pos++ )
+ out << "::" << nameRef[pos];
+ return out;
+}
+
+NameInst **Compiler::makeNameIndex()
+{
+ /* The number of nodes in the tree can now be given by nextNameId. Put a
+ * null pointer on the end of the list to terminate it. */
+ NameInst **nameIndex = new NameInst*[nextNameId+1];
+ memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) );
+
+ for ( NameInstList::Iter ni = nameInstList; ni.lte(); ni++ )
+ nameIndex[ni->id] = ni;
+
+ return nameIndex;
+}
+
+void Compiler::createBuiltin( const char *name, BuiltinMachine builtin )
+{
+ LexExpression *expression = LexExpression::cons( builtin );
+ LexJoin *join = LexJoin::cons( expression );
+ LexDefinition *varDef = new LexDefinition( name, join );
+ GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
+ rootNamespace->rlMap.insert( graphDictEl );
+}
+
+/* Initialize the graph dict with builtin types. */
+void Compiler::initGraphDict( )
+{
+ createBuiltin( "any", BT_Any );
+ createBuiltin( "ascii", BT_Ascii );
+ createBuiltin( "extend", BT_Extend );
+ createBuiltin( "alpha", BT_Alpha );
+ createBuiltin( "digit", BT_Digit );
+ createBuiltin( "alnum", BT_Alnum );
+ createBuiltin( "lower", BT_Lower );
+ createBuiltin( "upper", BT_Upper );
+ createBuiltin( "cntrl", BT_Cntrl );
+ createBuiltin( "graph", BT_Graph );
+ createBuiltin( "print", BT_Print );
+ createBuiltin( "punct", BT_Punct );
+ createBuiltin( "space", BT_Space );
+ createBuiltin( "xdigit", BT_Xdigit );
+ createBuiltin( "null", BT_Lambda );
+ createBuiltin( "zlen", BT_Lambda );
+ createBuiltin( "empty", BT_Empty );
+}
+
+/* Initialize the key operators object that will be referenced by all fsms
+ * created. */
+void Compiler::initKeyOps( )
+{
+ /* Signedness and bounds. */
+ const HostType *alphType = alphTypeSet ? userAlphType :
+ &hostLang->hostTypes[hostLang->defaultHostType];
+ thisKeyOps.setAlphType( alphType );
+
+ if ( lowerNum != 0 ) {
+ /* If ranges are given then interpret the alphabet type. */
+ thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
+ thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
+ }
+}
+
+/* Remove duplicates of unique actions from an action table. */
+void Compiler::removeDups( ActionTable &table )
+{
+ /* Scan through the table looking for unique actions to
+ * remove duplicates of. */
+ for ( int i = 0; i < table.length(); i++ ) {
+ /* Remove any duplicates ahead of i. */
+ for ( int r = i+1; r < table.length(); ) {
+ if ( table[r].value == table[i].value )
+ table.vremove(r);
+ else
+ r += 1;
+ }
+ }
+}
+
+/* Remove duplicates from action lists. This operates only on transition and
+ * eof action lists and so should be called once all actions have been
+ * transfered to their final resting place. */
+void Compiler::removeActionDups( FsmGraph *graph )
+{
+ /* Loop all states. */
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
+ /* Loop all transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ removeDups( trans->actionTable );
+ removeDups( state->toStateActionTable );
+ removeDups( state->fromStateActionTable );
+ removeDups( state->eofActionTable );
+ }
+}
+
+Action *Compiler::newAction( const String &name, InlineList *inlineList )
+{
+ InputLoc loc;
+ loc.line = 1;
+ loc.col = 1;
+ loc.fileName = 0;
+
+ Action *action = Action::cons( loc, name, inlineList );
+ actionList.append( action );
+ return action;
+}
+
+void Compiler::initLongestMatchData()
+{
+ if ( regionSetList.length() > 0 ) {
+ /* The initActId action gives act a default value. */
+ InlineList *il4 = InlineList::cons();
+ il4->append( InlineItem::cons( InputLoc(), InlineItem::LmInitAct ) );
+ initActId = newAction( "initact", il4 );
+ initActId->isLmAction = true;
+
+ /* The setTokStart action sets tokstart. */
+ InlineList *il5 = InlineList::cons();
+ il5->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokStart ) );
+ setTokStart = newAction( "tokstart", il5 );
+ setTokStart->isLmAction = true;
+
+ /* The setTokEnd action sets tokend. */
+ InlineList *il3 = InlineList::cons();
+ il3->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokEnd ) );
+ setTokEnd = newAction( "tokend", il3 );
+ setTokEnd->isLmAction = true;
+
+ /* The action will also need an ordering: ahead of all user action
+ * embeddings. */
+ initActIdOrd = curActionOrd++;
+ setTokStartOrd = curActionOrd++;
+ setTokEndOrd = curActionOrd++;
+ }
+}
+
+void Compiler::finishGraphBuild( FsmGraph *graph )
+{
+ /* Resolve any labels that point to multiple states. Any labels that are
+ * still around are referenced only by gotos and calls and they need to be
+ * made into deterministic entry points. */
+ graph->deterministicEntry();
+
+ /*
+ * All state construction is now complete.
+ */
+
+ /* Transfer global error actions. */
+ for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
+ graph->transferErrorActions( state, 0 );
+
+ removeActionDups( graph );
+
+ /* Remove unreachable states. There should be no dead end states. The
+ * subtract and intersection operators are the only places where they may
+ * be created and those operators clean them up. */
+ graph->removeUnreachableStates();
+
+ /* No more fsm operations are to be done. Action ordering numbers are
+ * no longer of use and will just hinder minimization. Clear them. */
+ graph->nullActionKeys();
+
+ /* Transition priorities are no longer of use. We can clear them
+ * because they will just hinder minimization as well. Clear them. */
+ graph->clearAllPriorities();
+
+ /* Minimize here even if we minimized at every op. Now that function
+ * keys have been cleared we may get a more minimal fsm. */
+ graph->minimizePartition2();
+ graph->compressTransitions();
+}
+
+/* Build the name tree and supporting data structures. */
+NameInst *Compiler::makeNameTree()
+{
+ /* Create the root name. */
+ nextNameId = 1;
+
+ /* First make the name tree. */
+ for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) {
+ /* Recurse on the instance. */
+ rel->makeNameTree( rel->loc, this );
+ }
+
+ return 0;
+}
+
+FsmGraph *Compiler::makeAllRegions()
+{
+ /* Build the name tree and supporting data structures. */
+ makeNameTree();
+ NameInst **nameIndex = makeNameIndex();
+
+ int numGraphs = 0;
+ FsmGraph **graphs = new FsmGraph*[regionImplList.length()];
+
+ /* Make all the instantiations, we know that main exists in this list. */
+ for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) {
+ /* Build the graph from a walk of the parse tree. */
+ FsmGraph *newGraph = rel->walk( this );
+
+ /* Wrap up the construction. */
+ finishGraphBuild( newGraph );
+
+ /* Save off the new graph. */
+ graphs[numGraphs++] = newGraph;
+ }
+
+ /* NOTE: If putting in minimization here we need to include eofTarget
+ * into the minimization algorithm. It is currently set by the longest
+ * match operator and not considered anywhere else. */
+
+ FsmGraph *all;
+ if ( numGraphs == 0 ) {
+ all = new FsmGraph;
+ all->lambdaFsm();
+ }
+ else {
+ /* Add all the other graphs into the first. */
+ all = graphs[0];
+ all->globOp( graphs+1, numGraphs-1 );
+ delete[] graphs;
+ }
+
+ /* Go through all the token regions and check for lmRequiresErrorState. */
+ for ( RegionImplList::Iter reg = regionImplList; reg.lte(); reg++ ) {
+ if ( reg->lmSwitchHandlesError )
+ all->lmRequiresErrorState = true;
+ }
+
+ all->nameIndex = nameIndex;
+
+ return all;
+}
+
+void Compiler::analyzeAction( Action *action, InlineList *inlineList )
+{
+ /* FIXME: Actions used as conditions should be very constrained. */
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ //if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
+ // action->anyCall = true;
+
+ /* Need to recurse into longest match items. */
+ if ( item->type == InlineItem::LmSwitch ) {
+ RegionImpl *lm = item->tokenRegion;
+ for ( TokenInstanceListReg::Iter lmi = lm->tokenInstanceList; lmi.lte(); lmi++ ) {
+ if ( lmi->action != 0 )
+ analyzeAction( action, lmi->action->inlineList );
+ }
+ }
+
+ if ( item->type == InlineItem::LmOnLast ||
+ item->type == InlineItem::LmOnNext ||
+ item->type == InlineItem::LmOnLagBehind )
+ {
+ TokenInstance *lmi = item->longestMatchPart;
+ if ( lmi->action != 0 )
+ analyzeAction( action, lmi->action->inlineList );
+ }
+
+ if ( item->children != 0 )
+ analyzeAction( action, item->children );
+ }
+}
+
+void Compiler::analyzeGraph( FsmGraph *graph )
+{
+ for ( ActionList::Iter act = actionList; act.lte(); act++ )
+ analyzeAction( act, act->inlineList );
+
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ /* The transition list. */
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
+ at->value->numTransRefs += 1;
+ }
+
+ for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
+ at->value->numToStateRefs += 1;
+
+ for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
+ at->value->numFromStateRefs += 1;
+
+ for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
+ at->value->numEofRefs += 1;
+ }
+}
+
+FsmGraph *Compiler::makeScanner()
+{
+ /* Make the graph, do minimization. */
+ FsmGraph *fsmGraph = makeAllRegions();
+
+ /* If any errors have occured in the input file then don't write anything. */
+ if ( gblErrorCount > 0 )
+ return 0;
+
+ analyzeGraph( fsmGraph );
+
+ /* Decide if an error state is necessary.
+ * 1. There is an error transition
+ * 2. There is a gap in the transitions
+ * 3. The longest match operator requires it. */
+ if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() )
+ fsmGraph->errState = fsmGraph->addState();
+
+ /* State numbers need to be assigned such that all final states have a
+ * larger state id number than all non-final states. This enables the
+ * first_final mechanism to function correctly. We also want states to be
+ * ordered in a predictable fashion. So we first apply a depth-first
+ * search, then do a stable sort by final state status, then assign
+ * numbers. */
+
+ fsmGraph->depthFirstOrdering();
+ fsmGraph->sortStatesByFinal();
+ fsmGraph->setStateNumbers( 0 );
+
+ return fsmGraph;
+}
+
+LangEl *Compiler::makeRepeatProd( const InputLoc &loc, Namespace *nspace,
+ const String &repeatName, UniqueType *ut, bool left )
+{
+ LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm );
+ prodName->isRepeat = true;
+ prodName->leftRecursive = left;
+
+ ProdElList *prodElList1 = new ProdElList;
+
+ /* Build the first production of the repeat. */
+ TypeRef *typeRef1 = TypeRef::cons( loc, ut );
+ ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType,
+ InputLoc(), 0, false, typeRef1, 0 );
+
+ UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
+ TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT );
+ ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType,
+ InputLoc(), 0, false, typeRef2, 0 );
+
+ if ( left ) {
+ prodElList1->append( factor2 );
+ prodElList1->append( factor1 );
+ }
+ else {
+ prodElList1->append( factor1 );
+ prodElList1->append( factor2 );
+ }
+
+ Production *newDef1 = Production::cons( InputLoc(),
+ prodName, prodElList1, String(), false, 0,
+ prodList.length(), prodName->defList.length() );
+
+ prodName->defList.append( newDef1 );
+ prodList.append( newDef1 );
+
+ /* Build the second production of the repeat. */
+ ProdElList *prodElList2 = new ProdElList;
+
+ Production *newDef2 = Production::cons( InputLoc(),
+ prodName, prodElList2, String(), false, 0,
+ prodList.length(), prodName->defList.length() );
+
+ prodName->defList.append( newDef2 );
+ prodList.append( newDef2 );
+
+ return prodName;
+}
+
+LangEl *Compiler::makeListProd( const InputLoc &loc, Namespace *nspace,
+ const String &listName, UniqueType *ut, bool left )
+{
+ LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm );
+ prodName->isList = true;
+ prodName->leftRecursive = left;
+
+ /* Build the first production of the list. */
+ TypeRef *typeRef1 = TypeRef::cons( loc, ut );
+ ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
+
+ UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
+ TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT );
+ ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef2, 0 );
+
+ ProdElList *prodElList1 = new ProdElList;
+ if ( left ) {
+ prodElList1->append( factor2 );
+ prodElList1->append( factor1 );
+ }
+ else {
+ prodElList1->append( factor1 );
+ prodElList1->append( factor2 );
+ }
+
+ Production *newDef1 = Production::cons( loc,
+ prodName, prodElList1, String(), false, 0,
+ prodList.length(), prodName->defList.length() );
+
+ prodName->defList.append( newDef1 );
+ prodList.append( newDef1 );
+
+ /* Build the second production of the list. */
+ TypeRef *typeRef3 = TypeRef::cons( loc, ut );
+ ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef3, 0 );
+
+ ProdElList *prodElList2 = new ProdElList;
+ prodElList2->append( factor3 );
+
+ Production *newDef2 = Production::cons( loc,
+ prodName, prodElList2, String(), false, 0,
+ prodList.length(), prodName->defList.length() );
+
+ prodName->defList.append( newDef2 );
+ prodList.append( newDef2 );
+
+ return prodName;
+}
+
+LangEl *Compiler::makeOptProd( const InputLoc &loc, Namespace *nspace,
+ const String &optName, UniqueType *ut )
+{
+ LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm );
+ prodName->isOpt = true;
+
+ ProdElList *prodElList1 = new ProdElList;
+
+ /* Build the first production of the repeat. */
+ TypeRef *typeRef1 = TypeRef::cons( loc, ut );
+ ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef1, 0 );
+ prodElList1->append( factor1 );
+
+ Production *newDef1 = Production::cons( loc,
+ prodName, prodElList1, String(), false, 0,
+ prodList.length(), prodName->defList.length() );
+
+ prodName->defList.append( newDef1 );
+ prodList.append( newDef1 );
+
+ /* Build the second production of the repeat. */
+ ProdElList *prodElList2 = new ProdElList;
+
+ Production *newDef2 = Production::cons( loc,
+ prodName, prodElList2, String(), false, 0,
+ prodList.length(), prodName->defList.length() );
+
+ prodName->defList.append( newDef2 );
+ prodList.append( newDef2 );
+
+ return prodName;
+}
+
+Namespace *Namespace::findNamespace( const String &name )
+{
+ for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) {
+ if ( strcmp( name, (*c)->name ) == 0 )
+ return *c;
+ }
+ return 0;
+}
+
+Reduction *Namespace::findReduction( const String &name )
+{
+ for ( ReductionVect::Iter r = reductions; r.lte(); r++ ) {
+ if ( strcmp( name, (*r)->name ) == 0 )
+ return *r;
+ }
+ return 0;
+}
+
+/* Search from a previously resolved qualification. (name 1+ in a qual list). */
+Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart )
+{
+ /* While there are still parts in the qualification. */
+ while ( qualPart.lte() ) {
+ Namespace *child = from->findNamespace( *qualPart );
+ if ( child == 0 )
+ return 0;
+
+ from = child;
+ qualPart.increment();
+ }
+
+ return from;
+}
+
+Namespace *NamespaceQual::getQual( Compiler *pd )
+{
+ /* Do the search only once. */
+ if ( cachedNspaceQual != 0 )
+ return cachedNspaceQual;
+
+ if ( qualNames.length() == 0 ) {
+ /* No qualification, use the region the qualification was
+ * declared in. */
+ cachedNspaceQual = declInNspace;
+ }
+ else if ( strcmp( qualNames[0], "root" ) == 0 ) {
+ /* First item is "root." Start the downward search from there. */
+ StringVect::Iter qualPart = qualNames;
+ qualPart.increment();
+ cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart );
+ return cachedNspaceQual;
+ }
+ else {
+ /* Have a qualification. Move upwards through the declared
+ * regions looking for the first part. */
+ StringVect::Iter qualPart = qualNames;
+ Namespace *parentNamespace = declInNspace;
+ while ( parentNamespace != 0 ) {
+ /* Search for the first part underneath the current parent. */
+ Namespace *child = parentNamespace->findNamespace( *qualPart );
+
+ if ( child != 0 ) {
+ /* Found the first part. Start going below the result. */
+ qualPart.increment();
+ cachedNspaceQual = searchFrom( child, qualPart );
+ return cachedNspaceQual;
+ }
+
+ /* Not found, move up to the parent. */
+ parentNamespace = parentNamespace->parentNamespace;
+ }
+
+ /* Failed to find the place to start from. */
+ cachedNspaceQual = 0;
+ }
+
+ return cachedNspaceQual;
+}
+
+void Compiler::initEmptyScanner( RegionSet *regionSet, TokenRegion *reg )
+{
+ if ( reg != 0 && reg->impl->tokenInstanceList.length() == 0 ) {
+ reg->impl->wasEmpty = true;
+
+ static int def = 1;
+ String name( 64, "__%p_DEF_PAT_%d", reg, def++ );
+
+ LexJoin *join = LexJoin::cons( LexExpression::cons( BT_Any ) );
+
+ TokenDef *tokenDef = TokenDef::cons( name, String(), false, false,
+ join, 0, internal, nextTokenId++, rootNamespace,
+ regionSet, 0, 0 );
+
+ TokenInstance *tokenInstance = TokenInstance::cons( tokenDef,
+ join, internal, nextTokenId++,
+ rootNamespace, reg );
+
+ reg->impl->tokenInstanceList.append( tokenInstance );
+
+ /* These do not go in the namespace so so they cannot get declared
+ * in the declare pass. */
+ LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term );
+
+ tokenInstance->tokenDef->tdLangEl = lel;
+ lel->tokenDef = tokenDef;
+ }
+}
+
+void Compiler::initEmptyScanners()
+{
+ for ( RegionSetList::Iter regionSet = regionSetList; regionSet.lte(); regionSet++ ) {
+ initEmptyScanner( regionSet, regionSet->tokenIgnore );
+ initEmptyScanner( regionSet, regionSet->tokenOnly );
+ initEmptyScanner( regionSet, regionSet->ignoreOnly );
+ initEmptyScanner( regionSet, regionSet->collectIgnore );
+ }
+}
+
+pda_run *Compiler::parsePattern( program_t *prg, tree_t **sp, const InputLoc &loc,
+ int parserId, struct input_impl *sourceStream )
+{
+ struct pda_run *pdaRun = new pda_run;
+ colm_pda_init( prg, pdaRun, pdaTables, parserId, 0, false, 0, false );
+
+ long pcr = colm_parse_loop( prg, sp, pdaRun, sourceStream, PCR_START );
+ assert( pcr == PCR_DONE );
+ if ( pdaRun->parse_error ) {
+ cerr << ( loc.fileName != 0 ? loc.fileName : "<input>" ) <<
+ ":" << loc.line << ":" << loc.col;
+
+ if ( pdaRun->parse_error_text != 0 ) {
+ colm_data *tokdata = pdaRun->parse_error_text->tokdata;
+ cerr << ": relative error: ";
+ cerr.write( (const char*)tokdata->data, tokdata->length );
+ }
+ else {
+ cerr << ": parse error";
+ }
+
+ cerr << endl;
+ gblErrorCount += 1;
+ }
+
+ return pdaRun;
+}
+
+void Compiler::parsePatterns()
+{
+ program_t *prg = colm_new_program( runtimeData );
+
+ colm_set_debug( prg, gblActiveRealm );
+
+ /* Turn off context-dependent parsing. */
+ prg->ctx_dep_parsing = 0;
+
+ tree_t **sp = prg->stack_root;
+
+ for ( ConsList::Iter cons = replList; cons.lte(); cons++ ) {
+ if ( cons->langEl != 0 ) {
+ struct input_impl *in = colm_impl_new_cons( strdup("<internal>"), cons );
+ cons->pdaRun = parsePattern( prg, sp, cons->loc, cons->langEl->parserId, in );
+ }
+ }
+
+ for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
+ struct input_impl *in = colm_impl_new_pat( strdup("<internal>"), pat );
+ pat->pdaRun = parsePattern( prg, sp, pat->loc, pat->langEl->parserId, in );
+ }
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ fillInPatterns( prg );
+}
+
+void Compiler::collectParserEls( BstSet<LangEl*> &parserEls )
+{
+ for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
+ /* We assume the reduction action compilation phase was run before
+ * pattern parsing and it decorated the pattern with the target type. */
+ assert( pat->langEl != 0 );
+ if ( pat->langEl->type != LangEl::NonTerm )
+ error(pat->loc) << "pattern type is not a non-terminal" << endp;
+
+ if ( pat->langEl->parserId < 0 ) {
+ /* Make a parser for the language element. */
+ parserEls.insert( pat->langEl );
+ pat->langEl->parserId = nextParserId++;
+ }
+ }
+
+ for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) {
+ /* We need the the language element from the compilation process. */
+ assert( repl->langEl != 0 );
+
+ if ( repl->langEl->parserId < 0 ) {
+ /* Make a parser for the language element. */
+ parserEls.insert( repl->langEl );
+ repl->langEl->parserId = nextParserId++;
+ }
+ }
+
+ /* Make parsers that we need. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->parserId >= 0 )
+ parserEls.insert( lel );
+ }
+}
+
+void Compiler::writeHostCall()
+{
+ /*
+ * Host Call
+ */
+ for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) {
+ *outStream <<
+ "value_t " << hc->hostCall << "( program_t *prg, tree_t **sp";
+ for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++ ) {
+ *outStream <<
+ ", value_t";
+ }
+ *outStream << " );\n";
+ }
+
+ *outStream <<
+ "tree_t **" << objectName << "_host_call( program_t *prg, long code, tree_t **sp )\n"
+ "{\n"
+ " value_t rtn = 0;\n"
+ " switch ( code ) {\n";
+
+ for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) {
+ *outStream <<
+ " case " << hc->funcId << ": {\n";
+
+ int pos = hc->paramList->length() - 1;
+ for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos-- ) {
+ *outStream <<
+ " value_t p" << pos << " = vm_pop_value();\n";
+ }
+
+ *outStream <<
+ " rtn = " << hc->hostCall << "( prg, sp";
+
+ pos = 0;
+ for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos++ ) {
+ *outStream <<
+ ", p" << pos;
+ }
+ *outStream << " );\n"
+ " break;\n"
+ " }\n";
+ }
+
+ *outStream <<
+ " }\n"
+ " vm_push_value( rtn );\n"
+ " return sp;\n"
+ "}\n";
+
+}
+
+void Compiler::generateOutput( long activeRealm, bool includeCommit )
+{
+ FsmCodeGen *fsmGen = new FsmCodeGen( *outStream, redFsm, fsmTables );
+
+ PdaCodeGen *pdaGen = new PdaCodeGen( *outStream );
+
+ fsmGen->writeIncludes();
+ pdaGen->defineRuntime();
+ fsmGen->writeCode();
+
+ /* Make parsers that we need. */
+ pdaGen->writeParserData( 0, pdaTables );
+
+ /* Write the runtime data. */
+ pdaGen->writeRuntimeData( runtimeData, pdaTables );
+
+ writeHostCall();
+
+ if ( includeCommit )
+ writeCommitStub();
+
+ if ( !gblLibrary )
+ fsmGen->writeMain( activeRealm );
+
+ outStream->flush();
+}
+
+
+void Compiler::prepGrammar()
+{
+ /* This will create language elements. */
+ wrapNonTerminals();
+
+ makeLangElIds();
+ makeStructElIds();
+ makeLangElNames();
+ makeDefinitionNames();
+ noUndefindLangEls();
+
+ /* Put the language elements in an index by language element id. */
+ langElIndex = new LangEl*[nextLelId+1];
+ memset( langElIndex, 0, sizeof(LangEl*)*(nextLelId+1) );
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
+ langElIndex[lel->id] = lel;
+
+ makeProdFsms();
+
+ /* Allocate the Runtime data now. Every PdaTable that we make
+ * will reference it, but it will be filled in after all the tables are
+ * built. */
+ runtimeData = new colm_sections;
+}
+
+void Compiler::compile()
+{
+ beginProcessing();
+ initKeyOps();
+
+ /* Declare types. */
+ declarePass();
+
+ /* Resolve type references. */
+ resolvePass();
+
+ makeTerminalWrappers();
+ makeEofElements();
+
+ /*
+ * Parsers
+ */
+
+ /* Init the longest match data */
+ initLongestMatchData();
+ FsmGraph *fsmGraph = makeScanner();
+
+ prepGrammar();
+
+ placeAllLanguageObjects();
+ placeAllStructObjects();
+ placeAllFrameObjects();
+ placeAllFunctions();
+
+ /* Compile bytecode. */
+ compileByteCode();
+
+ /* Make the reduced scanner. */
+ RedFsmBuild reduce( this, fsmGraph );
+ redFsm = reduce.reduceMachine();
+
+ BstSet<LangEl*> parserEls;
+ collectParserEls( parserEls );
+
+ makeParser( parserEls );
+
+ /* Make the scanner tables. */
+ fsmTables = redFsm->makeFsmTables();
+
+ /* Now that all parsers are built, make the global runtimeData. */
+ makeRuntimeData();
+
+ /*
+ * All compilation is now complete.
+ */
+
+ /* Parse constructors and patterns. */
+ parsePatterns();
+}
+
diff --git a/src/compiler.h b/src/compiler.h
new file mode 100644
index 00000000..67d5b40e
--- /dev/null
+++ b/src/compiler.h
@@ -0,0 +1,1158 @@
+/*
+ * Copyright 2001-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_PARSEDATA_H
+#define _COLM_PARSEDATA_H
+
+#include <limits.h>
+
+#include <iostream>
+
+#include <avlmap.h>
+#include <avlset.h>
+#include <bstmap.h>
+#include <vector.h>
+#include <bstset.h>
+#include <dlist.h>
+#include <dlistmel.h>
+#include <fsmgraph.h>
+#include <compare.h>
+
+#include "global.h"
+#include "keyops.h"
+#include "parsetree.h"
+#include "cstring.h"
+#include "pdagraph.h"
+#include "pdarun.h"
+#include "bytecode.h"
+#include "program.h"
+#include "internal.h"
+
+using std::ostream;
+
+struct exit_object { };
+extern exit_object endp;
+void operator<<( std::ostream &out, exit_object & );
+extern const char *objectName;
+extern bool hostAdapters;
+
+/* Forwards. */
+struct RedFsm;
+struct LangEl;
+struct Compiler;
+struct PdaCodeGen;
+struct FsmCodeGen;
+
+#define SHIFT_CODE 0x1
+#define REDUCE_CODE 0x2
+#define SHIFT_REDUCE_CODE 0x3
+
+typedef Vector<const char**> CharVectVect;
+
+/* This is used for tracking the current stack of include file/machine pairs. It is
+ * is used to detect and recursive include structure. */
+struct IncludeStackItem
+{
+ IncludeStackItem( const char *fileName )
+ : fileName(fileName) {}
+
+ const char *fileName;
+};
+
+typedef Vector<IncludeStackItem> IncludeStack;
+typedef Vector<const char *> ArgsVector;
+
+struct DefineArg
+{
+ DefineArg( String name, String value )
+ : name(name), value(value) {}
+
+ String name;
+ String value;
+};
+
+typedef Vector<DefineArg> DefineVector;
+
+extern DefineVector defineArgs;
+extern ArgsVector includePaths;
+
+inline long makeReduceCode( long reduction, bool isShiftReduce )
+{
+ return ( isShiftReduce ? SHIFT_REDUCE_CODE : REDUCE_CODE ) |
+ ( reduction << 2 );
+}
+
+struct ProdEl;
+struct ProdElList;
+struct PdaLiteral;
+struct Production;
+
+/* A pointer to this is in struct pda_run, but it's specification is not known by the
+ * runtime code. The runtime functions that access it are defined in
+ * ctinput.cpp and stubbed in fsmcodegen.cpp */
+struct bindings
+ : public Vector<parse_tree_t*>
+{};
+
+struct DefListEl { Production *prev, *next; };
+struct LelDefListEl { Production *prev, *next; };
+typedef Vector< LangEl* > LangElVect;
+typedef Vector< ProdEl* > FactorVect;
+
+typedef AvlMap<String, long, ColmCmpStr> StringMap;
+typedef AvlMapEl<String, long> StringMapEl;
+
+enum PredType {
+ PredLeft,
+ PredRight,
+ PredNonassoc,
+ PredNone
+};
+
+struct PredDecl
+{
+ PredDecl( TypeRef *typeRef, long predValue )
+ : typeRef(typeRef), predValue(predValue)
+ {}
+
+ TypeRef *typeRef;
+ PredType predType;
+ long predValue;
+
+ PredDecl *prev, *next;
+};
+
+typedef DList<PredDecl> PredDeclList;
+
+/* Graph dictionary. */
+struct Production
+:
+ public DefListEl, public LelDefListEl
+{
+ Production()
+ :
+ prodName(0), prodElList(0), prodCommit(false), redBlock(0),
+ prodId(0), prodNum(0), fsm(0), fsmLength(0), uniqueEmptyLeader(0),
+ isLeftRec(false), localFrame(0), lhsField(0), predOf(0)
+ {}
+
+ static Production* cons( const InputLoc &loc, LangEl *prodName, ProdElList *prodElList,
+ String name, bool prodCommit, CodeBlock *redBlock, int prodId, int prodNum )
+ {
+ Production *p = new Production;
+ p->loc = loc;
+ p->prodName = prodName;
+ p->_name = name;
+ p->prodElList = prodElList;
+ p->prodCommit = prodCommit;
+ p->redBlock = redBlock;
+ p->prodId = prodId;
+ p->prodNum = prodNum;
+ return p;
+ }
+
+ InputLoc loc;
+ LangEl *prodName;
+ ProdElList *prodElList;
+ String _name;
+ bool prodCommit;
+
+ CodeBlock *redBlock;
+
+ int prodId;
+ int prodNum;
+
+ PdaGraph *fsm;
+ int fsmLength;
+ String data;
+ LongSet reducesTo;
+
+ LangEl *uniqueEmptyLeader;
+
+ ProdIdSet nonTermFirstSet;
+ AlphSet firstSet;
+ bool isLeftRec;
+
+ ObjectDef *localFrame;
+ ObjectField *lhsField;
+
+ LangEl *predOf;
+
+ UnsignedCharVect copy;
+};
+
+struct CmpDefById
+{
+ static int compare( Production *d1, Production *d2 )
+ {
+ if ( d1->prodId < d2->prodId )
+ return -1;
+ else if ( d1->prodId > d2->prodId )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+
+/* Map dotItems to productions. */
+typedef BstMap< int, Production*, CmpOrd<int> > DotItemIndex;
+typedef BstMapEl< int, Production*> DotItemIndexEl;
+
+struct DefList
+:
+ public DListMel<Production, DefListEl>
+{};
+
+/* A vector of production vectors. Each non terminal can have many productions. */
+struct LelDefList
+:
+ public DListMel<Production, LelDefListEl>
+{};
+
+/* A set of machines made during a closure round. */
+typedef Vector< PdaGraph* > Machines;
+
+/* List of language elements. */
+typedef DList<LangEl> LelList;
+
+typedef Vector< TokenInstance* > TokenInstanceVect;
+
+struct UniqueType;
+
+typedef Vector<LangEl*> LangElVect;
+typedef BstSet<LangEl*> LangElSet;
+
+/* A language element class. Can be a nonTerm or a term. */
+struct LangEl : public DListEl<LangEl>
+{
+ enum Type { Unknown, Term, NonTerm };
+
+ LangEl( Namespace *nspace, const String &name, Type type );
+ ~LangEl();
+
+ /* The region the language element was defined in. */
+ Namespace *nspace;
+
+ String name;
+ String lit;
+
+ String fullName;
+ String fullLit;
+
+ /* For referencing the type. */
+ String refName;
+
+ /* For declaring things inside the type. */
+ String declName;
+
+ String xmlTag;
+
+ Type type;
+ long id;
+ String displayString;
+ long numAppearances;
+ bool commit;
+ bool isIgnore;
+ bool reduceFirst;
+ bool isLiteral;
+ bool isRepeat;
+ bool isList;
+ bool isOpt;
+ bool parseStop;
+ bool isEOF;
+
+ /* For a list or a repeat. Defaults to right recursive. */
+ bool leftRecursive;
+
+ LangEl *repeatOf;
+
+ /* Productions from the language element if it is a non-terminal. */
+ LelDefList defList;
+
+ TokenDef *tokenDef;
+ Production *rootDef;
+ LangEl *termDup;
+ LangEl *eofLel;
+
+ PdaGraph *pdaGraph;
+ struct pda_tables *pdaTables;
+
+ PdaState *startState;
+
+ CodeBlock *transBlock;
+
+ ObjectDef *objectDef;
+
+ long thisSize;
+ long ofiOffset;
+
+ long parserId;
+
+ PredType predType;
+ long predValue;
+
+ StructDef *contextDef;
+ StructDef *contextIn;
+ bool noPreIgnore;
+ bool noPostIgnore;
+ bool isZero;
+ RegionSet *regionSet;
+};
+
+struct ProdEl
+{
+ /* Language elements a factor node can be. */
+ enum Type {
+ LiteralType,
+ ReferenceType
+ };
+
+ /* Construct with a reference to a var def. */
+ ProdEl( Type type, const InputLoc &loc, ObjectField *captureField,
+ bool commit, TypeRef *typeRef, int priorVal )
+ :
+ type(type),
+ production(0),
+ captureField(captureField),
+ rhsElField(0),
+ commit(commit),
+ typeRef(typeRef),
+ langEl(0),
+ priorVal(priorVal)
+ {}
+
+ ProdEl( const InputLoc &loc, TypeRef *typeRef )
+ :
+ type(ReferenceType),
+ production(0),
+ captureField(0),
+ rhsElField(0),
+ commit(false),
+ typeRef(typeRef),
+ langEl(0),
+ priorVal(0)
+ {}
+
+ Type type;
+ Production *production;
+ int pos;
+ ObjectField *captureField;
+ ObjectField *rhsElField;
+ bool commit;
+ TypeRef *typeRef;
+ LangEl *langEl;
+ int priorVal;
+
+ ProdEl *prev, *next;
+};
+
+struct ProdElList : public DList<ProdEl>
+{
+ PdaGraph *walk( Compiler *pd, Production *prod );
+};
+
+/* This should be renamed. It is a literal string in a type reference. */
+struct PdaLiteral
+{
+ PdaLiteral( const InputLoc &loc, const String &data )
+ : loc(loc), data(data), value(0) { }
+
+ InputLoc loc;
+ String data;
+ long value;
+};
+
+/* Nodes in the tree that use this action. */
+typedef Vector<NameInst*> ActionRefs;
+
+/* Element in list of actions. Contains the string for the code to exectute. */
+struct Action
+:
+ public DListEl<Action>,
+ public AvlTreeEl<Action>
+{
+public:
+
+ static Action *cons( const InputLoc &loc, const String &name, InlineList *inlineList )
+ {
+ Action *a = new Action;
+ a->loc = (loc);
+ a->name = (name);
+ a->markType = (MarkNone);
+ a->objField = (0);
+ a->markId = (-1);
+ a->inlineList = (inlineList);
+ a->actionId = (-1);
+ a->numTransRefs = (0);
+ a->numToStateRefs = (0);
+ a->numFromStateRefs = (0);
+ a->numEofRefs = (0);
+ a->numCondRefs = (0);
+ a->anyCall = (false);
+ a->isLmAction = (false);
+ return a;
+ }
+
+ static Action *cons( MarkType markType, long markId )
+ {
+ Action *a = new Action;
+ a->name = ("mark");
+ a->markType = (markType);
+ a->objField = (0);
+ a->markId = (markId);
+ a->inlineList = (InlineList::cons());
+ a->actionId = (-1);
+ a->numTransRefs = (0);
+ a->numToStateRefs = (0);
+ a->numFromStateRefs = (0);
+ a->numEofRefs = (0);
+ a->numCondRefs = (0);
+ a->anyCall = (false);
+ a->isLmAction = (false);
+ return a;
+ }
+
+ /* Key for action dictionary. */
+ const String &getKey() const { return name; }
+
+ /* Data collected during parse. */
+ InputLoc loc;
+ String name;
+
+ MarkType markType;
+ ObjectField *objField;
+ long markId;
+
+ InlineList *inlineList;
+ int actionId;
+
+ void actionName( ostream &out )
+ {
+ if ( name != 0 )
+ out << name;
+ else
+ out << loc.line << ":" << loc.col;
+ }
+
+ /* Places in the input text that reference the action. */
+ ActionRefs actionRefs;
+
+ /* Number of references in the final machine. */
+ bool numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+ int numCondRefs;
+ bool anyCall;
+
+ bool isLmAction;
+};
+
+/* A list of actions. */
+typedef DList<Action> ActionList;
+
+struct VarDef;
+struct LexJoin;
+struct LexTerm;
+struct FactorAug;
+struct FactorLabel;
+struct FactorRep;
+struct FactorNeg;
+struct Factor;
+struct Literal;
+struct Range;
+struct RegExpr;
+struct ReItem;
+struct ReOrBlock;
+struct ReOrItem;
+struct TokenRegion;
+
+/* tree_t of instantiated names. */
+typedef BstMapEl<String, NameInst*> NameMapEl;
+typedef BstMap<String, NameInst*, ColmCmpStr> NameMap;
+typedef Vector<NameInst*> NameVect;
+typedef BstSet<NameInst*> NameSet;
+
+/* Node in the tree of instantiated names. */
+struct NameInst
+{
+ NameInst( int id )
+ : id(id) {}
+
+ int id;
+
+ /* Pointers for the name search queue. */
+ NameInst *prev, *next;
+};
+
+typedef DList<NameInst> NameInstList;
+
+/* Stack frame used in walking the name tree. */
+struct NameFrame
+{
+ NameInst *prevNameInst;
+ int prevNameChild;
+ NameInst *prevLocalScope;
+};
+
+/* Class to collect information about the machine during the
+ * parse of input. */
+struct Compiler
+{
+ /* Create a new parse data object. This is done at the beginning of every
+ * fsm specification. */
+ Compiler();
+ ~Compiler();
+
+ /*
+ * Setting up the graph dict.
+ */
+
+ void compileLiteralTokens();
+ void initEmptyScanners();
+ void initEmptyScanner( RegionSet *regionSet, TokenRegion *reg );
+ void initUniqueTypes();
+
+ /* Initialize a graph dict with the basic fsms. */
+ void initGraphDict();
+ void createBuiltin( const char *name, BuiltinMachine builtin );
+
+ /* Make a name id in the current name instantiation scope if it is not
+ * already there. */
+ NameInst *makeJoinNameTree( LexJoin *join );
+ NameInst *makeNameTree();
+ NameInst **makeNameIndex();
+
+ void printNameTree( NameInst *rootName );
+ void printNameIndex( NameInst **nameIndex );
+
+ /* Resove name references in action code and epsilon transitions. */
+ NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly );
+ void resolveFrom( NameSet &result, NameInst *refFrom,
+ const NameRef &nameRef, int namePos );
+
+ /* Set the alphabet type. If type types are not valid returns false. */
+ bool setAlphType( char *s1, char *s2 );
+ bool setAlphType( char *s1 );
+
+ /* Unique actions. */
+ void removeDups( ActionTable &actionTable );
+ void removeActionDups( FsmGraph *graph );
+
+ /* Dumping the name instantiation tree. */
+ void printNameInst( NameInst *nameInst, int level );
+
+ /* Make the graph from a graph dict node. Does minimization. */
+ void finishGraphBuild( FsmGraph *graph );
+ FsmGraph *makeAllRegions();
+ FsmGraph *makeScanner();
+
+ void analyzeAction( Action *action, InlineList *inlineList );
+ void analyzeGraph( FsmGraph *graph );
+ void resolvePrecedence( PdaGraph *pdaGraph );
+ LangEl *predOf( PdaTrans *trans, long action );
+ bool precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 );
+ bool precedenceRemoveBoth( LangEl *l1, LangEl *l2 );
+
+ void placeFrameFields( ObjectDef *localFrame );
+ void placeUserFunction( Function *func, bool isUserIter );
+ void placeAllStructObjects();
+ void placeAllLanguageObjects();
+ void placeAllFrameObjects();
+ void placeAllFunctions();
+
+ void initKeyOps();
+
+ /*
+ * Data collected during the parse.
+ */
+
+ /* List of actions. Will be pasted into a switch statement. */
+ ActionList actionList;
+
+ /* The id of the next priority name and label. */
+ int nextPriorKey, nextNameId;
+
+ /* Alphabet type. */
+ const HostType *userAlphType;
+ bool alphTypeSet;
+
+ /* Element type and get key expression. */
+ InlineList *getKeyExpr;
+ InlineList *accessExpr;
+ InlineList *curStateExpr;
+
+ /* The alphabet range. */
+ char *lowerNum, *upperNum;
+ Key lowKey, highKey;
+ InputLoc rangeLowLoc, rangeHighLoc;
+
+ /* Number of errors encountered parsing the fsm spec. */
+ int errorCount;
+
+ /* Counting the action and priority ordering. */
+ int curActionOrd;
+ int curPriorOrd;
+
+ /* Root of the name tree. */
+ NameInst *curNameInst;
+ int curNameChild;
+ NameInstList nameInstList;
+
+ /* The place where resolved epsilon transitions go. These cannot go into
+ * the parse tree because a single epsilon op can resolve more than once
+ * to different nameInsts if the machine it's in is used more than once. */
+ NameVect epsilonResolvedLinks;
+ int nextEpsilonResolvedLink;
+
+ /* Root of the name tree used for doing local name searches. */
+ NameInst *localNameScope;
+
+ void setLmInRetLoc( InlineList *inlineList );
+ void initLongestMatchData();
+
+ /* Counter for assigning ids to longest match items. */
+ int nextTokenId;
+
+ RegionImplList regionImplList;
+ RegionList regionList;
+ RegionSetList regionSetList;
+
+ NamespaceList namespaceList;
+
+ Action *newAction( const String &name, InlineList *inlineList );
+
+ Action *setTokStart;
+ int setTokStartOrd;
+
+ Action *initActId;
+ int initActIdOrd;
+
+ Action *setTokEnd;
+ int setTokEndOrd;
+
+ CodeBlock *rootCodeBlock;
+
+ void beginProcessing()
+ {
+ ::keyOps = &thisKeyOps;
+ }
+
+ KeyOps thisKeyOps;
+
+ UniqueType *mainReturnUT;
+
+ CharVectVect streamFileNames;
+
+ /* CONTEXT FREE */
+ ProdElList *makeProdElList( LangEl *langEl );
+ void wrapNonTerminals();
+ void makeDefinitionNames();
+ void noUndefindLangEls();
+ void declareBaseLangEls();
+ void makeLangElIds();
+ void makeStructElIds();
+ void makeLangElNames();
+ void makeTerminalWrappers();
+ void makeEofElements();
+ void makeIgnoreCollectors();
+ void resolvePrecedence();
+ void resolveReductionActions();
+ void findReductionActionProds();
+ void resolveReducers();
+
+ Production *findProductionByLabel( LangEl *langEl, String label );
+
+ void declarePass();
+ void resolvePass();
+
+ /* Parser generation. */
+ void advanceReductions( PdaGraph *pdaGraph );
+ void sortActions( PdaGraph *pdaGraph );
+ void addDupTerms( PdaGraph *pdaGraph );
+ void linkExpansions( PdaGraph *pdaGraph );
+ void lalr1FollowEpsilonOp( PdaGraph *pdaGraph );
+
+ void transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, PdaState *state, long prodId );
+
+ void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys );
+ void lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state );
+
+ void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior );
+ void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans );
+
+ void lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls );
+
+ void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState,
+ PdaTrans *expandFrom, Production *prod );
+ void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state );
+ void lr0CloseAllStates( PdaGraph *pdaGraph );
+
+ void lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls );
+
+ void reduceActions( PdaGraph *pdaGraph );
+
+ bool makeNonTermFirstSetProd( Production *prod, PdaState *state );
+ void makeNonTermFirstSets();
+
+ bool makeFirstSetProd( Production *prod, PdaState *state );
+ void makeFirstSets();
+
+ int findIndexOff( struct pda_tables *pdaTables, PdaGraph *pdaGraph,
+ PdaState *state, int &currLen );
+ void trySetTime( PdaTrans *trans, long code, long &time );
+ void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey,
+ bool noPreIgnore, bool noPostIgnore );
+ PdaState *followProd( PdaState *tabState, PdaState *prodState );
+ void findFollow( AlphSet &result, PdaState *overTab,
+ PdaState *overSrc, Production *parentDef );
+ void pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls );
+ void pdaOrderFollow( LangEl *rootEl, PdaState *tabState,
+ PdaTrans *tabTrans, PdaTrans *srcTrans,
+ Production *parentDef, Production *definition, long &time );
+ void pdaOrderProd( LangEl *rootEl, PdaState *tabState,
+ PdaState *srcState, Production *parentDef, long &time );
+ void analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls );
+
+ void makeProdFsms();
+ void insertUniqueEmptyProductions();
+ void printNonTermFirstSets();
+ void printFirstSets();
+
+ LangEl *makeRepeatProd( const InputLoc &loc, Namespace *nspace,
+ const String &repeatName, UniqueType *ut, bool left );
+ LangEl *makeListProd( const InputLoc &loc, Namespace *nspace,
+ const String &listName, UniqueType *ut, bool left );
+ LangEl *makeOptProd( const InputLoc &loc, Namespace *nspace,
+ const String &optName, UniqueType *ut );
+ void resolveProdEl( ProdEl *prodEl );
+ void resolveProductionEls();
+
+ void addMatchText( ObjectDef *frame, LangEl *lel );
+ void addMatchLength( ObjectDef *frame, LangEl *lel );
+ void addInput( ObjectDef *frame );
+ void addThis( ObjectDef *frame );
+ void addTransTokVar( ObjectDef *frame, LangEl *lel );
+ void addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList );
+ void addProdRedObjectVar( ObjectDef *localFrame, LangEl *langEl );
+ void addProdObjects();
+
+ void addProdRHSLoads( Production *prod, CodeVect &code, long &insertPos );
+ void addProdLHSLoad( Production *prod, CodeVect &code, long &insertPos );
+ void addPushBackLHS( Production *prod, CodeVect &code, long &insertPos );
+
+ void prepGrammar();
+ struct pda_run *parsePattern( program_t *prg, tree_t **sp, const InputLoc &loc,
+ int parserId, struct input_impl *sourceStream );
+ void parsePatterns();
+
+ void collectParserEls( LangElSet &parserEls );
+ void makeParser( LangElSet &parserEls );
+ PdaGraph *makePdaGraph( BstSet<LangEl*> &parserEls );
+ struct pda_tables *makePdaTables( PdaGraph *pdaGraph );
+
+ void fillInPatterns( program_t *prg );
+ void makeRuntimeData();
+
+ /* Generate and write out the fsm. */
+ void generateGraphviz();
+
+ void verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph );
+ void computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph );
+
+ void initListElField( GenericType *gen, const char *name, int offset );
+ void initListFieldEl( GenericType *gen, const char *name, int offset );
+ void initListFieldVal( GenericType *gen, const char *name, int offset );
+
+ void initListFields( GenericType *gen );
+ void initListFunctions( GenericType *gen );
+
+ void initMapElKey( GenericType *gen, const char *name, int offset );
+ void initMapElField( GenericType *gen, const char *name, int offset );
+ void initMapField( GenericType *gen, const char *name, int offset );
+
+ void initMapFields( GenericType *gen );
+ void initMapFunctions( GenericType *gen );
+
+ void initVectorFunctions( GenericType *gen );
+ void initParserField( GenericType *gen, const char *name,
+ int offset, TypeRef *typeRef );
+ void initParserFunctions( GenericType *gen );
+ void initParserFields( GenericType *gen );
+
+ void addStdin();
+ void addStdout();
+ void addStderr();
+ void addArgv();
+ void addStds();
+ void addError();
+ void addDefineArgs();
+ int argvOffset();
+ int arg0Offset();
+ int stdsOffset();
+ void makeDefaultIterators();
+ void addLengthField( ObjectDef *objDef, code_t getLength );
+ ObjectDef *findObject( const String &name );
+ void resolveListElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof );
+ void resolveMapElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof );
+ void resolveElementOf( ObjectDef *obj );
+ void makeFuncVisible( Function *func, bool isUserIter );
+ void makeInHostVisible( Function *func );
+
+ void declareFunction( Function *func );
+ void declareReductionCode( Production *prod );
+ void declareTranslateBlock( LangEl *langEl );
+ void declarePreEof( TokenRegion *region );
+ void declareRootBlock();
+ void declareByteCode();
+
+ void resolveFunction( Function *func );
+ void resolveInHost( Function *func );
+ void resolvePreEof( TokenRegion *region );
+ void resolveRootBlock();
+ void resolveTranslateBlock( LangEl *langEl );
+ void resolveReductionCode( Production *prod );
+ void resolveParseTree();
+
+ void compileFunction( Function *func, CodeVect &code );
+ void compileFunction( Function *func );
+ void compileUserIter( Function *func, CodeVect &code );
+ void compileUserIter( Function *func );
+ void compilePreEof( TokenRegion *region );
+ void compileRootBlock();
+ void compileTranslateBlock( LangEl *langEl );
+ void findLocals( ObjectDef *localFrame, CodeBlock *block );
+ void makeProdCopies( Production *prod );
+ void compileReductionCode( Production *prod );
+ void removeNonUnparsableRepls();
+ void compileByteCode();
+
+ void resolveUses();
+ void generateOutput( long activeRealm, bool includeCommit );
+ void compile();
+
+ void openNameSpace( ostream &out, Namespace *nspace );
+ void closeNameSpace( ostream &out, Namespace *nspace );
+ void refNameSpace( LangEl *lel, Namespace *nspace );
+ void generateExports();
+ void generateExportsImpl();
+
+ struct local_info *makeLocalInfo( Locals &locals );
+ short *makeTrees( ObjectDef *objectDef, int &numTrees );
+
+ /*
+ * Graphviz Generation
+ */
+ void writeTransList( PdaState *state );
+ void writeDotFile( PdaGraph *graph );
+ void writeDotFile( );
+
+
+ /*
+ * Data collected during the parse.
+ */
+
+ LelList langEls;
+ StructElList structEls;
+ DefList prodList;
+
+ /* Dumping. */
+ DotItemIndex dotItemIndex;
+
+ PredDeclList predDeclList;
+
+ /* The name of the file the fsm is from, and the spec name. */
+ // EXISTS IN RL: char *fileName;
+ String parserName;
+ // EXISTS IN RL: InputLoc sectionLoc;
+
+ /* How to access the instance data. */
+ String access;
+
+ /* The name of the token structure. */
+ String tokenStruct;
+
+ GenericType *anyList;
+ GenericType *anyMap;
+ GenericType *anyVector;
+
+ LangEl *ptrLangEl;
+ LangEl *strLangEl;
+ LangEl *anyLangEl;
+ LangEl *rootLangEl;
+ LangEl *noTokenLangEl;
+ LangEl *eofLangEl;
+ LangEl *errorLangEl;
+ LangEl *ignoreLangEl;
+
+ Namespace *rootNamespace;
+
+ int nextLelId;
+ int firstNonTermId;
+ int firstStructElId;
+ int structInbuiltId;
+ int structInputId;
+ int structStreamId;
+
+ LangEl **langElIndex;
+ PdaState *actionDestState;
+ DefSetSet prodSetSet;
+
+ Production **prodIdIndex;
+ AlphSet literalSet;
+
+ PatList patternList;
+ ConsList replList;
+ ParserTextList parserTextList;
+
+ StructDef *global;
+ StructEl *globalSel;
+ ObjectDef *globalObjectDef;
+ ObjectField *arg0;
+ ObjectField *argv;
+ ObjectField *stds;
+ StructDef *argvEl;
+ StructEl *argvElSel;
+ StructEl *stdsElSel;
+
+ StructDef *input;
+ StructDef *stream;
+ StructEl *inputSel;
+ StructEl *streamSel;
+
+ VectorTypeIdMap vectorTypeIdMap;
+
+ UniqueType *findUniqueType( enum TYPE typeId );
+ UniqueType *findUniqueType( enum TYPE typeId, LangEl *langEl );
+ UniqueType *findUniqueType( enum TYPE typeId, IterDef *iterDef );
+ UniqueType *findUniqueType( enum TYPE typeId, StructEl *structEl );
+ UniqueType *findUniqueType( enum TYPE typeId, GenericType *generic );
+
+ UniqueGeneric *findUniqueGeneric( UniqueGeneric::Type type,
+ UniqueType *utKey, UniqueType *utValue );
+ UniqueGeneric *findUniqueGeneric( UniqueGeneric::Type type,
+ UniqueType *utValue );
+
+ UniqueType *uniqueTypeNil;
+ UniqueType *uniqueTypeVoid;
+ UniqueType *uniqueTypePtr;
+ UniqueType *uniqueTypeBool;
+ UniqueType *uniqueTypeInt;
+ UniqueType *uniqueTypeStr;
+ UniqueType *uniqueTypeIgnore;
+ UniqueType *uniqueTypeAny;
+
+ UniqueType *uniqueTypeInput;
+ UniqueType *uniqueTypeStream;
+
+ UniqueTypeMap uniqeTypeMap;
+ UniqueRepeatMap uniqeRepeatMap;
+ UniqueGenericMap uniqueGenericMap;
+
+ void declareGlobalFields();
+ void declareStrFields();
+
+ void declareInputField( ObjectDef *objDef, code_t getLength );
+ void declareInputFields();
+
+ void declareStreamField( ObjectDef *objDef, code_t getLength );
+ void declareStreamFields();
+
+ void declareIntFields();
+ void declareTokenFields();
+
+ ObjectDef *intObj;
+ ObjectDef *strObj;
+ ObjectDef *inputObj;
+ ObjectDef *streamObj;
+
+ struct fsm_tables *fsmTables;
+ struct colm_sections *runtimeData;
+
+ int nextPatConsId;
+ int nextGenericId;
+
+ FunctionList functionList;
+ FunctionList inHostList;
+ int nextFuncId;
+ int nextHostId;
+
+ enum CompileContext {
+ CompileTranslation,
+ CompileReduction,
+ CompileFunction,
+ CompileRoot
+ };
+
+ CompileContext compileContext;
+ LongVect returnJumps;
+ LongVect breakJumps;
+ Function *curFunction;
+
+ /* For stack unwinding. Used at exits, returns, iterator destroy, etc. */
+ CodeVect unwindCode;
+
+ ObjectField *makeDataEl();
+ ObjectField *makeFileEl();
+ ObjectField *makeLineEl();
+ ObjectField *makeColEl();
+ ObjectField *makePosEl();
+
+ IterDef *findIterDef( IterDef::Type type, GenericType *generic );
+ IterDef *findIterDef( IterDef::Type type, Function *func );
+ IterDef *findIterDef( IterDef::Type type );
+ IterDefSet iterDefSet;
+
+ enum GeneratesType { GenToken, GenIgnore, GenCfl };
+
+ int nextObjectId;
+ GeneratesType generatesType;
+ bool generatesIgnore;
+
+ StringMap literalStrings;
+
+ long nextFrameId;
+ long nextParserId;
+
+ ObjectDef *rootLocalFrame;
+
+ bool revertOn;
+
+ RedFsm *redFsm;
+
+ PdaGraph *pdaGraph;
+ struct pda_tables *pdaTables;
+
+ long predValue;
+ long nextMatchEndNum;
+
+ TypeRef *argvTypeRef;
+ TypeRef *stdsTypeRef;
+
+ bool inContiguous;
+ int contiguousOffset;
+ int contiguousStretch;
+
+ void declareReVars();
+
+ void initReductionNeeds( Reduction *reduction );
+
+ void findRhsRefs( bool &lhsUsed, Vector<ProdEl*> &rhsUsed, Vector<ProdEl*> &treeUsed,
+ Vector<ProdEl*> &locUsed, Reduction *reduction, Production *production,
+ const ReduceTextItemList &list );
+
+ void computeNeeded( Reduction *reduction, Production *production,
+ const ReduceTextItemList &list );
+ void computeNeeded();
+
+ void loadRefs( Reduction *reduction, Production *production,
+ const ReduceTextItemList &list, bool read );
+
+ void writePostfixReduce( Reduction *reduction );
+ void writeParseReduce( Reduction *reduction );
+
+ void writeParseReduce();
+ void writePostfixReduce();
+
+ void writeHostCall();
+ void writeNeeds();
+ void writeCommit();
+ void writeReduceStructs();
+ void writeReduceDispatchers();
+ void writeUnescape();
+
+ void writeLhsRef( Production *production, ReduceTextItem *i );
+ void writeRhsRef( Production *production, ReduceTextItem *i );
+ void writeTreeRef( Production *production, ReduceTextItem *i );
+ void writeRhsLoc( Production *production, ReduceTextItem *i );
+ void writeHostItemList( Production *production, const ReduceTextItemList &list );
+ void writeCommitStub();
+};
+
+void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true );
+Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd );
+Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd );
+Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd );
+Key makeFsmKeyChar( char c, Compiler *pd );
+void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd );
+void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
+ bool caseInsensitive, Compiler *pd );
+FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd );
+FsmGraph *dotFsm( Compiler *pd );
+FsmGraph *dotStarFsm( Compiler *pd );
+
+void errorStateLabels( const NameSet &locations );
+
+struct ColmParser;
+
+typedef AvlMap<String, ColmParser *, ColmCmpStr> ParserDict;
+typedef AvlMapEl<String, ColmParser *> ParserDictEl;
+
+LangEl *declareLangEl( Compiler *pd, Namespace *nspace,
+ const String &data, LangEl::Type type );
+LangEl *addLangEl( Compiler *pd, Namespace *nspace,
+ const String &data, LangEl::Type type );
+
+StructEl *declareStruct( Compiler *pd, Namespace *nspace,
+ const String &data, StructDef *context );
+
+void declareTypeAlias( Compiler *pd, Namespace *nspace,
+ const String &data, TypeRef *typeRef );
+
+LangEl *findType( Compiler *pd, Namespace *nspace, const String &data );
+
+ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC,
+ bool isConst, bool useFnInstr = false, GenericType *useGeneric = 0 );
+
+ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC,
+ UniqueType *arg1, bool isConst, bool useFnInstr = false,
+ GenericType *useGeneric = 0 );
+
+ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC,
+ UniqueType *arg1, UniqueType *arg2, bool isConst,
+ bool useFnInstr = false, GenericType *useGeneric = 0 );
+
+ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC,
+ bool isConst, bool useFnInstr = false, GenericType *useGeneric = 0 );
+
+ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC,
+ UniqueType *arg1, bool isConst, bool useFnInstr = false,
+ GenericType *useGeneric = 0 );
+
+ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC,
+ UniqueType *arg1, UniqueType *arg2, bool isConst,
+ bool useFnInstr = false, GenericType *useGeneric = 0 );
+
+extern "C" struct input_impl *colm_impl_new_pat( char *name, struct Pattern *pattern );
+extern "C" struct input_impl *colm_impl_new_cons( char *name, struct Constructor *constructor );
+
+#endif /* _COLM_PARSEDATA_H */
+
diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in
index 12117f4d..455cffe7 100644
--- a/src/config.h.cmake.in
+++ b/src/config.h.cmake.in
@@ -9,7 +9,6 @@
#cmakedefine HAVE_SYS_WAIT_H 1
#cmakedefine HAVE_UNISTD_H 1
-#cmakedefine SIZEOF_INT @SIZEOF_INT@
#cmakedefine SIZEOF_LONG @SIZEOF_LONG@
#endif /* _COLM_CONFIG_H */
diff --git a/src/consinit.cc b/src/consinit.cc
new file mode 100644
index 00000000..4f59b07c
--- /dev/null
+++ b/src/consinit.cc
@@ -0,0 +1,922 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "consinit.h"
+
+#include <iostream>
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+LexTerm *rangeTerm( const char *low, const char *high )
+{
+ Literal *lowLit = Literal::cons( internal, String( low ), Literal::LitString );
+ Literal *highLit = Literal::cons( internal, String( high ), Literal::LitString );
+ Range *range = Range::cons( lowLit, highLit );
+ LexFactor *factor = LexFactor::cons( range );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ LexFactorRep *factorRep = LexFactorRep::cons( factorNeg );
+ LexFactorAug *factorAug = LexFactorAug::cons( factorRep );
+ LexTerm *term = LexTerm::cons( factorAug );
+ return term;
+}
+
+LexFactorNeg *litFactorNeg( const char *str )
+{
+ Literal *lit = Literal::cons( internal, String( str ), Literal::LitString );
+ LexFactor *factor = LexFactor::cons( lit );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ return factorNeg;
+}
+
+LexFactorAug *litFactorAug( const char *str )
+{
+ Literal *lit = Literal::cons( internal, String( str ), Literal::LitString );
+ LexFactor *factor = LexFactor::cons( lit );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ LexFactorRep *factorRep = LexFactorRep::cons( factorNeg );
+ LexFactorAug *factorAug = LexFactorAug::cons( factorRep );
+ return factorAug;
+}
+
+LexTerm *litTerm( const char *str )
+{
+ Literal *lit = Literal::cons( internal, String( str ), Literal::LitString );
+ LexFactor *factor = LexFactor::cons( lit );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ LexFactorRep *factorRep = LexFactorRep::cons( factorNeg );
+ LexFactorAug *factorAug = LexFactorAug::cons( factorRep );
+ LexTerm *term = LexTerm::cons( factorAug );
+ return term;
+}
+
+LexExpression *litExpr( const char *str )
+{
+ LexTerm *term = litTerm( str );
+ LexExpression *expr = LexExpression::cons( term );
+ return expr;
+}
+
+LexExpression *orExpr( LexTerm *term1, LexTerm *term2 )
+{
+ LexExpression *expr1 = LexExpression::cons( term1 );
+ return LexExpression::cons( expr1, term2, LexExpression::OrType );
+}
+
+LexExpression *orExpr( LexTerm *term1, LexTerm *term2, LexTerm *term3 )
+{
+ LexExpression *expr1 = LexExpression::cons( term1 );
+ LexExpression *expr2 = LexExpression::cons( expr1, term2, LexExpression::OrType );
+ LexExpression *expr3 = LexExpression::cons( expr2, term3, LexExpression::OrType );
+ return expr3;
+}
+
+LexExpression *orExpr( LexTerm *term1, LexTerm *term2, LexTerm *term3, LexTerm *term4 )
+{
+ LexExpression *expr1 = LexExpression::cons( term1 );
+ LexExpression *expr2 = LexExpression::cons( expr1, term2, LexExpression::OrType );
+ LexExpression *expr3 = LexExpression::cons( expr2, term3, LexExpression::OrType );
+ LexExpression *expr4 = LexExpression::cons( expr3, term4, LexExpression::OrType );
+ return expr4;
+}
+
+LexExpression *orExpr( LexTerm *term1, LexTerm *term2, LexTerm *term3,
+ LexTerm *term4, LexTerm *term5, LexTerm *term6 )
+{
+ LexExpression *expr1 = LexExpression::cons( term1 );
+ LexExpression *expr2 = LexExpression::cons( expr1, term2, LexExpression::OrType );
+ LexExpression *expr3 = LexExpression::cons( expr2, term3, LexExpression::OrType );
+ LexExpression *expr4 = LexExpression::cons( expr3, term4, LexExpression::OrType );
+ return expr4;
+}
+
+LexFactorAug *starFactorAug( LexExpression *expr )
+{
+ LexJoin *join = LexJoin::cons( expr );
+ LexFactor *factor = LexFactor::cons( join );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ LexFactorRep *factorRep = LexFactorRep::cons( factorNeg );
+ LexFactorRep *staredRep = LexFactorRep::cons( internal,
+ factorRep, 0, 0, LexFactorRep::StarType );
+ LexFactorAug *factorAug = LexFactorAug::cons( staredRep );
+ return factorAug;
+}
+
+LexFactorAug *starFactorAug( LexTerm *term )
+{
+ LexExpression *expr = LexExpression::cons( term );
+ return starFactorAug( expr );
+}
+
+LexFactorAug *starFactorAug( LexFactorAug *factorAug )
+{
+ LexTerm *term = LexTerm::cons( factorAug );
+ return starFactorAug( term );
+}
+
+LexFactorAug *plusFactorAug( LexExpression *expr )
+{
+ LexJoin *join = LexJoin::cons( expr );
+ LexFactor *factor = LexFactor::cons( join );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ LexFactorRep *factorRep = LexFactorRep::cons( factorNeg );
+ LexFactorRep *staredRep = LexFactorRep::cons( internal, factorRep, 0, 0, LexFactorRep::PlusType );
+ LexFactorAug *factorAug = LexFactorAug::cons( staredRep );
+ return factorAug;
+}
+
+LexTerm *concatTerm( LexFactorAug *fa1, LexFactorAug *fa2 )
+{
+ LexTerm *term1 = LexTerm::cons( fa1 );
+ LexTerm *term2 = LexTerm::cons( term1, fa2, LexTerm::ConcatType );
+ return term2;
+}
+
+LexTerm *concatTerm( LexFactorAug *fa1, LexFactorAug *fa2, LexFactorAug *fa3 )
+{
+ LexTerm *term1 = LexTerm::cons( fa1 );
+ LexTerm *term2 = LexTerm::cons( term1, fa2, LexTerm::ConcatType );
+ LexTerm *term3 = LexTerm::cons( term2, fa3, LexTerm::ConcatType );
+ return term3;
+}
+
+LexFactorAug *parensFactorAug( LexExpression *expr )
+{
+ LexJoin *join = LexJoin::cons( expr );
+ LexFactor *factor = LexFactor::cons( join );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ LexFactorRep *factorRep = LexFactorRep::cons( factorNeg );
+ LexFactorAug *factorAug = LexFactorAug::cons( factorRep );
+ return factorAug;
+}
+
+LexFactorNeg *parensFactorNeg( LexExpression *expr )
+{
+ LexJoin *join = LexJoin::cons( expr );
+ LexFactor *factor = LexFactor::cons( join );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ return factorNeg;
+}
+
+LexFactorAug *parensFactorAug( LexTerm *term )
+{
+ LexExpression *expr = LexExpression::cons( term );
+ LexJoin *join = LexJoin::cons( expr );
+ LexFactor *factor = LexFactor::cons( join );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ LexFactorRep *factorRep = LexFactorRep::cons( factorNeg );
+ LexFactorAug *factorAug = LexFactorAug::cons( factorRep );
+ return factorAug;
+}
+
+LexFactorAug *charNegFactorAug( LexExpression *expr )
+{
+ LexFactorNeg *factorNeg = parensFactorNeg( expr );
+ LexFactorNeg *charNeg = LexFactorNeg::cons( factorNeg, LexFactorNeg::CharNegateType );
+ LexFactorRep *factorRep = LexFactorRep::cons( charNeg );
+ LexFactorAug *factorAug = LexFactorAug::cons( factorRep );
+ return factorAug;
+}
+
+LexTerm *charNegTerm( LexExpression *expr )
+{
+ LexFactorAug *factorAug = charNegFactorAug( expr );
+ LexTerm *term = LexTerm::cons( factorAug );
+ return term;
+}
+
+LexTerm *parensTerm( LexExpression *expr )
+{
+ LexFactorAug *factorAug = parensFactorAug( expr );
+ return LexTerm::cons( factorAug );
+}
+
+void ConsInit::wsIgnore()
+{
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, String(), pd->nextObjectId++ );
+
+ LexTerm *r1 = litTerm( "' '" );
+ LexTerm *r2 = litTerm( "'\t'" );
+ LexTerm *r3 = litTerm( "'\v'" );
+ LexTerm *r4 = litTerm( "'\n'" );
+ LexTerm *r5 = litTerm( "'\r'" );
+ LexTerm *r6 = litTerm( "'\f'" );
+
+ LexExpression *whitespace = orExpr( r1, r2, r3, r4, r5, r6 );
+ LexFactorAug *whitespaceRep = plusFactorAug( whitespace );
+
+ LexTerm *term = LexTerm::cons( whitespaceRep );
+ LexExpression *expr = LexExpression::cons( term );
+ LexJoin *join = LexJoin::cons( expr );
+
+ defineToken( internal, String(), join, objectDef, 0, true, false, false );
+}
+
+void ConsInit::commentIgnore()
+{
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, String(), pd->nextObjectId++ );
+
+ LexFactorAug *pound = litFactorAug( "'#'" );
+ LexExpression *newline = litExpr( "'\\n'" );
+
+ LexFactorAug *commChars = charNegFactorAug( newline );
+ LexFactorAug *restOfLine = starFactorAug( commChars );
+
+ LexFactorAug *termNewline = litFactorAug( "'\\n'" );
+
+ LexTerm *concat = concatTerm( pound, restOfLine, termNewline );
+ LexExpression *expr = LexExpression::cons( concat );
+
+ LexJoin *join = LexJoin::cons( expr );
+
+ defineToken( internal, String(), join, objectDef, 0, true, false, false );
+}
+
+void ConsInit::idToken()
+{
+ String hello( "id" );
+
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, hello, pd->nextObjectId++ );
+
+ LexTerm *r1 = rangeTerm( "'a'", "'z'" );
+ LexTerm *r2 = rangeTerm( "'A'", "'Z'" );
+ LexTerm *r3 = litTerm( "'_'" );
+ LexFactorAug *first = parensFactorAug( orExpr( r1, r2, r3 ) );
+
+ LexTerm *r4 = rangeTerm( "'a'", "'z'" );
+ LexTerm *r5 = rangeTerm( "'A'", "'Z'" );
+ LexTerm *r6 = litTerm( "'_'" );
+ LexTerm *r7 = rangeTerm( "'0'", "'9'" );
+ LexExpression *second = orExpr( r4, r5, r6, r7 );
+ LexFactorAug *secondStar = starFactorAug( second );
+
+ LexTerm *concat = concatTerm( first, secondStar );
+
+ LexExpression *expr = LexExpression::cons( concat );
+ LexJoin *join = LexJoin::cons( expr );
+
+ defineToken( internal, hello, join, objectDef, 0, false, false, false );
+}
+
+void ConsInit::literalToken()
+{
+ String hello( "literal" );
+
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, hello, pd->nextObjectId++ );
+
+ LexFactorAug *r1 = litFactorAug( "'\\''" );
+
+ /* [^'\\] */
+ LexExpression *singleQuoteBackSlash = orExpr(
+ litTerm( "'\\''" ),
+ litTerm( "'\\\\'" ) );
+
+ LexTerm *freeChars = charNegTerm( singleQuoteBackSlash );
+
+ /* '\\' any */
+ LexFactorAug *backSlash = litFactorAug( "'\\\\'" );
+ LexExpression *any = LexExpression::cons( BT_Any );
+ LexTerm *escape = concatTerm( backSlash, parensFactorAug( any ) );
+
+ /* Union and repeat. */
+ LexExpression *charOrEscape = orExpr( freeChars, escape );
+ LexFactorAug *r2 = starFactorAug( charOrEscape );
+
+ LexFactorAug *r3 = litFactorAug( "'\''" );
+
+ LexTerm *concat = concatTerm( r1, r2, r3 );
+ LexExpression *expr = LexExpression::cons( concat );
+ LexJoin *join = LexJoin::cons( expr );
+
+ defineToken( internal, hello, join, objectDef, 0, false, false, false );
+}
+
+void ConsInit::keyword( const String &name, const String &lit )
+{
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, name, pd->nextObjectId++ );
+ LexTerm *term = litTerm( lit );
+ LexExpression *expr = LexExpression::cons( term );
+ LexJoin *join = LexJoin::cons( expr );
+ defineToken( internal, name, join, objectDef, 0, false, false, false );
+}
+
+void ConsInit::keyword( const String &kw )
+{
+ literalDef( internal, kw, false, false );
+}
+
+ProdEl *ConsInit::prodRefName( const String &name )
+{
+ ProdEl *prodEl = prodElName( internal, name,
+ NamespaceQual::cons( curNspace() ), 0,
+ RepeatNone, false );
+ return prodEl;
+}
+
+ProdEl *ConsInit::prodRefName( const String &capture, const String &name )
+{
+ ObjectField *captureField = ObjectField::cons( internal,
+ ObjectField::RhsNameType, 0, capture );
+ ProdEl *prodEl = prodElName( internal, name,
+ NamespaceQual::cons( curNspace() ), captureField,
+ RepeatNone, false );
+ return prodEl;
+}
+
+ProdEl *ConsInit::prodRefNameLeftRepeat( const String &name )
+{
+ ProdEl *prodEl = prodElName( internal, name,
+ NamespaceQual::cons( curNspace() ), 0,
+ RepeatLeftRepeat, false );
+ return prodEl;
+}
+
+ProdEl *ConsInit::prodRefNameLeftRepeat( const String &capture, const String &name )
+{
+ ObjectField *captureField = ObjectField::cons( internal,
+ ObjectField::RhsNameType, 0, capture );
+ ProdEl *prodEl = prodElName( internal, name,
+ NamespaceQual::cons( curNspace() ), captureField,
+ RepeatLeftRepeat, false );
+ return prodEl;
+}
+
+ProdEl *ConsInit::prodRefLit( const String &lit )
+{
+ ProdEl *prodEl = prodElLiteral( internal, lit,
+ NamespaceQual::cons( curNspace() ), 0,
+ RepeatNone, false );
+ return prodEl;
+}
+
+Production *ConsInit::production()
+{
+ ProdElList *prodElList = new ProdElList;
+ return BaseParser::production( internal, prodElList, String(), false, 0, 0 );
+}
+
+Production *ConsInit::production( ProdEl *prodEl1 )
+{
+ ProdElList *prodElList = new ProdElList;
+ appendProdEl( prodElList, prodEl1 );
+ return BaseParser::production( internal, prodElList, String(), false, 0, 0 );
+}
+
+Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2 )
+{
+ ProdElList *prodElList = new ProdElList;
+ appendProdEl( prodElList, prodEl1 );
+ appendProdEl( prodElList, prodEl2 );
+ return BaseParser::production( internal, prodElList, String(), false, 0, 0 );
+}
+
+Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2,
+ ProdEl *prodEl3 )
+{
+ ProdElList *prodElList = new ProdElList;
+ appendProdEl( prodElList, prodEl1 );
+ appendProdEl( prodElList, prodEl2 );
+ appendProdEl( prodElList, prodEl3 );
+ return BaseParser::production( internal, prodElList, String(), false, 0, 0 );
+}
+
+Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2,
+ ProdEl *prodEl3, ProdEl *prodEl4 )
+{
+ ProdElList *prodElList = new ProdElList;
+ appendProdEl( prodElList, prodEl1 );
+ appendProdEl( prodElList, prodEl2 );
+ appendProdEl( prodElList, prodEl3 );
+ appendProdEl( prodElList, prodEl4 );
+ return BaseParser::production( internal, prodElList, String(), false, 0, 0 );
+}
+
+Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2,
+ ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5 )
+{
+ ProdElList *prodElList = new ProdElList;
+ appendProdEl( prodElList, prodEl1 );
+ appendProdEl( prodElList, prodEl2 );
+ appendProdEl( prodElList, prodEl3 );
+ appendProdEl( prodElList, prodEl4 );
+ appendProdEl( prodElList, prodEl5 );
+ return BaseParser::production( internal, prodElList, String(), false, 0, 0 );
+}
+
+Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2,
+ ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5,
+ ProdEl *prodEl6, ProdEl *prodEl7 )
+{
+ ProdElList *prodElList = new ProdElList;
+ appendProdEl( prodElList, prodEl1 );
+ appendProdEl( prodElList, prodEl2 );
+ appendProdEl( prodElList, prodEl3 );
+ appendProdEl( prodElList, prodEl4 );
+ appendProdEl( prodElList, prodEl5 );
+ appendProdEl( prodElList, prodEl6 );
+ appendProdEl( prodElList, prodEl7 );
+ return BaseParser::production( internal, prodElList, String(), false, 0, 0 );
+}
+
+void ConsInit::definition( const String &name, Production *prod1, Production *prod2,
+ Production *prod3, Production *prod4 )
+{
+ LelDefList *defList = new LelDefList;
+ prodAppend( defList, prod1 );
+ prodAppend( defList, prod2 );
+ prodAppend( defList, prod3 );
+ prodAppend( defList, prod4 );
+
+ NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false );
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType,
+ name, pd->nextObjectId++ );
+ cflDef( ntDef, objectDef, defList );
+}
+
+void ConsInit::definition( const String &name, Production *prod1,
+ Production *prod2, Production *prod3 )
+{
+ LelDefList *defList = new LelDefList;
+ prodAppend( defList, prod1 );
+ prodAppend( defList, prod2 );
+ prodAppend( defList, prod3 );
+
+ NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false );
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType,
+ name, pd->nextObjectId++ );
+ cflDef( ntDef, objectDef, defList );
+}
+
+void ConsInit::definition( const String &name, Production *prod1, Production *prod2 )
+{
+ LelDefList *defList = new LelDefList;
+ prodAppend( defList, prod1 );
+ prodAppend( defList, prod2 );
+
+ NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false );
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType,
+ name, pd->nextObjectId++ );
+ cflDef( ntDef, objectDef, defList );
+}
+
+void ConsInit::definition( const String &name, Production *prod )
+{
+ LelDefList *defList = new LelDefList;
+ prodAppend( defList, prod );
+
+ NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false );
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType,
+ name, pd->nextObjectId++ );
+ cflDef( ntDef, objectDef, defList );
+}
+
+void ConsInit::lexFactor()
+{
+ ProdEl *prodEl1 = prodRefName( "Literal", "literal" );
+ Production *prod1 = production( prodEl1 );
+
+ ProdEl *prodEl8 = prodRefName( "Id", "id" );
+ Production *prod4 = production( prodEl8 );
+
+ ProdEl *prodEl2 = prodRefLit( "'('" );
+ ProdEl *prodEl3 = prodRefName( "Expr", "lex_expr" );
+ ProdEl *prodEl4 = prodRefLit( "')'" );
+ Production *prod2 = production( prodEl2, prodEl3, prodEl4 );
+
+ ProdEl *prodEl5 = prodRefName( "Low", "literal" );
+ ProdEl *prodEl6 = prodRefLit( "'..'" );
+ ProdEl *prodEl7 = prodRefName( "High", "literal" );
+ Production *prod3 = production( prodEl5, prodEl6, prodEl7 );
+
+ definition( "lex_factor", prod1, prod2, prod3, prod4 );
+}
+
+void ConsInit::lexFactorNeg()
+{
+ ProdEl *prodEl1 = prodRefLit( "'^'" );
+ ProdEl *prodEl2 = prodRefName( "FactorNeg", "lex_factor_neg" );
+ Production *prod1 = production( prodEl1, prodEl2 );
+
+ ProdEl *prodEl3 = prodRefName( "Factor", "lex_factor" );
+ Production *prod2 = production( prodEl3 );
+
+ definition( "lex_factor_neg", prod1, prod2 );
+}
+
+void ConsInit::lexFactorRep()
+{
+ ProdEl *prodEl1 = prodRefName( "FactorRep", "lex_factor_rep" );
+ ProdEl *prodEl2 = prodRefName( "Star", "STAR" );
+ Production *prod1 = production( prodEl1, prodEl2 );
+
+ ProdEl *prodEl3 = prodRefName( "FactorRep", "lex_factor_rep" );
+ ProdEl *prodEl4 = prodRefName( "Plus", "PLUS" );
+ Production *prod2 = production( prodEl3, prodEl4 );
+
+ ProdEl *prodEl5 = prodRefName( "FactorNeg", "lex_factor_neg" );
+ Production *prod3 = production( prodEl5 );
+
+ definition( "lex_factor_rep", prod1, prod2, prod3 );
+}
+
+void ConsInit::lexTerm()
+{
+ ProdEl *prodEl1 = prodRefName( "Term", "lex_term" );
+ ProdEl *prodEl2 = prodRefName( "Dot", "DOT" );
+ ProdEl *prodEl3 = prodRefName( "FactorRep", "lex_factor_rep" );
+ Production *prod1 = production( prodEl1, prodEl2, prodEl3 );
+
+ ProdEl *prodEl4 = prodRefName( "Term", "lex_term" );
+ ProdEl *prodEl5 = prodRefName( "ColonLt", "COLON_LT" );
+ ProdEl *prodEl6 = prodRefName( "FactorRep", "lex_factor_rep" );
+ Production *prod2 = production( prodEl4, prodEl5, prodEl6 );
+
+ ProdEl *prodEl7 = prodRefName( "FactorRep", "lex_factor_rep" );
+ Production *prod3 = production( prodEl7 );
+
+ definition( "lex_term", prod1, prod2, prod3 );
+}
+
+void ConsInit::lexExpr()
+{
+ ProdEl *prodEl1 = prodRefName( "Expr", "lex_expr" );
+ ProdEl *prodEl2 = prodRefLit( "'|'" );
+ ProdEl *prodEl3 = prodRefName( "Term", "lex_term" );
+ Production *prod1 = production( prodEl1, prodEl2, prodEl3 );
+
+ ProdEl *prodEl4 = prodRefName( "Term", "lex_term" );
+ Production *prod2 = production( prodEl4 );
+
+ definition( "lex_expr", prod1, prod2 );
+}
+
+void ConsInit::token()
+{
+ ProdEl *prodEl1 = prodRefLit( "'token'" );
+ ProdEl *prodEl2 = prodRefName( "Id", "id" );
+ ProdEl *prodEl3 = prodRefName( "LeftNi", "opt_ni" );
+ ProdEl *prodEl4 = prodRefLit( "'/'" );
+ ProdEl *prodEl5 = prodRefName( "Expr", "lex_expr" );
+ ProdEl *prodEl6 = prodRefLit( "'/'" );
+ ProdEl *prodEl7 = prodRefName( "RightNi", "opt_ni" );
+ Production *prod1 = production( prodEl1, prodEl2, prodEl3,
+ prodEl4, prodEl5, prodEl6, prodEl7 );
+ definition( "token_def", prod1 );
+}
+
+void ConsInit::ignore()
+{
+ ProdEl *prodEl1 = prodRefLit( "'ignore'" );
+ ProdEl *prodEl2 = prodRefLit( "'/'" );
+ ProdEl *prodEl3 = prodRefName( "Expr", "lex_expr" );
+ ProdEl *prodEl4 = prodRefLit( "'/'" );
+ Production *prod1 = production( prodEl1, prodEl2, prodEl3, prodEl4 );
+ definition( "ignore_def", prod1 );
+}
+
+void ConsInit::tokenList()
+{
+ ProdEl *prodEl1 = prodRefName( "TokenList", "token_list" );
+ ProdEl *prodEl2 = prodRefName( "TokenDef", "token_def" );
+ Production *prod1 = production( prodEl1, prodEl2 );
+
+ ProdEl *prodEl3 = prodRefName( "TokenList", "token_list" );
+ ProdEl *prodEl4 = prodRefName( "IgnoreDef", "ignore_def" );
+ Production *prod2 = production( prodEl3, prodEl4 );
+
+ Production *prod3 = production();
+
+ definition( "token_list", prod1, prod2, prod3 );
+}
+
+Production *ConsInit::prodLex()
+{
+ ProdEl *prodEl1 = prodRefLit( "'lex'" );
+ ProdEl *prodEl2 = prodRefName( "TokenList", "token_list" );
+ ProdEl *prodEl3 = prodRefLit( "'end'" );
+
+ return production( prodEl1, prodEl2, prodEl3 );
+}
+
+void ConsInit::optProdElName()
+{
+ ProdEl *prodEl1 = prodRefName( "Name", "id" );
+ ProdEl *prodEl2 = prodRefLit( "':'" );
+ Production *prod1 = production( prodEl1, prodEl2 );
+
+ Production *prod2 = production();
+
+ definition( "opt_prod_el_name", prod1, prod2 );
+}
+
+void ConsInit::optNi()
+{
+ ProdEl *prodEl1 = prodRefLit( "'-'" );
+ ProdEl *prodEl2 = prodRefName( "Ni", "NI" );
+ Production *prod1 = production( prodEl1, prodEl2 );
+
+ Production *prod2 = production();
+
+ definition( "opt_ni", prod1, prod2 );
+}
+
+void ConsInit::optRepeat()
+{
+ ProdEl *prodEl1 = prodRefName( "Star", "STAR" );
+ Production *prod1 = production( prodEl1 );
+
+ ProdEl *prodEl2 = prodRefName( "LeftStar", "LEFT_STAR" );
+ Production *prod2 = production( prodEl2 );
+
+ Production *prod3 = production();
+
+ definition( "opt_prod_repeat", prod1, prod2, prod3 );
+}
+
+void ConsInit::prodEl()
+{
+ ProdEl *prodEl1 = prodRefName( "OptName", "opt_prod_el_name" );
+ ProdEl *prodEl2 = prodRefName( "Id", "id" );
+ ProdEl *prodEl3 = prodRefName( "OptRepeat", "opt_prod_repeat" );
+ Production *prod1 = production( prodEl1, prodEl2, prodEl3 );
+
+ definition( "prod_el", prod1 );
+}
+
+void ConsInit::prodElList()
+{
+ ProdEl *prodEl1 = prodRefName( "ProdElList", "prod_el_list" );
+ ProdEl *prodEl2 = prodRefName( "ProdEl", "prod_el" );
+ Production *prod1 = production( prodEl1, prodEl2 );
+
+ Production *prod2 = production();
+
+ definition( "prod_el_list", prod1, prod2 );
+}
+
+void ConsInit::optCommit()
+{
+ ProdEl *prodEl1 = prodRefName( "Commit", "COMMIT" );
+ Production *prod1 = production( prodEl1 );
+
+ Production *prod2 = production();
+
+ definition( "opt_commit", prod1, prod2 );
+}
+
+void ConsInit::optProdName()
+{
+ ProdEl *prodEl1 = prodRefLit( "':'" );
+ ProdEl *prodEl2 = prodRefName( "Name", "id" );
+ Production *prod1 = production( prodEl1, prodEl2 );
+
+ Production *prod2 = production();
+
+ definition( "opt_prod_name", prod1, prod2 );
+}
+
+void ConsInit::prodVarDef()
+{
+ ProdEl *prodEl1 = prodRefName( "Name", "id" );
+ ProdEl *prodEl2 = prodRefLit( "':'" );
+ ProdEl *prodEl3 = prodRefName( "Type", "id" );
+ Production *prod1 = production( prodEl1, prodEl2, prodEl3 );
+
+ definition( "prod_var_def", prod1 );
+}
+
+/* The prod var list we provide in a basic form, just "id: id." We won't make
+ * use of them in bootstrap0 or bootstrap1, They are ignored in the loader for
+ * bootstrap1. We want to use them in bootstrap2 during the rewrite stage. */
+void ConsInit::prodVarList()
+{
+ ProdEl *prodEl1 = prodRefName( "VarDefList", "prod_var_list" );
+ ProdEl *prodEl2 = prodRefName( "VarDef", "prod_var_def" );
+ Production *prod1 = production( prodEl1, prodEl2 );
+
+ Production *prod2 = production();
+
+ definition( "prod_var_list", prod1, prod2 );
+}
+
+void ConsInit::prod()
+{
+ ProdEl *prodEl1 = prodRefLit( "'['" );
+ ProdEl *prodEl2 = prodRefName( "ProdElList", "prod_el_list" );
+ ProdEl *prodEl3 = prodRefLit( "']'" );
+ ProdEl *prodEl4 = prodRefName( "OptName", "opt_prod_name" );
+ ProdEl *prodEl5 = prodRefName( "OptCommit", "opt_commit" );
+ Production *prod1 = production( prodEl1, prodEl2, prodEl3, prodEl4, prodEl5 );
+
+ definition( "prod", prod1 );
+}
+
+void ConsInit::prodList()
+{
+ ProdEl *prodEl1 = prodRefName( "ProdList", "prod_list" );
+ ProdEl *prodEl2 = prodRefLit( "'|'" );
+ ProdEl *prodEl3 = prodRefName( "Prod", "prod" );
+ Production *prod1 = production( prodEl1, prodEl2, prodEl3 );
+
+ ProdEl *prodEl4 = prodRefName( "Prod", "prod" );
+ Production *prod2 = production( prodEl4 );
+
+ definition( "prod_list", prod1, prod2 );
+}
+
+Production *ConsInit::prodProd()
+{
+ ProdEl *prodEl1 = prodRefLit( "'def'" );
+ ProdEl *prodEl2 = prodRefName( "DefId", "id" );
+ ProdEl *prodEl3 = prodRefName( "ProdVarList", "prod_var_list" );
+ ProdEl *prodEl4 = prodRefName( "ProdList", "prod_list" );
+
+ return production( prodEl1, prodEl2, prodEl3, prodEl4 );
+}
+
+void ConsInit::item()
+{
+ Production *prod1 = prodLex();
+ Production *prod2 = prodProd();
+ definition( "item", prod1, prod2 );
+}
+
+void ConsInit::startProd()
+{
+ ProdEl *prodEl1 = prodRefNameLeftRepeat( "ItemList", "item" );
+ Production *prod1 = production( prodEl1 );
+
+ definition( "start", prod1 );
+}
+
+void ConsInit::parseInput( StmtList *stmtList )
+{
+ /* Pop argv, this yields the file name . */
+ CallArgVect *popArgs = new CallArgVect;
+ QualItemVect *popQual = new QualItemVect;
+ popQual->append( QualItem( QualItem::Arrow, internal, String( "argv" ) ) );
+
+ LangVarRef *popRef = LangVarRef::cons( internal,
+ curNspace(), 0, curLocalFrame()->rootScope,
+ NamespaceQual::cons( curNspace() ), popQual, String("pop") );
+ LangExpr *pop = LangExpr::cons( LangTerm::cons( InputLoc(), popRef, popArgs ) );
+
+ TypeRef *typeRef = TypeRef::cons( internal, pd->uniqueTypeStr );
+ ObjectField *objField = ObjectField::cons( internal,
+ ObjectField::UserLocalType, typeRef, "A" );
+
+ LangStmt *stmt = varDef( objField, pop, LangStmt::AssignType );
+ stmtList->append( stmt );
+
+ /* Construct a literal string 'r', for second arg to open. */
+ ConsItem *modeConsItem = ConsItem::cons( internal,
+ ConsItem::InputText, String("r") );
+ ConsItemList *modeCons = new ConsItemList;
+ modeCons->append( modeConsItem );
+ LangExpr *modeExpr = LangExpr::cons( LangTerm::cons( internal, modeCons ) );
+
+ /* Reference A->value */
+ QualItemVect *qual = new QualItemVect;
+ LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0,
+ curLocalFrame()->rootScope, NamespaceQual::cons( curNspace() ),
+ qual, String("A") );
+ LangExpr *Avalue = LangExpr::cons( LangTerm::cons( internal,
+ LangTerm::VarRefType, varRef ) );
+
+ /* Call open. */
+ QualItemVect *openQual = new QualItemVect;
+ LangVarRef *openRef = LangVarRef::cons( internal,
+ 0, 0, curLocalFrame()->rootScope,
+ NamespaceQual::cons( curNspace() ), openQual, String("open") );
+ CallArgVect *openArgs = new CallArgVect;
+ openArgs->append( new CallArg(Avalue) );
+ openArgs->append( new CallArg(modeExpr) );
+ LangExpr *open = LangExpr::cons( LangTerm::cons( InputLoc(), openRef, openArgs ) );
+
+ /* Construct a list containing the open stream. */
+ ConsItem *consItem = ConsItem::cons( internal, ConsItem::ExprType, open, ConsItem::TrimDefault );
+ ConsItemList *list = ConsItemList::cons( consItem );
+
+ /* Will capture the parser to "P" */
+ objField = ObjectField::cons( internal,
+ ObjectField::UserLocalType, 0, String("P") );
+
+ /* Parse the "start" def. */
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ typeRef = TypeRef::cons( internal, nspaceQual,
+ String("start"), RepeatNone );
+
+ /* Parse the above list. */
+ LangExpr *parseExpr = parseCmd( internal, false, false, objField,
+ typeRef, 0, list, true, false, false, "" );
+ LangStmt *parseStmt = LangStmt::cons( internal, LangStmt::ExprType, parseExpr );
+ stmtList->append( parseStmt );
+}
+
+void ConsInit::exportTree( StmtList *stmtList )
+{
+ /* reference P */
+ QualItemVect *qual = new QualItemVect;
+ LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0,
+ curLocalFrame()->rootScope, NamespaceQual::cons( curNspace() ), qual, String("P") );
+ LangExpr *expr = LangExpr::cons( LangTerm::cons( internal,
+ LangTerm::VarRefType, varRef ) );
+
+ /* Assign P to ColmTree */
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, String("start"), RepeatNone );
+ ObjectField *program = ObjectField::cons( internal,
+ ObjectField::StructFieldType, typeRef, String("ColmTree") );
+ LangStmt *programExport = exportStmt( program, LangStmt::AssignType, expr );
+ stmtList->append( programExport );
+}
+
+void ConsInit::go( long activeRealm )
+{
+ ConsInit::init();
+
+ StmtList *stmtList = new StmtList;
+
+ /* The token region */
+ pushRegionSet( internal );
+
+ wsIgnore();
+ commentIgnore();
+
+ keyword( "'def'" );
+ keyword( "'lex'" );
+ keyword( "'end'" );
+ keyword( "'token'" );
+ keyword( "'ignore'" );
+ keyword( "NI", "'ni'" );
+ keyword( "COMMIT", "'commit'" );
+
+ idToken();
+ literalToken();
+
+ keyword( "STAR", "'*'");
+ keyword( "PLUS", "'+'");
+ keyword( "LEFT_STAR", "'<*'");
+ keyword( "'['" );
+ keyword( "']'" );
+ keyword( "'|'" );
+ keyword( "'/'" );
+ keyword( "':'" );
+ keyword( "DOT", "'.'" );
+ keyword( "COLON_LT", "':>'" );
+ keyword( "'('" );
+ keyword( "')'" );
+ keyword( "'..'" );
+ keyword( "'^'" );
+ keyword( "'-'" );
+
+ popRegionSet();
+
+ lexFactor();
+ lexFactorNeg();
+ lexFactorRep();
+ lexTerm();
+ lexExpr();
+
+ optNi();
+ optRepeat();
+ optProdElName();
+ prodEl();
+ prodElList();
+ optCommit();
+ optProdName();
+ prodVarDef();
+ prodVarList();
+ prod();
+ prodList();
+ ignore();
+ token();
+ tokenList();
+ item();
+ startProd();
+
+ parseInput( stmtList );
+ exportTree( stmtList );
+
+ pd->rootCodeBlock = CodeBlock::cons( stmtList, 0 );
+}
diff --git a/src/consinit.h b/src/consinit.h
new file mode 100644
index 00000000..614f19d4
--- /dev/null
+++ b/src/consinit.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2013-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <iostream>
+
+#include <avltree.h>
+
+#include "compiler.h"
+#include "parser.h"
+
+#ifndef _COLM_CONSINIT_H
+#define _COLM_CONSINIT_H
+
+struct ConsInit
+:
+ public BaseParser
+{
+ ConsInit( Compiler *pd )
+ :
+ BaseParser(pd)
+ {}
+
+ ProdEl *prodRefName( const String &name );
+ ProdEl *prodRefName( const String &capture, const String &name );
+ ProdEl *prodRefNameLeftRepeat( const String &name );
+ ProdEl *prodRefNameLeftRepeat( const String &capture, const String &name );
+ ProdEl *prodRefLit( const String &lit );
+
+ Production *production();
+ Production *production( ProdEl *prodEl1 );
+ Production *production( ProdEl *prodEl1, ProdEl *prodEl2 );
+ Production *production( ProdEl *prodEl1, ProdEl *prodEl2,
+ ProdEl *prodEl3 );
+ Production *production( ProdEl *prodEl1, ProdEl *prodEl2,
+ ProdEl *prodEl3, ProdEl *prodEl4 );
+ Production *production( ProdEl *prodEl1, ProdEl *prodEl2,
+ ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5 );
+ Production *production( ProdEl *prodEl1, ProdEl *prodEl2,
+ ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5,
+ ProdEl *prodEl6, ProdEl *prodEl7 );
+
+ void definition( const String &name, Production *prod );
+ void definition( const String &name, Production *prod1, Production *prod2 );
+ void definition( const String &name, Production *prod1, Production *prod2, Production *prod3 );
+ void definition( const String &name, Production *prod1, Production *prod2,
+ Production *prod3, Production *prod4 );
+
+ void keyword( const String &name, const String &lit );
+ void keyword( const String &kw );
+
+ void printParseTree( StmtList *stmtList );
+ void printParseTree();
+
+ void literalToken();
+ void commentIgnore();
+ void wsIgnore();
+ void idToken();
+
+ void token();
+ void ignore();
+ void tokenList();
+
+ void lexFactor();
+ void lexFactorNeg();
+ void lexFactorRep();
+ void lexExpr();
+ void lexTerm();
+
+ Production *prodProd();
+ Production *prodLex();
+
+ void optNi();
+ void optRepeat();
+ void optProdElName();
+ void prodEl();
+ void prodElList();
+ void varDefList();
+ void item();
+ void prodVarDef();
+ void prodVarList();
+ void prodList();
+ void optProdName();
+ void prod();
+ void startProd();
+ void optCommit();
+
+ void parseInput( StmtList *stmtList );
+ void exportTree( StmtList *stmtList );
+
+ virtual void go( long activeRealm );
+};
+
+#endif /* _COLM_CONSINIT_H */
+
diff --git a/src/cstring.h b/src/cstring.h
new file mode 100644
index 00000000..3c285153
--- /dev/null
+++ b/src/cstring.h
@@ -0,0 +1,862 @@
+/*
+ * Copyright 2002-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _AAPL_ASTRING_H
+#define _AAPL_ASTRING_H
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include <new>
+#include <iostream>
+
+#include "tree.h"
+
+struct colm_data;
+
+#ifdef AAPL_NAMESPACE
+namespace Aapl {
+#endif
+
+#ifdef AAPL_DOCUMENTATION
+
+/**
+ * \defgroup astring String
+ * \brief Implicitly shared copy-on-write string.
+ *
+ * @{
+ */
+
+/**
+ * \class String
+ * \brief Implicitly shared copy-on-write string.
+ */
+
+/*@}*/
+
+class String
+{
+public:
+ /**
+ * \brief Create a null string. Data points to NULL.
+ */
+ String();
+
+ /**
+ * \brief Construct a string from a c-style string.
+ *
+ * A new buffer is allocated for the c string. Initially, this string will
+ * be the only String class referencing the data.
+ */
+ String( const char *s );
+
+ /**
+ * \brief Construct a string from a c-style string of specific length.
+ *
+ * A new buffer is allocated for the c string. Initially, this string will
+ * be the only String class referencing the data.
+ */
+ String( const char *s, long len );
+
+ /**
+ * \brief Construct a string from another String.
+ *
+ * A refernce to the buffer allocated for s is taken. A new buffer is
+ * not allocated.
+ */
+ String( const String &s );
+
+ /**
+ * \brief Construct a string using snprintf.
+ *
+ * Requires a maximum length for the resulting string. If the formatting
+ * (not including trailing null) requires more space than maxLen, the
+ * result will be truncated to maxLen long. Only the length actually
+ * written will be used by the new string. This string will be the only
+ * String class referencing the data.
+ */
+ String( long maxLen, const char *format, ... )
+
+ /**
+ * \brief Clean up the string.
+ *
+ * If the string is not null, the referenced data is detached. If no other
+ * string refernces the detached data, it is deleted.
+ */
+ ~String();
+
+ /**
+ * \brief Set the string from a c-style string.
+ *
+ * If this string is not null, the current buffer is dereferenced and
+ * possibly deleted. A new buffer is allocated (or possibly the old buffer
+ * reused) for the string. Initially, this string will be the only String
+ * class referencing the data.
+ *
+ * If s is null, then this string becomes a null ptr.
+ *
+ * \returns A reference to this.
+ */
+ String &operator=( const char *s );
+
+ /**
+ * \brief Set the string from a c-style of specific length.
+ *
+ * If this string is not null, the current buffer is dereferenced and
+ * possibly deleted. A new buffer is allocated (or possibly the old buffer
+ * reused) for the string. Initially, this string will be the only String
+ * class referencing the data.
+ *
+ * If s is null, then this string becomes a null ptr.
+ *
+ * \returns A reference to this.
+ */
+ void setAs( const char *s, long len );
+
+ /**
+ * \brief Set the string from a single char.
+ *
+ * The current buffer is dereferenced and possibly deleted. A new buffer
+ * is allocated (or possibly the old buffer reused) for the string.
+ * Initially, this string will be the only String class referencing the
+ * data.
+ *
+ * If s is null, then this string becomes a null ptr.
+ *
+ * \returns A reference to this.
+ */
+ String &operator=( const char c );
+
+
+ /**
+ * \brief Set the string from another String.
+ *
+ * If this string is not null, the current buffer is dereferenced and
+ * possibly deleted. A reference to the buffer allocated for s is taken.
+ * A new buffer is not allocated.
+ *
+ * If s is null, then this string becomes a null ptr.
+ *
+ * \returns a reference to this.
+ */
+ String &operator=( const String &s );
+
+ /**
+ * \brief Append a c string to the end of this string.
+ *
+ * If this string shares its allocation with another, a copy is first
+ * taken. The buffer for this string is grown and s is appended to the
+ * end.
+ *
+ * If s is null nothing happens.
+ *
+ * \returns a reference to this.
+ */
+ String &operator+=( const char *s );
+
+ /**
+ * \brief Append a c string of specific length to the end of this string.
+ *
+ * If this string shares its allocation with another, a copy is first
+ * taken. The buffer for this string is grown and s is appended to the
+ * end.
+ *
+ * If s is null nothing happens.
+ *
+ * \returns a reference to this.
+ */
+ void append( const char *s, long len );
+
+ /**
+ * \brief Append a single char to the end of this string.
+ *
+ * If this string shares its allocation with another, a copy is first
+ * taken. The buffer for this string is grown and s is appended to the
+ * end.
+ *
+ * \returns a reference to this.
+ */
+ String &operator+=( const char c );
+
+ /**
+ * \brief Append a String to the end of this string.
+ *
+ * If this string shares its allocation with another, a copy is first
+ * taken. The buffer for this string is grown and the data of s is
+ * appeneded to the end.
+ *
+ * If s is null nothing happens.
+ *
+ * returns a reference to this.
+ */
+ String &operator+=( const String &s );
+
+ /**
+ * \brief Cast to a char star.
+ *
+ * \returns the string data. A null string returns 0.
+ */
+ operator char*() const;
+
+ /**
+ * \brief Get a pointer to the data.
+ *
+ * \returns the string Data
+ */
+ char *get() const;
+
+ /**
+ * \brief Get the length of the string
+ *
+ * If the string is null, then undefined behaviour results.
+ *
+ * \returns the length of the string.
+ */
+ long length() const;
+
+ /**
+ * \brief Pointer to the data.
+ *
+ * Publically accessible pointer to the data. Immediately in front of the
+ * string data block is the string header which stores the refcount and
+ * length. Consequently, care should be taken if modifying this pointer.
+ */
+ char *data;
+};
+
+/**
+ * \relates String
+ * \brief Concatenate a c-style string and a String.
+ *
+ * \returns The concatenation of the two strings in a String.
+ */
+String operator+( const String &s1, const char *s2 );
+
+/**
+ * \relates String
+ * \brief Concatenate a String and a c-style string.
+ *
+ * \returns The concatenation of the two strings in a String.
+ */
+String operator+( const char *s1, const String &s2 );
+
+/**
+ * \relates String
+ * \brief Concatenate two String classes.
+ *
+ * \returns The concatenation of the two strings in a String.
+ */
+String operator+( const String &s1, const String &s2 );
+
+#endif
+
+template<class T> class StrTmpl
+{
+public:
+ class Fresh {};
+
+ /* Header located just before string data. Keeps the length and a refcount on
+ * the data. */
+ struct Head
+ {
+ long refCount;
+ long length;
+ };
+
+ /**
+ * \brief Create a null string.
+ */
+ StrTmpl() : data(0) { }
+
+ /* Clean up the string. */
+ ~StrTmpl();
+
+ /* Construct a string from a c-style string. */
+ StrTmpl( const char *s );
+
+ /* Construct a string from a c-style string of specific len. */
+ StrTmpl( const char *s, long len );
+
+ /* Allocate len spaces. */
+ StrTmpl( const Fresh &, long len );
+
+ /* Construct a string from another StrTmpl. */
+ StrTmpl( const StrTmpl &s );
+
+ /* Construct a string from with, sprintf. */
+ StrTmpl( long lenGuess, const char *format, ... );
+
+ /* Construct a string from with, sprintf. */
+ StrTmpl( const colm_data *cd );
+
+ /* Set the string from a c-style string. */
+ StrTmpl &operator=( const char *s );
+
+ /* Set the string from a c-style string of specific len. */
+ void setAs( const char *s, long len );
+
+ /* Allocate len spaces. */
+ void setAs( const Fresh &, long len );
+
+ void chop( long len );
+
+ /* Construct a string from with, sprintf. */
+ void setAs( long lenGuess, const char *format, ... );
+
+ /* Set the string from a single char. */
+ StrTmpl &operator=( const char c );
+
+ /* Set the string from another StrTmpl. */
+ StrTmpl &operator=( const StrTmpl &s );
+
+ /* Append a c string to the end of this string. */
+ StrTmpl &operator+=( const char *s );
+
+ /* Append a c string to the end of this string of specifi len. */
+ void append( const char *s, long len );
+
+ /* Append a single char to the end of this string. */
+ StrTmpl &operator+=( const char c );
+
+ /* Append an StrTmpl to the end of this string. */
+ StrTmpl &operator+=( const StrTmpl &s );
+
+ /* Cast to a char star. */
+ operator char*() const { return data; }
+
+ /* Get a pointer to the data. */
+ char *get() const { return data; }
+
+ /* Return the length of the string. Must check for null data pointer. */
+ long length() const { return data ? (((Head*)data)-1)->length : 0; }
+
+ /**
+ * \brief Pointer to the data.
+ */
+ char *data;
+
+protected:
+ /* Make space for a string of length len to be appended. */
+ char *appendSpace( long len );
+ void initSpace( long length );
+ void setSpace( long length );
+
+ template <class FT> friend StrTmpl<FT> operator+(
+ const StrTmpl<FT> &s1, const char *s2 );
+ template <class FT> friend StrTmpl<FT> operator+(
+ const char *s1, const StrTmpl<FT> &s2 );
+ template <class FT> friend StrTmpl<FT> operator+(
+ const StrTmpl<FT> &s1, const StrTmpl<FT> &s2 );
+
+private:
+ /* A dummy struct solely to make a constructor that will never be
+ * ambiguous with the public constructors. */
+ struct DisAmbig { };
+ StrTmpl( char *data, const DisAmbig & ) : data(data) { }
+};
+
+/* Free all mem used by the string. */
+template<class T> StrTmpl<T>::~StrTmpl()
+{
+ if ( data != 0 ) {
+ /* If we are the only ones referencing the string, then delete it. */
+ Head *head = ((Head*) data) - 1;
+ head->refCount -= 1;
+ if ( head->refCount == 0 )
+ free( head );
+ }
+}
+
+/* Create from a c-style string. */
+template<class T> StrTmpl<T>::StrTmpl( const char *s )
+{
+ if ( s == 0 )
+ data = 0;
+ else {
+ /* Find the length and allocate the space for the shared string. */
+ long length = strlen( s );
+
+ /* Init space for the data. */
+ initSpace( length );
+
+ /* Copy in the data. */
+ memcpy( data, s, length+1 );
+ }
+}
+
+/* Create from a c-style string. */
+template<class T> StrTmpl<T>::StrTmpl( const char *s, long length )
+{
+ if ( s == 0 )
+ data = 0;
+ else {
+ /* Init space for the data. */
+ initSpace( length );
+
+ /* Copy in the data. */
+ memcpy( data, s, length );
+ data[length] = 0;
+ }
+}
+
+/* Create from a c-style string. */
+template<class T> StrTmpl<T>::StrTmpl( const Fresh &, long length )
+{
+ /* Init space for the data. */
+ initSpace( length );
+ data[length] = 0;
+}
+
+/* Create from another string class. */
+template<class T> StrTmpl<T>::StrTmpl( const StrTmpl &s )
+{
+ if ( s.data == 0 )
+ data = 0;
+ else {
+ /* Take a reference to the string. */
+ Head *strHead = ((Head*)s.data) - 1;
+ strHead->refCount += 1;
+ data = (char*) (strHead+1);
+ }
+}
+
+/* Construct a string from with, sprintf. */
+template<class T> StrTmpl<T>::StrTmpl( long lenGuess, const char *format, ... )
+{
+ /* Set the string for len. */
+ initSpace( lenGuess );
+
+ va_list args;
+
+ va_start( args, format );
+ long written = vsnprintf( data, lenGuess+1, format, args );
+ va_end( args );
+
+ if ( written > lenGuess ) {
+ setSpace( written );
+ va_start( args, format );
+ written = vsnprintf( data, written+1, format, args );
+ va_end( args );
+ }
+ chop( written );
+
+ va_end( args );
+}
+
+/* Create from another string class. */
+template<class T> StrTmpl<T>::StrTmpl( const colm_data *cd )
+{
+ if ( cd->data == 0 )
+ data = 0;
+ else {
+ /* Init space for the data. */
+ initSpace( cd->length );
+
+ /* Copy in the data. */
+ memcpy( data, cd->data, cd->length );
+ data[cd->length] = 0;
+ }
+}
+
+
+
+/* Construct a string from with, sprintf. */
+template<class T> void StrTmpl<T>::setAs( long lenGuess, const char *format, ... )
+{
+ /* Set the string for len. */
+ setSpace( lenGuess );
+
+ va_list args;
+
+ /* Write to the temporary buffer. */
+ va_start( args, format );
+
+ long written = vsnprintf( data, lenGuess+1, format, args );
+ if ( written > lenGuess ) {
+ setSpace( written );
+ written = vsnprintf( data, written+1, format, args );
+ }
+ chop( written );
+
+ va_end( args );
+}
+
+template<class T> void StrTmpl<T>::initSpace( long length )
+{
+ /* Find the length and allocate the space for the shared string. */
+ Head *head = (Head*) malloc( sizeof(Head) + length+1 );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Init the header. */
+ head->refCount = 1;
+ head->length = length;
+
+ /* Save the pointer to the data. */
+ data = (char*) (head+1);
+}
+
+
+/* Set this string to be the c string exactly. The old string is discarded.
+ * Returns a reference to this. */
+template<class T> StrTmpl<T> &StrTmpl<T>::operator=( const char *s )
+{
+ if ( s == 0 ) {
+ /* Just free the data, we are being set to null. */
+ if ( data != 0 ) {
+ Head *head = ((Head*)data) - 1;
+ head->refCount -= 1;
+ if ( head->refCount == 0 )
+ free(head);
+ data = 0;
+ }
+ }
+ else {
+ /* Find the length of the string we are setting. */
+ long length = strlen( s );
+
+ /* Set the string for len. */
+ setSpace( length );
+
+ /* Copy in the data. */
+ memcpy( data, s, length+1 );
+ }
+ return *this;
+}
+
+/* Set this string to be the c string exactly. The old string is discarded.
+ * Returns a reference to this. */
+template<class T> void StrTmpl<T>::setAs( const char *s, long length )
+{
+ if ( s == 0 ) {
+ /* Just free the data, we are being set to null. */
+ if ( data != 0 ) {
+ Head *head = ((Head*)data) - 1;
+ head->refCount -= 1;
+ if ( head->refCount == 0 )
+ free(head);
+ data = 0;
+ }
+ }
+ else {
+ /* Set the string for len. */
+ setSpace( length );
+
+ /* Copy in the data. */
+ memcpy( data, s, length );
+ data[length] = 0;
+ }
+}
+
+template<class T> void StrTmpl<T>::chop( long length )
+{
+ Head *head = ((Head*)data) - 1;
+ assert( head->refCount == 1 );
+ assert( length <= head->length );
+ head->length = length;
+ data[length] = 0;
+}
+
+/* Set this string to be the c string exactly. The old string is discarded.
+ * Returns a reference to this. */
+template<class T> void StrTmpl<T>::setAs( const Fresh &, long length )
+{
+ setSpace( length );
+ data[length] = 0;
+}
+
+/* Set this string to be the single char exactly. The old string is discarded.
+ * Returns a reference to this. */
+template<class T> StrTmpl<T> &StrTmpl<T>::operator=( const char c )
+{
+ /* Set to length 1. */
+ setSpace( 1 );
+
+ /* Copy in the data. */
+ data[0] = c;
+ data[1] = 0;
+
+ /* Return ourselves. */
+ return *this;
+}
+
+/* Set this string to be the StrTmpl s exactly. The old string is
+ * discarded. */
+template<class T> StrTmpl<T> &StrTmpl<T>::operator=( const StrTmpl &s )
+{
+ /* Detach from the existing string. */
+ if ( data != 0 ) {
+ Head *head = ((Head*)data) - 1;
+ head->refCount -= 1;
+ if ( head->refCount == 0 )
+ free( head );
+ }
+
+ if ( s.data != 0 ) {
+ /* Take a reference to the string. */
+ Head *strHead = ((Head*)s.data) - 1;
+ strHead->refCount += 1;
+ data = (char*)(strHead+1);
+ }
+ else {
+ /* Setting from a null string, just null our pointer. */
+ data = 0;
+ }
+ return *this;
+}
+
+/* Prepare the string to be set to something else of the given length. */
+template<class T> void StrTmpl<T>::setSpace( long length )
+{
+ /* Detach from the existing string. */
+ Head *head = ((Head*)data) - 1;
+ if ( data != 0 && --head->refCount == 0 ) {
+ /* Resuse the space. */
+ head = (Head*) realloc( head, sizeof(Head) + length+1 );
+ }
+ else {
+ /* Need to make new space, there is no usable old space. */
+ head = (Head*) malloc( sizeof(Head) + length+1 );
+ }
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Init the header. */
+ head->refCount = 1;
+ head->length = length;
+
+ /* Copy in the data and save the pointer to it. */
+ data = (char*) (head+1);
+}
+
+
+/* Append a c-style string to the end of this string. Returns a reference to
+ * this */
+template<class T> StrTmpl<T> &StrTmpl<T>::operator+=( const char *s )
+{
+ /* Find the length of the string appended. */
+ if ( s != 0 ) {
+ /* Get the string length and make space on the end. */
+ long addedLen = strlen( s );
+ char *dest = appendSpace( addedLen );
+
+ /* Copy the data in. Plus one for the null. */
+ memcpy( dest, s, addedLen+1 );
+ }
+ return *this;
+}
+
+/* Append a c-style string of specific length to the end of this string.
+ * Returns a reference to this */
+template<class T> void StrTmpl<T>::append( const char *s, long length )
+{
+ /* Find the length of the string appended. */
+ if ( s != 0 ) {
+ /* Make space on the end. */
+ char *dest = appendSpace( length );
+
+ /* Copy the data in. Plus one for the null. */
+ memcpy( dest, s, length );
+ dest[length] = 0;
+ }
+}
+
+/* Append a single char to the end of this string. Returns a reference to
+ * this */
+template<class T> StrTmpl<T> &StrTmpl<T>::operator+=( const char c )
+{
+ /* Grow on the end. */
+ char *dst = appendSpace( 1 );
+
+ /* Append a single charachter. */
+ dst[0] = c;
+ dst[1] = 0;
+ return *this;
+}
+
+
+/* Append an StrTmpl string to the end of this string. Returns a reference
+ * to this */
+template<class T> StrTmpl<T> &StrTmpl<T>::operator+=( const StrTmpl &s )
+{
+ /* Find the length of the string appended. */
+ if ( s.data != 0 ) {
+ /* Find the length to append. */
+ long addedLen = (((Head*)s.data) - 1)->length;
+
+ /* Make space on the end to put the string. */
+ char *dest = appendSpace( addedLen );
+
+ /* Append the data, add one for the null. */
+ memcpy( dest, s.data, addedLen+1 );
+ }
+ return *this;
+}
+
+/* Make space for a string of length len to be appended. */
+template<class T> char *StrTmpl<T>::appendSpace( long len )
+{
+ /* Find the length of this and the string appended. */
+ Head *head = (((Head*)data) - 1);
+ long thisLen = head->length;
+
+ if ( head->refCount == 1 ) {
+ /* No other string is using the space, grow this space. */
+ head = (Head*) realloc( head,
+ sizeof(Head) + thisLen + len + 1 );
+ if ( head == 0 )
+ throw std::bad_alloc();
+ data = (char*) (head+1);
+
+ /* Adjust the length. */
+ head->length += len;
+ }
+ else {
+ /* Another string is using this space, make new space. */
+ head->refCount -= 1;
+ Head *newHead = (Head*) malloc(
+ sizeof(Head) + thisLen + len + 1 );
+ if ( newHead == 0 )
+ throw std::bad_alloc();
+ data = (char*) (newHead+1);
+
+ /* Set the new header and data from this. */
+ newHead->refCount = 1;
+ newHead->length = thisLen + len;
+ memcpy( data, head+1, thisLen );
+ }
+
+ /* Return writing position. */
+ return data + thisLen;
+}
+
+/* Concatenate a String and a c-style string. */
+template<class T> StrTmpl<T> operator+( const StrTmpl<T> &s1, const char *s2 )
+{
+ /* Find s2 length and alloc the space for the result. */
+ long str1Len = (((typename StrTmpl<T>::Head*)(s1.data)) - 1)->length;
+ long str2Len = strlen( s2 );
+
+ typename StrTmpl<T>::Head *head = (typename StrTmpl<T>::Head*)
+ malloc( sizeof(typename StrTmpl<T>::Head) + str1Len + str2Len + 1 );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Set up the header. */
+ head->refCount = 1;
+ head->length = str1Len + str2Len;
+
+ /* Save the pointer to data and copy the data in. */
+ char *data = (char*) (head+1);
+ memcpy( data, s1.data, str1Len );
+ memcpy( data + str1Len, s2, str2Len + 1 );
+ return StrTmpl<T>( data, typename StrTmpl<T>::DisAmbig() );
+}
+
+/* Concatenate a c-style string and a String. */
+template<class T> StrTmpl<T> operator+( const char *s1, const StrTmpl<T> &s2 )
+{
+ /* Find s2 length and alloc the space for the result. */
+ long str1Len = strlen( s1 );
+ long str2Len = (((typename StrTmpl<T>::Head*)(s2.data)) - 1)->length;
+
+ typename StrTmpl<T>::Head *head = (typename StrTmpl<T>::Head*)
+ malloc( sizeof(typename StrTmpl<T>::Head) + str1Len + str2Len + 1 );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Set up the header. */
+ head->refCount = 1;
+ head->length = str1Len + str2Len;
+
+ /* Save the pointer to data and copy the data in. */
+ char *data = (char*) (head+1);
+ memcpy( data, s1, str1Len );
+ memcpy( data + str1Len, s2.data, str2Len + 1 );
+ return StrTmpl<T>( data, typename StrTmpl<T>::DisAmbig() );
+}
+
+/* Add two StrTmpl strings. */
+template<class T> StrTmpl<T> operator+( const StrTmpl<T> &s1, const StrTmpl<T> &s2 )
+{
+ /* Find s2 length and alloc the space for the result. */
+ long str1Len = (((typename StrTmpl<T>::Head*)(s1.data)) - 1)->length;
+ long str2Len = (((typename StrTmpl<T>::Head*)(s2.data)) - 1)->length;
+ typename StrTmpl<T>::Head *head = (typename StrTmpl<T>::Head*)
+ malloc( sizeof(typename StrTmpl<T>::Head) + str1Len + str2Len + 1 );
+ if ( head == 0 )
+ throw std::bad_alloc();
+
+ /* Set up the header. */
+ head->refCount = 1;
+ head->length = str1Len + str2Len;
+
+ /* Save the pointer to data and copy the data in. */
+ char *data = (char*) (head+1);
+ memcpy( data, s1.data, str1Len );
+ memcpy( data + str1Len, s2.data, str2Len + 1 );
+ return StrTmpl<T>( data, typename StrTmpl<T>::DisAmbig() );
+}
+
+/* Operator used in case the compiler does not support the conversion. */
+template <class T> inline std::ostream &operator<<( std::ostream &o, const StrTmpl<T> &s )
+{
+ return o.write( s.data, s.length() );
+}
+
+typedef StrTmpl<char> String;
+
+/**
+ * \brief Compare two null terminated character sequences.
+ *
+ * This comparision class is a wrapper for strcmp.
+ */
+template<class T> struct CmpStrTmpl
+{
+ /**
+ * \brief Compare two null terminated string types.
+ */
+ static inline long compare( const char *k1, const char *k2 )
+ { return strcmp(k1, k2); }
+
+ static int compare( const StrTmpl<T> &s1, const StrTmpl<T> &s2 )
+ {
+ if ( s1.length() < s2.length() )
+ return -1;
+ else if ( s1.length() > s2.length() )
+ return 1;
+ else
+ return memcmp( s1.data, s2.data, s1.length() );
+ }
+};
+
+typedef CmpStrTmpl<char> ColmCmpStr;
+
+#ifdef AAPL_NAMESPACE
+}
+#endif
+
+#endif /* _AAPL_ASTRING_H */
+
diff --git a/src/ctinput.cc b/src/ctinput.cc
new file mode 100644
index 00000000..f8267487
--- /dev/null
+++ b/src/ctinput.cc
@@ -0,0 +1,570 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <iostream>
+
+#include "compiler.h"
+#include "pool.h"
+//#include "debug.h"
+
+using std::cerr;
+using std::endl;
+
+DEF_INPUT_FUNCS( input_funcs_ct, input_impl_ct );
+
+extern input_funcs_ct pat_funcs;
+extern input_funcs_ct repl_funcs;
+
+struct input_impl_ct
+{
+ struct input_funcs *funcs;
+
+ char *name;
+ long line;
+ long column;
+ long byte;
+
+ struct Pattern *pattern;
+ struct PatternItem *pat_item;
+ struct Constructor *constructor;
+ struct ConsItem *cons_item;
+
+ char eof_mark;
+ char eof_sent;
+
+ int offset;
+};
+
+void ct_destructor( program_t *prg, tree_t **sp, struct input_impl_ct *ss )
+{
+}
+
+char ct_get_eof_sent( struct colm_program *prg, struct input_impl_ct *si )
+{
+ return si->eof_sent;
+}
+
+void ct_set_eof_sent( struct colm_program *prg, struct input_impl_ct *si, char eof_sent )
+{
+ si->eof_sent = eof_sent;
+}
+
+/*
+ * Pattern
+ */
+
+struct input_impl *colm_impl_new_pat( char *name, Pattern *pattern )
+{
+ struct input_impl_ct *ss = (struct input_impl_ct*)malloc(sizeof(struct input_impl_ct));
+ memset( ss, 0, sizeof(struct input_impl_ct) );
+ ss->pattern = pattern;
+ ss->pat_item = pattern->list->head;
+ ss->funcs = (struct input_funcs*)&pat_funcs;
+ return (struct input_impl*) ss;
+}
+
+int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int *pskip,
+ alph_t **pdp, int *copied )
+{
+ *copied = 0;
+
+ PatternItem *buf = ss->pat_item;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ return INPUT_EOF;
+
+ if ( buf->form == PatternItem::TypeRefForm )
+ return INPUT_LANG_EL;
+
+ assert ( buf->form == PatternItem::InputTextForm );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ alph_t *src = (alph_t*)&buf->data[offset];
+ int slen = avail;
+
+ /* Need to skip? */
+ if ( *pskip > 0 && slen <= *pskip ) {
+ /* Skipping the the whole source. */
+ *pskip -= slen;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += *pskip;
+ slen -= *pskip;
+ *pskip = 0;
+
+ *pdp = src;
+ *copied += slen;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return INPUT_DATA;
+}
+
+
+int pat_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length )
+{
+ int copied = 0;
+
+ PatternItem *buf = ss->pat_item;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ break;
+
+ if ( buf->form == PatternItem::TypeRefForm )
+ break;
+
+ assert ( buf->form == PatternItem::InputTextForm );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ memcpy( dest+copied, src, slen ) ;
+ copied += slen;
+ length -= slen;
+ }
+
+ if ( length == 0 )
+ break;
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return copied;
+}
+
+void pat_backup( struct input_impl_ct *ss )
+{
+ if ( ss->pat_item == 0 )
+ ss->pat_item = ss->pattern->list->tail;
+ else
+ ss->pat_item = ss->pat_item->prev;
+}
+
+int pat_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int length, location_t *loc )
+{
+ //debug( REALM_INPUT, "consuming %ld bytes\n", length );
+
+ int consumed = 0;
+
+ while ( true ) {
+ if ( ss->pat_item == 0 )
+ break;
+
+ int avail = ss->pat_item->data.length() - ss->offset;
+
+ if ( length >= avail ) {
+ /* Read up to the end of the data. Advance the
+ * pattern item. */
+ ss->pat_item = ss->pat_item->next;
+ ss->offset = 0;
+
+ length -= avail;
+ consumed += avail;
+
+ if ( length == 0 )
+ break;
+ }
+ else {
+ ss->offset += length;
+ consumed += length;
+ break;
+ }
+ }
+
+ return consumed;
+}
+
+int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length )
+{
+ int origLen = length;
+ while ( true ) {
+ int avail = ss->offset;
+
+ /* Okay to go up to the front of the buffer. */
+ if ( length > avail ) {
+ ss->pat_item = ss->pat_item == 0 ?
+ ss->pattern->list->tail :
+ ss->pat_item->prev;
+ ss->offset = ss->pat_item->data.length();
+ length -= avail;
+ }
+ else {
+ ss->offset -= length;
+ break;
+ }
+ }
+
+ return origLen;
+}
+
+LangEl *pat_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, long *bindId,
+ alph_t **data, long *length )
+{
+ LangEl *klangEl = ss->pat_item->prodEl->langEl;
+ *bindId = ss->pat_item->bindId;
+ *data = 0;
+ *length = 0;
+
+ ss->pat_item = ss->pat_item->next;
+ ss->offset = 0;
+ return klangEl;
+}
+
+void pat_undo_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss )
+{
+ pat_backup( ss );
+ ss->offset = ss->pat_item->data.length();
+}
+
+void ct_set_eof_mark( struct colm_program *prg, struct input_impl_ct *si, char eof_mark )
+{
+ si->eof_mark = eof_mark;
+}
+
+void ct_transfer_loc_seq( struct colm_program *prg, location_t *loc, struct input_impl_ct *ss )
+{
+ loc->name = ss->name;
+ loc->line = ss->line;
+ loc->column = ss->column;
+ loc->byte = ss->byte;
+}
+
+input_funcs_ct pat_funcs =
+{
+ &pat_get_parse_block,
+ &pat_get_data,
+
+ &pat_consume_data,
+ &pat_undo_consume_data,
+
+ 0, /* consume_tree */
+ 0, /* undo_consume_tree */
+
+ &pat_consume_lang_el,
+ &pat_undo_consume_lang_el,
+
+ 0, 0, 0, 0, 0, 0, /* prepend funcs. */
+ 0, 0, 0, 0, 0, 0, /* append funcs */
+
+ &ct_set_eof_mark,
+
+ &ct_transfer_loc_seq,
+ &ct_destructor,
+
+ 0, 0
+};
+
+
+/*
+ * Replacements
+ */
+
+struct input_impl *colm_impl_new_cons( char *name, Constructor *constructor )
+{
+ struct input_impl_ct *ss = (struct input_impl_ct*)malloc(sizeof(struct input_impl_ct));
+ memset( ss, 0, sizeof(struct input_impl_ct) );
+ ss->constructor = constructor;
+ ss->cons_item = constructor->list->head;
+ ss->funcs = (struct input_funcs*)&repl_funcs;
+ return (struct input_impl*)ss;
+}
+
+LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss,
+ long *bindId, alph_t **data, long *length )
+{
+ LangEl *klangEl = ss->cons_item->type == ConsItem::ExprType ?
+ ss->cons_item->langEl : ss->cons_item->prodEl->langEl;
+ *bindId = ss->cons_item->bindId;
+
+ *data = 0;
+ *length = 0;
+
+ if ( ss->cons_item->type == ConsItem::LiteralType ) {
+ if ( ss->cons_item->prodEl->typeRef->pdaLiteral != 0 ) {
+ bool unusedCI;
+ prepareLitString( ss->cons_item->data, unusedCI,
+ ss->cons_item->prodEl->typeRef->pdaLiteral->data,
+ ss->cons_item->prodEl->typeRef->pdaLiteral->loc );
+
+ *data = (alph_t*)ss->cons_item->data.data;
+ *length = ss->cons_item->data.length();
+ }
+ }
+
+ ss->cons_item = ss->cons_item->next;
+ ss->offset = 0;
+ return klangEl;
+}
+
+int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss,
+ int *pskip, alph_t **pdp, int *copied )
+{
+ *copied = 0;
+
+ ConsItem *buf = ss->cons_item;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ return INPUT_EOF;
+
+ if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::LiteralType )
+ return INPUT_LANG_EL;
+
+ assert ( buf->type == ConsItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ alph_t *src = (alph_t*)&buf->data[offset];
+ int slen = avail;
+
+ /* Need to skip? */
+ if ( *pskip > 0 && slen <= *pskip ) {
+ /* Skipping the the whole source. */
+ *pskip -= slen;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += *pskip;
+ slen -= *pskip;
+ *pskip = 0;
+
+ *pdp = src;
+ *copied += slen;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return INPUT_DATA;
+}
+
+int repl_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length )
+{
+ int copied = 0;
+
+ ConsItem *buf = ss->cons_item;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ break;
+
+ if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::LiteralType )
+ break;
+
+ assert ( buf->type == ConsItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ memcpy( dest+copied, src, slen ) ;
+ copied += slen;
+ length -= slen;
+ }
+
+ if ( length == 0 )
+ break;
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return copied;
+}
+
+void repl_backup( struct input_impl_ct *ss )
+{
+ if ( ss->cons_item == 0 )
+ ss->cons_item = ss->constructor->list->tail;
+ else
+ ss->cons_item = ss->cons_item->prev;
+}
+
+void repl_undo_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss )
+{
+ repl_backup( ss );
+ ss->offset = ss->cons_item->data.length();
+}
+
+
+int repl_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int length, location_t *loc )
+{
+ int consumed = 0;
+
+ while ( true ) {
+ if ( ss->cons_item == 0 )
+ break;
+
+ int avail = ss->cons_item->data.length() - ss->offset;
+
+ if ( length >= avail ) {
+ /* Read up to the end of the data. Advance the
+ * pattern item. */
+ ss->cons_item = ss->cons_item->next;
+ ss->offset = 0;
+
+ length -= avail;
+ consumed += avail;
+
+ if ( length == 0 )
+ break;
+ }
+ else {
+ ss->offset += length;
+ consumed += length;
+ break;
+ }
+ }
+
+ return consumed;
+}
+
+int repl_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length )
+{
+ int origLen = length;
+ while ( true ) {
+ int avail = ss->offset;
+
+ /* Okay to go up to the front of the buffer. */
+ if ( length > avail ) {
+ ss->cons_item = ss->cons_item == 0 ?
+ ss->constructor->list->tail :
+ ss->cons_item->prev;
+ ss->offset = ss->cons_item->data.length();
+ length -= avail;
+ }
+ else {
+ ss->offset -= length;
+ break;
+ }
+ }
+
+ return origLen;
+}
+
+input_funcs_ct repl_funcs =
+{
+ &repl_get_parse_block,
+ &repl_get_data,
+
+ &repl_consume_data,
+ &repl_undo_consume_data,
+
+ 0, /* consume_tree */
+ 0, /* undo_consume_tree. */
+
+ &repl_consume_lang_el,
+ &repl_undo_consume_lang_el,
+
+ 0, 0, 0, 0, 0, 0, /* prepend. */
+ 0, 0, 0, 0, 0, 0, /* append. */
+
+ &ct_set_eof_mark,
+
+ &ct_transfer_loc_seq,
+ &ct_destructor,
+
+ 0, 0
+};
+
+void pushBinding( pda_run *pdaRun, parse_tree_t *parseTree )
+{
+ /* If the item is bound then store it in the bindings array. */
+ pdaRun->bindings->push( parseTree );
+}
+
+extern "C" void internalSendNamedLangEl( program_t *prg, tree_t **sp,
+ struct pda_run *pdaRun, struct input_impl *is )
+{
+ /* All three set by consumeLangEl. */
+ long bindId;
+ alph_t *data;
+ long length;
+
+ LangEl *klangEl = is->funcs->consume_lang_el( prg, is, &bindId, &data, &length );
+
+ //cerr << "named langEl: " << prg->rtd->lelInfo[klangEl->id].name << endl;
+
+ /* Copy the token data. */
+ head_t *tokdata = 0;
+ if ( data != 0 )
+ tokdata = string_alloc_full( prg, colm_cstr_from_alph( data ), length );
+
+ kid_t *input = make_token_with_data( prg, pdaRun, is, klangEl->id, tokdata );
+
+ colm_increment_steps( pdaRun );
+
+ parse_tree_t *parseTree = parse_tree_allocate( pdaRun );
+ parseTree->id = input->tree->id;
+ parseTree->flags |= PF_NAMED;
+ parseTree->shadow = input;
+
+ if ( bindId > 0 )
+ pushBinding( pdaRun, parseTree );
+
+ pdaRun->parse_input = parseTree;
+}
+
+extern "C" void internalInitBindings( pda_run *pdaRun )
+{
+ /* Bindings are indexed at 1. Need a no-binding. */
+ pdaRun->bindings = new bindings;
+ pdaRun->bindings->push(0);
+}
+
+extern "C" void internalPopBinding( pda_run *pdaRun, parse_tree_t *parseTree )
+{
+ parse_tree_t *lastBound = pdaRun->bindings->top();
+ if ( lastBound == parseTree )
+ pdaRun->bindings->pop();
+}
diff --git a/src/debug.c b/src/debug.c
new file mode 100644
index 00000000..1cfd24d7
--- /dev/null
+++ b/src/debug.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2010-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <colm/debug.h>
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <colm/program.h>
+
+const char *const colm_realm_names[REALMS] =
+ // @NOTE: keep this in sync with 'main.cc': 'processArgs()' '-D' option
+ {
+ "BYTECODE",
+ "PARSE",
+ "MATCH",
+ "COMPILE",
+ "POOL",
+ "PRINT",
+ "INPUT",
+ "SCAN",
+ };
+
+int _debug( struct colm_program *prg, long realm, const char *fmt, ... )
+{
+ int result = 0;
+ if ( prg->active_realm & realm ) {
+ /* Compute the index by shifting. */
+ int ind = 0;
+ while ( (realm & 0x1) != 0x1 ) {
+ realm >>= 1;
+ ind += 1;
+ }
+
+ fprintf( stderr, "%s: ", colm_realm_names[ind] );
+ va_list args;
+ va_start( args, fmt );
+ result = vfprintf( stderr, fmt, args );
+ va_end( args );
+ }
+
+ return result;
+}
+
+void fatal( const char *fmt, ... )
+{
+ va_list args;
+ fprintf( stderr, "fatal: " );
+ va_start( args, fmt );
+ vfprintf( stderr, fmt, args );
+ va_end( args );
+ exit(1);
+}
+
+void message( const char *fmt, ... )
+{
+ va_list args;
+ fprintf( stderr, "message: " );
+ va_start( args, fmt );
+ vfprintf( stderr, fmt, args );
+ va_end( args );
+}
diff --git a/src/debug.h b/src/debug.h
new file mode 100644
index 00000000..1870f553
--- /dev/null
+++ b/src/debug.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2010-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_DEBUG_H
+#define _COLM_DEBUG_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "colm.h"
+#include "config.h"
+
+void fatal( const char *fmt, ... );
+
+#ifdef DEBUG
+#define debug( prg, realm, ... ) _debug( prg, realm, __VA_ARGS__ )
+#define check_realm( realm ) _check_realm( realm )
+#else
+#define debug( prg, realm, ... )
+#define check_realm( realm )
+#endif
+
+int _debug( struct colm_program *prg, long realm, const char *fmt, ... );
+
+void message( const char *fmt, ... );
+
+#define REALM_BYTECODE COLM_DBG_BYTECODE
+#define REALM_PARSE COLM_DBG_PARSE
+#define REALM_MATCH COLM_DBG_MATCH
+#define REALM_COMPILE COLM_DBG_COMPILE
+#define REALM_POOL COLM_DBG_POOL
+#define REALM_PRINT COLM_DBG_PRINT
+#define REALM_INPUT COLM_DBG_INPUT
+#define REALM_SCAN COLM_DBG_SCAN
+
+#define REALMS 32
+
+extern const char *const colm_realm_names[REALMS];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COLM_DEBUG_H */
+
diff --git a/src/declare.cc b/src/declare.cc
new file mode 100644
index 00000000..b96092e2
--- /dev/null
+++ b/src/declare.cc
@@ -0,0 +1,1623 @@
+/*
+ * Copyright 2012-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <assert.h>
+
+#include <iostream>
+
+#include "compiler.h"
+
+void Compiler::initUniqueTypes( )
+{
+ uniqueTypeNil = new UniqueType( TYPE_NIL );
+ uniqueTypeVoid = new UniqueType( TYPE_VOID );
+ uniqueTypePtr = new UniqueType( TYPE_TREE, ptrLangEl );
+ uniqueTypeBool = new UniqueType( TYPE_BOOL );
+ uniqueTypeInt = new UniqueType( TYPE_INT );
+ uniqueTypeStr = new UniqueType( TYPE_TREE, strLangEl );
+ uniqueTypeIgnore = new UniqueType( TYPE_TREE, ignoreLangEl );
+ uniqueTypeAny = new UniqueType( TYPE_TREE, anyLangEl );
+
+ uniqueTypeInput = new UniqueType( TYPE_STRUCT, inputSel );
+ uniqueTypeStream = new UniqueType( TYPE_STRUCT, streamSel );
+
+ uniqeTypeMap.insert( uniqueTypeNil );
+ uniqeTypeMap.insert( uniqueTypeVoid );
+ uniqeTypeMap.insert( uniqueTypePtr );
+ uniqeTypeMap.insert( uniqueTypeBool );
+ uniqeTypeMap.insert( uniqueTypeInt );
+ uniqeTypeMap.insert( uniqueTypeStr );
+ uniqeTypeMap.insert( uniqueTypeIgnore );
+ uniqeTypeMap.insert( uniqueTypeAny );
+
+ uniqeTypeMap.insert( uniqueTypeInput );
+ uniqeTypeMap.insert( uniqueTypeStream );
+}
+
+ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC,
+ int nargs, UniqueType **args, bool isConst, bool useFnInstr,
+ GenericType *useGeneric )
+{
+ ObjectMethod *objMethod = new ObjectMethod( retType, name,
+ methIdWV, methIdWC, nargs, args, 0, isConst );
+ objMethod->type = type;
+ objMethod->useFnInstr = useFnInstr;
+
+ if ( nspace != 0 )
+ nspace->rootScope->methodMap.insert( name, objMethod );
+ else
+ obj->rootScope->methodMap.insert( name, objMethod );
+
+ if ( useGeneric ) {
+ objMethod->useGenericId = true;
+ objMethod->generic = useGeneric;
+ }
+
+ return objMethod;
+}
+
+ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, bool isConst,
+ bool useFnInstr, GenericType *useGeneric )
+{
+ return initFunction( retType, 0, obj, type, name, methIdWV, methIdWC,
+ 0, 0, isConst, useFnInstr, useGeneric );
+}
+
+ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, UniqueType *arg1,
+ bool isConst, bool useFnInstr, GenericType *useGeneric )
+{
+ UniqueType *args[] = { arg1 };
+ return initFunction( retType, 0, obj, type, name, methIdWV, methIdWC,
+ 1, args, isConst, useFnInstr, useGeneric );
+}
+
+ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC,
+ UniqueType *arg1, UniqueType *arg2,
+ bool isConst, bool useFnInstr, GenericType *useGeneric )
+{
+ UniqueType *args[] = { arg1, arg2 };
+ return initFunction( retType, 0, obj, type, name, methIdWV, methIdWC,
+ 2, args, isConst, useFnInstr, useGeneric );
+}
+
+/*
+ * With namespace supplied. Global functions.
+ */
+
+ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, bool isConst,
+ bool useFnInstr, GenericType *useGeneric )
+{
+ return initFunction( retType, nspace, obj, type, name, methIdWV, methIdWC,
+ 0, 0, isConst, useFnInstr, useGeneric );
+}
+
+ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, UniqueType *arg1,
+ bool isConst, bool useFnInstr, GenericType *useGeneric )
+{
+ UniqueType *args[] = { arg1 };
+ return initFunction( retType, nspace, obj, type, name, methIdWV, methIdWC,
+ 1, args, isConst, useFnInstr, useGeneric );
+}
+
+ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj,
+ ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC,
+ UniqueType *arg1, UniqueType *arg2,
+ bool isConst, bool useFnInstr, GenericType *useGeneric )
+{
+ UniqueType *args[] = { arg1, arg2 };
+ return initFunction( retType, nspace, obj, type, name, methIdWV, methIdWC,
+ 2, args, isConst, useFnInstr, useGeneric );
+}
+
+ObjectField *NameScope::checkRedecl( const String &name )
+{
+ return owningObj->checkRedecl( this, name );
+}
+
+void NameScope::insertField( const String &name, ObjectField *value )
+{
+ return owningObj->insertField( this, name, value );
+}
+
+ObjectField *ObjectDef::checkRedecl( NameScope *inScope, const String &name )
+{
+ FieldMapEl *objDefMapEl = inScope->fieldMap.find( name );
+ if ( objDefMapEl != 0 )
+ return objDefMapEl->value;
+ return 0;
+}
+
+void ObjectDef::insertField( NameScope *inScope, const String &name, ObjectField *value )
+{
+ inScope->fieldMap.insert( name, value );
+ fieldList.append( value );
+ value->scope = inScope;
+}
+
+NameScope *ObjectDef::pushScope( NameScope *curScope )
+{
+ NameScope *newScope = new NameScope;
+
+ newScope->owningObj = this;
+ newScope->parentScope = curScope;
+ curScope->children.append( newScope );
+
+ return newScope;
+}
+
+void LexJoin::varDecl( Compiler *pd, TokenDef *tokenDef )
+{
+ expr->varDecl( pd, tokenDef );
+}
+
+void LexExpression::varDecl( Compiler *pd, TokenDef *tokenDef )
+{
+ switch ( type ) {
+ case OrType: case IntersectType: case SubtractType:
+ case StrongSubtractType:
+ expression->varDecl( pd, tokenDef );
+ term->varDecl( pd, tokenDef );
+ break;
+ case TermType:
+ term->varDecl( pd, tokenDef );
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+void LexTerm::varDecl( Compiler *pd, TokenDef *tokenDef )
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ term->varDecl( pd, tokenDef );
+ factorAug->varDecl( pd, tokenDef );
+ break;
+ case FactorAugType:
+ factorAug->varDecl( pd, tokenDef );
+ break;
+ }
+}
+
+void LexFactorAug::varDecl( Compiler *pd, TokenDef *tokenDef )
+{
+ for ( ReCaptureVect::Iter re = reCaptureVect; re.lte(); re++ ) {
+ if ( tokenDef->objectDef->rootScope->checkRedecl( re->objField->name ) != 0 ) {
+ error(re->objField->loc) << "label name \"" <<
+ re->objField->name << "\" already in use" << endp;
+ }
+
+ /* Insert it into the map. */
+ tokenDef->objectDef->rootScope->insertField( re->objField->name, re->objField );
+
+ /* Store it in the TokenDef. */
+ tokenDef->reCaptureVect.append( *re );
+ }
+}
+
+void Compiler::declareReVars()
+{
+ for ( NamespaceList::Iter n = namespaceList; n.lte(); n++ ) {
+ for ( TokenDefListNs::Iter tok = n->tokenDefList; tok.lte(); tok++ ) {
+ if ( tok->join != 0 )
+ tok->join->varDecl( this, tok );
+ }
+ }
+
+ /* FIXME: declare RE captures in token generation actions. */
+#if 0
+ /* Add captures to the local frame. We Depend on these becoming the
+ * first local variables so we can compute their location. */
+
+ /* Make local variables corresponding to the local capture vector. */
+ for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ )
+ {
+ ObjectField *objField = ObjectField::cons( c->objField->loc,
+ c->objField->typeRef, c->objField->name );
+
+ /* Insert it into the field map. */
+ pd->curLocalFrame->insertField( objField->name, objField );
+ }
+#endif
+}
+
+LangEl *declareLangEl( Compiler *pd, Namespace *nspace,
+ const String &data, LangEl::Type type )
+{
+ /* If the id is already in the dict, it will be placed in last found. If
+ * it is not there then it will be inserted and last found will be set to it. */
+ TypeMapEl *inDict = nspace->typeMap.find( data );
+ if ( inDict != 0 )
+ error() << "language element '" << data << "' already defined as something else" << endp;
+
+ /* Language element not there. Make the new lang el and insert.. */
+ LangEl *langEl = new LangEl( nspace, data, type );
+ TypeMapEl *typeMapEl = new TypeMapEl( TypeMapEl::LangElType, data, langEl );
+ nspace->typeMap.insert( typeMapEl );
+ pd->langEls.append( langEl );
+
+ return langEl;
+}
+
+StructEl *declareStruct( Compiler *pd, Namespace *inNspace,
+ const String &data, StructDef *structDef )
+{
+ if ( inNspace != 0 ) {
+ TypeMapEl *inDict = inNspace->typeMap.find( data );
+ if ( inDict != 0 )
+ error() << "struct '" << data << "' already defined as something else" << endp;
+ }
+
+ StructEl *structEl = new StructEl( data, structDef );
+ pd->structEls.append( structEl );
+ structDef->structEl = structEl;
+
+ if ( inNspace ) {
+ TypeMapEl *typeMapEl = new TypeMapEl( TypeMapEl::StructType, data, structEl );
+ inNspace->typeMap.insert( typeMapEl );
+ }
+
+ return structEl;
+}
+
+/* Does not map the new language element. */
+LangEl *addLangEl( Compiler *pd, Namespace *inNspace,
+ const String &data, LangEl::Type type )
+{
+ LangEl *langEl = new LangEl( inNspace, data, type );
+ pd->langEls.append( langEl );
+ return langEl;
+}
+
+void declareTypeAlias( Compiler *pd, Namespace *nspace,
+ const String &data, TypeRef *typeRef )
+{
+ /* If the id is already in the dict, it will be placed in last found. If
+ * it is not there then it will be inserted and last found will be set to it. */
+ TypeMapEl *inDict = nspace->typeMap.find( data );
+ if ( inDict != 0 )
+ error() << "alias '" << data << "' already defined as something else" << endp;
+
+ /* Language element not there. Make the new lang el and insert. */
+ TypeMapEl *typeMapEl = new TypeMapEl( TypeMapEl::AliasType, data, typeRef );
+ nspace->typeMap.insert( typeMapEl );
+}
+
+LangEl *findType( Compiler *pd, Namespace *nspace, const String &data )
+{
+ /* If the id is already in the dict, it will be placed in last found. If
+ * it is not there then it will be inserted and last found will be set to it. */
+ TypeMapEl *inDict = nspace->typeMap.find( data );
+
+ if ( inDict == 0 )
+ error() << "'" << data << "' not declared as anything" << endp;
+
+ return inDict->value;
+}
+
+
+void Compiler::declareBaseLangEls()
+{
+ /* Order here is important because we make assumptions about the inbuilt
+ * language elements in the runtime. Note tokens are have identifiers set
+ * in an initial pass. */
+
+ /* Make a "_notoken" language element. This element is used when a
+ * generation action fails to generate anything, but there is reverse code
+ * that needs to be associated with a language element. This allows us to
+ * always associate reverse code with the first language element produced
+ * after a generation action. */
+ noTokenLangEl = declareLangEl( this, rootNamespace, "_notoken", LangEl::Term );
+ noTokenLangEl->isIgnore = true;
+
+ ptrLangEl = declareLangEl( this, rootNamespace, "ptr", LangEl::Term );
+ strLangEl = declareLangEl( this, rootNamespace, "str", LangEl::Term );
+ ignoreLangEl = declareLangEl( this, rootNamespace, "il", LangEl::Term );
+
+ /* Make the EOF language element. */
+ eofLangEl = 0;
+
+ /* Make the "any" language element */
+ anyLangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm );
+}
+
+
+void Compiler::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm )
+{
+ UniqueType *prodNameUT = findUniqueType( TYPE_TREE, nonTerm );
+ TypeRef *typeRef = TypeRef::cons( internal, prodNameUT );
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::LhsElType, typeRef, "lhs" );
+
+ localFrame->rootScope->insertField( el->name, el );
+}
+
+void Compiler::addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList )
+{
+ long position = 1;
+ for ( ProdElList::Iter rhsEl = *prodElList; rhsEl.lte(); rhsEl++, position++ ) {
+ if ( rhsEl->type == ProdEl::ReferenceType ) {
+ /* Use an offset of zero. For frame objects we compute the offset on
+ * demand. */
+ String name( 8, "r%d", position );
+ ObjectField *el = ObjectField::cons( InputLoc(),
+ ObjectField::RedRhsType, rhsEl->typeRef, name );
+ rhsEl->rhsElField = el;
+
+ /* Right hand side elements are constant. */
+ el->isConst = true;
+ localFrame->rootScope->insertField( el->name, el );
+ }
+ }
+}
+
+void GenericType::declare( Compiler *pd, Namespace *nspace )
+{
+ elUt = elTr->resolveType( pd );
+
+ if ( typeId == GEN_MAP )
+ keyUt = keyTr->resolveType( pd );
+
+ if ( typeId == GEN_MAP || typeId == GEN_LIST )
+ valueUt = valueTr->resolveType( pd );
+
+ objDef = ObjectDef::cons( ObjectDef::BuiltinType,
+ "generic", pd->nextObjectId++ );
+
+ switch ( typeId ) {
+ case GEN_MAP:
+ pd->initMapFunctions( this );
+ pd->initMapFields( this );
+ break;
+ case GEN_LIST:
+ pd->initListFunctions( this );
+ pd->initListFields( this );
+ break;
+ case GEN_PARSER:
+ elUt->langEl->parserId = pd->nextParserId++;
+ pd->initParserFunctions( this );
+ pd->initParserFields( this );
+ break;
+ }
+}
+
+void Namespace::declare( Compiler *pd )
+{
+ for ( GenericList::Iter g = genericList; g.lte(); g++ )
+ g->declare( pd, this );
+
+ for ( TokenDefListNs::Iter tokenDef = tokenDefList; tokenDef.lte(); tokenDef++ ) {
+ if ( tokenDef->isLiteral ) {
+ if ( tokenDef->isZero ) {
+ assert( tokenDef->regionSet->collectIgnore->zeroLel != 0 );
+ tokenDef->tdLangEl = tokenDef->regionSet->collectIgnore->zeroLel;
+ }
+ else {
+ /* Original. Create a token for the literal. */
+ LangEl *litEl = declareLangEl( pd, this, tokenDef->name, LangEl::Term );
+
+ litEl->lit = tokenDef->literal;
+ litEl->isLiteral = true;
+ litEl->tokenDef = tokenDef;
+ litEl->objectDef = tokenDef->objectDef;
+
+ tokenDef->tdLangEl = litEl;
+
+ if ( tokenDef->noPreIgnore )
+ litEl->noPreIgnore = true;
+ if ( tokenDef->noPostIgnore )
+ litEl->noPostIgnore = true;
+ }
+ }
+ }
+
+ for ( StructDefList::Iter s = structDefList; s.lte(); s++ )
+ declareStruct( pd, this, s->name, s );
+
+ for ( TokenDefListNs::Iter tokenDef = tokenDefList; tokenDef.lte(); tokenDef++ ) {
+ /* Literals already taken care of. */
+ if ( ! tokenDef->isLiteral ) {
+ /* Create the token. */
+ LangEl *tokEl = declareLangEl( pd, this, tokenDef->name, LangEl::Term );
+ tokEl->isIgnore = tokenDef->isIgnore;
+ tokEl->transBlock = tokenDef->codeBlock;
+ tokEl->objectDef = tokenDef->objectDef;
+ tokEl->contextIn = tokenDef->contextIn;
+ tokEl->tokenDef = tokenDef;
+
+ if ( tokenDef->noPreIgnore )
+ tokEl->noPreIgnore = true;
+ if ( tokenDef->noPostIgnore )
+ tokEl->noPostIgnore = true;
+
+ tokenDef->tdLangEl = tokEl;
+
+ if ( tokenDef->isZero ) {
+ /* Setting zero lel to newly created tokEl. */
+ tokenDef->regionSet->collectIgnore->zeroLel = tokEl;
+ tokEl->isZero = true;
+ }
+ }
+ }
+
+ for ( NtDefList::Iter n = ntDefList; n.lte(); n++ ) {
+ /* Get the language element. */
+ LangEl *langEl = declareLangEl( pd, this, n->name, LangEl::NonTerm );
+ //$$->langEl = langEl;
+
+ /* Get the language element. */
+ langEl->objectDef = n->objectDef;
+ langEl->reduceFirst = n->reduceFirst;
+ langEl->contextIn = n->contextIn;
+ langEl->defList.transfer( *n->defList );
+
+ for ( LelDefList::Iter d = langEl->defList; d.lte(); d++ ) {
+ d->prodName = langEl;
+
+ if ( d->redBlock != 0 ) {
+ pd->addProdRedObjectVar( d->redBlock->localFrame, langEl );
+ pd->addProdRHSVars( d->redBlock->localFrame, d->prodElList );
+ }
+
+ /* References to the reduce item. */
+ }
+ }
+
+ for ( TypeAliasList::Iter ta = typeAliasList; ta.lte(); ta++ )
+ declareTypeAlias( pd, this, ta->name, ta->typeRef );
+
+ /* Go into child aliases. */
+ for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ )
+ (*c)->declare( pd );
+}
+
+void Compiler::makeIgnoreCollectors()
+{
+ for ( RegionSetList::Iter regionSet = regionSetList; regionSet.lte(); regionSet++ ) {
+ if ( regionSet->collectIgnore->zeroLel == 0 ) {
+ String name( 128, "_ign_%p", regionSet->tokenIgnore );
+ LangEl *zeroLel = new LangEl( rootNamespace, name, LangEl::Term );
+ langEls.append( zeroLel );
+ zeroLel->isZero = true;
+ zeroLel->regionSet = regionSet;
+
+ regionSet->collectIgnore->zeroLel = zeroLel;
+ }
+ }
+}
+
+void LangStmt::chooseDefaultIter( Compiler *pd, IterCall *iterCall ) const
+{
+ /* This is two-part, It gets rewritten before evaluation in synthesis. */
+
+ /* The iterator name. */
+ LangVarRef *callVarRef = LangVarRef::cons( loc, 0, context, scope, "triter" );
+
+ /* The parameters. */
+ CallArgVect *callExprVect = new CallArgVect;
+ callExprVect->append( new CallArg( iterCall->langExpr ) );
+ iterCall->langTerm = LangTerm::cons( InputLoc(), callVarRef, callExprVect );
+ iterCall->langExpr = 0;
+ iterCall->form = IterCall::Call;
+ iterCall->wasExpr = true;
+}
+
+void LangStmt::declareForIter( Compiler *pd ) const
+{
+ if ( iterCall->form != IterCall::Call )
+ chooseDefaultIter( pd, iterCall );
+
+ objField->typeRef = TypeRef::cons( loc, typeRef, iterCall );
+}
+
+void LangStmt::declare( Compiler *pd ) const
+{
+ switch ( type ) {
+ case ExprType:
+ break;
+ case IfType:
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->declare( pd );
+
+ if ( elsePart != 0 )
+ elsePart->declare( pd );
+ break;
+
+ case ElseType:
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->declare( pd );
+ break;
+ case RejectType:
+ break;
+ case WhileType:
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->declare( pd );
+ break;
+ case AssignType:
+ break;
+ case ForIterType:
+ declareForIter( pd );
+
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->declare( pd );
+ break;
+ case ReturnType:
+ break;
+ case BreakType:
+ break;
+ case YieldType:
+ break;
+ }
+}
+
+void CodeBlock::declare( Compiler *pd ) const
+{
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->declare( pd );
+}
+
+void Compiler::declareFunction( Function *func )
+{
+ CodeBlock *block = func->codeBlock;
+ block->declare( this );
+}
+
+void Compiler::declareReductionCode( Production *prod )
+{
+ CodeBlock *block = prod->redBlock;
+ block->declare( this );
+}
+
+void Compiler::declareTranslateBlock( LangEl *langEl )
+{
+ CodeBlock *block = langEl->transBlock;
+
+ /* References to the reduce item. */
+ addMatchLength( block->localFrame, langEl );
+ addMatchText( block->localFrame, langEl );
+ addInput( block->localFrame );
+ addThis( block->localFrame );
+
+ block->declare( this );
+}
+
+void Compiler::declarePreEof( TokenRegion *region )
+{
+ CodeBlock *block = region->preEofBlock;
+
+ addInput( block->localFrame );
+ addThis( block->localFrame );
+
+ block->declare( this );
+}
+
+void Compiler::declareRootBlock()
+{
+ CodeBlock *block = rootCodeBlock;
+ block->declare( this );
+}
+
+void Compiler::declareByteCode()
+{
+ for ( FunctionList::Iter f = functionList; f.lte(); f++ )
+ declareFunction( f );
+
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->redBlock != 0 )
+ declareReductionCode( prod );
+ }
+
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->transBlock != 0 )
+ declareTranslateBlock( lel );
+ }
+
+ for ( RegionList::Iter r = regionList; r.lte(); r++ ) {
+ if ( r->preEofBlock != 0 )
+ declarePreEof( r );
+ }
+
+ declareRootBlock( );
+}
+
+void Compiler::makeDefaultIterators()
+{
+ /* Tree iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef,
+ ObjectMethod::Call, "triter", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::Tree );
+ objMethod->iterDef = triter;
+ }
+
+ /* Child iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef,
+ ObjectMethod::Call, "child", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::Child );
+ objMethod->iterDef = triter;
+ }
+
+ /* Reverse iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef,
+ ObjectMethod::Call, "rev_child", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::RevChild );
+ objMethod->iterDef = triter;
+ }
+
+ /* Repeat iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef,
+ ObjectMethod::Call, "repeat", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::Repeat );
+ objMethod->iterDef = triter;
+ }
+
+ /* Reverse repeat iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef,
+ ObjectMethod::Call, "rev_repeat", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::RevRepeat );
+ objMethod->iterDef = triter;
+ }
+
+ /* List iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef,
+ ObjectMethod::Call, "list_iter", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::ListEl );
+ objMethod->iterDef = triter;
+ }
+
+ /* Reverse Value List iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef,
+ ObjectMethod::Call, "rev_list_iter", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::RevListVal );
+ objMethod->iterDef = triter;
+ }
+
+ /* Map iterator. */
+ {
+ UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl );
+ ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef,
+ ObjectMethod::Call, "map_iter", IN_HALT, IN_HALT, anyRefUT, true );
+
+ IterDef *triter = findIterDef( IterDef::MapEl );
+ objMethod->iterDef = triter;
+ }
+}
+
+void Compiler::addMatchLength( ObjectDef *frame, LangEl *lel )
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt );
+
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( InputLoc(),
+ ObjectField::InbuiltFieldType, typeRef, "match_length" );
+ el->isConst = true;
+ el->inGetR = IN_GET_MATCH_LENGTH_R;
+ el->inGetValR = IN_GET_MATCH_LENGTH_R;
+ frame->rootScope->insertField( el->name, el );
+}
+
+void Compiler::addMatchText( ObjectDef *frame, LangEl *lel )
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr );
+
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "match_text" );
+ el->isConst = true;
+ el->inGetR = IN_GET_MATCH_TEXT_R;
+ el->inGetValR = IN_GET_MATCH_TEXT_R;
+ frame->rootScope->insertField( el->name, el );
+}
+
+void Compiler::addInput( ObjectDef *frame )
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInput );
+
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltObjectType, typeRef, "input" );
+ el->inGetR = IN_LOAD_INPUT_R;
+ el->inGetWV = IN_LOAD_INPUT_WV;
+ el->inGetWC = IN_LOAD_INPUT_WC;
+ el->inGetValR = IN_LOAD_INPUT_R;
+ el->inGetValWC = IN_LOAD_INPUT_WC;
+ el->inGetValWV = IN_LOAD_INPUT_WV;
+ frame->rootScope->insertField( el->name, el );
+}
+
+void Compiler::addThis( ObjectDef *frame )
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream );
+
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltObjectType, typeRef, "this" );
+ el->inGetR = IN_LOAD_CONTEXT_R;
+ el->inGetWV = IN_LOAD_CONTEXT_WV;
+ el->inGetWC = IN_LOAD_CONTEXT_WC;
+ el->inGetValR = IN_LOAD_CONTEXT_R;
+ el->inGetValWC = IN_LOAD_CONTEXT_WC;
+ el->inGetValWV = IN_LOAD_CONTEXT_WV;
+ frame->rootScope->insertField( el->name, el );
+}
+
+void Compiler::declareIntFields( )
+{
+ intObj = ObjectDef::cons( ObjectDef::BuiltinType, "int", nextObjectId++ );
+// intLangEl->objectDef = intObj;
+
+ initFunction( uniqueTypeStr, intObj, ObjectMethod::Call, "to_string", IN_INT_TO_STR, IN_INT_TO_STR, true );
+}
+
+void Compiler::declareStrFields( )
+{
+ strObj = ObjectDef::cons( ObjectDef::BuiltinType, "str", nextObjectId++ );
+ strLangEl->objectDef = strObj;
+
+ initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "atoi",
+ FN_STR_ATOI, FN_STR_ATOI, true, true );
+
+ initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "atoo",
+ FN_STR_ATOO, FN_STR_ATOO, true, true );
+
+ initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "uord8",
+ FN_STR_UORD8, FN_STR_UORD8, true, true );
+
+ initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "sord8",
+ FN_STR_SORD8, FN_STR_SORD8, true, true );
+
+ initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "uord16",
+ FN_STR_UORD16, FN_STR_UORD16, true, true );
+
+ initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "sord16",
+ FN_STR_SORD16, FN_STR_SORD16, true, true );
+
+ initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "uord32",
+ FN_STR_UORD32, FN_STR_UORD32, true, true );
+
+ initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "sord32",
+ FN_STR_SORD32, FN_STR_SORD32, true, true );
+
+ initFunction( uniqueTypeStr, strObj, ObjectMethod::Call, "prefix",
+ FN_STR_PREFIX, FN_STR_PREFIX, uniqueTypeInt, true, true );
+
+ initFunction( uniqueTypeStr, strObj, ObjectMethod::Call, "suffix",
+ FN_STR_SUFFIX, FN_STR_SUFFIX, uniqueTypeInt, true, true );
+
+ initFunction( uniqueTypeStr, rootNamespace, globalObjectDef,
+ ObjectMethod::Call, "sprintf", FN_SPRINTF, FN_SPRINTF,
+ uniqueTypeStr, uniqueTypeInt, true, true );
+
+ addLengthField( strObj, IN_STR_LENGTH );
+}
+
+void Compiler::declareInputField( ObjectDef *objDef, code_t getLength )
+{
+ /* Create the "length" field. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr );
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "tree" );
+ el->isConst = true;
+ el->inGetR = IN_GET_COLLECT_STRING;
+ el->inGetValR = IN_GET_COLLECT_STRING;
+
+ objDef->rootScope->insertField( el->name, el );
+}
+
+void Compiler::declareStreamField( ObjectDef *objDef, code_t getLength )
+{
+ /* Create the "length" field. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr );
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "tree" );
+ el->isConst = true;
+ el->inGetR = IN_GET_COLLECT_STRING;
+ el->inGetValR = IN_GET_COLLECT_STRING;
+
+ objDef->rootScope->insertField( el->name, el );
+}
+
+void Compiler::declareInputFields( )
+{
+ inputObj = inputSel->structDef->objectDef;
+
+ initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "pull",
+ IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false );
+
+ initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push",
+ IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false );
+
+ initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push_ignore",
+ IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false );
+
+ initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push_stream",
+ IN_INPUT_PUSH_STREAM_WV, IN_INPUT_PUSH_STREAM_WV, uniqueTypeStream, false );
+
+ initFunction( uniqueTypeVoid, inputObj, ObjectMethod::Call, "close",
+ IN_INPUT_CLOSE_WC, IN_INPUT_CLOSE_WC, false );
+
+ initFunction( uniqueTypeVoid, inputObj, ObjectMethod::Call, "auto_trim",
+ IN_IINPUT_AUTO_TRIM_WC, IN_IINPUT_AUTO_TRIM_WC, uniqueTypeBool, false );
+
+ declareInputField( inputObj, 0 );
+}
+
+void Compiler::declareStreamFields( )
+{
+ streamObj = streamSel->structDef->objectDef;
+
+ initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "pull",
+ IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false );
+
+ initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "push",
+ IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false );
+
+ initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "push_ignore",
+ IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false );
+
+ initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "push_stream",
+ IN_INPUT_PUSH_STREAM_WV, IN_INPUT_PUSH_STREAM_WV, uniqueTypeStream, false );
+
+ initFunction( uniqueTypeVoid, streamObj, ObjectMethod::Call, "close",
+ IN_INPUT_CLOSE_WC, IN_INPUT_CLOSE_WC, false );
+
+ initFunction( uniqueTypeVoid, streamObj, ObjectMethod::Call, "auto_trim",
+ IN_INPUT_AUTO_TRIM_WC, IN_INPUT_AUTO_TRIM_WC, uniqueTypeBool, false );
+
+ declareStreamField( streamObj, 0 );
+}
+
+ObjectField *Compiler::makeDataEl()
+{
+ /* Create the "data" field. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr );
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "data" );
+
+ el->inGetR = IN_GET_TOKEN_DATA_R;
+ el->inSetWC = IN_SET_TOKEN_DATA_WC;
+ el->inSetWV = IN_SET_TOKEN_DATA_WV;
+ return el;
+}
+
+ObjectField *Compiler::makeFileEl()
+{
+ /* Create the "file" field. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr );
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "file" );
+
+ el->isConst = true;
+ el->inGetR = IN_GET_TOKEN_FILE_R;
+ el->inGetValR = IN_GET_TOKEN_FILE_R;
+ return el;
+}
+
+ObjectField *Compiler::makeLineEl()
+{
+ /* Create the "line" field. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt );
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "line" );
+
+ el->isConst = true;
+ el->inGetR = IN_GET_TOKEN_LINE_R;
+ el->inGetValR = IN_GET_TOKEN_LINE_R;
+ return el;
+}
+
+ObjectField *Compiler::makeColEl()
+{
+ /* Create the "col" field. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt );
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "col" );
+
+ el->isConst = true;
+ el->inGetR = IN_GET_TOKEN_COL_R;
+ el->inGetValR = IN_GET_TOKEN_COL_R;
+ return el;
+}
+
+ObjectField *Compiler::makePosEl()
+{
+ /* Create the "data" field. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt );
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "pos" );
+
+ el->isConst = true;
+ el->inGetR = IN_GET_TOKEN_POS_R;
+ el->inGetValR = IN_GET_TOKEN_POS_R;
+ return el;
+}
+
+/* Add a constant length field to the object.
+ * Opcode supplied by the caller. */
+void Compiler::addLengthField( ObjectDef *objDef, code_t getLength )
+{
+ /* Create the "length" field. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt );
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "length" );
+ el->isConst = true;
+ el->inGetR = getLength;
+ el->inGetValR = getLength;
+
+ objDef->rootScope->insertField( el->name, el );
+}
+
+void Compiler::declareTokenFields( )
+{
+ /* Give all user terminals the token object type. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->type == LangEl::Term ) {
+ if ( lel->objectDef != 0 ) {
+ /* Create the "data" field. */
+ ObjectField *dataEl = makeDataEl();
+ lel->objectDef->rootScope->insertField( dataEl->name, dataEl );
+
+ /* Create the "file" field. */
+ ObjectField *fileEl = makeFileEl();
+ lel->objectDef->rootScope->insertField( fileEl->name, fileEl );
+
+ /* Create the "line" field. */
+ ObjectField *lineEl = makeLineEl();
+ lel->objectDef->rootScope->insertField( lineEl->name, lineEl );
+
+ /* Create the "col" field. */
+ ObjectField *colEl = makeColEl();
+ lel->objectDef->rootScope->insertField( colEl->name, colEl );
+
+ /* Create the "pos" field. */
+ ObjectField *posEl = makePosEl();
+ lel->objectDef->rootScope->insertField( posEl->name, posEl );
+ }
+ }
+ }
+}
+
+void Compiler::declareGlobalFields()
+{
+ ObjectMethod *method;
+
+ method = initFunction( uniqueTypeStream, rootNamespace, globalObjectDef, ObjectMethod::Call, "open",
+ IN_OPEN_FILE, IN_OPEN_FILE, uniqueTypeStr, uniqueTypeStr, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "tolower",
+ IN_TO_LOWER, IN_TO_LOWER, uniqueTypeStr, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "toupper",
+ IN_TO_UPPER, IN_TO_UPPER, uniqueTypeStr, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "atoi",
+ FN_STR_ATOI, FN_STR_ATOI, uniqueTypeStr, true, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "atoo",
+ FN_STR_ATOO, FN_STR_ATOO, uniqueTypeStr, true, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "prefix",
+ FN_PREFIX, FN_PREFIX, uniqueTypeStr, uniqueTypeInt, true, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "suffix",
+ FN_SUFFIX, FN_SUFFIX, uniqueTypeStr, uniqueTypeInt, true, true );
+ method->useCallObj = false;
+
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "uord8",
+ FN_STR_UORD8, FN_STR_UORD8, uniqueTypeStr, true, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "sord8",
+ FN_STR_SORD8, FN_STR_SORD8, uniqueTypeStr, true, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "uord16",
+ FN_STR_UORD16, FN_STR_UORD16, uniqueTypeStr, true, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "sord16",
+ FN_STR_SORD16, FN_STR_SORD16, uniqueTypeStr, true, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "uord32",
+ FN_STR_UORD32, FN_STR_UORD32, uniqueTypeStr, true, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "sord32",
+ FN_STR_SORD32, FN_STR_SORD32, uniqueTypeStr, true, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "exit",
+ FN_EXIT, FN_EXIT, uniqueTypeInt, true, true );
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "exit_hard",
+ FN_EXIT_HARD, FN_EXIT_HARD, uniqueTypeInt, true, true );
+
+ method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "system",
+ IN_SYSTEM, IN_SYSTEM, uniqueTypeStr, true );
+
+ method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "xml",
+ IN_TREE_TO_STR_XML, IN_TREE_TO_STR_XML, uniqueTypeAny, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "xmlac",
+ IN_TREE_TO_STR_XML_AC, IN_TREE_TO_STR_XML_AC, uniqueTypeAny, true );
+ method->useCallObj = false;
+
+ method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "postfix",
+ IN_TREE_TO_STR_POSTFIX, IN_TREE_TO_STR_POSTFIX, uniqueTypeAny, true );
+ method->useCallObj = false;
+
+ addStdin();
+ addStdout();
+ addStderr();
+ addStds();
+ addArgv();
+ addError();
+ addDefineArgs();
+}
+
+void Compiler::addStdin()
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream );
+
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "stdin" );
+
+ el->isConst = true;
+
+ el->inGetR = IN_GET_CONST;
+ el->inGetWC = IN_GET_CONST;
+ el->inGetWV = IN_GET_CONST;
+ el->inGetValR = IN_GET_CONST;
+ el->inGetValWC = IN_GET_CONST;
+ el->inGetValWV = IN_GET_CONST;
+
+ el->isConstVal = true;
+ el->constValId = CONST_STDIN;
+
+ rootNamespace->rootScope->insertField( el->name, el );
+}
+
+void Compiler::addStdout()
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream );
+
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "stdout" );
+ el->isConst = true;
+
+ el->inGetR = IN_GET_CONST;
+ el->inGetWC = IN_GET_CONST;
+ el->inGetWV = IN_GET_CONST;
+ el->inGetValR = IN_GET_CONST;
+ el->inGetValWC = IN_GET_CONST;
+ el->inGetValWV = IN_GET_CONST;
+
+ el->isConstVal = true;
+ el->constValId = CONST_STDOUT;
+
+ rootNamespace->rootScope->insertField( el->name, el );
+}
+
+void Compiler::addStderr()
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream );
+
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "stderr" );
+ el->isConst = true;
+
+ el->inGetR = IN_GET_CONST;
+ el->inGetWC = IN_GET_CONST;
+ el->inGetWV = IN_GET_CONST;
+ el->inGetValR = IN_GET_CONST;
+ el->inGetValWC = IN_GET_CONST;
+ el->inGetValWV = IN_GET_CONST;
+
+ el->isConstVal = true;
+ el->constValId = CONST_STDERR;
+
+ rootNamespace->rootScope->insertField( el->name, el );
+}
+
+void Compiler::addArgv()
+{
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::StructFieldType, argvTypeRef, "argv" );
+ el->isConst = true;
+ rootNamespace->rootScope->insertField( el->name, el );
+ argv = el;
+
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr );
+
+ el = ObjectField::cons( internal,
+ ObjectField::StructFieldType, typeRef, "arg0" );
+ el->isConst = true;
+ rootNamespace->rootScope->insertField( el->name, el );
+ arg0 = el;
+}
+
+void Compiler::addStds()
+{
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::StructFieldType, stdsTypeRef, "stds" );
+ rootNamespace->rootScope->insertField( el->name, el );
+ stds = el;
+}
+
+void Compiler::addError()
+{
+ /* Make the type ref. */
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr );
+
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, "error" );
+ el->isConst = true;
+ el->inGetR = IN_GET_ERROR;
+ el->inGetWC = IN_GET_ERROR;
+ el->inGetWV = IN_GET_ERROR;
+ rootNamespace->rootScope->insertField( el->name, el );
+}
+
+void Compiler::addDefineArgs()
+{
+ for ( DefineVector::Iter d = defineArgs; d.lte(); d++ ) {
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr );
+
+ /* Create the field and insert it into the map. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltFieldType, typeRef, d->name );
+
+ el->isConst = true;
+
+ el->inGetR = IN_GET_CONST;
+ el->inGetWC = IN_GET_CONST;
+ el->inGetWV = IN_GET_CONST;
+ el->inGetValR = IN_GET_CONST;
+ el->inGetValWC = IN_GET_CONST;
+ el->inGetValWV = IN_GET_CONST;
+
+ el->isConstVal = true;
+ el->constValId = CONST_ARG;
+ el->constValArg = d->value;
+
+ rootNamespace->rootScope->insertField( el->name, el );
+ }
+}
+
+void Compiler::initMapFunctions( GenericType *gen )
+{
+ /* Value functions. */
+ initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "find",
+ FN_VMAP_FIND, FN_VMAP_FIND, gen->keyUt, true, true, gen );
+
+ initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "insert",
+ FN_VMAP_INSERT_WV, FN_VMAP_INSERT_WC, gen->keyUt, gen->valueUt,
+ false, true, gen );
+
+ initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "remove",
+ FN_VMAP_REMOVE_WV, FN_VMAP_REMOVE_WC, gen->keyUt, false, true, gen );
+
+ /*
+ * Element Functions
+ */
+ initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "find_el",
+ FN_MAP_FIND, FN_MAP_FIND, gen->keyUt, true, true, gen );
+
+ initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "insert_el",
+ FN_MAP_INSERT_WV, FN_MAP_INSERT_WC, gen->elUt, false, true, gen );
+
+ initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "detach_el",
+ FN_MAP_DETACH_WV, FN_MAP_DETACH_WC, gen->elUt, false, true, gen );
+}
+
+void Compiler::initMapField( GenericType *gen, const char *name, int offset )
+{
+ /* Make the type ref and create the field. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltOffType, gen->elTr, name );
+
+ el->inGetR = IN_GET_MAP_MEM_R;
+ el->inGetWC = IN_GET_MAP_MEM_WC;
+ el->inGetWV = IN_GET_MAP_MEM_WV;
+// el->inSetWC = IN_SET_MAP_MEM_WC;
+// el->inSetWV = IN_SET_MAP_MEM_WV;
+
+ el->inGetValR = IN_GET_MAP_MEM_R;
+ el->inGetValWC = IN_GET_MAP_MEM_WC;
+ el->inGetValWV = IN_GET_MAP_MEM_WV;
+
+ gen->objDef->rootScope->insertField( el->name, el );
+
+ el->useGenericId = true;
+ el->generic = gen;
+
+ /* Zero for head, One for tail. */
+ el->offset = offset;
+}
+
+void Compiler::initMapFields( GenericType *gen )
+{
+ addLengthField( gen->objDef, IN_MAP_LENGTH );
+
+ initMapField( gen, "head_el", 0 );
+ initMapField( gen, "tail_el", 1 );
+
+ initMapElKey( gen, "key", 0 );
+
+ initMapElField( gen, "prev", 0 );
+ initMapElField( gen, "next", 1 );
+}
+
+void Compiler::initMapElKey( GenericType *gen, const char *name, int offset )
+{
+ /* Make the type ref and create the field. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::GenericDependentType, gen->keyTr, name );
+
+ gen->el->mapKeyField = el;
+
+ /* Offset will be computed when the offset of the owning map element field
+ * is computed. */
+
+ gen->elUt->structEl->structDef->objectDef->rootScope->insertField( el->name, el );
+}
+
+void Compiler::initMapElField( GenericType *gen, const char *name, int offset )
+{
+ /* Make the type ref and create the field. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltOffType, gen->elTr, name );
+
+ el->inGetR = IN_GET_MAP_EL_MEM_R;
+ el->inGetValR = IN_GET_MAP_EL_MEM_R;
+// el->inGetWC = IN_GET_LIST2EL_MEM_WC;
+// el->inGetWV = IN_GET_LIST2EL_MEM_WV;
+// el->inSetWC = IN_SET_LIST2EL_MEM_WC;
+// el->inSetWV = IN_SET_LIST2EL_MEM_WV;
+
+ el->useGenericId = true;
+ el->generic = gen;
+
+ /* Zero for head, One for tail. */
+ el->offset = offset;
+
+ gen->elUt->structEl->structDef->objectDef->rootScope->insertField( el->name, el );
+}
+
+void Compiler::initListFunctions( GenericType *gen )
+{
+ initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_head",
+ FN_VLIST_PUSH_HEAD_WV, FN_VLIST_PUSH_HEAD_WC, gen->valueUt, false, true, gen );
+
+ initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_tail",
+ FN_VLIST_PUSH_TAIL_WV, FN_VLIST_PUSH_TAIL_WC, gen->valueUt, false, true, gen );
+
+ initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push",
+ FN_VLIST_PUSH_HEAD_WV, FN_VLIST_PUSH_HEAD_WC, gen->valueUt, false, true, gen );
+
+ initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "pop_head",
+ FN_VLIST_POP_HEAD_WV, FN_VLIST_POP_HEAD_WC, false, true, gen );
+
+ initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "pop_tail",
+ FN_VLIST_POP_TAIL_WV, FN_VLIST_POP_TAIL_WC, false, true, gen );
+
+ initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "pop",
+ FN_VLIST_POP_HEAD_WV, FN_VLIST_POP_HEAD_WC, false, true, gen );
+
+ initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_head_el",
+ FN_LIST_PUSH_HEAD_WV, FN_LIST_PUSH_HEAD_WC, gen->elUt, false, true, gen );
+
+ initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_tail_el",
+ FN_LIST_PUSH_TAIL_WV, FN_LIST_PUSH_TAIL_WC, gen->elUt, false, true, gen );
+
+ initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_el",
+ FN_LIST_PUSH_HEAD_WV, FN_LIST_PUSH_HEAD_WC, gen->elUt, false, true, gen );
+
+ initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "pop_head_el",
+ FN_LIST_POP_HEAD_WV, FN_LIST_POP_HEAD_WC, false, true, gen );
+
+ initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "pop_tail_el",
+ FN_LIST_POP_TAIL_WV, FN_LIST_POP_TAIL_WC, false, true, gen );
+
+ initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "pop_el",
+ FN_LIST_POP_HEAD_WV, FN_LIST_POP_HEAD_WC, false, true, gen );
+}
+
+void Compiler::initListElField( GenericType *gen, const char *name, int offset )
+{
+ /* Make the type ref and create the field. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltOffType, gen->elTr, name );
+
+ el->inGetR = IN_GET_LIST_EL_MEM_R;
+ el->inGetValR = IN_GET_LIST_EL_MEM_R;
+// el->inGetWC = IN_GET_LIST2EL_MEM_WC;
+// el->inGetWV = IN_GET_LIST2EL_MEM_WV;
+// el->inSetWC = IN_SET_LIST2EL_MEM_WC;
+// el->inSetWV = IN_SET_LIST2EL_MEM_WV;
+
+ el->useGenericId = true;
+ el->generic = gen;
+
+ /* Zero for head, One for tail. */
+ el->offset = offset;
+
+ gen->elUt->structEl->structDef->objectDef->rootScope->insertField( el->name, el );
+}
+
+void Compiler::initListFieldEl( GenericType *gen, const char *name, int offset )
+{
+ /* Make the type ref and create the field. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltOffType, gen->elTr, name );
+
+ el->inGetR = IN_GET_LIST_MEM_R;
+ el->inGetWC = IN_GET_LIST_MEM_WC;
+ el->inGetWV = IN_GET_LIST_MEM_WV;
+// el->inSetWC = IN_SET_LIST_MEM_WC;
+// el->inSetWV = IN_SET_LIST_MEM_WV;
+
+ el->inGetValR = IN_GET_LIST_MEM_R;
+ el->inGetValWC = IN_GET_LIST_MEM_WC;
+ el->inGetValWV = IN_GET_LIST_MEM_WV;
+
+ gen->objDef->rootScope->insertField( el->name, el );
+
+ el->useGenericId = true;
+ el->generic = gen;
+
+ /* Zero for head, One for tail. */
+ el->offset = offset;
+}
+
+void Compiler::initListFieldVal( GenericType *gen, const char *name, int offset )
+{
+ /* Make the type ref and create the field. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltOffType, gen->valueTr, name );
+
+ el->inGetR = IN_GET_VLIST_MEM_R;
+ el->inGetWC = IN_GET_VLIST_MEM_WC;
+ el->inGetWV = IN_GET_VLIST_MEM_WV;
+// el->inSetWC = IN_SET_VLIST_MEM_WC;
+// el->inSetWV = IN_SET_VLIST_MEM_WV;
+
+ el->inGetValR = IN_GET_VLIST_MEM_R;
+ el->inGetValWC = IN_GET_VLIST_MEM_WC;
+ el->inGetValWV = IN_GET_VLIST_MEM_WV;
+
+ gen->objDef->rootScope->insertField( el->name, el );
+
+ el->useGenericId = true;
+ el->generic = gen;
+
+ /* Zero for head, One for tail. */
+ el->offset = offset;
+}
+
+void Compiler::initListFields( GenericType *gen )
+{
+ /* The value fields. */
+ initListFieldVal( gen, "head", 0 );
+ initListFieldVal( gen, "tail", 1 );
+ initListFieldVal( gen, "top", 0 );
+
+ /* The element fields. */
+ initListFieldEl( gen, "head_el", 0 );
+ initListFieldEl( gen, "tail_el", 1 );
+ initListFieldEl( gen, "top_el", 0 );
+
+ addLengthField( gen->objDef, IN_LIST_LENGTH );
+
+ /* The fields of the list element. */
+ initListElField( gen, "prev", 0 );
+ initListElField( gen, "next", 1 );
+}
+
+void Compiler::initParserFunctions( GenericType *gen )
+{
+ initFunction( gen->elUt, gen->objDef, ObjectMethod::ParseFinish, "finish",
+ IN_PARSE_FRAG_W, IN_PARSE_FRAG_W, true );
+
+ initFunction( gen->elUt, gen->objDef, ObjectMethod::ParseFinish, "eof",
+ IN_PARSE_FRAG_W, IN_PARSE_FRAG_W, true );
+
+ initFunction( uniqueTypeInput, gen->objDef, ObjectMethod::Call, "gets",
+ IN_GET_PARSER_STREAM, IN_GET_PARSER_STREAM, true );
+}
+
+void Compiler::initParserField( GenericType *gen, const char *name,
+ int offset, TypeRef *typeRef )
+{
+ /* Make the type ref and create the field. */
+ ObjectField *el = ObjectField::cons( internal,
+ ObjectField::InbuiltOffType, typeRef, name );
+
+ el->inGetR = IN_GET_PARSER_MEM_R;
+ // el->inGetWC = IN_GET_PARSER_MEM_WC;
+ // el->inGetWV = IN_GET_PARSER_MEM_WV;
+ // el->inSetWC = IN_SET_PARSER_MEM_WC;
+ // el->inSetWV = IN_SET_PARSER_MEM_WV;
+
+ gen->objDef->rootScope->insertField( el->name, el );
+
+ /* Zero for head, One for tail. */
+ el->offset = offset;
+}
+
+void Compiler::initParserFields( GenericType *gen )
+{
+ TypeRef *typeRef;
+
+ typeRef = TypeRef::cons( internal, gen->elUt );
+ initParserField( gen, "tree", 0, typeRef );
+
+ typeRef = TypeRef::cons( internal, uniqueTypeStr );
+ initParserField( gen, "error", 1, typeRef );
+}
+
+void Compiler::makeFuncVisible( Function *func, bool isUserIter )
+{
+ func->localFrame = func->codeBlock->localFrame;
+
+ /* Set up the parameters. */
+ for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) {
+ if ( func->localFrame->rootScope->findField( param->name ) != 0 )
+ error(param->loc) << "parameter " << param->name << " redeclared" << endp;
+
+ func->localFrame->rootScope->insertField( param->name, param );
+ }
+
+ /* Insert the function into the global function map. */
+ ObjectMethod *objMethod = new ObjectMethod( func->typeRef, func->name,
+ IN_CALL_WV, IN_CALL_WC,
+ func->paramList->length(), 0, func->paramList, false );
+ objMethod->funcId = func->funcId;
+ objMethod->useFuncId = true;
+ objMethod->useCallObj = false;
+ objMethod->func = func;
+
+ if ( isUserIter ) {
+ IterDef *uiter = findIterDef( IterDef::User, func );
+ objMethod->iterDef = uiter;
+ }
+
+ NameScope *scope = func->nspace->rootScope; // : globalObjectDef->rootScope;
+
+ if ( !scope->methodMap.insert( func->name, objMethod ) )
+ error(func->typeRef->loc) << "function " << func->name << " redeclared" << endp;
+
+ func->objMethod = objMethod;
+}
+
+void Compiler::makeInHostVisible( Function *func )
+{
+ /* Set up the parameters. */
+ for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) {
+ if ( func->localFrame->rootScope->findField( param->name ) != 0 )
+ error(param->loc) << "parameter " << param->name << " redeclared" << endp;
+
+ func->localFrame->rootScope->insertField( param->name, param );
+ }
+
+ /* Insert the function into the global function map. */
+ ObjectMethod *objMethod = new ObjectMethod( func->typeRef, func->name,
+ IN_HOST, IN_HOST,
+ func->paramList->length(), 0, func->paramList, false );
+ objMethod->funcId = func->funcId;
+ objMethod->useFuncId = true;
+ objMethod->useCallObj = false;
+ objMethod->func = func;
+
+ NameScope *scope = func->nspace->rootScope;
+
+ if ( !scope->methodMap.insert( func->name, objMethod ) ) {
+ error(func->typeRef->loc) << "in-host function " << func->name <<
+ " redeclared" << endp;
+ }
+
+ func->objMethod = objMethod;
+}
+
+/*
+ * Type Declaration Root.
+ */
+void Compiler::declarePass()
+{
+ declareReVars();
+
+ makeDefaultIterators();
+
+ for ( FunctionList::Iter f = functionList; f.lte(); f++ )
+ makeFuncVisible( f, f->isUserIter );
+
+ for ( FunctionList::Iter f = inHostList; f.lte(); f++ )
+ makeInHostVisible( f );
+
+ rootNamespace->declare( this );
+
+ /* Will fill in zero lels that were not declared. */
+ makeIgnoreCollectors();
+
+ declareByteCode();
+
+ declareIntFields();
+ declareStrFields();
+ declareInputFields();
+ declareStreamFields();
+ declareTokenFields();
+ declareGlobalFields();
+
+ /* Fill any empty scanners with a default token. */
+ initEmptyScanners();
+}
diff --git a/src/defs.h.cmake.in b/src/defs.h.cmake.in
new file mode 100644
index 00000000..c4cf8844
--- /dev/null
+++ b/src/defs.h.cmake.in
@@ -0,0 +1,11 @@
+/* defs.h Generated from defs.h.cmake.in by cmake */
+
+#ifndef _COLM_DEFS_H
+#define _COLM_DEFS_H
+
+#cmakedefine SIZEOF_LONG @SIZEOF_LONG@
+#cmakedefine SIZEOF_UNSIGNED_LONG @SIZEOF_UNSIGNED_LONG@
+#cmakedefine SIZEOF_UNSIGNED_LONG_LONG @SIZEOF_UNSIGNED_LONG_LONG@
+#cmakedefine SIZEOF_VOID_P @SIZEOF_VOID_P@
+
+#endif /* _COLM_DEFS_H */
diff --git a/src/defs.h.in b/src/defs.h.in
new file mode 100644
index 00000000..a91e2ff2
--- /dev/null
+++ b/src/defs.h.in
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2001-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_DEFS_H
+#define _COLM_DEFS_H
+
+/* Configuration */
+
+/* The size of `long', as computed by sizeof. */
+#undef SIZEOF_LONG
+
+/* The size of `unsigned long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG
+
+/* The size of `unsigned long long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG_LONG
+
+/* The size of `void *', as computed by sizeof. */
+#undef SIZEOF_VOID_P
+
+#endif /* _COLM_DEFS_H */
diff --git a/src/dotgen.cc b/src/dotgen.cc
new file mode 100644
index 00000000..42f54159
--- /dev/null
+++ b/src/dotgen.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+
+#include <iostream>
+
+#include "compiler.h"
+
+using namespace std;
+
+void Compiler::writeTransList( PdaState *state )
+{
+ ostream &out = *outStream;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ /* Write out the from and to states. */
+ out << "\t" << state->stateNum << " -> " << trans->value->toState->stateNum;
+
+ /* Begin the label. */
+ out << " [ label = \"";
+ long key = trans->key;
+ LangEl *lel = langElIndex[key];
+ if ( lel != 0 )
+ out << lel->name;
+ else
+ out << (char)key;
+
+ if ( trans->value->actions.length() > 0 ) {
+ out << " / ";
+ for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) {
+ switch ( *act & 0x3 ) {
+ case 1:
+ out << "S(" << trans->value->actOrds[act.pos()] << ")";
+ break;
+ case 2: {
+ out << "R(" << prodIdIndex[(*act >> 2)]->data <<
+ ", " << trans->value->actOrds[act.pos()] << ")";
+ break;
+ }
+ case 3: {
+ out << "SR(" << prodIdIndex[(*act >> 2)]->data <<
+ ", " << trans->value->actOrds[act.pos()] << ")";
+ break;
+ }}
+ if ( ! act.last() )
+ out << ", ";
+ }
+ }
+
+ out << "\" ];\n";
+ }
+}
+
+void Compiler::writeDotFile( PdaGraph *graph )
+{
+ ostream &out = *outStream;
+ out <<
+ "digraph " << parserName << " {\n"
+ " rankdir=LR;\n"
+ " ranksep=\"0\"\n"
+ " nodesep=\"0.25\"\n"
+ "\n";
+
+ /* Define the psuedo states. Transitions will be done after the states
+ * have been defined as either final or not final. */
+ out <<
+ " node [ shape = point ];\n";
+
+ for ( int i = 0; i < graph->entryStateSet.length(); i++ )
+ out << "\tENTRY" << i << " [ label = \"\" ];\n";
+
+ out <<
+ "\n"
+ " node [ shape = circle, fixedsize = true, height = 0.6 ];\n";
+
+ /* Walk the states. */
+ for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ )
+ out << " " << st->stateNum << " [ label = \"" << st->stateNum << "\" ];\n";
+
+ out << "\n";
+
+ /* Walk the states. */
+ for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ )
+ writeTransList( st );
+
+ /* Start state and other entry points. */
+ for ( PdaStateSet::Iter st = graph->entryStateSet; st.lte(); st++ )
+ out << "\tENTRY" << st.pos() << " -> " << (*st)->stateNum << " [ label = \"\" ];\n";
+
+ out <<
+ "}\n";
+}
+
+void Compiler::writeDotFile()
+{
+ writeDotFile( pdaGraph );
+}
+
diff --git a/src/dotgen.h b/src/dotgen.h
new file mode 100644
index 00000000..8e8f694b
--- /dev/null
+++ b/src/dotgen.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2001-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_GVDOTGEN_H
+#define _COLM_GVDOTGEN_H
+
+#include <iostream>
+
+#if 0
+
+class GraphvizDotGen : public CodeGenData
+{
+public:
+ GraphvizDotGen( ostream &out ) : CodeGenData(out) { }
+
+ /* Print an fsm to out stream. */
+ void writeTransList( RedState *state );
+ void writeDotFile( );
+
+ virtual void finishRagelDef();
+
+private:
+ /* Writing labels and actions. */
+ std::ostream &ONCHAR( Key lowKey, Key highKey );
+ std::ostream &TRANS_ACTION( RedState *fromState, RedTrans *trans );
+ std::ostream &ACTION( RedAction *action );
+ std::ostream &KEY( Key key );
+};
+
+#endif
+
+#endif /* _COLM_GVDOTGEN_H */
+
diff --git a/src/exports.cc b/src/exports.cc
new file mode 100644
index 00000000..988499db
--- /dev/null
+++ b/src/exports.cc
@@ -0,0 +1,260 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+
+#include <iostream>
+
+#include "fsmcodegen.h"
+
+using std::ostream;
+using std::ostringstream;
+using std::string;
+using std::cerr;
+using std::endl;
+
+void Compiler::openNameSpace( ostream &out, Namespace *nspace )
+{
+ if ( nspace == rootNamespace )
+ return;
+
+ openNameSpace( out, nspace->parentNamespace );
+ out << "namespace " << nspace->name << " { ";
+}
+
+void Compiler::closeNameSpace( ostream &out, Namespace *nspace )
+{
+ if ( nspace == rootNamespace )
+ return;
+
+ openNameSpace( out, nspace->parentNamespace );
+ out << " }";
+}
+
+void Compiler::generateExports()
+{
+ ostream &out = *outStream;
+
+ out <<
+ "#ifndef _EXPORTS_H\n"
+ "#define _EXPORTS_H\n"
+ "\n"
+ "#include <colm/colm.h>\n"
+ "#include <colm/tree.h>\n"
+ "#include <colm/colmex.h>\n"
+ "#include <string>\n"
+ "\n";
+
+ /* Declare. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->isEOF )
+ continue;
+
+ openNameSpace( out, lel->nspace );
+ out << "struct " << lel->fullName << ";";
+ closeNameSpace( out, lel->nspace );
+ out << "\n";
+ }
+
+ /* Class definitions. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->isEOF )
+ continue;
+
+ openNameSpace( out, lel->nspace );
+ out << "struct " << lel->fullName << "\n";
+ out << " : public ExportTree\n";
+ out << "{\n";
+ out << " static const int ID = " << lel->id << ";\n";
+
+ if ( mainReturnUT != 0 && mainReturnUT->langEl == lel ) {
+ out << " " << lel->fullName <<
+ "( colm_program *prg ) : ExportTree( prg, returnVal(prg) ) {\n";
+ out << " }\n";
+ }
+
+ out << " " << lel->fullName <<
+ "( colm_program *prg, colm_tree *tree ) : ExportTree( prg, tree ) {\n";
+
+ out << "}\n";
+
+ if ( lel->objectDef != 0 ) {
+ FieldList &fieldList = lel->objectDef->fieldList;
+ for ( FieldList::Iter ofi = fieldList; ofi.lte(); ofi++ ) {
+ ObjectField *field = ofi->value;
+ if ( ( field->useOffset() && field->typeRef != 0 ) || field->isRhsGet() ) {
+ UniqueType *ut = field->typeRef->resolveType( this );
+
+ if ( ut != 0 && ut->typeId == TYPE_TREE )
+ out << " " << ut->langEl->refName << " " << field->name << "();\n";
+ }
+ }
+ }
+
+ bool prodNames = false;
+ for ( LelDefList::Iter prod = lel->defList; prod.lte(); prod++ ) {
+ if ( prod->_name.length() > 0 )
+ prodNames = true;
+ }
+
+ if ( prodNames ) {
+ out << " enum prod_name {\n";
+ for ( LelDefList::Iter prod = lel->defList; prod.lte(); prod++ ) {
+ if ( prod->_name.length() > 0 )
+ out << "\t\t" << prod->_name << " = " << prod->prodNum << ",\n";
+ }
+ out << " };\n";
+ out << " enum prod_name prodName() " <<
+ "{ return (enum prod_name)__tree->prod_num; }\n";
+ }
+
+ out << "};";
+ closeNameSpace( out, lel->nspace );
+ out << "\n";
+ }
+
+ for ( FieldList::Iter of = globalObjectDef->fieldList; of.lte(); of++ ) {
+ ObjectField *field = of->value;
+ if ( field->isExport ) {
+ UniqueType *ut = field->typeRef->resolveType(this);
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out << ut->langEl->refName << " " << field->name << "( colm_program *prg );\n";
+ }
+ }
+ }
+
+ out << "\n";
+
+ for ( FunctionList::Iter func = functionList; func.lte(); func++ ) {
+ if ( func->exprt ) {
+ char *refName = func->typeRef->uniqueType->langEl->refName;
+ int paramCount = func->paramList->length();
+ out <<
+ refName << " " << func->name << "( colm_program *prg";
+
+ for ( int p = 0; p < paramCount; p++ )
+ out << ", const char *p" << p;
+
+ out << " );\n";
+ }
+ }
+
+ out << "#endif\n";
+}
+
+void Compiler::generateExportsImpl()
+{
+ ostream &out = *outStream;
+
+ char *headerFn = strdup( exportHeaderFn );
+ char *suffix = strstr( headerFn, ".pack" );
+ if ( suffix != 0 && strcmp( suffix, ".pack" ) == 0 )
+ *suffix = 0;
+
+ if ( exportHeaderFn != 0 ) {
+ out << "#include \"" << headerFn << "\"\n";
+ }
+
+ out << "#include <colm/tree.h>\n";
+ out << "#include <string.h>\n";
+
+ /* Function implementations. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->objectDef != 0 ) {
+ FieldList &fieldList = lel->objectDef->fieldList;
+ for ( FieldList::Iter ofi = fieldList; ofi.lte(); ofi++ ) {
+ ObjectField *field = ofi->value;
+ if ( field->useOffset() && field->typeRef != 0 ) {
+ UniqueType *ut = field->typeRef->resolveType( this );
+
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out << ut->langEl->refName << " " << lel->declName <<
+ "::" << field->name << "() { return " <<
+ ut->langEl->refName << "( __prg, colm_get_attr( __tree, " <<
+ field->offset << ") ); }\n";
+ }
+ }
+
+ if ( field->isRhsGet() ) {
+ UniqueType *ut = field->typeRef->resolveType( this );
+
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out << ut->langEl->refName << " " << lel->declName <<
+ "::" << field->name << "() { static int a[] = {";
+
+ /* Need to place the array computing the val. */
+ out << field->rhsVal.length();
+ for ( Vector<RhsVal>::Iter rg = field->rhsVal; rg.lte(); rg++ ) {
+ out << ", " << rg->prodEl->production->prodNum;
+ out << ", " << rg->prodEl->pos;
+ }
+
+ out << "}; return " << ut->langEl->refName <<
+ "( __prg, colm_get_rhs_val( __prg, __tree, a ) ); }\n";
+ }
+ }
+ }
+ }
+ }
+
+ out << "\n";
+
+ for ( FieldList::Iter of = globalObjectDef->fieldList; of.lte(); of++ ) {
+ ObjectField *field = of->value;
+ if ( field->isExport ) {
+ UniqueType *ut = field->typeRef->resolveType(this);
+ if ( ut != 0 && ut->typeId == TYPE_TREE ) {
+ out <<
+ ut->langEl->refName << " " << field->name << "( colm_program *prg )\n"
+ "{ return " << ut->langEl->refName << "( prg, colm_get_global( prg, " <<
+ field->offset << ") ); }\n";
+ }
+ }
+ }
+
+ out << "\n";
+
+ for ( FunctionList::Iter func = functionList; func.lte(); func++ ) {
+ if ( func->exprt ) {
+ char *refName = func->typeRef->uniqueType->langEl->refName;
+ int paramCount = func->paramList->length();
+ out <<
+ refName << " " << func->name << "( colm_program *prg";
+
+ for ( int p = 0; p < paramCount; p++ )
+ out << ", const char *p" << p;
+
+ out << " )\n"
+ "{\n"
+ " int funcId = " << func->funcId << ";\n"
+ " const char *params[" << paramCount << "];\n";
+
+ for ( int p = 0; p < paramCount; p++ )
+ out << " params[" << p << "] = p" << p << ";\n";
+
+ out <<
+ " return " << refName <<
+ "( prg, colm_run_func( prg, funcId, params, " << paramCount << " ));\n"
+ "}\n";
+ }
+ }
+}
diff --git a/src/fsmap.cc b/src/fsmap.cc
new file mode 100644
index 00000000..3e1ae913
--- /dev/null
+++ b/src/fsmap.cc
@@ -0,0 +1,806 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <iostream>
+
+#include <assert.h>
+
+#include "fsmgraph.h"
+
+using std::cerr;
+using std::endl;
+
+KeyOps *keyOps = 0;
+
+/* Insert an action into an action table. */
+void ActionTable::setAction( int ordering, Action *action )
+{
+ /* Multi-insert in case specific instances of an action appear in a
+ * transition more than once. */
+ insertMulti( ordering, action );
+}
+
+/* Set all the action from another action table in this table. */
+void ActionTable::setActions( const ActionTable &other )
+{
+ for ( ActionTable::Iter action = other; action.lte(); action++ )
+ insertMulti( action->key, action->value );
+}
+
+void ActionTable::setActions( int *orderings, Action **actions, int nActs )
+{
+ for ( int a = 0; a < nActs; a++ )
+ insertMulti( orderings[a], actions[a] );
+}
+
+bool ActionTable::hasAction( Action *action )
+{
+ for ( int a = 0; a < length(); a++ ) {
+ if ( data[a].value == action )
+ return true;
+ }
+ return false;
+}
+
+/* Insert an action into an action table. */
+void LmActionTable::setAction( int ordering, TokenInstance *action )
+{
+ /* Multi-insert in case specific instances of an action appear in a
+ * transition more than once. */
+ insertMulti( ordering, action );
+}
+
+/* Set all the action from another action table in this table. */
+void LmActionTable::setActions( const LmActionTable &other )
+{
+ for ( LmActionTable::Iter action = other; action.lte(); action++ )
+ insertMulti( action->key, action->value );
+}
+
+void ErrActionTable::setAction( int ordering, Action *action, int transferPoint )
+{
+ insertMulti( ErrActionTableEl( action, ordering, transferPoint ) );
+}
+
+void ErrActionTable::setActions( const ErrActionTable &other )
+{
+ for ( ErrActionTable::Iter act = other; act.lte(); act++ )
+ insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) );
+}
+
+/* Insert a priority into this priority table. Looks out for priorities on
+ * duplicate keys. */
+void PriorTable::setPrior( int ordering, PriorDesc *desc )
+{
+ PriorEl *lastHit = 0;
+ PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit );
+ if ( insed == 0 ) {
+ /* This already has a priority on the same key as desc. Overwrite the
+ * priority if the ordering is larger (later in time). */
+ if ( ordering >= lastHit->ordering )
+ *lastHit = PriorEl( ordering, desc );
+ }
+}
+
+/* Set all the priorities from a priorTable in this table. */
+void PriorTable::setPriors( const PriorTable &other )
+{
+ /* Loop src priorities once to overwrite duplicates. */
+ PriorTable::Iter priorIt = other;
+ for ( ; priorIt.lte(); priorIt++ )
+ setPrior( priorIt->ordering, priorIt->desc );
+}
+
+/* Set the priority of starting transitions. Isolates the start state so it has
+ * no other entry points, then sets the priorities of all the transitions out
+ * of the start state. If the start state is final, then the outPrior of the
+ * start state is also set. The idea is that a machine that accepts the null
+ * string can still specify the starting trans prior for when it accepts the
+ * null word. */
+void FsmGraph::startFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Walk all transitions out of the start state. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+}
+
+/* Set the priority of all transitions in a graph. Walks all transition lists
+ * and all def transitions. */
+void FsmGraph::allTransPrior( int ordering, PriorDesc *prior )
+{
+ /* Walk the list of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out list of the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+ }
+}
+
+/* Set the priority of all transitions that go into a final state. Note that if
+ * any entry states are final, we will not be setting the priority of any
+ * transitions that may go into those states in the future. The graph does not
+ * support pending in transitions in the same way pending out transitions are
+ * supported. */
+void FsmGraph::finishFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk all in transitions of the final state. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->priorTable.setPrior( ordering, prior );
+ }
+}
+
+/* Set the priority of any future out transitions that may be made going out of
+ * this state machine. */
+void FsmGraph::leaveFsmPrior( int ordering, PriorDesc *prior )
+{
+ /* Set priority in all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outPriorTable.setPrior( ordering, prior );
+}
+
+
+/* Set actions to execute on starting transitions. Isolates the start state
+ * so it has no other entry points, then adds to the transition functions
+ * of all the transitions out of the start state. If the start state is final,
+ * then the func is also added to the start state's out func list. The idea is
+ * that a machine that accepts the null string can execute a start func when it
+ * matches the null word, which can only be done when leaving the start/final
+ * state. */
+void FsmGraph::startFsmAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Walk the start state's transitions, setting functions. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+/* Set functions to execute on all transitions. Walks the out lists of all
+ * states. */
+void FsmGraph::allTransAction( int ordering, Action *action )
+{
+ /* Walk all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out list of the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+ }
+}
+
+/* Specify functions to execute upon entering final states. If the start state
+ * is final we can't really specify a function to execute upon entering that
+ * final state the first time. So function really means whenever entering a
+ * final state from within the same fsm. */
+void FsmGraph::finishFsmAction( int ordering, Action *action )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk the final state's in list. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+/* Add functions to any future out transitions that may be made going out of
+ * this state machine. */
+void FsmGraph::leaveFsmAction( int ordering, Action *action )
+{
+ /* Insert the action in the outActionTable of all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->outActionTable.setAction( ordering, action );
+}
+
+/* Add functions to the longest match action table for constructing scanners. */
+void FsmGraph::longMatchAction( int ordering, TokenInstance *lmPart )
+{
+ /* Walk all final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) {
+ /* Walk the final state's in list. */
+ for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ )
+ trans->lmActionTable.setAction( ordering, lmPart );
+ }
+}
+
+void FsmGraph::fillGaps( FsmState *state )
+{
+ if ( state->outList.length() == 0 ) {
+ /* Add the range on the lower and upper bound. */
+ attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey );
+ }
+ else {
+ TransList srcList;
+ srcList.transfer( state->outList );
+
+ /* Check for a gap at the beginning. */
+ TransList::Iter trans = srcList, next;
+ if ( keyOps->minKey < trans->lowKey ) {
+ /* Make the high key and append. */
+ Key highKey = trans->lowKey;
+ highKey.decrement();
+
+ attachNewTrans( state, 0, keyOps->minKey, highKey );
+ }
+
+ /* Write the transition. */
+ next = trans.next();
+ state->outList.append( trans );
+
+ /* Keep the last high end. */
+ Key lastHigh = trans->highKey;
+
+ /* Loop each source range. */
+ for ( trans = next; trans.lte(); trans = next ) {
+ /* Make the next key following the last range. */
+ Key nextKey = lastHigh;
+ nextKey.increment();
+
+ /* Check for a gap from last up to here. */
+ if ( nextKey < trans->lowKey ) {
+ /* Make the high end of the range that fills the gap. */
+ Key highKey = trans->lowKey;
+ highKey.decrement();
+
+ attachNewTrans( state, 0, nextKey, highKey );
+ }
+
+ /* Reduce the transition. If it reduced to anything then add it. */
+ next = trans.next();
+ state->outList.append( trans );
+
+ /* Keep the last high end. */
+ lastHigh = trans->highKey;
+ }
+
+ /* Now check for a gap on the end to fill. */
+ if ( lastHigh < keyOps->maxKey ) {
+ /* Get a copy of the default. */
+ lastHigh.increment();
+
+ attachNewTrans( state, 0, lastHigh, keyOps->maxKey );
+ }
+ }
+}
+
+void FsmGraph::setErrorAction( FsmState *state, int ordering, Action *action )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error transitions in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 )
+ trans->actionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::setErrorActions( FsmState *state, const ActionTable &other )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error transitions in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 )
+ trans->actionTable.setActions( other );
+ }
+}
+
+
+/* Give a target state for error transitions. */
+void FsmGraph::setErrorTarget( FsmState *state, FsmState *target, int *orderings,
+ Action **actions, int nActs )
+{
+ /* Fill any gaps in the out list with an error transition. */
+ fillGaps( state );
+
+ /* Set error target in the transitions that go to error. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState == 0 ) {
+ /* The trans goes to error, redirect it. */
+ redirectErrorTrans( trans->fromState, target, trans );
+ trans->actionTable.setActions( orderings, actions, nActs );
+ }
+ }
+}
+
+void FsmGraph::transferErrorActions( FsmState *state, int transferPoint )
+{
+ for ( int i = 0; i < state->errActionTable.length(); ) {
+ ErrActionTableEl *act = state->errActionTable.data + i;
+ if ( act->transferPoint == transferPoint ) {
+ /* Transfer the error action and remove it. */
+ setErrorAction( state, act->ordering, act->action );
+ state->errActionTable.vremove( i );
+ }
+ else {
+ /* Not transfering and deleting, skip over the item. */
+ i += 1;
+ }
+ }
+}
+
+/* Set error actions in the start state. */
+void FsmGraph::startErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Add the actions. */
+ startState->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+/* Set error actions in all states where there is a transition out. */
+void FsmGraph::allErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Insert actions in the error action table of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+/* Set error actions in final states. */
+void FsmGraph::finalErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->errActionTable.setAction( ordering, action, transferPoint );
+}
+
+void FsmGraph::notStartErrorAction( int ordering, Action *action, int transferPoint )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+void FsmGraph::notFinalErrorAction( int ordering, Action *action, int transferPoint )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+/* Set error actions in the states that have transitions into a final state. */
+void FsmGraph::middleErrorAction( int ordering, Action *action, int transferPoint )
+{
+ /* Isolate the start state in case it is reachable from in inside the
+ * machine, in which case we don't want it set. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->errActionTable.setAction( ordering, action, transferPoint );
+ }
+}
+
+/* Set EOF actions in the start state. */
+void FsmGraph::startEOFAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+
+ /* Add the actions. */
+ startState->eofActionTable.setAction( ordering, action );
+}
+
+/* Set EOF actions in all states where there is a transition out. */
+void FsmGraph::allEOFAction( int ordering, Action *action )
+{
+ /* Insert actions in the EOF action table of all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->eofActionTable.setAction( ordering, action );
+}
+
+/* Set EOF actions in final states. */
+void FsmGraph::finalEOFAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->eofActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::notStartEOFAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::notFinalEOFAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+/* Set EOF actions in the states that have transitions into a final state. */
+void FsmGraph::middleEOFAction( int ordering, Action *action )
+{
+ /* Set the actions in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->eofActionTable.setAction( ordering, action );
+ }
+}
+
+/*
+ * Set To State Actions.
+ */
+
+/* Set to state actions in the start state. */
+void FsmGraph::startToStateAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ startState->toStateActionTable.setAction( ordering, action );
+}
+
+/* Set to state actions in all states. */
+void FsmGraph::allToStateAction( int ordering, Action *action )
+{
+ /* Insert the action on all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->toStateActionTable.setAction( ordering, action );
+}
+
+/* Set to state actions in final states. */
+void FsmGraph::finalToStateAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->toStateActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::notStartToStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::notFinalToStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+/* Set to state actions in states that are not final and not the start state. */
+void FsmGraph::middleToStateAction( int ordering, Action *action )
+{
+ /* Set the action in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->toStateActionTable.setAction( ordering, action );
+ }
+}
+
+/*
+ * Set From State Actions.
+ */
+
+void FsmGraph::startFromStateAction( int ordering, Action *action )
+{
+ /* Make sure the start state has no other entry points. */
+ isolateStartState();
+ startState->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::allFromStateAction( int ordering, Action *action )
+{
+ /* Insert the action on all states. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::finalFromStateAction( int ordering, Action *action )
+{
+ /* Add the action to the error table of final states. */
+ for ( StateSet::Iter state = finStateSet; state.lte(); state++ )
+ (*state)->fromStateActionTable.setAction( ordering, action );
+}
+
+void FsmGraph::notStartFromStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::notFinalFromStateAction( int ordering, Action *action )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( ! state->isFinState() )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+void FsmGraph::middleFromStateAction( int ordering, Action *action )
+{
+ /* Set the action in all states that are not the start state and not final. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ if ( state != startState && ! state->isFinState() )
+ state->fromStateActionTable.setAction( ordering, action );
+ }
+}
+
+/* Shift the function ordering of the start transitions to start
+ * at fromOrder and increase in units of 1. Useful before staring.
+ * Returns the maximum number of order numbers used. */
+int FsmGraph::shiftStartActionOrder( int fromOrder )
+{
+ int maxUsed = 0;
+
+ /* Walk the start state's transitions, shifting function ordering. */
+ for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) {
+ /* Walk the function data for the transition and set the keys to
+ * increasing values starting at fromOrder. */
+ int curFromOrder = fromOrder;
+ ActionTable::Iter action = trans->actionTable;
+ for ( ; action.lte(); action++ )
+ action->key = curFromOrder++;
+
+ /* Keep track of the max number of orders used. */
+ if ( curFromOrder - fromOrder > maxUsed )
+ maxUsed = curFromOrder - fromOrder;
+ }
+
+ return maxUsed;
+}
+
+/* Remove all priorities. */
+void FsmGraph::clearAllPriorities()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Clear out priority data. */
+ state->outPriorTable.empty();
+
+ /* Clear transition data from the out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ trans->priorTable.empty();
+ }
+}
+
+/* Zeros out the function ordering keys. This may be called before minimization
+ * when it is known that no more fsm operations are going to be done. This
+ * will achieve greater reduction as states will not be separated on the basis
+ * of function ordering. */
+void FsmGraph::nullActionKeys( )
+{
+ /* For each state... */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the transitions for the state. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* Walk the action table for the transition. */
+ for ( ActionTable::Iter action = trans->actionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Walk the action table for the transition. */
+ for ( LmActionTable::Iter action = trans->lmActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+ }
+
+ /* Null the action keys of the to state action table. */
+ for ( ActionTable::Iter action = state->toStateActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the from state action table. */
+ for ( ActionTable::Iter action = state->fromStateActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the out transtions. */
+ for ( ActionTable::Iter action = state->outActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+
+ /* Null the action keys of the error action table. */
+ for ( ErrActionTable::Iter action = state->errActionTable;
+ action.lte(); action++ )
+ action->ordering = 0;
+
+ /* Null the action keys eof action table. */
+ for ( ActionTable::Iter action = state->eofActionTable;
+ action.lte(); action++ )
+ action->key = 0;
+ }
+}
+
+/* Walk the list of states and verify that non final states do not have out
+ * data, that all stateBits are cleared, and that there are no states with
+ * zero foreign in transitions. */
+void FsmGraph::verifyStates()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Non final states should not have leaving data. */
+ if ( ! (state->stateBits & SB_ISFINAL) ) {
+ assert( state->outActionTable.length() == 0 );
+ assert( state->outCondSet.length() == 0 );
+ assert( state->outPriorTable.length() == 0 );
+ }
+
+ /* Data used in algorithms should be cleared. */
+ assert( (state->stateBits & SB_BOTH) == 0 );
+ assert( state->foreignInTrans > 0 );
+ }
+}
+
+/* Compare two transitions according to their relative priority. Since the
+ * base transition has no priority associated with it, the default is to
+ * return equal. */
+int FsmGraph::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 )
+{
+ /* Looking for differing priorities on same keys. Need to concurrently
+ * scan the priority lists. */
+ PriorTable::Iter pd1 = priorTable1;
+ PriorTable::Iter pd2 = priorTable2;
+ while ( pd1.lte() && pd2.lte() ) {
+ /* Check keys. */
+ if ( pd1->desc->key < pd2->desc->key )
+ pd1.increment();
+ else if ( pd1->desc->key > pd2->desc->key )
+ pd2.increment();
+ /* Keys are the same, check priorities. */
+ else if ( pd1->desc->priority < pd2->desc->priority )
+ return -1;
+ else if ( pd1->desc->priority > pd2->desc->priority )
+ return 1;
+ else {
+ /* Keys and priorities are equal, advance both. */
+ pd1.increment();
+ pd2.increment();
+ }
+ }
+
+ /* No differing priorities on the same key. */
+ return 0;
+}
+
+/* Compares two transitions according to priority and functions. Pointers
+ * should not be null. Does not consider to state or from state. Compare two
+ * transitions according to the data contained in the transitions. Data means
+ * any properties added to user transitions that may differentiate them. Since
+ * the base transition has no data, the default is to return equal. */
+int FsmGraph::compareTransData( FsmTrans *trans1, FsmTrans *trans2 )
+{
+ /* Compare the prior table. */
+ int cmpRes = CmpPriorTable::compare( trans1->priorTable,
+ trans2->priorTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Compare longest match action tables. */
+ cmpRes = CmpLmActionTable::compare(trans1->lmActionTable,
+ trans2->lmActionTable);
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Compare action tables. */
+ return CmpActionTable::compare(trans1->actionTable,
+ trans2->actionTable);
+}
+
+/* Callback invoked when another trans (or possibly this) is added into this
+ * transition during the merging process. Draw in any properties of srcTrans
+ * into this transition. AddInTrans is called when a new transitions is made
+ * that will be a duplicate of another transition or a combination of several
+ * other transitions. AddInTrans will be called for each transition that the
+ * new transition is to represent. */
+void FsmGraph::addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans )
+{
+ /* Protect against adding in from ourselves. */
+ if ( srcTrans == destTrans ) {
+ /* Adding in ourselves, need to make a copy of the source transitions.
+ * The priorities are not copied in as that would have no effect. */
+ destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) );
+ destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) );
+ }
+ else {
+ /* Not a copy of ourself, get the functions and priorities. */
+ destTrans->lmActionTable.setActions( srcTrans->lmActionTable );
+ destTrans->actionTable.setActions( srcTrans->actionTable );
+ destTrans->priorTable.setPriors( srcTrans->priorTable );
+ }
+}
+
+/* Compare the properties of states that are embedded by users. Compares out
+ * priorities, out transitions, to, from, out, error and eof action tables. */
+int FsmGraph::compareStateData( const FsmState *state1, const FsmState *state2 )
+{
+ /* Compare the out priority table. */
+ int cmpRes = CmpPriorTable::
+ compare( state1->outPriorTable, state2->outPriorTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test to state action tables. */
+ cmpRes = CmpActionTable::compare( state1->toStateActionTable,
+ state2->toStateActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test from state action tables. */
+ cmpRes = CmpActionTable::compare( state1->fromStateActionTable,
+ state2->fromStateActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out action tables. */
+ cmpRes = CmpActionTable::compare( state1->outActionTable,
+ state2->outActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out condition sets. */
+ cmpRes = CmpActionSet::compare( state1->outCondSet,
+ state2->outCondSet );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test out error action tables. */
+ cmpRes = CmpErrActionTable::compare( state1->errActionTable,
+ state2->errActionTable );
+ if ( cmpRes != 0 )
+ return cmpRes;
+
+ /* Test eof action tables. */
+ return CmpActionTable::compare( state1->eofActionTable,
+ state2->eofActionTable );
+}
+
+/* Invoked when a state looses its final state status and the leaving
+ * transition embedding data should be deleted. */
+void FsmGraph::clearOutData( FsmState *state )
+{
+ /* Kill the out actions and priorities. */
+ state->outActionTable.empty();
+ state->outCondSet.empty();
+ state->outPriorTable.empty();
+}
+
+bool FsmGraph::hasOutData( FsmState *state )
+{
+ return ( state->outActionTable.length() > 0 ||
+ state->outCondSet.length() > 0 ||
+ state->outPriorTable.length() > 0 );
+}
diff --git a/src/fsmattach.cc b/src/fsmattach.cc
new file mode 100644
index 00000000..bc8571b2
--- /dev/null
+++ b/src/fsmattach.cc
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+
+#include <iostream>
+
+#include "fsmgraph.h"
+
+using namespace std;
+
+/* Insert a transition into an inlist. The head must be supplied. */
+void FsmGraph::attachToInList( FsmState *from, FsmState *to,
+ FsmTrans *&head, FsmTrans *trans )
+{
+ trans->ilnext = head;
+ trans->ilprev = 0;
+
+ /* If in trans list is not empty, set the head->prev to trans. */
+ if ( head != 0 )
+ head->ilprev = trans;
+
+ /* Now insert ourselves at the front of the list. */
+ head = trans;
+
+ /* Keep track of foreign transitions for from and to. */
+ if ( from != to ) {
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * move it from the misfit list to the main list. */
+ if ( to->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( to ) );
+ }
+
+ to->foreignInTrans += 1;
+ }
+};
+
+/* Detach a transition from an inlist. The head of the inlist must be supplied. */
+void FsmGraph::detachFromInList( FsmState *from, FsmState *to,
+ FsmTrans *&head, FsmTrans *trans )
+{
+ /* Detach in the inTransList. */
+ if ( trans->ilprev == 0 )
+ head = trans->ilnext;
+ else
+ trans->ilprev->ilnext = trans->ilnext;
+
+ if ( trans->ilnext != 0 )
+ trans->ilnext->ilprev = trans->ilprev;
+
+ /* Keep track of foreign transitions for from and to. */
+ if ( from != to ) {
+ to->foreignInTrans -= 1;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions goes down to 0 then move it
+ * from the main list to the misfit list. */
+ if ( to->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( to ) );
+ }
+ }
+}
+
+/* Attach states on the default transition, range list or on out/in list key.
+ * First makes a new transition. If there is already a transition out from
+ * fromState on the default, then will assertion fail. */
+FsmTrans *FsmGraph::attachNewTrans( FsmState *from, FsmState *to, Key lowKey, Key highKey )
+{
+ /* Make the new transition. */
+ FsmTrans *retVal = new FsmTrans();
+
+ /* The transition is now attached. Remember the parties involved. */
+ retVal->fromState = from;
+ retVal->toState = to;
+
+ /* Make the entry in the out list for the transitions. */
+ from->outList.append( retVal );
+
+ /* Set the the keys of the new trans. */
+ retVal->lowKey = lowKey;
+ retVal->highKey = highKey;
+
+ /* Attach using inList as the head pointer. */
+ if ( to != 0 )
+ attachToInList( from, to, to->inList.head, retVal );
+
+ return retVal;
+}
+
+/* Attach for range lists or for the default transition. This attach should
+ * be used when a transition already is allocated and must be attached to a
+ * target state. Does not handle adding the transition into the out list. */
+void FsmGraph::attachTrans( FsmState *from, FsmState *to, FsmTrans *trans )
+{
+ assert( trans->fromState == 0 && trans->toState == 0 );
+ trans->fromState = from;
+ trans->toState = to;
+
+ if ( to != 0 ) {
+ /* Attach using the inList pointer as the head pointer. */
+ attachToInList( from, to, to->inList.head, trans );
+ }
+}
+
+/* Redirect a transition away from error and towards some state. This is just
+ * like attachTrans except it requires fromState to be set and does not touch
+ * it. */
+void FsmGraph::redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans )
+{
+ assert( trans->fromState != 0 && trans->toState == 0 );
+ trans->toState = to;
+
+ if ( to != 0 ) {
+ /* Attach using the inList pointer as the head pointer. */
+ attachToInList( from, to, to->inList.head, trans );
+ }
+}
+
+/* Detach for out/in lists or for default transition. */
+void FsmGraph::detachTrans( FsmState *from, FsmState *to, FsmTrans *trans )
+{
+ assert( trans->fromState == from && trans->toState == to );
+ trans->fromState = 0;
+ trans->toState = 0;
+
+ if ( to != 0 ) {
+ /* Detach using to's inList pointer as the head. */
+ detachFromInList( from, to, to->inList.head, trans );
+ }
+}
+
+
+/* Detach a state from the graph. Detaches and deletes transitions in and out
+ * of the state. Empties inList and outList. Removes the state from the final
+ * state set. A detached state becomes useless and should be deleted. */
+void FsmGraph::detachState( FsmState *state )
+{
+ /* Detach the in transitions from the inList list of transitions. */
+ while ( state->inList.head != 0 ) {
+ /* Get pointers to the trans and the state. */
+ FsmTrans *trans = state->inList.head;
+ FsmState *fromState = trans->fromState;
+
+ /* Detach the transitions from the source state. */
+ detachTrans( fromState, state, trans );
+
+ /* Ok to delete the transition. */
+ fromState->outList.detach( trans );
+ delete trans;
+ }
+
+ /* Remove the entry points in on the machine. */
+ while ( state->entryIds.length() > 0 )
+ unsetEntry( state->entryIds[0], state );
+
+ /* Detach out range transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); ) {
+ TransList::Iter next = trans.next();
+ detachTrans( state, trans->toState, trans );
+ delete trans;
+ trans = next;
+ }
+
+ /* Delete all of the out range pointers. */
+ state->outList.abandon();
+
+ /* Unset final stateness before detaching from graph. */
+ if ( state->stateBits & SB_ISFINAL )
+ finStateSet.remove( state );
+}
+
+
+/* Duplicate a transition. Makes a new transition that is attached to the same
+ * dest as srcTrans. The new transition has functions and priority taken from
+ * srcTrans. Used for merging a transition in to a free spot. The trans can
+ * just be dropped in. It does not conflict with an existing trans and need
+ * not be crossed. Returns the new transition. */
+FsmTrans *FsmGraph::dupTrans( FsmState *from, FsmTrans *srcTrans )
+{
+ /* Make a new transition. */
+ FsmTrans *newTrans = new FsmTrans();
+
+ /* We can attach the transition, one does not exist. */
+ attachTrans( from, srcTrans->toState, newTrans );
+
+ /* Call the user callback to add in the original source transition. */
+ addInTrans( newTrans, srcTrans );
+
+ return newTrans;
+}
+
+/* In crossing, src trans and dest trans both go to existing states. Make one
+ * state from the sets of states that src and dest trans go to. */
+FsmTrans *FsmGraph::fsmAttachStates( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans )
+{
+ /* The priorities are equal. We must merge the transitions. Does the
+ * existing trans go to the state we are to attach to? ie, are we to
+ * simply double up the transition? */
+ FsmState *toState = srcTrans->toState;
+ FsmState *existingState = destTrans->toState;
+
+ if ( existingState == toState ) {
+ /* The transition is a double up to the same state. Copy the src
+ * trans into itself. We don't need to merge in the from out trans
+ * data, that was done already. */
+ addInTrans( destTrans, srcTrans );
+ }
+ else {
+ /* The trans is not a double up. Dest trans cannot be the same as src
+ * trans. Set up the state set. */
+ StateSet stateSet;
+
+ /* We go to all the states the existing trans goes to, plus... */
+ if ( existingState->stateDictEl == 0 )
+ stateSet.insert( existingState );
+ else
+ stateSet.insert( existingState->stateDictEl->stateSet );
+
+ /* ... all the states that we have been told to go to. */
+ if ( toState->stateDictEl == 0 )
+ stateSet.insert( toState );
+ else
+ stateSet.insert( toState->stateDictEl->stateSet );
+
+ /* Look for the state. If it is not there already, make it. */
+ StateDictEl *lastFound;
+ if ( md.stateDict.insert( stateSet, &lastFound ) ) {
+ /* Make a new state representing the combination of states in
+ * stateSet. It gets added to the fill list. This means that we
+ * need to fill in it's transitions sometime in the future. We
+ * don't do that now (ie, do not recurse). */
+ FsmState *combinState = addState();
+
+ /* Link up the dict element and the state. */
+ lastFound->targState = combinState;
+ combinState->stateDictEl = lastFound;
+
+ /* Add to the fill list. */
+ md.fillListAppend( combinState );
+ }
+
+ /* Get the state insertted/deleted. */
+ FsmState *targ = lastFound->targState;
+
+ /* Detach the state from existing state. */
+ detachTrans( from, existingState, destTrans );
+
+ /* Re-attach to the new target. */
+ attachTrans( from, targ, destTrans );
+
+ /* Add in src trans to the existing transition that we redirected to
+ * the new state. We don't need to merge in the from out trans data,
+ * that was done already. */
+ addInTrans( destTrans, srcTrans );
+ }
+
+ return destTrans;
+}
+
+/* Two transitions are to be crossed, handle the possibility of either going
+ * to the error state. */
+FsmTrans *FsmGraph::mergeTrans( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans )
+{
+ FsmTrans *retTrans = 0;
+ if ( destTrans->toState == 0 && srcTrans->toState == 0 ) {
+ /* Error added into error. */
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) {
+ /* Non error added into error we need to detach and reattach, */
+ detachTrans( from, destTrans->toState, destTrans );
+ attachTrans( from, srcTrans->toState, destTrans );
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else if ( srcTrans->toState == 0 ) {
+ /* Dest goes somewhere but src doesn't, just add it it in. */
+ addInTrans( destTrans, srcTrans );
+ retTrans = destTrans;
+ }
+ else {
+ /* Both go somewhere, run the actual cross. */
+ retTrans = fsmAttachStates( md, from, destTrans, srcTrans );
+ }
+
+ return retTrans;
+}
+
+/* Find the trans with the higher priority. If src is lower priority then dest then
+ * src is ignored. If src is higher priority than dest, then src overwrites dest. If
+ * the priorities are equal, then they are merged. */
+FsmTrans *FsmGraph::crossTransitions( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans )
+{
+ FsmTrans *retTrans;
+
+ /* Compare the priority of the dest and src transitions. */
+ int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable );
+ if ( compareRes < 0 ) {
+ /* Src trans has a higher priority than dest, src overwrites dest.
+ * Detach dest and return a copy of src. */
+ detachTrans( from, destTrans->toState, destTrans );
+ retTrans = dupTrans( from, srcTrans );
+ }
+ else if ( compareRes > 0 ) {
+ /* The dest trans has a higher priority, use dest. */
+ retTrans = destTrans;
+ }
+ else {
+ /* Src trans and dest trans have the same priority, they must be merged. */
+ retTrans = mergeTrans( md, from, destTrans, srcTrans );
+ }
+
+ /* Return the transition that resulted from the cross. */
+ return retTrans;
+}
+
+/* Copy the transitions in srcList to the outlist of dest. The srcList should
+ * not be the outList of dest, otherwise you would be copying the contents of
+ * srcList into itself as it's iterated: bad news. */
+void FsmGraph::outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList )
+{
+ /* The destination list. */
+ TransList destList;
+
+ /* Set up an iterator to stop at breaks. */
+ PairIter<FsmTrans> outPair( dest->outList.head, srcList );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+ case RangeInS1: {
+ /* The pair iter is the authority on the keys. It may have needed
+ * to break the dest range. */
+ FsmTrans *destTrans = outPair.s1Tel.trans;
+ destTrans->lowKey = outPair.s1Tel.lowKey;
+ destTrans->highKey = outPair.s1Tel.highKey;
+ destList.append( destTrans );
+ break;
+ }
+ case RangeInS2: {
+ /* Src range may get crossed with dest's default transition. */
+ FsmTrans *newTrans = dupTrans( dest, outPair.s2Tel.trans );
+
+ /* Set up the transition's keys and append to the dest list. */
+ newTrans->lowKey = outPair.s2Tel.lowKey;
+ newTrans->highKey = outPair.s2Tel.highKey;
+ destList.append( newTrans );
+ break;
+ }
+ case RangeOverlap: {
+ /* Exact overlap, cross them. */
+ FsmTrans *newTrans = crossTransitions( md, dest,
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+
+ /* Set up the transition's keys and append to the dest list. */
+ newTrans->lowKey = outPair.s1Tel.lowKey;
+ newTrans->highKey = outPair.s1Tel.highKey;
+ destList.append( newTrans );
+ break;
+ }
+ case BreakS1: {
+ /* Since we are always writing to the dest trans, the dest needs
+ * to be copied when it is broken. The copy goes into the first
+ * half of the break to "break it off". */
+ outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans );
+ break;
+ }
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Abandon the old outList and transfer destList into it. */
+ dest->outList.transfer( destList );
+}
+
+
+/* Move all the transitions that go into src so that they go into dest. */
+void FsmGraph::inTransMove( FsmState *dest, FsmState *src )
+{
+ /* Do not try to move in trans to and from the same state. */
+ assert( dest != src );
+
+ /* If src is the start state, dest becomes the start state. */
+ if ( src == startState ) {
+ unsetStartState();
+ setStartState( dest );
+ }
+
+ /* For each entry point into, create an entry point into dest, when the
+ * state is detached, the entry points to src will be removed. */
+ for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ )
+ changeEntry( *enId, dest, src );
+
+ /* Move the transitions in inList. */
+ while ( src->inList.head != 0 ) {
+ /* Get trans and from state. */
+ FsmTrans *trans = src->inList.head;
+ FsmState *fromState = trans->fromState;
+
+ /* Detach from src, reattach to dest. */
+ detachTrans( fromState, src, trans );
+ attachTrans( fromState, dest, trans );
+ }
+}
diff --git a/src/fsmbase.cc b/src/fsmbase.cc
new file mode 100644
index 00000000..52698a1a
--- /dev/null
+++ b/src/fsmbase.cc
@@ -0,0 +1,603 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+
+#include "fsmgraph.h"
+
+/* Simple singly linked list append routine for the fill list. The new state
+ * goes to the end of the list. */
+void MergeData::fillListAppend( FsmState *state )
+{
+ state->alg.next = 0;
+
+ if ( stfillHead == 0 ) {
+ /* List is empty, state becomes head and tail. */
+ stfillHead = state;
+ stfillTail = state;
+ }
+ else {
+ /* List is not empty, state goes after last element. */
+ stfillTail->alg.next = state;
+ stfillTail = state;
+ }
+}
+
+/* Graph constructor. */
+FsmGraph::FsmGraph()
+:
+ /* No start state. */
+ startState(0),
+ errState(0),
+
+ /* Misfit accounting is a switch, turned on only at specific times. It
+ * controls what happens when states have no way in from the outside
+ * world.. */
+ misfitAccounting(false),
+
+ lmRequiresErrorState(false)
+{
+}
+
+/* Copy all graph data including transitions. */
+FsmGraph::FsmGraph( const FsmGraph &graph )
+:
+ /* Lists start empty. Will be filled by copy. */
+ stateList(),
+ misfitList(),
+
+ /* Copy in the entry points,
+ * pointers will be resolved later. */
+ entryPoints(graph.entryPoints),
+ startState(graph.startState),
+ errState(0),
+
+ /* Will be filled by copy. */
+ finStateSet(),
+
+ /* Misfit accounting is only on during merging. */
+ misfitAccounting(false),
+
+ lmRequiresErrorState(graph.lmRequiresErrorState)
+{
+ /* Create the states and record their map in the original state. */
+ StateList::Iter origState = graph.stateList;
+ for ( ; origState.lte(); origState++ ) {
+ /* Make the new state. */
+ FsmState *newState = new FsmState( *origState );
+
+ /* Add the state to the list. */
+ stateList.append( newState );
+
+ /* Set the mapsTo item of the old state. */
+ origState->alg.stateMap = newState;
+ }
+
+ /* Derefernce all the state maps. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* The points to the original in the src machine. The taget's duplicate
+ * is in the statemap. */
+ FsmState *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0;
+
+ /* Attach The transition to the duplicate. */
+ trans->toState = 0;
+ attachTrans( state, toState, trans );
+ }
+ }
+
+ /* Fix the state pointers in the entry points array. */
+ EntryMapEl *eel = entryPoints.data;
+ for ( int e = 0; e < entryPoints.length(); e++, eel++ ) {
+ /* Get the duplicate of the state. */
+ eel->value = eel->value->alg.stateMap;
+
+ /* Foreign in transitions must be built up when duping machines so
+ * increment it here. */
+ eel->value->foreignInTrans += 1;
+ }
+
+ /* Fix the start state pointer and the new start state's count of in
+ * transiions. */
+ startState = startState->alg.stateMap;
+ startState->foreignInTrans += 1;
+
+ /* Build the final state set. */
+ StateSet::Iter st = graph.finStateSet;
+ for ( ; st.lte(); st++ )
+ finStateSet.insert((*st)->alg.stateMap);
+}
+
+/* Deletes all transition data then deletes each state. */
+FsmGraph::~FsmGraph()
+{
+ /* Delete all the transitions. */
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Iterate the out transitions, deleting them. */
+ state->outList.empty();
+ }
+
+ /* Delete all the states. */
+ stateList.empty();
+}
+
+/* Set a state final. The state has its isFinState set to true and the state
+ * is added to the finStateSet. */
+void FsmGraph::setFinState( FsmState *state )
+{
+ /* Is it already a fin state. */
+ if ( state->stateBits & SB_ISFINAL )
+ return;
+
+ state->stateBits |= SB_ISFINAL;
+ finStateSet.insert( state );
+}
+
+/* Set a state non-final. The has its isFinState flag set false and the state
+ * is removed from the final state set. */
+void FsmGraph::unsetFinState( FsmState *state )
+{
+ /* Is it already a non-final state? */
+ if ( ! (state->stateBits & SB_ISFINAL) )
+ return;
+
+ /* When a state looses its final state status it must relinquish all the
+ * properties that are allowed only for final states. */
+ clearOutData( state );
+
+ state->stateBits &= ~ SB_ISFINAL;
+ finStateSet.remove( state );
+}
+
+/* Set and unset a state as the start state. */
+void FsmGraph::setStartState( FsmState *state )
+{
+ /* Sould change from unset to set. */
+ assert( startState == 0 );
+ startState = state;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( state->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( state ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ state->foreignInTrans += 1;
+}
+
+void FsmGraph::unsetStartState()
+{
+ /* Should change from set to unset. */
+ assert( startState != 0 );
+
+ /* Decrement the entry's count of foreign entries. */
+ startState->foreignInTrans -= 1;
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( startState->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( startState ) );
+ }
+
+ startState = 0;
+}
+
+/* Associate an id with a state. Makes the state a named entry point. Has no
+ * effect if the entry point is already mapped to the state. */
+void FsmGraph::setEntry( int id, FsmState *state )
+{
+ /* Insert the id into the state. If the state is already labelled with id,
+ * nothing to do. */
+ if ( state->entryIds.insert( id ) ) {
+ /* Insert the entry and assert that it succeeds. */
+ entryPoints.insertMulti( id, state );
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( state->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( state ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ state->foreignInTrans += 1;
+ }
+}
+
+/* Remove the association of an id with a state. The state looses it's entry
+ * point status. Assumes that the id is indeed mapped to state. */
+void FsmGraph::unsetEntry( int id, FsmState *state )
+{
+ /* Find the entry point in on id. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ while ( enLow->value != state )
+ enLow += 1;
+
+ /* Remove the record from the map. */
+ entryPoints.remove( enLow );
+
+ /* Remove the state's sense of the link. */
+ state->entryIds.remove( id );
+ state->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( state->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( state ) );
+ }
+}
+
+/* Remove all association of an id with states. Assumes that the id is indeed
+ * mapped to a state. */
+void FsmGraph::unsetEntry( int id )
+{
+ /* Find the entry point in on id. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) {
+ /* Remove the state's sense of the link. */
+ mel->value->entryIds.remove( id );
+ mel->value->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0
+ * then take it off the main list and put it on the misfit list. */
+ if ( mel->value->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( mel->value ) );
+ }
+ }
+
+ /* Remove the records from the entry points map. */
+ entryPoints.removeMulti( enLow, enHigh );
+}
+
+
+void FsmGraph::changeEntry( int id, FsmState *to, FsmState *from )
+{
+ /* Find the entry in the entry map. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ entryPoints.findMulti( id, enLow, enHigh );
+ while ( enLow->value != from )
+ enLow += 1;
+
+ /* Change it to the new target. */
+ enLow->value = to;
+
+ /* Remove from's sense of the link. */
+ from->entryIds.remove( id );
+ from->foreignInTrans -= 1;
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0 then take
+ * it off the main list and put it on the misfit list. */
+ if ( from->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( from ) );
+ }
+
+ /* Add to's sense of the link. */
+ if ( to->entryIds.insert( id ) != 0 ) {
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions is about to go up to 1 then
+ * take it off the misfit list and put it on the head list. */
+ if ( to->foreignInTrans == 0 )
+ stateList.append( misfitList.detach( to ) );
+ }
+
+ /* Up the foreign in transitions to the state. */
+ to->foreignInTrans += 1;
+ }
+}
+
+
+/* Clear all entry points from a machine. */
+void FsmGraph::unsetAllEntryPoints()
+{
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) {
+ /* Kill all the state's entry points at once. */
+ if ( en->value->entryIds.length() > 0 ) {
+ en->value->foreignInTrans -= en->value->entryIds.length();
+
+ if ( misfitAccounting ) {
+ /* If the number of foreign in transitions just went down to 0
+ * then take it off the main list and put it on the misfit
+ * list. */
+ if ( en->value->foreignInTrans == 0 )
+ misfitList.append( stateList.detach( en->value ) );
+ }
+
+ /* Clear the set of ids out all at once. */
+ en->value->entryIds.empty();
+ }
+ }
+
+ /* Now clear out the entry map all at once. */
+ entryPoints.empty();
+}
+
+/* Assigning an epsilon transition into final states. */
+void FsmGraph::epsilonTrans( int id )
+{
+ for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ )
+ (*fs)->epsilonTrans.append( id );
+}
+
+/* Mark all states reachable from state. Traverses transitions forward. Used
+ * for removing states that have no path into them. */
+void FsmGraph::markReachableFromHere( FsmState *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 )
+ markReachableFromHere( trans->toState );
+ }
+}
+
+void FsmGraph::markReachableFromHereStopFinal( FsmState *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ FsmState *toState = trans->toState;
+ if ( toState != 0 && !toState->isFinState() )
+ markReachableFromHereStopFinal( toState );
+ }
+}
+
+/* Mark all states reachable from state. Traverse transitions backwards. Used
+ * for removing dead end paths in graphs. */
+void FsmGraph::markReachableFromHereReverse( FsmState *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states with
+ * transitions into this state. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all items in transitions. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
+ markReachableFromHereReverse( trans->fromState );
+}
+
+/* Determine if there are any entry points into a start state other than the
+ * start state. Setting starting transitions requires that the start state be
+ * isolated. In most cases a start state will already be isolated. */
+bool FsmGraph::isStartStateIsolated()
+{
+ /* If there are any in transitions then the state is not isolated. */
+ if ( startState->inList.head != 0 )
+ return false;
+
+ /* If there are any entry points then isolated. */
+ if ( startState->entryIds.length() > 0 )
+ return false;
+
+ return true;
+}
+
+/* Bring in other's entry points. Assumes others states are going to be
+ * copied into this machine. */
+void FsmGraph::copyInEntryPoints( FsmGraph *other )
+{
+ /* Use insert multi because names are not unique. */
+ for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ )
+ entryPoints.insertMulti( en->key, en->value );
+}
+
+
+void FsmGraph::unsetAllFinStates()
+{
+ for ( StateSet::Iter st = finStateSet; st.lte(); st++ )
+ (*st)->stateBits &= ~ SB_ISFINAL;
+ finStateSet.empty();
+}
+
+void FsmGraph::setFinBits( int finStateBits )
+{
+ for ( int s = 0; s < finStateSet.length(); s++ )
+ finStateSet.data[s]->stateBits |= finStateBits;
+}
+
+
+/* Tests the integrity of the transition lists and the fromStates. */
+void FsmGraph::verifyIntegrity()
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ ) {
+ /* Walk the out transitions and assert fromState is correct. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
+ assert( trans->fromState == state );
+
+ /* Walk the inlist and assert toState is correct. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ )
+ assert( trans->toState == state );
+ }
+}
+
+void FsmGraph::verifyReachability()
+{
+ /* Mark all the states that can be reached
+ * through the set of entry points. */
+ markReachableFromHere( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ markReachableFromHere( en->value );
+
+ /* Check that everything got marked. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Assert it got marked and then clear the mark. */
+ assert( st->stateBits & SB_ISMARKED );
+ st->stateBits &= ~ SB_ISMARKED;
+ }
+}
+
+void FsmGraph::verifyNoDeadEndStates()
+{
+ /* Mark all states that have paths to the final states. */
+ for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ )
+ markReachableFromHereReverse( *pst );
+
+ /* Start state gets honorary marking. Must be done AFTER recursive call. */
+ startState->stateBits |= SB_ISMARKED;
+
+ /* Make sure everything got marked. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Assert the state got marked and unmark it. */
+ assert( st->stateBits & SB_ISMARKED );
+ st->stateBits &= ~ SB_ISMARKED;
+ }
+}
+
+void FsmGraph::depthFirstOrdering( FsmState *state )
+{
+ /* Nothing to do if the state is already on the list. */
+ if ( state->stateBits & SB_ONLIST )
+ return;
+
+ /* Doing depth first, put state on the list. */
+ state->stateBits |= SB_ONLIST;
+ stateList.append( state );
+
+ /* Recurse on everything ranges. */
+ for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) {
+ if ( tel->toState != 0 )
+ depthFirstOrdering( tel->toState );
+ }
+}
+
+/* Ordering states by transition connections. */
+void FsmGraph::depthFirstOrdering()
+{
+ /* Init on state list flags. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->stateBits &= ~SB_ONLIST;
+
+ /* Clear out the state list, we will rebuild it. */
+ int stateListLen = stateList.length();
+ stateList.abandon();
+
+ /* Add back to the state list from the start state and all other entry
+ * points. */
+ if ( errState != 0 )
+ depthFirstOrdering( errState );
+ depthFirstOrdering( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ depthFirstOrdering( en->value );
+
+ /* Make sure we put everything back on. */
+ assert( stateListLen == stateList.length() );
+}
+
+/* Stable sort the states by final state status. */
+void FsmGraph::sortStatesByFinal()
+{
+ /* Move forward through the list and throw final states onto the end. */
+ FsmState *state = 0;
+ FsmState *next = stateList.head;
+ FsmState *last = stateList.tail;
+ while ( state != last ) {
+ /* Move forward and load up the next. */
+ state = next;
+ next = state->next;
+
+ /* Throw to the end? */
+ if ( state->isFinState() ) {
+ stateList.detach( state );
+ stateList.append( state );
+ }
+ }
+}
+
+void FsmGraph::setStateNumbers( int base )
+{
+ for ( StateList::Iter state = stateList; state.lte(); state++ )
+ state->alg.stateNum = base++;
+}
+
+
+bool FsmGraph::checkErrTrans( FsmState *state, FsmTrans *trans )
+{
+ /* Might go directly to error state. */
+ if ( trans->toState == 0 )
+ return true;
+
+ if ( trans->prev == 0 ) {
+ /* If this is the first transition. */
+ if ( keyOps->minKey < trans->lowKey )
+ return true;
+ }
+ else {
+ /* Not the first transition. Compare against the prev. */
+ FsmTrans *prev = trans->prev;
+ Key nextKey = prev->highKey;
+ nextKey.increment();
+ if ( nextKey < trans->lowKey )
+ return true;
+ }
+ return false;
+}
+
+bool FsmGraph::checkErrTransFinish( FsmState *state )
+{
+ /* Check if there are any ranges already. */
+ if ( state->outList.length() == 0 )
+ return true;
+ else {
+ /* Get the last and check for a gap on the end. */
+ FsmTrans *last = state->outList.tail;
+ if ( last->highKey < keyOps->maxKey )
+ return true;
+ }
+ return 0;
+}
+
+bool FsmGraph::hasErrorTrans()
+{
+ bool result;
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) {
+ result = checkErrTrans( st, tr );
+ if ( result )
+ return true;
+ }
+ result = checkErrTransFinish( st );
+ if ( result )
+ return true;
+ }
+ return false;
+}
diff --git a/src/fsmcodegen.cc b/src/fsmcodegen.cc
new file mode 100644
index 00000000..5d63c079
--- /dev/null
+++ b/src/fsmcodegen.cc
@@ -0,0 +1,918 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include <sstream>
+#include <iostream>
+
+#include "fsmcodegen.h"
+
+using std::ostream;
+using std::ostringstream;
+using std::string;
+using std::cerr;
+using std::endl;
+
+/* Init code gen with in parameters. */
+FsmCodeGen::FsmCodeGen( ostream &out,
+ RedFsm *redFsm, fsm_tables *fsmTables )
+:
+ out(out),
+ redFsm(redFsm),
+ fsmTables(fsmTables),
+ codeGenErrCount(0),
+ dataPrefix(true),
+ writeFirstFinal(true),
+ writeErr(true),
+ skipTokprefLabelNeeded(false)
+{
+}
+
+/* Write out the fsm name. */
+string FsmCodeGen::FSM_NAME()
+{
+ return "parser";
+}
+
+/* Emit the offset of the start state as a decimal integer. */
+string FsmCodeGen::START_STATE_ID()
+{
+ ostringstream ret;
+ ret << redFsm->startState->id;
+ return ret.str();
+};
+
+/* Write out the array of actions. */
+std::ostream &FsmCodeGen::ACTIONS_ARRAY()
+{
+ out << "\t0, ";
+ int totalActions = 1;
+ for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) {
+ /* Write out the length, which will never be the last character. */
+ out << act->key.length() << ", ";
+ /* Put in a line break every 8 */
+ if ( totalActions++ % 8 == 7 )
+ out << "\n\t";
+
+ for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) {
+ out << item->value->actionId;
+ if ( ! (act.last() && item.last()) )
+ out << ", ";
+
+ /* Put in a line break every 8 */
+ if ( totalActions++ % 8 == 7 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ return out;
+}
+
+
+string FsmCodeGen::CS()
+{
+ ostringstream ret;
+ /* Expression for retrieving the key, use simple dereference. */
+ ret << ACCESS() << "fsm_cs";
+ return ret.str();
+}
+
+string FsmCodeGen::GET_WIDE_KEY()
+{
+ return GET_KEY();
+}
+
+string FsmCodeGen::GET_WIDE_KEY( RedState *state )
+{
+ return GET_KEY();
+}
+
+string FsmCodeGen::GET_KEY()
+{
+ ostringstream ret;
+ /* Expression for retrieving the key, use simple dereference. */
+ ret << "(*" << P() << ")";
+ return ret.str();
+}
+
+/* Write out level number of tabs. Makes the nested binary search nice
+ * looking. */
+string FsmCodeGen::TABS( int level )
+{
+ string result;
+ while ( level-- > 0 )
+ result += "\t";
+ return result;
+}
+
+/* Write out a key from the fsm code gen. Depends on wether or not the key is
+ * signed. */
+string FsmCodeGen::KEY( Key key )
+{
+ ostringstream ret;
+ ret << key.getVal();
+ return ret.str();
+}
+
+void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item )
+{
+ ret << ACT() << " = " << item->longestMatchPart->longestMatchId << ";";
+}
+
+void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
+{
+ /* The tokend action sets tokend. */
+ ret << "{ " << TOKEND() << " = " << TOKPREF() << " + ( " << P() << " - " << BLOCK_START() << " ) + 1; }";
+}
+
+void FsmCodeGen::SET_TOKEND_0( ostream &ret, InlineItem *item )
+{
+ /* The tokend action sets tokend. */
+ ret << "{ " << TOKEND() << " = " << TOKPREF() << " + ( " << P() << " - " << BLOCK_START() << " ); }";
+}
+
+void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
+{
+ ret << TOKSTART() << " = 0;";
+}
+
+void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item )
+{
+ ret << ACT() << " = 0;";
+}
+
+void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item )
+{
+ ret << TOKSTART() << " = " << P() << ";";
+}
+
+void FsmCodeGen::EMIT_TOKEN( ostream &ret, LangEl *token )
+{
+ ret << " " << MATCHED_TOKEN() << " = " << token->id << ";\n";
+}
+
+void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
+ int targState, int inFinish )
+{
+ ret <<
+ " switch( " << ACT() << " ) {\n";
+
+ /* If the switch handles error then we also forced the error state. It
+ * will exist. */
+ if ( item->tokenRegion->lmSwitchHandlesError ) {
+ ret << " case 0: " //<< P() << " = " << TOKSTART() << ";" <<
+ "goto st" << redFsm->errState->id << ";\n";
+ }
+
+ for ( TokenInstanceListReg::Iter lmi = item->tokenRegion->tokenInstanceList; lmi.lte(); lmi++ ) {
+ if ( lmi->inLmSelect ) {
+ assert( lmi->tokenDef->tdLangEl != 0 );
+ ret << " case " << lmi->longestMatchId << ":\n";
+ EMIT_TOKEN( ret, lmi->tokenDef->tdLangEl );
+ ret << " break;\n";
+ }
+ }
+
+ ret <<
+ " }\n"
+ "\t"
+ " goto skip_tokpref;\n";
+
+ skipTokprefLabelNeeded = true;
+}
+
+void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item )
+{
+ assert( item->longestMatchPart->tokenDef->tdLangEl != 0 );
+
+ ret << " " << P() << " += 1;\n";
+ SET_TOKEND_0( ret, 0 );
+ EMIT_TOKEN( ret, item->longestMatchPart->tokenDef->tdLangEl );
+ ret << " goto out;\n";
+}
+
+void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item )
+{
+ assert( item->longestMatchPart->tokenDef->tdLangEl != 0 );
+
+ SET_TOKEND_0( ret, 0 );
+ EMIT_TOKEN( ret, item->longestMatchPart->tokenDef->tdLangEl );
+ ret << " goto out;\n";
+}
+
+void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item )
+{
+ assert( item->longestMatchPart->tokenDef->tdLangEl != 0 );
+
+ EMIT_TOKEN( ret, item->longestMatchPart->tokenDef->tdLangEl );
+ ret << " goto skip_tokpref;\n";
+
+ skipTokprefLabelNeeded = true;
+}
+
+
+/* Write out an inline tree structure. Walks the list and possibly calls out
+ * to virtual functions than handle language specific items in the tree. */
+void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList,
+ int targState, bool inFinish )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Text:
+ assert( false );
+ break;
+ case InlineItem::LmSetActId:
+ SET_ACT( ret, item );
+ break;
+ case InlineItem::LmSetTokEnd:
+ SET_TOKEND( ret, item );
+ break;
+ case InlineItem::LmInitTokStart:
+ assert( false );
+ break;
+ case InlineItem::LmInitAct:
+ INIT_ACT( ret, item );
+ break;
+ case InlineItem::LmSetTokStart:
+ SET_TOKSTART( ret, item );
+ break;
+ case InlineItem::LmSwitch:
+ LM_SWITCH( ret, item, targState, inFinish );
+ break;
+ case InlineItem::LmOnLast:
+ LM_ON_LAST( ret, item );
+ break;
+ case InlineItem::LmOnNext:
+ LM_ON_NEXT( ret, item );
+ break;
+ case InlineItem::LmOnLagBehind:
+ LM_ON_LAG_BEHIND( ret, item );
+ break;
+ }
+ }
+}
+
+/* Write out paths in line directives. Escapes any special characters. */
+string FsmCodeGen::LDIR_PATH( char *path )
+{
+ ostringstream ret;
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ ret << "\\\\";
+ else
+ ret << *pc;
+ }
+ return ret.str();
+}
+
+void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish )
+{
+ /* Write the block and close it off. */
+ ret << "\t{";
+ INLINE_LIST( ret, action->inlineList, targState, inFinish );
+
+ if ( action->markId > 0 )
+ ret << "mark[" << action->markId-1 << "] = " << P() << ";\n";
+
+ ret << "}\n";
+
+}
+
+void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition )
+{
+ ret << "\n";
+ INLINE_LIST( ret, condition->inlineList, 0, false );
+}
+
+string FsmCodeGen::ERROR_STATE()
+{
+ ostringstream ret;
+ if ( redFsm->errState != 0 )
+ ret << redFsm->errState->id;
+ else
+ ret << "-1";
+ return ret.str();
+}
+
+string FsmCodeGen::FIRST_FINAL_STATE()
+{
+ ostringstream ret;
+ if ( redFsm->firstFinState != 0 )
+ ret << redFsm->firstFinState->id;
+ else
+ ret << redFsm->nextStateId;
+ return ret.str();
+}
+
+string FsmCodeGen::DATA_PREFIX()
+{
+ if ( dataPrefix )
+ return FSM_NAME() + "_";
+ return "";
+}
+
+/* Emit the alphabet data type. */
+string FsmCodeGen::ALPH_TYPE()
+{
+ string ret = keyOps->alphType->data1;
+ if ( keyOps->alphType->data2 != 0 ) {
+ ret += " ";
+ ret += + keyOps->alphType->data2;
+ }
+ return ret;
+}
+
+/* Emit the alphabet data type. */
+string FsmCodeGen::WIDE_ALPH_TYPE()
+{
+ string ret;
+ ret = ALPH_TYPE();
+ return ret;
+}
+
+
+string FsmCodeGen::PTR_CONST()
+{
+ return "const ";
+}
+
+std::ostream &FsmCodeGen::OPEN_ARRAY( string type, string name )
+{
+ out << "static const " << type << " " << name << "[] = {\n";
+ return out;
+}
+
+std::ostream &FsmCodeGen::CLOSE_ARRAY()
+{
+ return out << "};\n";
+}
+
+std::ostream &FsmCodeGen::STATIC_VAR( string type, string name )
+{
+ out << "static const " << type << " " << name;
+ return out;
+}
+
+string FsmCodeGen::UINT( )
+{
+ return "unsigned int";
+}
+
+string FsmCodeGen::ARR_OFF( string ptr, string offset )
+{
+ return ptr + " + " + offset;
+}
+
+string FsmCodeGen::CAST( string type )
+{
+ return "(" + type + ")";
+}
+
+std::ostream &FsmCodeGen::TO_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numToStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ return out;
+}
+
+std::ostream &FsmCodeGen::FROM_STATE_ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numFromStateRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ return out;
+}
+
+std::ostream &FsmCodeGen::ACTION_SWITCH()
+{
+ /* Walk the list of functions, printing the cases. */
+ for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) {
+ /* Write out referenced actions. */
+ if ( act->numTransRefs > 0 ) {
+ /* Write the case label, the action and the case break. */
+ out << "\tcase " << act->actionId << ":\n";
+ ACTION( out, act, 0, false );
+ out << "\tbreak;\n";
+ }
+ }
+
+ return out;
+}
+
+void FsmCodeGen::emitSingleSwitch( RedState *state )
+{
+ /* Load up the singles. */
+ int numSingles = state->outSingle.length();
+ RedTransEl *data = state->outSingle.data;
+
+ if ( numSingles == 1 ) {
+ /* If there is a single single key then write it out as an if. */
+ out << "\tif ( " << GET_WIDE_KEY(state) << " == " <<
+ KEY(data[0].lowKey) << " )\n\t\t";
+
+ /* Virtual function for writing the target of the transition. */
+ TRANS_GOTO(data[0].value, 0) << "\n";
+ }
+ else if ( numSingles > 1 ) {
+ /* Write out single keys in a switch if there is more than one. */
+ out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n";
+
+ /* Write out the single indices. */
+ for ( int j = 0; j < numSingles; j++ ) {
+ out << "\t\tcase " << KEY(data[j].lowKey) << ": ";
+ TRANS_GOTO(data[j].value, 0) << "\n";
+ }
+
+ /* Close off the transition switch. */
+ out << "\t}\n";
+ }
+}
+
+void FsmCodeGen::emitRangeBSearch( RedState *state, int level, int low, int high )
+{
+ /* Get the mid position, staying on the lower end of the range. */
+ int mid = (low + high) >> 1;
+ RedTransEl *data = state->outRange.data;
+
+ /* Determine if we need to look higher or lower. */
+ bool anyLower = mid > low;
+ bool anyHigher = mid < high;
+
+ /* Determine if the keys at mid are the limits of the alphabet. */
+ bool limitLow = data[mid].lowKey == keyOps->minKey;
+ bool limitHigh = data[mid].highKey == keyOps->maxKey;
+
+ if ( anyLower && anyHigher ) {
+ /* Can go lower and higher than mid. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
+ KEY(data[mid].lowKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, low, mid-1 );
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " <<
+ KEY(data[mid].highKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, mid+1, high );
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( anyLower && !anyHigher ) {
+ /* Can go lower than mid but not higher. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " <<
+ KEY(data[mid].lowKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, low, mid-1 );
+
+ /* if the higher is the highest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitHigh ) {
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+ else if ( !anyLower && anyHigher ) {
+ /* Can go higher than mid but not lower. */
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " <<
+ KEY(data[mid].highKey) << " ) {\n";
+ emitRangeBSearch( state, level+1, mid+1, high );
+
+ /* If the lower end is the lowest in the alphabet then there is no
+ * sense testing it. */
+ if ( limitLow ) {
+ out << TABS(level) << "} else\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " <<
+ KEY(data[mid].lowKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+ else {
+ /* Cannot go higher or lower than mid. It's mid or bust. What
+ * tests to do depends on limits of alphabet. */
+ if ( !limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
+ GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( limitLow && !limitHigh ) {
+ out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " <<
+ KEY(data[mid].highKey) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else if ( !limitLow && limitHigh ) {
+ out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " <<
+ GET_WIDE_KEY(state) << " )\n";
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ else {
+ /* Both high and low are at the limit. No tests to do. */
+ TRANS_GOTO(data[mid].value, level+1) << "\n";
+ }
+ }
+}
+
+std::ostream &FsmCodeGen::STATE_GOTOS()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ if ( st == redFsm->errState )
+ STATE_GOTO_ERROR();
+ else {
+ /* Writing code above state gotos. */
+ GOTO_HEADER( st );
+
+ /* Try singles. */
+ if ( st->outSingle.length() > 0 )
+ emitSingleSwitch( st );
+
+ /* Default case is to binary search for the ranges, if that fails then */
+ if ( st->outRange.length() > 0 )
+ emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 );
+
+ /* Write the default transition. */
+ TRANS_GOTO( st->defTrans, 1 ) << "\n";
+ }
+ }
+ return out;
+}
+
+unsigned int FsmCodeGen::TO_STATE_ACTION( RedState *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ return act;
+}
+
+unsigned int FsmCodeGen::FROM_STATE_ACTION( RedState *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ return act;
+}
+
+std::ostream &FsmCodeGen::TO_STATE_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = TO_STATE_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+std::ostream &FsmCodeGen::FROM_STATE_ACTIONS()
+{
+ /* Take one off for the psuedo start state. */
+ int numStates = redFsm->stateList.length();
+ unsigned int *vals = new unsigned int[numStates];
+ memset( vals, 0, sizeof(unsigned int)*numStates );
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ vals[st->id] = FROM_STATE_ACTION(st);
+
+ out << "\t";
+ for ( int st = 0; st < redFsm->nextStateId; st++ ) {
+ /* Write any eof action. */
+ out << vals[st];
+ if ( st < numStates-1 ) {
+ out << ", ";
+ if ( (st+1) % IALL == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n";
+ delete[] vals;
+ return out;
+}
+
+bool FsmCodeGen::IN_TRANS_ACTIONS( RedState *state )
+{
+ /* Emit any transitions that have actions and that go to this state. */
+ for ( int it = 0; it < state->numInTrans; it++ ) {
+ RedTrans *trans = state->inTrans[it];
+ if ( trans->action != 0 && trans->labelNeeded ) {
+ /* Write the label for the transition so it can be jumped to. */
+ out << "tr" << trans->id << ":\n";
+
+ /* If the action contains a next, then we must preload the current
+ * state since the action may or may not set it. */
+ if ( trans->action->anyNextStmt() )
+ out << " " << CS() << " = " << trans->targ->id << ";\n";
+
+ /* Write each action in the list. */
+ for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ )
+ ACTION( out, item->value, trans->targ->id, false );
+
+ out << "\tgoto st" << trans->targ->id << ";\n";
+ }
+ }
+
+ return 0;
+}
+
+/* Called from FsmCodeGen::STATE_GOTOS just before writing the gotos for each
+ * state. */
+void FsmCodeGen::GOTO_HEADER( RedState *state )
+{
+ IN_TRANS_ACTIONS( state );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ if ( state->toStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ for ( GenActionTable::Iter item = state->toStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ /* Give the state a switch case. */
+ out << "case " << state->id << ":\n";
+
+ /* Advance and test buffer pos. */
+ out <<
+ " if ( ++" << P() << " == " << PE() << " )\n"
+ " goto out" << state->id << ";\n";
+
+ if ( state->fromStateAction != 0 ) {
+ /* Remember that we wrote an action. Write every action in the list. */
+ for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ )
+ ACTION( out, item->value, state->id, false );
+ }
+
+ /* Record the prev state if necessary. */
+ if ( state->anyRegCurStateRef() )
+ out << " _ps = " << state->id << ";\n";
+}
+
+void FsmCodeGen::STATE_GOTO_ERROR()
+{
+ /* In the error state we need to emit some stuff that usually goes into
+ * the header. */
+ RedState *state = redFsm->errState;
+ IN_TRANS_ACTIONS( state );
+
+ if ( state->labelNeeded )
+ out << "st" << state->id << ":\n";
+
+ /* We do not need a case label here because the the error state is checked
+ * at the head of the loop. */
+
+ /* Break out here. */
+ out << " goto out" << state->id << ";\n";
+}
+
+
+/* Emit the goto to take for a given transition. */
+std::ostream &FsmCodeGen::TRANS_GOTO( RedTrans *trans, int level )
+{
+ if ( trans->action != 0 ) {
+ /* Go to the transition which will go to the state. */
+ out << TABS(level) << "goto tr" << trans->id << ";";
+ }
+ else {
+ /* Go directly to the target state. */
+ out << TABS(level) << "goto st" << trans->targ->id << ";";
+ }
+ return out;
+}
+
+std::ostream &FsmCodeGen::EXIT_STATES()
+{
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ out << " case " << st->id << ": out" << st->id << ": ";
+ if ( st->eofTrans != 0 ) {
+ out << "if ( " << DATA_EOF() << " ) {";
+ TRANS_GOTO( st->eofTrans, 0 );
+ out << "\n";
+ out << "}";
+ }
+
+ /* Exit. */
+ out << CS() << " = " << st->id << "; goto out; \n";
+ }
+ return out;
+}
+
+/* Set up labelNeeded flag for each state. */
+void FsmCodeGen::setLabelsNeeded()
+{
+ /* Do not use all labels by default, init all labelNeeded vars to false. */
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->labelNeeded = false;
+
+ if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() )
+ redFsm->errState->labelNeeded = true;
+
+ /* Walk all transitions and set only those that have targs. */
+ for ( RedTransSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) {
+ /* If there is no action with a next statement, then the label will be
+ * needed. */
+ if ( trans->action == 0 || !trans->action->anyNextStmt() )
+ trans->targ->labelNeeded = true;
+ }
+
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ )
+ st->outNeeded = st->labelNeeded;
+}
+
+void FsmCodeGen::writeData()
+{
+ out << "#define " << START() << " " << START_STATE_ID() << "\n";
+ out << "#define " << FIRST_FINAL() << " " << FIRST_FINAL_STATE() << "\n";
+ out << "#define " << ERROR() << " " << ERROR_STATE() << "\n";
+ out << "#define false 0\n";
+ out << "#define true 1\n";
+ out << "\n";
+
+ out << "static long " << ENTRY_BY_REGION() << "[] = {\n\t";
+ for ( int i = 0; i < fsmTables->num_regions; i++ ) {
+ out << fsmTables->entry_by_region[i];
+
+ if ( i < fsmTables->num_regions-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out <<
+ "static struct fsm_tables fsmTables_start =\n"
+ "{\n"
+ " 0, " /* actions */
+ " 0, " /* keyOffsets */
+ " 0, " /* transKeys */
+ " 0, " /* singleLengths */
+ " 0, " /* rangeLengths */
+ " 0, " /* indexOffsets */
+ " 0, " /* transTargsWI */
+ " 0, " /* transActionsWI */
+ " 0, " /* toStateActions */
+ " 0, " /* fromStateActions */
+ " 0, " /* eofActions */
+ " 0,\n" /* eofTargs */
+ " " << ENTRY_BY_REGION() << ",\n"
+
+ "\n"
+ " 0, " /* numStates */
+ " 0, " /* numActions */
+ " 0, " /* numTransKeys */
+ " 0, " /* numSingleLengths */
+ " 0, " /* numRangeLengths */
+ " 0, " /* numIndexOffsets */
+ " 0, " /* numTransTargsWI */
+ " 0,\n" /* numTransActionsWI */
+ " " << redFsm->regionToEntry.length() << ",\n"
+ "\n"
+ " " << START() << ",\n"
+ " " << FIRST_FINAL() << ",\n"
+ " " << ERROR() << ",\n"
+ "\n"
+ " 0,\n" /* actionSwitch */
+ " 0\n" /* numActionSwitch */
+ "};\n"
+ "\n";
+}
+
+void FsmCodeGen::writeInit()
+{
+ out <<
+ " " << CS() << " = " << START() << ";\n";
+
+ /* If there are any calls, then the stack top needs initialization. */
+ if ( redFsm->anyActionCalls() || redFsm->anyActionRets() )
+ out << "\t" << TOP() << " = 0;\n";
+
+ out <<
+ " " << TOKSTART() << " = 0;\n"
+ " " << TOKEND() << " = 0;\n"
+ " " << ACT() << " = 0;\n";
+
+ out << "\n";
+}
+
+void FsmCodeGen::writeExec()
+{
+ setLabelsNeeded();
+
+ out <<
+ "static void fsm_execute( struct pda_run *pdaRun, struct input_impl *inputStream )\n"
+ "{\n"
+ " " << BLOCK_START() << " = pdaRun->p;\n"
+ "/*_resume:*/\n";
+
+ if ( redFsm->errState != 0 ) {
+ out <<
+ " if ( " << CS() << " == " << redFsm->errState->id << " )\n"
+ " goto out;\n";
+ }
+
+ out <<
+ " if ( " << P() << " == " << PE() << " )\n"
+ " goto out_switch;\n"
+ " --" << P() << ";\n"
+ "\n"
+ " switch ( " << CS() << " )\n {\n";
+ STATE_GOTOS() <<
+ " }\n";
+
+ out <<
+ "out_switch:\n"
+ " switch ( " << CS() << " )\n {\n";
+ EXIT_STATES() <<
+ " }\n";
+
+ out <<
+ "out:\n"
+ " if ( " << P() << " != 0 )\n"
+ " " << TOKPREF() << " += " << P() << " - " << BLOCK_START() << ";\n";
+
+ if ( skipTokprefLabelNeeded ) {
+ out <<
+ "skip_tokpref:\n"
+ " {}\n";
+ }
+
+ out <<
+ "}\n"
+ "\n";
+}
+
+void FsmCodeGen::writeCode()
+{
+ redFsm->depthFirstOrdering();
+
+ writeData();
+ writeExec();
+
+ /* Referenced in the runtime lib, but used only in the compiler. Probably
+ * should use the preprocessor to make these go away. */
+ out <<
+ "static void sendNamedLangEl( struct colm_program *prg, tree_t **tree,\n"
+ " struct pda_run *pda_run, struct input_impl *input ) { }\n"
+ "static void initBindings( struct pda_run *pdaRun ) {}\n"
+ "static void popBinding( struct pda_run *pdaRun, parse_tree_t *tree ) {}\n"
+ "\n"
+ "\n";
+}
+
+
diff --git a/src/fsmcodegen.h b/src/fsmcodegen.h
new file mode 100644
index 00000000..c8f66c9c
--- /dev/null
+++ b/src/fsmcodegen.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2001-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_FSMCODEGEN_H
+#define _COLM_FSMCODEGEN_H
+
+#include <stdio.h>
+
+#include <string>
+#include <iostream>
+
+#include "keyops.h"
+#include "compiler.h"
+#include "redfsm.h"
+
+using std::string;
+using std::ostream;
+
+/* Integer array line length. */
+#define IALL 8
+
+/* Forwards. */
+struct RedFsm;
+struct RedState;
+struct GenAction;
+struct NameInst;
+struct RedAction;
+struct LongestMatch;
+struct TokenInstance;
+struct InlineList;
+struct InlineItem;
+struct NameInst;
+struct FsmCodeGen;
+
+typedef unsigned long ulong;
+typedef unsigned char uchar;
+
+
+/*
+ * The interface to the parser
+ */
+
+std::ostream *openOutput( char *inputFile );
+
+inline string itoa( int i )
+{
+ char buf[16];
+ sprintf( buf, "%i", i );
+ return buf;
+}
+
+/*
+ * class FsmCodeGen
+ */
+struct FsmCodeGen
+{
+public:
+ FsmCodeGen( ostream &out, RedFsm *redFsm, fsm_tables *fsmTables );
+
+protected:
+
+ string FSM_NAME();
+ string START_STATE_ID();
+ ostream &ACTIONS_ARRAY();
+ string GET_WIDE_KEY();
+ string GET_WIDE_KEY( RedState *state );
+ string TABS( int level );
+ string KEY( Key key );
+ string LDIR_PATH( char *path );
+ void ACTION( ostream &ret, GenAction *action, int targState, bool inFinish );
+ void CONDITION( ostream &ret, GenAction *condition );
+ string ALPH_TYPE();
+ string WIDE_ALPH_TYPE();
+ string ARRAY_TYPE( unsigned long maxVal );
+
+ string ARR_OFF( string ptr, string offset );
+ string CAST( string type );
+ string UINT();
+ string GET_KEY();
+
+ string ACCESS() { return "pdaRun->"; }
+
+ string P() { return ACCESS() + "p"; }
+ string PE() { return ACCESS() + "pe"; }
+ string DATA_EOF() { return ACCESS() + "scan_eof"; }
+
+ string CS();
+ string TOP() { return ACCESS() + "top"; }
+ string TOKSTART() { return ACCESS() + "tokstart"; }
+ string TOKEND() { return ACCESS() + "tokend"; }
+ string BLOCK_START() { return ACCESS() + "start"; }
+ string TOKPREF() { return ACCESS() + "tokpref"; }
+ string ACT() { return ACCESS() + "act"; }
+ string MATCHED_TOKEN() { return ACCESS() + "matched_token"; }
+
+ string DATA_PREFIX();
+
+ string START() { return DATA_PREFIX() + "start"; }
+ string ERROR() { return DATA_PREFIX() + "error"; }
+ string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; }
+
+ string ENTRY_BY_REGION() { return DATA_PREFIX() + "entry_by_region"; }
+
+
+ void INLINE_LIST( ostream &ret, InlineList *inlineList,
+ int targState, bool inFinish );
+ void EXEC_TOKEND( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish );
+ void SET_ACT( ostream &ret, InlineItem *item );
+ void INIT_TOKSTART( ostream &ret, InlineItem *item );
+ void INIT_ACT( ostream &ret, InlineItem *item );
+ void SET_TOKSTART( ostream &ret, InlineItem *item );
+ void SET_TOKEND( ostream &ret, InlineItem *item );
+ void SET_TOKEND_0( ostream &ret, InlineItem *item );
+ void GET_TOKEND( ostream &ret, InlineItem *item );
+ void SUB_ACTION( ostream &ret, InlineItem *item, int targState, bool inFinish );
+ void LM_ON_LAST( ostream &ret, InlineItem *item );
+ void LM_ON_NEXT( ostream &ret, InlineItem *item );
+ void LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item );
+ void EXEC_TOKEND( ostream &ret );
+ void EMIT_TOKEN( ostream &ret, LangEl *token );
+
+ string ERROR_STATE();
+ string FIRST_FINAL_STATE();
+
+ string PTR_CONST();
+ ostream &OPEN_ARRAY( string type, string name );
+ ostream &CLOSE_ARRAY();
+ ostream &STATIC_VAR( string type, string name );
+
+ string CTRL_FLOW();
+
+ unsigned int arrayTypeSize( unsigned long maxVal );
+
+public:
+ ostream &out;
+ RedFsm *redFsm;
+ fsm_tables *fsmTables;
+ int codeGenErrCount;
+
+ /* Write options. */
+ bool dataPrefix;
+ bool writeFirstFinal;
+ bool writeErr;
+ bool skipTokprefLabelNeeded;
+
+ std::ostream &TO_STATE_ACTION_SWITCH();
+ std::ostream &FROM_STATE_ACTION_SWITCH();
+ std::ostream &ACTION_SWITCH();
+ std::ostream &STATE_GOTOS();
+ std::ostream &TRANSITIONS();
+ std::ostream &EXEC_FUNCS();
+
+ unsigned int TO_STATE_ACTION( RedState *state );
+ unsigned int FROM_STATE_ACTION( RedState *state );
+
+ std::ostream &TO_STATE_ACTIONS();
+ std::ostream &FROM_STATE_ACTIONS();
+
+ void emitCondBSearch( RedState *state, int level, int low, int high );
+ void STATE_CONDS( RedState *state, bool genDefault );
+
+ void emitSingleSwitch( RedState *state );
+ void emitRangeBSearch( RedState *state, int level, int low, int high );
+
+ std::ostream &EXIT_STATES();
+ std::ostream &TRANS_GOTO( RedTrans *trans, int level );
+ std::ostream &FINISH_CASES();
+
+ void writeIncludes();
+ void writeData();
+ void writeInit();
+ void writeExec();
+ void writeCode();
+ void writeMain( long activeRealm );
+
+protected:
+ bool useAgainLabel();
+
+ /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for
+ * each state. */
+ bool IN_TRANS_ACTIONS( RedState *state );
+ void GOTO_HEADER( RedState *state );
+ void STATE_GOTO_ERROR();
+
+ /* Set up labelNeeded flag for each state. */
+ void setLabelsNeeded();
+};
+
+#endif /* _COLM_FSMCODEGEN_H */
+
diff --git a/src/fsmexec.cc b/src/fsmexec.cc
new file mode 100644
index 00000000..8aa4a072
--- /dev/null
+++ b/src/fsmexec.cc
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+
+#include <assert.h>
+
+#include "redfsm.h"
+#include "compiler.h"
+
+void execAction( struct pda_run *pdaRun, GenAction *genAction )
+{
+ for ( InlineList::Iter item = *genAction->inlineList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case InlineItem::Text:
+ assert(false);
+ break;
+ case InlineItem::LmSetActId:
+ pdaRun->act = item->longestMatchPart->longestMatchId;
+ break;
+ case InlineItem::LmSetTokEnd:
+ pdaRun->tokend = pdaRun->tokpref + ( pdaRun->p - pdaRun->start ) + 1;
+ break;
+ case InlineItem::LmInitTokStart:
+ assert(false);
+ break;
+ case InlineItem::LmInitAct:
+ pdaRun->act = 0;
+ break;
+ case InlineItem::LmSetTokStart:
+ pdaRun->tokstart = pdaRun->p;
+ break;
+ case InlineItem::LmSwitch:
+ /* If the switch handles error then we also forced the error state. It
+ * will exist. */
+ if ( item->tokenRegion->lmSwitchHandlesError && pdaRun->act == 0 ) {
+ pdaRun->fsm_cs = pdaRun->fsm_tables->error_state;
+ }
+ else {
+ for ( TokenInstanceListReg::Iter lmi = item->tokenRegion->tokenInstanceList;
+ lmi.lte(); lmi++ )
+ {
+ if ( lmi->inLmSelect && pdaRun->act == lmi->longestMatchId )
+ pdaRun->matched_token = lmi->tokenDef->tdLangEl->id;
+ }
+ }
+ pdaRun->return_result = true;
+ pdaRun->skip_tokpref = true;
+ break;
+ case InlineItem::LmOnLast:
+ pdaRun->p += 1;
+ pdaRun->tokend = pdaRun->tokpref + ( pdaRun->p - pdaRun->start );
+ pdaRun->matched_token = item->longestMatchPart->tokenDef->tdLangEl->id;
+ pdaRun->return_result = true;
+ break;
+ case InlineItem::LmOnNext:
+ pdaRun->tokend = pdaRun->tokpref + ( pdaRun->p - pdaRun->start );
+ pdaRun->matched_token = item->longestMatchPart->tokenDef->tdLangEl->id;
+ pdaRun->return_result = true;
+ break;
+ case InlineItem::LmOnLagBehind:
+ pdaRun->matched_token = item->longestMatchPart->tokenDef->tdLangEl->id;
+ pdaRun->return_result = true;
+ pdaRun->skip_tokpref = true;
+ break;
+ }
+ }
+
+ if ( genAction->markType == MarkMark )
+ pdaRun->mark[genAction->markId-1] = pdaRun->p;
+}
+
+extern "C" void internalFsmExecute( struct pda_run *pdaRun, struct input_impl *inputStream )
+{
+ int _klen;
+ unsigned int _trans;
+ const long *_acts;
+ unsigned int _nacts;
+ const char *_keys;
+
+ pdaRun->start = pdaRun->p;
+
+ /* Init the token match to nothing (the sentinal). */
+ pdaRun->matched_token = 0;
+
+/*_resume:*/
+ if ( pdaRun->fsm_cs == pdaRun->fsm_tables->error_state )
+ goto out;
+
+ if ( pdaRun->p == pdaRun->pe )
+ goto out;
+
+_loop_head:
+ _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->from_state_actions[pdaRun->fsm_cs];
+ _nacts = (unsigned int) *_acts++;
+ while ( _nacts-- > 0 )
+ execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] );
+
+ _keys = pdaRun->fsm_tables->trans_keys + pdaRun->fsm_tables->key_offsets[pdaRun->fsm_cs];
+ _trans = pdaRun->fsm_tables->index_offsets[pdaRun->fsm_cs];
+
+ _klen = pdaRun->fsm_tables->single_lengths[pdaRun->fsm_cs];
+ if ( _klen > 0 ) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + _klen - 1;
+ while (1) {
+ if ( _upper < _lower )
+ break;
+
+ _mid = _lower + ((_upper-_lower) >> 1);
+ if ( (*pdaRun->p) < *_mid )
+ _upper = _mid - 1;
+ else if ( (*pdaRun->p) > *_mid )
+ _lower = _mid + 1;
+ else {
+ _trans += (_mid - _keys);
+ goto _match;
+ }
+ }
+ _keys += _klen;
+ _trans += _klen;
+ }
+
+ _klen = pdaRun->fsm_tables->range_lengths[pdaRun->fsm_cs];
+ if ( _klen > 0 ) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + (_klen<<1) - 2;
+ while (1) {
+ if ( _upper < _lower )
+ break;
+
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
+ if ( (*pdaRun->p) < _mid[0] )
+ _upper = _mid - 2;
+ else if ( (*pdaRun->p) > _mid[1] )
+ _lower = _mid + 2;
+ else {
+ _trans += ((_mid - _keys)>>1);
+ goto _match;
+ }
+ }
+ _trans += _klen;
+ }
+
+_match:
+ pdaRun->fsm_cs = pdaRun->fsm_tables->transTargsWI[_trans];
+
+ if ( pdaRun->fsm_tables->transActionsWI[_trans] == 0 )
+ goto _again;
+
+ pdaRun->return_result = false;
+ pdaRun->skip_tokpref = false;
+ _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->transActionsWI[_trans];
+ _nacts = (unsigned int) *_acts++;
+ while ( _nacts-- > 0 )
+ execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] );
+ if ( pdaRun->return_result ) {
+ if ( pdaRun->skip_tokpref )
+ goto skip_tokpref;
+ goto final;
+ }
+
+_again:
+ _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->to_state_actions[pdaRun->fsm_cs];
+ _nacts = (unsigned int) *_acts++;
+ while ( _nacts-- > 0 )
+ execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] );
+
+ if ( pdaRun->fsm_cs == pdaRun->fsm_tables->error_state )
+ goto out;
+
+ if ( ++pdaRun->p != pdaRun->pe )
+ goto _loop_head;
+out:
+ if ( pdaRun->scan_eof ) {
+ pdaRun->return_result = false;
+ pdaRun->skip_tokpref = false;
+ _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->eof_actions[pdaRun->fsm_cs];
+ _nacts = (unsigned int) *_acts++;
+
+ if ( pdaRun->fsm_tables->eof_targs[pdaRun->fsm_cs] >= 0 )
+ pdaRun->fsm_cs = pdaRun->fsm_tables->eof_targs[pdaRun->fsm_cs];
+
+ while ( _nacts-- > 0 )
+ execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] );
+ if ( pdaRun->return_result ) {
+ if ( pdaRun->skip_tokpref )
+ goto skip_tokpref;
+ goto final;
+ }
+ }
+
+final:
+
+ if ( pdaRun->p != 0 )
+ pdaRun->tokpref += pdaRun->p - pdaRun->start;
+skip_tokpref:
+ {}
+}
diff --git a/src/fsmgraph.cc b/src/fsmgraph.cc
new file mode 100644
index 00000000..8cbfe29c
--- /dev/null
+++ b/src/fsmgraph.cc
@@ -0,0 +1,981 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "fsmgraph.h"
+
+#include <assert.h>
+
+#include <iostream>
+
+using std::cerr;
+using std::endl;
+
+/* Make a new state. The new state will be put on the graph's
+ * list of state. The new state can be created final or non final. */
+FsmState *FsmGraph::addState()
+{
+ /* Make the new state to return. */
+ FsmState *state = new FsmState();
+
+ if ( misfitAccounting ) {
+ /* Create the new state on the misfit list. All states are created
+ * with no foreign in transitions. */
+ misfitList.append( state );
+ }
+ else {
+ /* Create the new state. */
+ stateList.append( state );
+ }
+
+ return state;
+}
+
+/* Construct an FSM that is the concatenation of an array of characters. A new
+ * machine will be made that has len+1 states with one transition between each
+ * state for each integer in str. IsSigned determines if the integers are to
+ * be considered as signed or unsigned ints. */
+void FsmGraph::concatFsm( Key *str, int len )
+{
+ /* Make the first state and set it as the start state. */
+ FsmState *last = addState();
+ setStartState( last );
+
+ /* Attach subsequent states. */
+ for ( int i = 0; i < len; i++ ) {
+ FsmState *newState = addState();
+ attachNewTrans( last, newState, str[i], str[i] );
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ setFinState( last );
+}
+
+/* Case insensitive version of concatFsm. */
+void FsmGraph::concatFsmCI( Key *str, int len )
+{
+ /* Make the first state and set it as the start state. */
+ FsmState *last = addState();
+ setStartState( last );
+
+ /* Attach subsequent states. */
+ for ( int i = 0; i < len; i++ ) {
+ FsmState *newState = addState();
+
+ KeySet keySet;
+ if ( str[i].isLower() )
+ keySet.insert( str[i].toUpper() );
+ if ( str[i].isUpper() )
+ keySet.insert( str[i].toLower() );
+ keySet.insert( str[i] );
+
+ for ( int i = 0; i < keySet.length(); i++ )
+ attachNewTrans( last, newState, keySet[i], keySet[i] );
+
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ setFinState( last );
+}
+
+/* Construct a machine that matches one character. A new machine will be made
+ * that has two states with a single transition between the states. IsSigned
+ * determines if the integers are to be considered as signed or unsigned ints. */
+void FsmGraph::concatFsm( Key chr )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ FsmState *end = addState();
+ setFinState( end );
+
+ /* Attach on the character. */
+ attachNewTrans( startState, end, chr, chr );
+}
+
+/* Construct a machine that matches any character in set. A new machine will
+ * be made that has two states and len transitions between the them. The set
+ * should be ordered correctly accroding to KeyOps and should not contain
+ * any duplicates. */
+void FsmGraph::orFsm( Key *set, int len )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ FsmState *end = addState();
+ setFinState( end );
+
+ for ( int i = 1; i < len; i++ )
+ assert( set[i-1] < set[i] );
+
+ /* Attach on all the integers in the given string of ints. */
+ for ( int i = 0; i < len; i++ )
+ attachNewTrans( startState, end, set[i], set[i] );
+}
+
+/* Construct a machine that matches a range of characters. A new machine will
+ * be made with two states and a range transition between them. The range will
+ * match any characters from low to high inclusive. Low should be less than or
+ * equal to high otherwise undefined behaviour results. IsSigned determines
+ * if the integers are to be considered as signed or unsigned ints. */
+void FsmGraph::rangeFsm( Key low, Key high )
+{
+ /* Two states first start, second final. */
+ setStartState( addState() );
+
+ FsmState *end = addState();
+ setFinState( end );
+
+ /* Attach using the range of characters. */
+ attachNewTrans( startState, end, low, high );
+}
+
+/* Construct a machine that a repeated range of characters. */
+void FsmGraph::rangeStarFsm( Key low, Key high)
+{
+ /* One state which is final and is the start state. */
+ setStartState( addState() );
+ setFinState( startState );
+
+ /* Attach start to start using range of characters. */
+ attachNewTrans( startState, startState, low, high );
+}
+
+/* Construct a machine that matches the empty string. A new machine will be
+ * made with only one state. The new state will be both a start and final
+ * state. IsSigned determines if the machine has a signed or unsigned
+ * alphabet. Fsm operations must be done on machines with the same alphabet
+ * signedness. */
+void FsmGraph::lambdaFsm( )
+{
+ /* Give it one state with no transitions making it
+ * the start state and final state. */
+ setStartState( addState() );
+ setFinState( startState );
+}
+
+/* Construct a machine that matches nothing at all. A new machine will be
+ * made with only one state. It will not be final. */
+void FsmGraph::emptyFsm( )
+{
+ /* Give it one state with no transitions making it
+ * the start state and final state. */
+ setStartState( addState() );
+}
+
+void FsmGraph::transferOutData( FsmState *destState, FsmState *srcState )
+{
+ for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) {
+ if ( trans->toState != 0 ) {
+ /* Get the actions data from the outActionTable. */
+ trans->actionTable.setActions( srcState->outActionTable );
+
+ /* Get the priorities from the outPriorTable. */
+ trans->priorTable.setPriors( srcState->outPriorTable );
+ }
+ }
+}
+
+/* Kleene star operator. Makes this machine the kleene star of itself. Any
+ * transitions made going out of the machine and back into itself will be
+ * notified that they are leaving transitions by having the leavingFromState
+ * callback invoked. */
+void FsmGraph::starOp( )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* Create the new new start state. It will be set final after the merging
+ * of the final states with the start state is complete. */
+ FsmState *prevStartState = startState;
+ unsetStartState();
+ setStartState( addState() );
+
+ /* Merge the new start state with the old one to isolate it. */
+ mergeStates( md, startState, prevStartState );
+
+ /* Merge the start state into all final states. Except the start state on
+ * the first pass. If the start state is set final we will be doubling up
+ * its transitions, which will get transfered to any final states that
+ * follow it in the final state set. This will be determined by the order
+ * of items in the final state set. To prevent this we just merge with the
+ * start on a second pass. */
+ for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) {
+ if ( *st != startState )
+ mergeStatesLeaving( md, *st, startState );
+ }
+
+ /* Now it is safe to merge the start state with itself (provided it
+ * is set final). */
+ if ( startState->isFinState() )
+ mergeStatesLeaving( md, startState, startState );
+
+ /* Now ensure the new start state is a final state. */
+ setFinState( startState );
+
+ /* Fill in any states that were newed up as combinations of others. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+void FsmGraph::repeatOp( int times )
+{
+ /* Must be 1 and up. 0 produces null machine and requires deleting this. */
+ assert( times > 0 );
+
+ /* A repeat of one does absolutely nothing. */
+ if ( times == 1 )
+ return;
+
+ /* Make a machine to make copies from. */
+ FsmGraph *copyFrom = new FsmGraph( *this );
+
+ /* Concatentate duplicates onto the end up until before the last. */
+ for ( int i = 1; i < times-1; i++ ) {
+ FsmGraph *dup = new FsmGraph( *copyFrom );
+ doConcat( dup, 0, false );
+ }
+
+ /* Now use the copyFrom on the end. */
+ doConcat( copyFrom, 0, false );
+}
+
+void FsmGraph::optionalRepeatOp( int times )
+{
+ /* Must be 1 and up. 0 produces null machine and requires deleting this. */
+ assert( times > 0 );
+
+ /* A repeat of one optional merely allows zero string. */
+ if ( times == 1 ) {
+ setFinState( startState );
+ return;
+ }
+
+ /* Make a machine to make copies from. */
+ FsmGraph *copyFrom = new FsmGraph( *this );
+
+ /* The state set used in the from end of the concatentation. Starts with
+ * the initial final state set, then after each concatenation, gets set to
+ * the the final states that come from the the duplicate. */
+ StateSet lastFinSet( finStateSet );
+
+ /* Set the initial state to zero to allow zero copies. */
+ setFinState( startState );
+
+ /* Concatentate duplicates onto the end up until before the last. */
+ for ( int i = 1; i < times-1; i++ ) {
+ /* Make a duplicate for concating and set the fin bits to graph 2 so we
+ * can pick out it's final states after the optional style concat. */
+ FsmGraph *dup = new FsmGraph( *copyFrom );
+ dup->setFinBits( SB_GRAPH2 );
+ doConcat( dup, &lastFinSet, true );
+
+ /* Clear the last final state set and make the new one by taking only
+ * the final states that come from graph 2.*/
+ lastFinSet.empty();
+ for ( int i = 0; i < finStateSet.length(); i++ ) {
+ /* If the state came from graph 2, add it to the last set and clear
+ * the bits. */
+ FsmState *fs = finStateSet[i];
+ if ( fs->stateBits & SB_GRAPH2 ) {
+ lastFinSet.insert( fs );
+ fs->stateBits &= ~SB_GRAPH2;
+ }
+ }
+ }
+
+ /* Now use the copyFrom on the end, no bits set, no bits to clear. */
+ doConcat( copyFrom, &lastFinSet, true );
+}
+
+
+/* Fsm concatentation worker. Supports treating the concatentation as optional,
+ * which essentially leaves the final states of machine one as final. */
+void FsmGraph::doConcat( FsmGraph *other, StateSet *fromStates, bool optional )
+{
+ /* For the merging process. */
+ StateSet finStateSetCopy, startStateSet;
+ MergeData md;
+
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Get the other's start state. */
+ FsmState *otherStartState = other->startState;
+
+ /* Unset other's start state before bringing in the entry points. */
+ other->unsetStartState();
+
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( other );
+ other->entryPoints.empty();
+
+ /* Bring in other's states into our state lists. */
+ stateList.append( other->stateList );
+ misfitList.append( other->misfitList );
+
+ /* If from states is not set, then get a copy of our final state set before
+ * we clobber it and use it instead. */
+ if ( fromStates == 0 ) {
+ finStateSetCopy = finStateSet;
+ fromStates = &finStateSetCopy;
+ }
+
+ /* Unset all of our final states and get the final states from other. */
+ if ( !optional )
+ unsetAllFinStates();
+ finStateSet.insert( other->finStateSet );
+
+ /* Since other's lists are empty, we can delete the fsm without
+ * affecting any states. */
+ delete other;
+
+ /* Merge our former final states with the start state of other. */
+ for ( int i = 0; i < fromStates->length(); i++ ) {
+ FsmState *state = fromStates->data[i];
+
+ /* Merge the former final state with other's start state. */
+ mergeStatesLeaving( md, state, otherStartState );
+
+ /* If the former final state was not reset final then we must clear
+ * the state's out trans data. If it got reset final then it gets to
+ * keep its out trans data. This must be done before fillInStates gets
+ * called to prevent the data from being sourced. */
+ if ( ! state->isFinState() )
+ clearOutData( state );
+ }
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Concatenates other to the end of this machine. Other is deleted. Any
+ * transitions made leaving this machine and entering into other are notified
+ * that they are leaving transitions by having the leavingFromState callback
+ * invoked. */
+void FsmGraph::concatOp( FsmGraph *other )
+{
+ /* Assert same signedness and return graph concatenation op. */
+ doConcat( other, 0, false );
+}
+
+
+void FsmGraph::doOr( FsmGraph *other )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Build a state set consisting of both start states */
+ StateSet startStateSet;
+ startStateSet.insert( startState );
+ startStateSet.insert( other->startState );
+
+ /* Both of the original start states loose their start state status. */
+ unsetStartState();
+ other->unsetStartState();
+
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( other );
+ other->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other
+ * into this. No states will be deleted. */
+ stateList.append( other->stateList );
+ misfitList.append( other->misfitList );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert(other->finStateSet);
+ other->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete other;
+
+ /* Create a new start state. */
+ setStartState( addState() );
+
+ /* Merge the start states. */
+ mergeStates( md, startState, startStateSet.data, startStateSet.length() );
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+}
+
+/* Unions other with this machine. Other is deleted. */
+void FsmGraph::unionOp( FsmGraph *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Call Worker routine. */
+ doOr( other );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Intersects other with this machine. Other is deleted. */
+void FsmGraph::intersectOp( FsmGraph *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Set the fin bits on this and other to want each other. */
+ setFinBits( SB_GRAPH1 );
+ other->setFinBits( SB_GRAPH2 );
+
+ /* Call worker Or routine. */
+ doOr( other );
+
+ /* Unset any final states that are no longer to
+ * be final due to final bits. */
+ unsetIncompleteFinals();
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+
+ /* Remove states that have no path to a final state. */
+ removeDeadEndStates();
+}
+
+/* Set subtracts other machine from this machine. Other is deleted. */
+void FsmGraph::subtractOp( FsmGraph *other )
+{
+ /* Turn on misfit accounting for both graphs. */
+ setMisfitAccounting( true );
+ other->setMisfitAccounting( true );
+
+ /* Set the fin bits of other to be killers. */
+ other->setFinBits( SB_GRAPH1 );
+
+ /* Call worker Or routine. */
+ doOr( other );
+
+ /* Unset any final states that are no longer to
+ * be final due to final bits. */
+ unsetKilledFinals();
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+
+ /* Remove states that have no path to a final state. */
+ removeDeadEndStates();
+}
+
+bool FsmGraph::inEptVect( EptVect *eptVect, FsmState *state )
+{
+ if ( eptVect != 0 ) {
+ /* Vect is there, walk it looking for state. */
+ for ( int i = 0; i < eptVect->length(); i++ ) {
+ if ( eptVect->data[i].targ == state )
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Fill epsilon vectors in a root state from a given starting point. Epmploys
+ * a depth first search through the graph of epsilon transitions. */
+void FsmGraph::epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving )
+{
+ /* Walk the epsilon transitions out of the state. */
+ for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) {
+ /* Find the entry point, if the it does not resove, ignore it. */
+ EntryMapEl *enLow, *enHigh;
+ if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) {
+ /* Loop the targets. */
+ for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) {
+ /* Do not add the root or states already in eptVect. */
+ FsmState *targ = en->value;
+ if ( targ != from && !inEptVect(root->eptVect, targ) ) {
+ /* Maybe need to create the eptVect. */
+ if ( root->eptVect == 0 )
+ root->eptVect = new EptVect();
+
+ /* If moving to a different graph or if any parent is
+ * leaving then we are leaving. */
+ bool leaving = parentLeaving ||
+ root->owningGraph != targ->owningGraph;
+
+ /* All ok, add the target epsilon and recurse. */
+ root->eptVect->append( EptVectEl(targ, leaving) );
+ epsilonFillEptVectFrom( root, targ, leaving );
+ }
+ }
+ }
+ }
+}
+
+void FsmGraph::shadowReadWriteStates( MergeData &md )
+{
+ /* Init isolatedShadow algorithm data. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->isolatedShadow = 0;
+
+ /* Any states that may be both read from and written to must
+ * be shadowed. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Find such states by looping through stateVect lists, which give us
+ * the states that will be read from. May cause us to visit the states
+ * that we are interested in more than once. */
+ if ( st->eptVect != 0 ) {
+ /* For all states that will be read from. */
+ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
+ /* Check for read and write to the same state. */
+ FsmState *targ = ept->targ;
+ if ( targ->eptVect != 0 ) {
+ /* State is to be written to, if the shadow is not already
+ * there, create it. */
+ if ( targ->isolatedShadow == 0 ) {
+ FsmState *shadow = addState();
+ mergeStates( md, shadow, targ );
+ targ->isolatedShadow = shadow;
+ }
+
+ /* Write shadow into the state vector so that it is the
+ * state that the epsilon transition will read from. */
+ ept->targ = targ->isolatedShadow;
+ }
+ }
+ }
+ }
+}
+
+void FsmGraph::resolveEpsilonTrans( MergeData &md )
+{
+ /* Walk the state list and invoke recursive worker on each state. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ epsilonFillEptVectFrom( st, st, false );
+
+ /* Prevent reading from and writing to of the same state. */
+ shadowReadWriteStates( md );
+
+ /* For all states that have epsilon transitions out, draw the transitions,
+ * clear the epsilon transitions. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ /* If there is a state vector, then create the pre-merge state. */
+ if ( st->eptVect != 0 ) {
+ /* Merge all the epsilon targets into the state. */
+ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) {
+ if ( ept->leaving )
+ mergeStatesLeaving( md, st, ept->targ );
+ else
+ mergeStates( md, st, ept->targ );
+ }
+
+ /* Clean up the target list. */
+ delete st->eptVect;
+ st->eptVect = 0;
+ }
+
+ /* Clear the epsilon transitions vector. */
+ st->epsilonTrans.empty();
+ }
+}
+
+void FsmGraph::epsilonOp()
+{
+ /* For merging process. */
+ MergeData md;
+
+ setMisfitAccounting( true );
+
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->owningGraph = 0;
+
+ /* Perform merges. */
+ resolveEpsilonTrans( md );
+
+ /* Epsilons can caused merges which leave behind unreachable states. */
+ fillInStates( md );
+
+ /* Remove the misfits and turn off misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Make a new maching by joining together a bunch of machines without making
+ * any transitions between them. A negative finalId results in there being no
+ * final id. */
+void FsmGraph::joinOp( int startId, int finalId, FsmGraph **others, int numOthers )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Set the owning machines. Start at one. Zero is reserved for the start
+ * and final states. */
+ for ( StateList::Iter st = stateList; st.lte(); st++ )
+ st->owningGraph = 1;
+ for ( int m = 0; m < numOthers; m++ ) {
+ for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ )
+ st->owningGraph = 2+m;
+ }
+
+ /* All machines loose start state status. */
+ unsetStartState();
+ for ( int m = 0; m < numOthers; m++ )
+ others[m]->unsetStartState();
+
+ /* Bring the other machines into this. */
+ for ( int m = 0; m < numOthers; m++ ) {
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( others[m] );
+ others[m]->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other into
+ * this. No states will be deleted. */
+ stateList.append( others[m]->stateList );
+ assert( others[m]->misfitList.length() == 0 );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert( others[m]->finStateSet );
+ others[m]->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete others[m];
+ }
+
+ /* Look up the start entry point. */
+ EntryMapEl *enLow = 0, *enHigh = 0;
+ bool findRes = entryPoints.findMulti( startId, enLow, enHigh );
+ if ( ! findRes ) {
+ /* No start state. Set a default one and proceed with the join. Note
+ * that the result of the join will be a very uninteresting machine. */
+ setStartState( addState() );
+ }
+ else {
+ /* There is at least one start state, create a state that will become
+ * the new start state. */
+ FsmState *newStart = addState();
+ setStartState( newStart );
+
+ /* The start state is in an owning machine class all it's own. */
+ newStart->owningGraph = 0;
+
+ /* Create the set of states to merge from. */
+ StateSet stateSet;
+ for ( EntryMapEl *en = enLow; en <= enHigh; en++ )
+ stateSet.insert( en->value );
+
+ /* Merge in the set of start states into the new start state. */
+ mergeStates( md, newStart, stateSet.data, stateSet.length() );
+ }
+
+ /* Take a copy of the final state set, before unsetting them all. This
+ * will allow us to call clearOutData on the states that don't get
+ * final state status back back. */
+ StateSet finStateSetCopy = finStateSet;
+
+ /* Now all final states are unset. */
+ unsetAllFinStates();
+
+ if ( finalId >= 0 ) {
+ /* Create the implicit final state. */
+ FsmState *finState = addState();
+ setFinState( finState );
+
+ /* Assign an entry into the final state on the final state entry id. Note
+ * that there may already be an entry on this id. That's ok. Also set the
+ * final state owning machine id. It's in a class all it's own. */
+ setEntry( finalId, finState );
+ finState->owningGraph = 0;
+ }
+
+ /* Hand over to workers for resolving epsilon trans. This will merge states
+ * with the targets of their epsilon transitions. */
+ resolveEpsilonTrans( md );
+
+ /* Invoke the relinquish final callback on any states that did not get
+ * final state status back. */
+ for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) {
+ if ( !((*st)->stateBits & SB_ISFINAL) )
+ clearOutData( *st );
+ }
+
+ /* Fill in any new states made from merging. */
+ fillInStates( md );
+
+ /* Joining can be messy. Instead of having misfit accounting on (which is
+ * tricky here) do a full cleaning. */
+ removeUnreachableStates();
+}
+
+void FsmGraph::globOp( FsmGraph **others, int numOthers )
+{
+ /* All other machines loose start states status. */
+ for ( int m = 0; m < numOthers; m++ )
+ others[m]->unsetStartState();
+
+ /* Bring the other machines into this. */
+ for ( int m = 0; m < numOthers; m++ ) {
+ /* Bring in the rest of other's entry points. */
+ copyInEntryPoints( others[m] );
+ others[m]->entryPoints.empty();
+
+ /* Merge the lists. This will move all the states from other into
+ * this. No states will be deleted. */
+ stateList.append( others[m]->stateList );
+ assert( others[m]->misfitList.length() == 0 );
+
+ /* Move the final set data from other into this. */
+ finStateSet.insert( others[m]->finStateSet );
+ others[m]->finStateSet.empty();
+
+ /* Since other's list is empty, we can delete the fsm without
+ * affecting any states. */
+ delete others[m];
+ }
+}
+
+void FsmGraph::deterministicEntry()
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* States may loose their entry points, turn on misfit accounting. */
+ setMisfitAccounting( true );
+
+ /* Get a copy of the entry map then clear all the entry points. As we
+ * iterate the old entry map finding duplicates we will add the entry
+ * points for the new states that we create. */
+ EntryMap prevEntry = entryPoints;
+ unsetAllEntryPoints();
+
+ for ( int enId = 0; enId < prevEntry.length(); ) {
+ /* Count the number of states on this entry key. */
+ int highId = enId;
+ while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key )
+ highId += 1;
+
+ int numIds = highId - enId;
+ if ( numIds == 1 ) {
+ /* Only a single entry point, just set the entry. */
+ setEntry( prevEntry[enId].key, prevEntry[enId].value );
+ }
+ else {
+ /* Multiple entry points, need to create a new state and merge in
+ * all the targets of entry points. */
+ FsmState *newEntry = addState();
+ for ( int en = enId; en < highId; en++ )
+ mergeStates( md, newEntry, prevEntry[en].value );
+
+ /* Add the new state as the single entry point. */
+ setEntry( prevEntry[enId].key, newEntry );
+ }
+
+ enId += numIds;
+ }
+
+ /* The old start state may be unreachable. Remove the misfits and turn off
+ * misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* Unset any final states that are no longer to be final due to final bits. */
+void FsmGraph::unsetKilledFinals()
+{
+ /* Duplicate the final state set before we begin modifying it. */
+ StateSet fin( finStateSet );
+
+ for ( int s = 0; s < fin.length(); s++ ) {
+ /* Check for killing bit. */
+ FsmState *state = fin.data[s];
+ if ( state->stateBits & SB_GRAPH1 ) {
+ /* One final state is a killer, set to non-final. */
+ unsetFinState( state );
+ }
+
+ /* Clear all killing bits. Non final states should never have had those
+ * state bits set in the first place. */
+ state->stateBits &= ~SB_GRAPH1;
+ }
+}
+
+/* Unset any final states that are no longer to be final due to final bits. */
+void FsmGraph::unsetIncompleteFinals()
+{
+ /* Duplicate the final state set before we begin modifying it. */
+ StateSet fin( finStateSet );
+
+ for ( int s = 0; s < fin.length(); s++ ) {
+ /* Check for one set but not the other. */
+ FsmState *state = fin.data[s];
+ if ( state->stateBits & SB_BOTH &&
+ (state->stateBits & SB_BOTH) != SB_BOTH )
+ {
+ /* One state wants the other but it is not there. */
+ unsetFinState( state );
+ }
+
+ /* Clear wanting bits. Non final states should never have had those
+ * state bits set in the first place. */
+ state->stateBits &= ~SB_BOTH;
+ }
+}
+
+/* Ensure that the start state is free of entry points (aside from the fact
+ * that it is the start state). If the start state has entry points then Make a
+ * new start state by merging with the old one. Useful before modifying start
+ * transitions. If the existing start state has any entry points other than the
+ * start state entry then modifying its transitions changes more than the start
+ * transitions. So isolate the start state by separating it out such that it
+ * only has start stateness as it's entry point. */
+void FsmGraph::isolateStartState( )
+{
+ /* For the merging process. */
+ MergeData md;
+
+ /* Bail out if the start state is already isolated. */
+ if ( isStartStateIsolated() )
+ return;
+
+ /* Turn on misfit accounting to possibly catch the old start state. */
+ setMisfitAccounting( true );
+
+ /* This will be the new start state. The existing start
+ * state is merged with it. */
+ FsmState *prevStartState = startState;
+ unsetStartState();
+ setStartState( addState() );
+
+ /* Merge the new start state with the old one to isolate it. */
+ mergeStates( md, startState, prevStartState );
+
+ /* Stfil and stateDict will be empty because the merging of the old start
+ * state into the new one will not have any conflicting transitions. */
+ assert( md.stateDict.treeSize == 0 );
+ assert( md.stfillHead == 0 );
+
+ /* The old start state may be unreachable. Remove the misfits and turn off
+ * misfit accounting. */
+ removeMisfits();
+ setMisfitAccounting( false );
+}
+
+/* A state merge which represents the drawing in of leaving transitions. If
+ * there is any out data then we duplicate the souce state, transfer the out
+ * data, then merge in the state. The new state will be reaped because it will
+ * not be given any in transitions. */
+void FsmGraph::mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState )
+{
+ if ( !hasOutData( destState ) )
+ mergeStates( md, destState, srcState );
+ else {
+ FsmState *ssMutable = addState();
+ mergeStates( md, ssMutable, srcState );
+ transferOutData( ssMutable, destState );
+ mergeStates( md, destState, ssMutable );
+ }
+}
+
+void FsmGraph::mergeStates( MergeData &md, FsmState *destState,
+ FsmState **srcStates, int numSrc )
+{
+ for ( int s = 0; s < numSrc; s++ )
+ mergeStates( md, destState, srcStates[s] );
+}
+
+void FsmGraph::mergeStates( MergeData &md, FsmState *destState, FsmState *srcState )
+{
+ outTransCopy( md, destState, srcState->outList.head );
+
+ /* Get its bits and final state status. */
+ destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL );
+ if ( srcState->isFinState() )
+ setFinState( destState );
+
+ /* Draw in any properties of srcState into destState. */
+ if ( srcState == destState ) {
+ /* Duplicate the list to protect against write to source. The
+ * priorities sets are not copied in because that would have no
+ * effect. */
+ destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) );
+
+ /* Get all actions, duplicating to protect against write to source. */
+ destState->toStateActionTable.setActions(
+ ActionTable( srcState->toStateActionTable ) );
+ destState->fromStateActionTable.setActions(
+ ActionTable( srcState->fromStateActionTable ) );
+ destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) );
+ destState->outCondSet.insert( ActionSet( srcState->outCondSet ) );
+ destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) );
+ destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) );
+ }
+ else {
+ /* Get the epsilons, out priorities. */
+ destState->epsilonTrans.append( srcState->epsilonTrans );
+ destState->outPriorTable.setPriors( srcState->outPriorTable );
+
+ /* Get all actions. */
+ destState->toStateActionTable.setActions( srcState->toStateActionTable );
+ destState->fromStateActionTable.setActions( srcState->fromStateActionTable );
+ destState->outActionTable.setActions( srcState->outActionTable );
+ destState->outCondSet.insert( srcState->outCondSet );
+ destState->errActionTable.setActions( srcState->errActionTable );
+ destState->eofActionTable.setActions( srcState->eofActionTable );
+ }
+}
+
+void FsmGraph::fillInStates( MergeData &md )
+{
+ /* Merge any states that are awaiting merging. This will likey cause
+ * other states to be added to the stfil list. */
+ FsmState *state = md.stfillHead;
+ while ( state != 0 ) {
+ StateSet *stateSet = &state->stateDictEl->stateSet;
+ mergeStates( md, state, stateSet->data, stateSet->length() );
+ state = state->alg.next;
+ }
+
+ /* Delete the state sets of all states that are on the fill list. */
+ state = md.stfillHead;
+ while ( state != 0 ) {
+ /* Delete and reset the state set. */
+ delete state->stateDictEl;
+ state->stateDictEl = 0;
+
+ /* Next state in the stfill list. */
+ state = state->alg.next;
+ }
+
+ /* StateDict will still have its ptrs/size set but all of it's element
+ * will be deleted so we don't need to clean it up. */
+}
diff --git a/src/fsmgraph.h b/src/fsmgraph.h
new file mode 100644
index 00000000..5b357499
--- /dev/null
+++ b/src/fsmgraph.h
@@ -0,0 +1,1321 @@
+/*
+ * Copyright 2001-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_FSMGRAPH_H
+#define _COLM_FSMGRAPH_H
+
+#include <assert.h>
+
+#include <avltree.h>
+#include <avlmap.h>
+#include <avlset.h>
+#include <bstmap.h>
+#include <vector.h>
+#include <sbstmap.h>
+#include <sbstset.h>
+#include <sbsttable.h>
+#include <bstset.h>
+#include <compare.h>
+#include <dlist.h>
+
+#include "keyops.h"
+
+/* Flags that control merging. */
+#define SB_GRAPH1 0x01
+#define SB_GRAPH2 0x02
+#define SB_BOTH 0x03
+#define SB_ISFINAL 0x04
+#define SB_ISMARKED 0x08
+#define SB_ONLIST 0x10
+
+struct FsmTrans;
+struct FsmState;
+struct FsmGraph;
+struct Action;
+struct TokenInstance;
+struct NameInst;
+
+/* State list element for unambiguous access to list element. */
+struct FsmListEl
+{
+ FsmState *prev, *next;
+};
+
+/* This is the marked index for a state pair. Used in minimization. It keeps
+ * track of whether or not the state pair is marked. */
+struct MarkIndex
+{
+ MarkIndex(int states);
+ ~MarkIndex();
+
+ void markPair(int state1, int state2);
+ bool isPairMarked(int state1, int state2);
+
+private:
+ int numStates;
+ bool *array;
+};
+
+extern KeyOps *keyOps;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, Action* > ActionTableEl;
+
+/* Transition Action Table. */
+struct ActionTable
+ : public SBstMap< int, Action*, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action );
+ void setActions( int *orderings, Action **actions, int nActs );
+ void setActions( const ActionTable &other );
+
+ bool hasAction( Action *action );
+};
+
+typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet;
+typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet;
+
+/* Transistion Action Element. */
+typedef SBstMapEl< int, TokenInstance* > LmActionTableEl;
+
+/* Transition Action Table. */
+struct LmActionTable
+ : public SBstMap< int, TokenInstance*, CmpOrd<int> >
+{
+ void setAction( int ordering, TokenInstance *action );
+ void setActions( const LmActionTable &other );
+};
+
+/* Compare of a whole action table element (key & value). */
+struct CmpActionTableEl
+{
+ static int compare( const ActionTableEl &action1,
+ const ActionTableEl &action2 )
+ {
+ if ( action1.key < action2.key )
+ return -1;
+ else if ( action1.key > action2.key )
+ return 1;
+ else if ( action1.value < action2.value )
+ return -1;
+ else if ( action1.value > action2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable;
+
+/* Compare of a whole lm action table element (key & value). */
+struct CmpLmActionTableEl
+{
+ static int compare( const LmActionTableEl &lmAction1,
+ const LmActionTableEl &lmAction2 )
+ {
+ if ( lmAction1.key < lmAction2.key )
+ return -1;
+ else if ( lmAction1.key > lmAction2.key )
+ return 1;
+ else if ( lmAction1.value < lmAction2.value )
+ return -1;
+ else if ( lmAction1.value > lmAction2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ActionTable. */
+typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable;
+
+/* Action table element for error action tables. Adds the encoding of transfer
+ * point. */
+struct ErrActionTableEl
+{
+ ErrActionTableEl( Action *action, int ordering, int transferPoint )
+ : ordering(ordering), action(action), transferPoint(transferPoint) { }
+
+ /* Ordering and id of the action embedding. */
+ int ordering;
+ Action *action;
+
+ /* Id of point of transfere from Error action table to transtions and
+ * eofActionTable. */
+ int transferPoint;
+
+ int getKey() const { return ordering; }
+};
+
+struct ErrActionTable
+ : public SBstTable< ErrActionTableEl, int, CmpOrd<int> >
+{
+ void setAction( int ordering, Action *action, int transferPoint );
+ void setActions( const ErrActionTable &other );
+};
+
+/* Compare of an error action table element (key & value). */
+struct CmpErrActionTableEl
+{
+ static int compare( const ErrActionTableEl &action1,
+ const ErrActionTableEl &action2 )
+ {
+ if ( action1.ordering < action2.ordering )
+ return -1;
+ else if ( action1.ordering > action2.ordering )
+ return 1;
+ else if ( action1.action < action2.action )
+ return -1;
+ else if ( action1.action > action2.action )
+ return 1;
+ else if ( action1.transferPoint < action2.transferPoint )
+ return -1;
+ else if ( action1.transferPoint > action2.transferPoint )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for ErrActionTable. */
+typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable;
+
+
+/* Descibe a priority, shared among PriorEls.
+ * Has key and whether or not used. */
+struct PriorDesc
+{
+ int key;
+ int priority;
+};
+
+/* Element in the arrays of priorities for transitions and arrays. Ordering is
+ * unique among instantiations of machines, desc is shared. */
+struct PriorEl
+{
+ PriorEl( int ordering, PriorDesc *desc )
+ : ordering(ordering), desc(desc) { }
+
+ int ordering;
+ PriorDesc *desc;
+};
+
+/* Compare priority elements, which are ordered by the priority descriptor
+ * key. */
+struct PriorElCmp
+{
+ static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
+ {
+ if ( pel1.desc->key < pel2.desc->key )
+ return -1;
+ else if ( pel1.desc->key > pel2.desc->key )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+
+/* Priority Table. */
+struct PriorTable
+ : public SBstSet< PriorEl, PriorElCmp >
+{
+ void setPrior( int ordering, PriorDesc *desc );
+ void setPriors( const PriorTable &other );
+};
+
+/* Compare of prior table elements for distinguising state data. */
+struct CmpPriorEl
+{
+ static inline int compare( const PriorEl &pel1, const PriorEl &pel2 )
+ {
+ if ( pel1.desc < pel2.desc )
+ return -1;
+ else if ( pel1.desc > pel2.desc )
+ return 1;
+ else if ( pel1.ordering < pel2.ordering )
+ return -1;
+ else if ( pel1.ordering > pel2.ordering )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare of PriorTable distinguising state data. Using a compare of the
+ * pointers is a little more strict than it needs be. It requires that
+ * prioritiy tables have the exact same set of priority assignment operators
+ * (from the input lang) to be considered equal.
+ *
+ * Really only key-value pairs need be tested and ordering be merged. However
+ * this would require that in the fuseing of states, priority descriptors be
+ * chosen for the new fused state based on priority. Since the out transition
+ * lists and ranges aren't necessarily going to line up, this is more work for
+ * little gain. Final compression resets all priorities first, so this would
+ * only be useful for compression at every operator, which is only an
+ * undocumented test feature.
+ */
+typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable;
+
+/* Plain action list that imposes no ordering. */
+typedef Vector<int> TransFuncList;
+
+/* Comparison for TransFuncList. */
+typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare;
+
+/* Transition class that implements actions and priorities. */
+struct FsmTrans
+{
+ FsmTrans() : fromState(0), toState(0) {}
+ FsmTrans( const FsmTrans &other ) :
+ lowKey(other.lowKey),
+ highKey(other.highKey),
+ fromState(0), toState(0),
+ actionTable(other.actionTable),
+ priorTable(other.priorTable)
+ {
+ assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 );
+ }
+
+ Key lowKey, highKey;
+ FsmState *fromState;
+ FsmState *toState;
+
+ /* Pointers for outlist. */
+ FsmTrans *prev, *next;
+
+ /* Pointers for in-list. */
+ FsmTrans *ilprev, *ilnext;
+
+ /* The function table and priority for the transition. */
+ ActionTable actionTable;
+ PriorTable priorTable;
+
+ LmActionTable lmActionTable;
+};
+
+/* In transition list. Like DList except only has head pointers, which is all
+ * that is required. Insertion and deletion is handled by the graph. This
+ * class provides the iterator of a single list. */
+struct TransInList
+{
+ TransInList() : head(0) { }
+
+ FsmTrans *head;
+
+ struct Iter
+ {
+ /* Default construct. */
+ Iter() : ptr(0) { }
+
+ /* Construct, assign from a list. */
+ Iter( const TransInList &il ) : ptr(il.head) { }
+ Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; }
+
+ /* At the end */
+ bool lte() const { return ptr != 0; }
+ bool end() const { return ptr == 0; }
+
+ /* At the first, last element. */
+ bool first() const { return ptr && ptr->ilprev == 0; }
+ bool last() const { return ptr && ptr->ilnext == 0; }
+
+ /* Cast, dereference, arrow ops. */
+ operator FsmTrans*() const { return ptr; }
+ FsmTrans &operator *() const { return *ptr; }
+ FsmTrans *operator->() const { return ptr; }
+
+ /* Increment, decrement. */
+ inline void operator++(int) { ptr = ptr->ilnext; }
+ inline void operator--(int) { ptr = ptr->ilprev; }
+
+ /* The iterator is simply a pointer. */
+ FsmTrans *ptr;
+ };
+};
+
+typedef DList<FsmTrans> TransList;
+
+/* Set of states, list of states. */
+typedef BstSet<FsmState*> StateSet;
+typedef DList<FsmState> StateList;
+
+/* A element in a state dict. */
+struct StateDictEl
+:
+ public AvlTreeEl<StateDictEl>
+{
+ StateDictEl(const StateSet &stateSet)
+ : stateSet(stateSet) { }
+
+ const StateSet &getKey() { return stateSet; }
+ StateSet stateSet;
+ FsmState *targState;
+};
+
+/* Dictionary mapping a set of states to a target state. */
+typedef AvlTree< StateDictEl, StateSet, CmpTable<FsmState*> > StateDict;
+
+/* Data needed for a merge operation. */
+struct MergeData
+{
+ MergeData()
+ : stfillHead(0), stfillTail(0) { }
+
+ StateDict stateDict;
+
+ FsmState *stfillHead;
+ FsmState *stfillTail;
+
+ void fillListAppend( FsmState *state );
+};
+
+struct TransEl
+{
+ /* Constructors. */
+ TransEl() { }
+ TransEl( Key lowKey, Key highKey )
+ : lowKey(lowKey), highKey(highKey) { }
+ TransEl( Key lowKey, Key highKey, FsmTrans *value )
+ : lowKey(lowKey), highKey(highKey), value(value) { }
+
+ Key lowKey, highKey;
+ FsmTrans *value;
+};
+
+struct CmpKey
+{
+ static int compare( const Key key1, const Key key2 )
+ {
+ if ( key1 < key2 )
+ return -1;
+ else if ( key1 > key2 )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+/* Vector based set of key items. */
+typedef BstSet<Key, CmpKey> KeySet;
+
+struct MinPartition
+{
+ MinPartition() : active(false) { }
+
+ StateList list;
+ bool active;
+
+ MinPartition *prev, *next;
+};
+
+/* Epsilon transition stored in a state. Specifies the target */
+typedef Vector<int> EpsilonTrans;
+
+/* List of states that are to be drawn into this. */
+struct EptVectEl
+{
+ EptVectEl( FsmState *targ, bool leaving )
+ : targ(targ), leaving(leaving) { }
+
+ FsmState *targ;
+ bool leaving;
+};
+typedef Vector<EptVectEl> EptVect;
+
+/* Set of entry ids that go into this state. */
+typedef BstSet<int> EntryIdSet;
+
+/* Set of longest match items that may be active in a given state. */
+typedef BstSet<TokenInstance*> LmItemSet;
+
+/* Conditions. */
+typedef BstSet< Action*, CmpOrd<Action*> > CondSet;
+typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet;
+
+struct CondSpace
+ : public AvlTreeEl<CondSpace>
+{
+ CondSpace( const CondSet &condSet )
+ : condSet(condSet) {}
+
+ const CondSet &getKey() { return condSet; }
+
+ CondSet condSet;
+ Key baseKey;
+ long condSpaceId;
+};
+
+typedef Vector<CondSpace*> CondSpaceVect;
+
+typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap;
+
+struct StateCond
+{
+ StateCond( Key lowKey, Key highKey ) :
+ lowKey(lowKey), highKey(highKey) {}
+
+ Key lowKey;
+ Key highKey;
+ CondSpace *condSpace;
+
+ StateCond *prev, *next;
+};
+
+typedef DList<StateCond> StateCondList;
+typedef Vector<long> LongVect;
+
+/* State class that implements actions and priorities. */
+struct FsmState
+{
+ FsmState();
+ FsmState(const FsmState &other);
+ ~FsmState();
+
+ /* Is the state final? */
+ bool isFinState() { return stateBits & SB_ISFINAL; }
+
+ /* Out transition list and the pointer for the default out trans. */
+ TransList outList;
+
+ /* In transition Lists. */
+ TransInList inList;
+
+ /* Entry points into the state. */
+ EntryIdSet entryIds;
+
+ /* Epsilon transitions. */
+ EpsilonTrans epsilonTrans;
+
+ /* Condition info. */
+ StateCondList stateCondList;
+
+ /* Number of in transitions from states other than ourselves. */
+ int foreignInTrans;
+
+ /* Temporary data for various algorithms. */
+ union {
+ /* When duplicating the fsm we need to map each
+ * state to the new state representing it. */
+ FsmState *stateMap;
+
+ /* When minimizing machines by partitioning, this maps to the group
+ * the state is in. */
+ MinPartition *partition;
+
+ /* When merging states (state machine operations) this next pointer is
+ * used for the list of states that need to be filled in. */
+ FsmState *next;
+
+ /* Identification for printing and stable minimization. */
+ int stateNum;
+
+ } alg;
+
+ /* Data used in epsilon operation, maybe fit into alg? */
+ FsmState *isolatedShadow;
+ int owningGraph;
+
+ /* A pointer to a dict element that contains the set of states this state
+ * represents. This cannot go into alg, because alg.next is used during
+ * the merging process. */
+ StateDictEl *stateDictEl;
+
+ /* When drawing epsilon transitions, holds the list of states to merge
+ * with. */
+ EptVect *eptVect;
+
+ /* Bits controlling the behaviour of the state during collapsing to dfa. */
+ int stateBits;
+
+ /* State list elements. */
+ FsmState *next, *prev;
+
+ /*
+ * Priority and Action data.
+ */
+
+ /* Out priorities transfered to out transitions. */
+ PriorTable outPriorTable;
+
+ /* The following two action tables are distinguished by the fact that when
+ * toState actions are executed immediatly after transition actions of
+ * incoming transitions and the current character will be the same as the
+ * one available then. The fromState actions are executed immediately
+ * before the transition actions of outgoing transitions and the current
+ * character is same as the one available then. */
+
+ /* Actions to execute upon entering into a state. */
+ ActionTable toStateActionTable;
+
+ /* Actions to execute when going from the state to the transition. */
+ ActionTable fromStateActionTable;
+
+ /* Actions to add to any future transitions that leave via this state. */
+ ActionTable outActionTable;
+
+ /* Conditions to add to any future transiions that leave via this sttate. */
+ ActionSet outCondSet;
+
+ /* Error action tables. */
+ ErrActionTable errActionTable;
+
+ /* Actions to execute on eof. */
+ ActionTable eofActionTable;
+
+ /* Set of longest match items that may be active in this state. */
+ LmItemSet lmItemSet;
+
+ FsmState *eofTarget;
+};
+
+template <class ListItem> struct NextTrans
+{
+ Key lowKey, highKey;
+ ListItem *trans;
+ ListItem *next;
+
+ void load() {
+ if ( trans == 0 )
+ next = 0;
+ else {
+ next = trans->next;
+ lowKey = trans->lowKey;
+ highKey = trans->highKey;
+ }
+ }
+
+ void set( ListItem *t ) {
+ trans = t;
+ load();
+ }
+
+ void increment() {
+ trans = next;
+ load();
+ }
+};
+
+
+/* Encodes the different states that are meaningful to the of the iterator. */
+enum PairIterUserState
+{
+ RangeInS1, RangeInS2,
+ RangeOverlap,
+ BreakS1, BreakS2
+};
+
+template <class ListItem1, class ListItem2 = ListItem1> struct PairIter
+{
+ /* Encodes the different states that an fsm iterator can be in. */
+ enum IterState {
+ Begin,
+ ConsumeS1Range, ConsumeS2Range,
+ OnlyInS1Range, OnlyInS2Range,
+ S1SticksOut, S1SticksOutBreak,
+ S2SticksOut, S2SticksOutBreak,
+ S1DragsBehind, S1DragsBehindBreak,
+ S2DragsBehind, S2DragsBehindBreak,
+ ExactOverlap, End
+ };
+
+ PairIter( ListItem1 *list1, ListItem2 *list2 );
+
+ /* Query iterator. */
+ bool lte() { return itState != End; }
+ bool end() { return itState == End; }
+ void operator++(int) { findNext(); }
+ void operator++() { findNext(); }
+
+ /* Iterator state. */
+ ListItem1 *list1;
+ ListItem2 *list2;
+ IterState itState;
+ PairIterUserState userState;
+
+ NextTrans<ListItem1> s1Tel;
+ NextTrans<ListItem2> s2Tel;
+ Key bottomLow, bottomHigh;
+ ListItem1 *bottomTrans1;
+ ListItem2 *bottomTrans2;
+
+private:
+ void findNext();
+};
+
+/* Init the iterator by advancing to the first item. */
+template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter(
+ ListItem1 *list1, ListItem2 *list2 )
+:
+ list1(list1),
+ list2(list2),
+ itState(Begin)
+{
+ findNext();
+}
+
+/* Return and re-entry for the co-routine iterators. This should ALWAYS be
+ * used inside of a block. */
+#define CO_RETURN(label) \
+ itState = label; \
+ return; \
+ entry##label: {}
+
+/* Return and re-entry for the co-routine iterators. This should ALWAYS be
+ * used inside of a block. */
+#define CO_RETURN2(label, uState) \
+ itState = label; \
+ userState = uState; \
+ return; \
+ entry##label: {}
+
+/* Advance to the next transition. When returns, trans points to the next
+ * transition, unless there are no more, in which case end() returns true. */
+template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext()
+{
+ /* Jump into the iterator routine base on the iterator state. */
+ switch ( itState ) {
+ case Begin: goto entryBegin;
+ case ConsumeS1Range: goto entryConsumeS1Range;
+ case ConsumeS2Range: goto entryConsumeS2Range;
+ case OnlyInS1Range: goto entryOnlyInS1Range;
+ case OnlyInS2Range: goto entryOnlyInS2Range;
+ case S1SticksOut: goto entryS1SticksOut;
+ case S1SticksOutBreak: goto entryS1SticksOutBreak;
+ case S2SticksOut: goto entryS2SticksOut;
+ case S2SticksOutBreak: goto entryS2SticksOutBreak;
+ case S1DragsBehind: goto entryS1DragsBehind;
+ case S1DragsBehindBreak: goto entryS1DragsBehindBreak;
+ case S2DragsBehind: goto entryS2DragsBehind;
+ case S2DragsBehindBreak: goto entryS2DragsBehindBreak;
+ case ExactOverlap: goto entryExactOverlap;
+ case End: goto entryEnd;
+ }
+
+entryBegin:
+ /* Set up the next structs at the head of the transition lists. */
+ s1Tel.set( list1 );
+ s2Tel.set( list2 );
+
+ /* Concurrently scan both out ranges. */
+ while ( true ) {
+ if ( s1Tel.trans == 0 ) {
+ /* We are at the end of state1's ranges. Process the rest of
+ * state2's ranges. */
+ while ( s2Tel.trans != 0 ) {
+ /* Range is only in s2. */
+ CO_RETURN2( ConsumeS2Range, RangeInS2 );
+ s2Tel.increment();
+ }
+ break;
+ }
+ else if ( s2Tel.trans == 0 ) {
+ /* We are at the end of state2's ranges. Process the rest of
+ * state1's ranges. */
+ while ( s1Tel.trans != 0 ) {
+ /* Range is only in s1. */
+ CO_RETURN2( ConsumeS1Range, RangeInS1 );
+ s1Tel.increment();
+ }
+ break;
+ }
+ /* Both state1's and state2's transition elements are good.
+ * The signiture of no overlap is a back key being in front of a
+ * front key. */
+ else if ( s1Tel.highKey < s2Tel.lowKey ) {
+ /* A range exists in state1 that does not overlap with state2. */
+ CO_RETURN2( OnlyInS1Range, RangeInS1 );
+ s1Tel.increment();
+ }
+ else if ( s2Tel.highKey < s1Tel.lowKey ) {
+ /* A range exists in state2 that does not overlap with state1. */
+ CO_RETURN2( OnlyInS2Range, RangeInS2 );
+ s2Tel.increment();
+ }
+ /* There is overlap, must mix the ranges in some way. */
+ else if ( s1Tel.lowKey < s2Tel.lowKey ) {
+ /* Range from state1 sticks out front. Must break it into
+ * non-overlaping and overlaping segments. */
+ bottomLow = s2Tel.lowKey;
+ bottomHigh = s1Tel.highKey;
+ s1Tel.highKey = s2Tel.lowKey;
+ s1Tel.highKey.decrement();
+ bottomTrans1 = s1Tel.trans;
+
+ /* Notify the caller that we are breaking s1. This gives them a
+ * chance to duplicate s1Tel[0,1].value. */
+ CO_RETURN2( S1SticksOutBreak, BreakS1 );
+
+ /* Broken off range is only in s1. */
+ CO_RETURN2( S1SticksOut, RangeInS1 );
+
+ /* Advance over the part sticking out front. */
+ s1Tel.lowKey = bottomLow;
+ s1Tel.highKey = bottomHigh;
+ s1Tel.trans = bottomTrans1;
+ }
+ else if ( s2Tel.lowKey < s1Tel.lowKey ) {
+ /* Range from state2 sticks out front. Must break it into
+ * non-overlaping and overlaping segments. */
+ bottomLow = s1Tel.lowKey;
+ bottomHigh = s2Tel.highKey;
+ s2Tel.highKey = s1Tel.lowKey;
+ s2Tel.highKey.decrement();
+ bottomTrans2 = s2Tel.trans;
+
+ /* Notify the caller that we are breaking s2. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S2SticksOutBreak, BreakS2 );
+
+ /* Broken off range is only in s2. */
+ CO_RETURN2( S2SticksOut, RangeInS2 );
+
+ /* Advance over the part sticking out front. */
+ s2Tel.lowKey = bottomLow;
+ s2Tel.highKey = bottomHigh;
+ s2Tel.trans = bottomTrans2;
+ }
+ /* Low ends are even. Are the high ends even? */
+ else if ( s1Tel.highKey < s2Tel.highKey ) {
+ /* Range from state2 goes longer than the range from state1. We
+ * must break the range from state2 into an evenly overlaping
+ * segment. */
+ bottomLow = s1Tel.highKey;
+ bottomLow.increment();
+ bottomHigh = s2Tel.highKey;
+ s2Tel.highKey = s1Tel.highKey;
+ bottomTrans2 = s2Tel.trans;
+
+ /* Notify the caller that we are breaking s2. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S2DragsBehindBreak, BreakS2 );
+
+ /* Breaking s2 produces exact overlap. */
+ CO_RETURN2( S2DragsBehind, RangeOverlap );
+
+ /* Advance over the front we just broke off of range 2. */
+ s2Tel.lowKey = bottomLow;
+ s2Tel.highKey = bottomHigh;
+ s2Tel.trans = bottomTrans2;
+
+ /* Advance over the entire s1Tel. We have consumed it. */
+ s1Tel.increment();
+ }
+ else if ( s2Tel.highKey < s1Tel.highKey ) {
+ /* Range from state1 goes longer than the range from state2. We
+ * must break the range from state1 into an evenly overlaping
+ * segment. */
+ bottomLow = s2Tel.highKey;
+ bottomLow.increment();
+ bottomHigh = s1Tel.highKey;
+ s1Tel.highKey = s2Tel.highKey;
+ bottomTrans1 = s1Tel.trans;
+
+ /* Notify the caller that we are breaking s1. This gives them a
+ * chance to duplicate s2Tel[0,1].value. */
+ CO_RETURN2( S1DragsBehindBreak, BreakS1 );
+
+ /* Breaking s1 produces exact overlap. */
+ CO_RETURN2( S1DragsBehind, RangeOverlap );
+
+ /* Advance over the front we just broke off of range 1. */
+ s1Tel.lowKey = bottomLow;
+ s1Tel.highKey = bottomHigh;
+ s1Tel.trans = bottomTrans1;
+
+ /* Advance over the entire s2Tel. We have consumed it. */
+ s2Tel.increment();
+ }
+ else {
+ /* There is an exact overlap. */
+ CO_RETURN2( ExactOverlap, RangeOverlap );
+
+ s1Tel.increment();
+ s2Tel.increment();
+ }
+ }
+
+ /* Done, go into end state. */
+ CO_RETURN( End );
+}
+
+
+/* Compare lists of epsilon transitions. Entries are name ids of targets. */
+typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
+
+/* Compare class for the Approximate minimization. */
+class ApproxCompare
+{
+public:
+ ApproxCompare() { }
+ int compare( const FsmState *pState1, const FsmState *pState2 );
+};
+
+/* Compare class for the initial partitioning of a partition minimization. */
+class InitPartitionCompare
+{
+public:
+ InitPartitionCompare() { }
+ int compare( const FsmState *pState1, const FsmState *pState2 );
+};
+
+/* Compare class for the regular partitioning of a partition minimization. */
+class PartitionCompare
+{
+public:
+ PartitionCompare() { }
+ int compare( const FsmState *pState1, const FsmState *pState2 );
+};
+
+/* Compare class for a minimization that marks pairs. Provides the shouldMark
+ * routine. */
+class MarkCompare
+{
+public:
+ MarkCompare() { }
+ bool shouldMark( MarkIndex &markIndex, const FsmState *pState1,
+ const FsmState *pState2 );
+};
+
+/* List of partitions. */
+typedef DList< MinPartition > PartitionList;
+
+/* List of transtions out of a state. */
+typedef Vector<TransEl> TransListVect;
+
+/* Entry point map used for keeping track of entry points in a machine. */
+typedef BstSet< int > EntryIdSet;
+typedef BstMapEl< int, FsmState* > EntryMapEl;
+typedef BstMap< int, FsmState* > EntryMap;
+typedef Vector<EntryMapEl> EntryMapBase;
+
+/* Graph class that implements actions and priorities. */
+struct FsmGraph
+{
+ /* Constructors/Destructors. */
+ FsmGraph( );
+ FsmGraph( const FsmGraph &graph );
+ ~FsmGraph();
+
+ /* The list of states. */
+ StateList stateList;
+ StateList misfitList;
+
+ /* The map of entry points. */
+ EntryMap entryPoints;
+
+ /* The start state. */
+ FsmState *startState;
+
+ /* Error state, possibly created only when the final machine has been
+ * created and the XML machine is about to be written. No transitions
+ * point to this state. */
+ FsmState *errState;
+
+ /* The set of final states. */
+ StateSet finStateSet;
+
+ /* Misfit Accounting. Are misfits put on a separate list. */
+ bool misfitAccounting;
+
+ bool lmRequiresErrorState;
+ NameInst **nameIndex;
+
+ /*
+ * Transition actions and priorities.
+ */
+
+ /* Set priorities on transtions. */
+ void startFsmPrior( int ordering, PriorDesc *prior );
+ void allTransPrior( int ordering, PriorDesc *prior );
+ void finishFsmPrior( int ordering, PriorDesc *prior );
+ void leaveFsmPrior( int ordering, PriorDesc *prior );
+
+ /* Action setting support. */
+ void transferErrorActions( FsmState *state, int transferPoint );
+ void setErrorAction( FsmState *state, int ordering, Action *action );
+ void setErrorActions( FsmState *state, const ActionTable &other );
+
+ /* Fill all spaces in a transition list with an error transition. */
+ void fillGaps( FsmState *state );
+
+ /* Similar to setErrorAction, instead gives a state to go to on error. */
+ void setErrorTarget( FsmState *state, FsmState *target, int *orderings,
+ Action **actions, int nActs );
+
+ /* Set actions to execute. */
+ void startFsmAction( int ordering, Action *action );
+ void allTransAction( int ordering, Action *action );
+ void finishFsmAction( int ordering, Action *action );
+ void leaveFsmAction( int ordering, Action *action );
+ void longMatchAction( int ordering, TokenInstance *lmPart );
+
+ /* Set error actions to execute. */
+ void startErrorAction( int ordering, Action *action, int transferPoint );
+ void allErrorAction( int ordering, Action *action, int transferPoint );
+ void finalErrorAction( int ordering, Action *action, int transferPoint );
+ void notStartErrorAction( int ordering, Action *action, int transferPoint );
+ void notFinalErrorAction( int ordering, Action *action, int transferPoint );
+ void middleErrorAction( int ordering, Action *action, int transferPoint );
+
+ /* Set EOF actions. */
+ void startEOFAction( int ordering, Action *action );
+ void allEOFAction( int ordering, Action *action );
+ void finalEOFAction( int ordering, Action *action );
+ void notStartEOFAction( int ordering, Action *action );
+ void notFinalEOFAction( int ordering, Action *action );
+ void middleEOFAction( int ordering, Action *action );
+
+ /* Set To State actions. */
+ void startToStateAction( int ordering, Action *action );
+ void allToStateAction( int ordering, Action *action );
+ void finalToStateAction( int ordering, Action *action );
+ void notStartToStateAction( int ordering, Action *action );
+ void notFinalToStateAction( int ordering, Action *action );
+ void middleToStateAction( int ordering, Action *action );
+
+ /* Set From State actions. */
+ void startFromStateAction( int ordering, Action *action );
+ void allFromStateAction( int ordering, Action *action );
+ void finalFromStateAction( int ordering, Action *action );
+ void notStartFromStateAction( int ordering, Action *action );
+ void notFinalFromStateAction( int ordering, Action *action );
+ void middleFromStateAction( int ordering, Action *action );
+
+ /* Shift the action ordering of the start transitions to start at
+ * fromOrder and increase in units of 1. Useful before kleene star
+ * operation. */
+ int shiftStartActionOrder( int fromOrder );
+
+ /* Clear all priorities from the fsm to so they won't affcet minimization
+ * of the final fsm. */
+ void clearAllPriorities();
+
+ /* Zero out all the function keys. */
+ void nullActionKeys();
+
+ /* Walk the list of states and verify state properties. */
+ void verifyStates();
+
+ /* Misfit Accounting. Are misfits put on a separate list. */
+ void setMisfitAccounting( bool val )
+ { misfitAccounting = val; }
+
+ /* Set and Unset a state as final. */
+ void setFinState( FsmState *state );
+ void unsetFinState( FsmState *state );
+
+ void setStartState( FsmState *state );
+ void unsetStartState( );
+
+ /* Set and unset a state as an entry point. */
+ void setEntry( int id, FsmState *state );
+ void changeEntry( int id, FsmState *to, FsmState *from );
+ void unsetEntry( int id, FsmState *state );
+ void unsetEntry( int id );
+ void unsetAllEntryPoints();
+
+ /* Epsilon transitions. */
+ void epsilonTrans( int id );
+ void shadowReadWriteStates( MergeData &md );
+
+ /*
+ * Basic attaching and detaching.
+ */
+
+ /* Common to attaching/detaching list and default. */
+ void attachToInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans );
+ void detachFromInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans );
+
+ /* Attach with a new transition. */
+ FsmTrans *attachNewTrans( FsmState *from, FsmState *to,
+ Key onChar1, Key onChar2 );
+
+ /* Attach with an existing transition that already in an out list. */
+ void attachTrans( FsmState *from, FsmState *to, FsmTrans *trans );
+
+ /* Redirect a transition away from error and towards some state. */
+ void redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans );
+
+ /* Detach a transition from a target state. */
+ void detachTrans( FsmState *from, FsmState *to, FsmTrans *trans );
+
+ /* Detach a state from the graph. */
+ void detachState( FsmState *state );
+
+ /*
+ * NFA to DFA conversion routines.
+ */
+
+ /* Duplicate a transition that will dropin to a free spot. */
+ FsmTrans *dupTrans( FsmState *from, FsmTrans *srcTrans );
+
+ /* In crossing, two transitions both go to real states. */
+ FsmTrans *fsmAttachStates( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans );
+
+ /* Two transitions are to be crossed, handle the possibility of either
+ * going to the error state. */
+ FsmTrans *mergeTrans( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans );
+
+ /* Compare deterimne relative priorities of two transition tables. */
+ int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 );
+
+ /* Cross a src transition with one that is already occupying a spot. */
+ FsmTrans *crossTransitions( MergeData &md, FsmState *from,
+ FsmTrans *destTrans, FsmTrans *srcTrans );
+
+ void outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList );
+ void mergeStateConds( FsmState *destState, FsmState *srcState );
+
+ /* Merge a set of states into newState. */
+ void mergeStates( MergeData &md, FsmState *destState,
+ FsmState **srcStates, int numSrc );
+ void mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState );
+ void mergeStates( MergeData &md, FsmState *destState, FsmState *srcState );
+
+ /* Make all states that are combinations of other states and that
+ * have not yet had their out transitions filled in. This will
+ * empty out stateDict and stFil. */
+ void fillInStates( MergeData &md );
+
+ /*
+ * Transition Comparison.
+ */
+
+ /* Compare transition data. Either of the pointers may be null. */
+ static inline int compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 );
+
+ /* Compare target state and transition data. Either pointer may be null. */
+ static inline int compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 );
+
+ /* Compare target partitions. Either pointer may be null. */
+ static inline int comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 );
+
+ /* Check marked status of target states. Either pointer may be null. */
+ static inline bool shouldMarkPtr( MarkIndex &markIndex,
+ FsmTrans *trans1, FsmTrans *trans2 );
+
+ /*
+ * Callbacks.
+ */
+
+ /* Compare priority and function table of transitions. */
+ static int compareTransData( FsmTrans *trans1, FsmTrans *trans2 );
+
+ /* Add in the properties of srcTrans into this. */
+ void addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans );
+
+ /* Compare states on data stored in the states. */
+ static int compareStateData( const FsmState *state1, const FsmState *state2 );
+
+ /* Out transition data. */
+ void clearOutData( FsmState *state );
+ bool hasOutData( FsmState *state );
+ void transferOutData( FsmState *destState, FsmState *srcState );
+
+ /*
+ * Allocation.
+ */
+
+ /* New up a state and add it to the graph. */
+ FsmState *addState();
+
+ /*
+ * Building basic machines
+ */
+
+ void concatFsm( Key c );
+ void concatFsm( Key *str, int len );
+ void concatFsmCI( Key *str, int len );
+ void orFsm( Key *set, int len );
+ void rangeFsm( Key low, Key high );
+ void rangeStarFsm( Key low, Key high );
+ void emptyFsm( );
+ void lambdaFsm( );
+
+ /*
+ * Fsm operators.
+ */
+
+ void starOp( );
+ void repeatOp( int times );
+ void optionalRepeatOp( int times );
+ void concatOp( FsmGraph *other );
+ void unionOp( FsmGraph *other );
+ void intersectOp( FsmGraph *other );
+ void subtractOp( FsmGraph *other );
+ void epsilonOp();
+ void joinOp( int startId, int finalId, FsmGraph **others, int numOthers );
+ void globOp( FsmGraph **others, int numOthers );
+ void deterministicEntry();
+
+ /*
+ * Operator workers
+ */
+
+ /* Determine if there are any entry points into a start state other than
+ * the start state. */
+ bool isStartStateIsolated();
+
+ /* Make a new start state that has no entry points. Will not change the
+ * identity of the fsm. */
+ void isolateStartState();
+
+ /* Workers for resolving epsilon transitions. */
+ bool inEptVect( EptVect *eptVect, FsmState *targ );
+ void epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving );
+ void resolveEpsilonTrans( MergeData &md );
+
+ /* Workers for concatenation and union. */
+ void doConcat( FsmGraph *other, StateSet *fromStates, bool optional );
+ void doOr( FsmGraph *other );
+
+ /*
+ * Final states
+ */
+
+ /* Unset any final states that are no longer to be final
+ * due to final bits. */
+ void unsetIncompleteFinals();
+ void unsetKilledFinals();
+
+ /* Bring in other's entry points. Assumes others states are going to be
+ * copied into this machine. */
+ void copyInEntryPoints( FsmGraph *other );
+
+ /* Ordering states. */
+ void depthFirstOrdering( FsmState *state );
+ void depthFirstOrdering();
+ void sortStatesByFinal();
+
+ /* Set sqequential state numbers starting at 0. */
+ void setStateNumbers( int base );
+
+ /* Unset all final states. */
+ void unsetAllFinStates();
+
+ /* Set the bits of final states and clear the bits of non final states. */
+ void setFinBits( int finStateBits );
+
+ /*
+ * Self-consistency checks.
+ */
+
+ /* Run a sanity check on the machine. */
+ void verifyIntegrity();
+
+ /* Verify that there are no unreachable states, or dead end states. */
+ void verifyReachability();
+ void verifyNoDeadEndStates();
+
+ /*
+ * Path pruning
+ */
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHereReverse( FsmState *state );
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHere( FsmState *state );
+ void markReachableFromHereStopFinal( FsmState *state );
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeDeadEndStates();
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeUnreachableStates();
+
+ /* Remove error actions from states on which the error transition will
+ * never be taken. */
+ bool outListCovers( FsmState *state );
+ bool anyErrorRange( FsmState *state );
+
+ /* Remove states that are on the misfit list. */
+ void removeMisfits();
+
+ /*
+ * FSM Minimization
+ */
+
+ /* Minimization by partitioning. */
+ void minimizePartition1();
+ void minimizePartition2();
+
+ /* Minimize the final state Machine. The result is the minimal fsm. Slow
+ * but stable, correct minimization. Uses n^2 space (lookout) and average
+ * n^2 time. Worst case n^3 time, but a that is a very rare case. */
+ void minimizeStable();
+
+ /* Minimize the final state machine. Does not find the minimal fsm, but a
+ * pretty good approximation. Does not use any extra space. Average n^2
+ * time. Worst case n^3 time, but a that is a very rare case. */
+ void minimizeApproximate();
+
+ /* This is the worker for the minimize approximate solution. It merges
+ * states that have identical out transitions. */
+ bool minimizeRound( );
+
+ /* Given an intial partioning of states, split partitions that have out trans
+ * to differing partitions. */
+ int partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts );
+
+ /* Split partitions that have a transition to a previously split partition, until
+ * there are no more partitions to split. */
+ int splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts );
+
+ /* Fuse together states in the same partition. */
+ void fusePartitions( MinPartition *parts, int numParts );
+
+ /* Mark pairs where out final stateness differs, out trans data differs,
+ * trans pairs go to a marked pair or trans data differs. Should get
+ * alot of pairs. */
+ void initialMarkRound( MarkIndex &markIndex );
+
+ /* One marking round on all state pairs. Considers if trans pairs go
+ * to a marked state only. Returns whether or not a pair was marked. */
+ bool markRound( MarkIndex &markIndex );
+
+ /* Move the in trans into src into dest. */
+ void inTransMove(FsmState *dest, FsmState *src);
+
+ /* Make state src and dest the same state. */
+ void fuseEquivStates(FsmState *dest, FsmState *src);
+
+ /* Find any states that didn't get marked by the marking algorithm and
+ * merge them into the primary states of their equivalence class. */
+ void fuseUnmarkedPairs( MarkIndex &markIndex );
+
+ /* Merge neighboring transitions go to the same state and have the same
+ * transitions data. */
+ void compressTransitions();
+
+ /* Returns true if there is a transtion (either explicit or by a gap) to
+ * the error state. */
+ bool checkErrTrans( FsmState *state, FsmTrans *trans );
+ bool checkErrTransFinish( FsmState *state );
+ bool hasErrorTrans();
+};
+
+
+#endif /* _COLM_FSMGRAPH_H */
+
diff --git a/src/fsmmin.cc b/src/fsmmin.cc
new file mode 100644
index 00000000..f47500bd
--- /dev/null
+++ b/src/fsmmin.cc
@@ -0,0 +1,737 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <assert.h>
+
+#include <mergesort.h>
+
+#include "fsmgraph.h"
+
+int FsmGraph::partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts )
+{
+ /* Need a mergesort object and a single partition compare. */
+ MergeSort<FsmState*, PartitionCompare> mergeSort;
+ PartitionCompare partCompare;
+
+ /* For each partition. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Fill the pointer array with the states in the partition. */
+ StateList::Iter state = parts[p].list;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the partitioning compare. */
+ int numStates = parts[p].list.length();
+ mergeSort.sort( statePtrs, numStates );
+
+ /* Assign the states into partitions based on the results of the sort. */
+ int destPart = p, firstNewPart = numParts;
+ for ( int s = 1; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* The new partition is the next avail spot. */
+ destPart = numParts;
+ numParts += 1;
+ }
+
+ /* If the state is not staying in the first partition, then
+ * transfer it to its destination partition. */
+ if ( destPart != p ) {
+ FsmState *state = parts[p].list.detach( statePtrs[s] );
+ parts[destPart].list.append( state );
+ }
+ }
+
+ /* Fix the partition pointer for all the states that got moved to a new
+ * partition. This must be done after the states are transfered so the
+ * result of the sort is not altered. */
+ for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ StateList::Iter state = parts[newPart].list;
+ for ( ; state.lte(); state++ )
+ state->alg.partition = &parts[newPart];
+ }
+ }
+
+ return numParts;
+}
+
+/**
+ * \brief Minimize by partitioning version 1.
+ *
+ * Repeatedly tries to split partitions until all partitions are unsplittable.
+ * Produces the most minimal FSM possible.
+ */
+void FsmGraph::minimizePartition1()
+{
+ /* Need one mergesort object and partition compares. */
+ MergeSort<FsmState*, InitPartitionCompare> mergeSort;
+ InitPartitionCompare initPartCompare;
+
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return;
+
+ /*
+ * First thing is to partition the states by final state status and
+ * transition functions. This gives us an initial partitioning to work
+ * with.
+ */
+
+ /* Make a array of pointers to states. */
+ int numStates = stateList.length();
+ FsmState** statePtrs = new FsmState*[numStates];
+
+ /* Fill up an array of pointers to the states for easy sorting. */
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the array of states. */
+ mergeSort.sort( statePtrs, numStates );
+
+ /* An array of lists of states is used to partition the states. */
+ MinPartition *parts = new MinPartition[numStates];
+
+ /* Assign the states into partitions. */
+ int destPart = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* Move to the next partition. */
+ destPart += 1;
+ }
+
+ /* Put the state into its partition. */
+ statePtrs[s]->alg.partition = &parts[destPart];
+ parts[destPart].list.append( statePtrs[s] );
+ }
+
+ /* We just moved all the states from the main list into partitions without
+ * taking them off the main list. So clean up the main list now. */
+ stateList.abandon();
+
+ /* Split partitions. */
+ int numParts = destPart + 1;
+ while ( true ) {
+ /* Test all partitions for splitting. */
+ int newNum = partitionRound( statePtrs, parts, numParts );
+
+ /* When no partitions can be split, stop. */
+ if ( newNum == numParts )
+ break;
+
+ numParts = newNum;
+ }
+
+ /* Fuse states in the same partition. The states will end up back on the
+ * main list. */
+ fusePartitions( parts, numParts );
+
+ /* Cleanup. */
+ delete[] statePtrs;
+ delete[] parts;
+}
+
+/* Split partitions that need splittting, decide which partitions might need
+ * to be split as a result, continue until there are no more that might need
+ * to be split. */
+int FsmGraph::splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts )
+{
+ /* Need a mergesort and a partition compare. */
+ MergeSort<FsmState*, PartitionCompare> mergeSort;
+ PartitionCompare partCompare;
+
+ /* The lists of unsplitable (partList) and splitable partitions.
+ * Only partitions in the splitable list are check for needing splitting. */
+ PartitionList partList, splittable;
+
+ /* Initially, all partitions are born from a split (the initial
+ * partitioning) and can cause other partitions to be split. So any
+ * partition with a state with a transition out to another partition is a
+ * candidate for splitting. This will make every partition except possibly
+ * partitions of final states split candidates. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Assume not active. */
+ parts[p].active = false;
+
+ /* Look for a trans out of any state in the partition. */
+ for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) {
+ /* If there is at least one transition out to another state then
+ * the partition becomes splittable. */
+ if ( state->outList.length() > 0 ) {
+ parts[p].active = true;
+ break;
+ }
+ }
+
+ /* If it was found active then it goes on the splittable list. */
+ if ( parts[p].active )
+ splittable.append( &parts[p] );
+ else
+ partList.append( &parts[p] );
+ }
+
+ /* While there are partitions that are splittable, pull one off and try
+ * to split it. If it splits, determine which partitions may now be split
+ * as a result of the newly split partition. */
+ while ( splittable.length() > 0 ) {
+ MinPartition *partition = splittable.detachFirst();
+
+ /* Fill the pointer array with the states in the partition. */
+ StateList::Iter state = partition->list;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the partitioning compare. */
+ int numStates = partition->list.length();
+ mergeSort.sort( statePtrs, numStates );
+
+ /* Assign the states into partitions based on the results of the sort. */
+ MinPartition *destPart = partition;
+ int firstNewPart = numParts;
+ for ( int s = 1; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* The new partition is the next avail spot. */
+ destPart = &parts[numParts];
+ numParts += 1;
+ }
+
+ /* If the state is not staying in the first partition, then
+ * transfer it to its destination partition. */
+ if ( destPart != partition ) {
+ FsmState *state = partition->list.detach( statePtrs[s] );
+ destPart->list.append( state );
+ }
+ }
+
+ /* Fix the partition pointer for all the states that got moved to a new
+ * partition. This must be done after the states are transfered so the
+ * result of the sort is not altered. */
+ int newPart;
+ for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ StateList::Iter state = parts[newPart].list;
+ for ( ; state.lte(); state++ )
+ state->alg.partition = &parts[newPart];
+ }
+
+ /* Put the partition we just split and any new partitions that came out
+ * of the split onto the inactive list. */
+ partition->active = false;
+ partList.append( partition );
+ for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
+ parts[newPart].active = false;
+ partList.append( &parts[newPart] );
+ }
+
+ if ( destPart == partition )
+ continue;
+
+ /* Now determine which partitions are splittable as a result of
+ * splitting partition by walking the in lists of the states in
+ * partitions that got split. Partition is the faked first item in the
+ * loop. */
+ MinPartition *causalPart = partition;
+ newPart = firstNewPart - 1;
+ while ( newPart < numParts ) {
+ /* Loop all states in the causal partition. */
+ StateList::Iter state = causalPart->list;
+ for ( ; state.lte(); state++ ) {
+ /* Walk all transition into the state and put the partition
+ * that the from state is in onto the splittable list. */
+ for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) {
+ MinPartition *fromPart = trans->fromState->alg.partition;
+ if ( ! fromPart->active ) {
+ fromPart->active = true;
+ partList.detach( fromPart );
+ splittable.append( fromPart );
+ }
+ }
+ }
+
+ newPart += 1;
+ causalPart = &parts[newPart];
+ }
+ }
+ return numParts;
+}
+
+
+/**
+ * \brief Minimize by partitioning version 2 (best alg).
+ *
+ * Repeatedly tries to split partitions that may splittable until there are no
+ * more partitions that might possibly need splitting. Runs faster than
+ * version 1. Produces the most minimal fsm possible.
+ */
+void FsmGraph::minimizePartition2()
+{
+ /* Need a mergesort and an initial partition compare. */
+ MergeSort<FsmState*, InitPartitionCompare> mergeSort;
+ InitPartitionCompare initPartCompare;
+
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return;
+
+ /*
+ * First thing is to partition the states by final state status and
+ * transition functions. This gives us an initial partitioning to work
+ * with.
+ */
+
+ /* Make a array of pointers to states. */
+ int numStates = stateList.length();
+ FsmState** statePtrs = new FsmState*[numStates];
+
+ /* Fill up an array of pointers to the states for easy sorting. */
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ /* Sort the states using the array of states. */
+ mergeSort.sort( statePtrs, numStates );
+
+ /* An array of lists of states is used to partition the states. */
+ MinPartition *parts = new MinPartition[numStates];
+
+ /* Assign the states into partitions. */
+ int destPart = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ /* If this state differs from the last then move to the next partition. */
+ if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
+ /* Move to the next partition. */
+ destPart += 1;
+ }
+
+ /* Put the state into its partition. */
+ statePtrs[s]->alg.partition = &parts[destPart];
+ parts[destPart].list.append( statePtrs[s] );
+ }
+
+ /* We just moved all the states from the main list into partitions without
+ * taking them off the main list. So clean up the main list now. */
+ stateList.abandon();
+
+ /* Split partitions. */
+ int numParts = splitCandidates( statePtrs, parts, destPart+1 );
+
+ /* Fuse states in the same partition. The states will end up back on the
+ * main list. */
+ fusePartitions( parts, numParts );
+
+ /* Cleanup. */
+ delete[] statePtrs;
+ delete[] parts;
+}
+
+void FsmGraph::initialMarkRound( MarkIndex &markIndex )
+{
+ /* P and q for walking pairs. */
+ FsmState *p = stateList.head, *q;
+
+ /* Need an initial partition compare. */
+ InitPartitionCompare initPartCompare;
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ q = stateList.head;
+ while ( q != p ) {
+ /* If the states differ on final state status, out transitions or
+ * any transition data then they should be separated on the initial
+ * round. */
+ if ( initPartCompare.compare( p, q ) != 0 )
+ markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
+
+ q = q->next;
+ }
+ p = p->next;
+ }
+}
+
+bool FsmGraph::markRound( MarkIndex &markIndex )
+{
+ /* P an q for walking pairs. Take note if any pair gets marked. */
+ FsmState *p = stateList.head, *q;
+ bool pairWasMarked = false;
+
+ /* Need a mark comparison. */
+ MarkCompare markCompare;
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ q = stateList.head;
+ while ( q != p ) {
+ /* Should we mark the pair? */
+ if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
+ if ( markCompare.shouldMark( markIndex, p, q ) ) {
+ markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
+ pairWasMarked = true;
+ }
+ }
+ q = q->next;
+ }
+ p = p->next;
+ }
+
+ return pairWasMarked;
+}
+
+
+/**
+ * \brief Minimize by pair marking.
+ *
+ * Decides if each pair of states is distinct or not. Uses O(n^2) memory and
+ * should only be used on small graphs. Produces the most minmimal FSM
+ * possible.
+ */
+void FsmGraph::minimizeStable()
+{
+ /* Set the state numbers. */
+ setStateNumbers( 0 );
+
+ /* This keeps track of which pairs have been marked. */
+ MarkIndex markIndex( stateList.length() );
+
+ /* Mark pairs where final stateness, out trans, or trans data differ. */
+ initialMarkRound( markIndex );
+
+ /* While the last round of marking succeeded in marking a state
+ * continue to do another round. */
+ int modified = markRound( markIndex );
+ while (modified)
+ modified = markRound( markIndex );
+
+ /* Merge pairs that are unmarked. */
+ fuseUnmarkedPairs( markIndex );
+}
+
+bool FsmGraph::minimizeRound()
+{
+ /* Nothing to do if there are no states. */
+ if ( stateList.length() == 0 )
+ return false;
+
+ /* Need a mergesort on approx compare and an approx compare. */
+ MergeSort<FsmState*, ApproxCompare> mergeSort;
+ ApproxCompare approxCompare;
+
+ /* Fill up an array of pointers to the states. */
+ FsmState **statePtrs = new FsmState*[stateList.length()];
+ StateList::Iter state = stateList;
+ for ( int s = 0; state.lte(); state++, s++ )
+ statePtrs[s] = state;
+
+ bool modified = false;
+
+ /* Sort The list. */
+ mergeSort.sort( statePtrs, stateList.length() );
+
+ /* Walk the list looking for duplicates next to each other,
+ * merge in any duplicates. */
+ FsmState **pLast = statePtrs;
+ FsmState **pState = statePtrs + 1;
+ for ( int i = 1; i < stateList.length(); i++, pState++ ) {
+ if ( approxCompare.compare( *pLast, *pState ) == 0 ) {
+ /* Last and pState are the same, so fuse together. Move forward
+ * with pState but not with pLast. If any more are identical, we
+ * must */
+ fuseEquivStates( *pLast, *pState );
+ modified = true;
+ }
+ else {
+ /* Last and this are different, do not set to merge them. Move
+ * pLast to the current (it may be way behind from merging many
+ * states) and pState forward one to consider the next pair. */
+ pLast = pState;
+ }
+ }
+ delete[] statePtrs;
+ return modified;
+}
+
+/**
+ * \brief Minmimize by an approximation.
+ *
+ * Repeatedly tries to find states with transitions out to the same set of
+ * states on the same set of keys until no more identical states can be found.
+ * Does not produce the most minimial FSM possible.
+ */
+void FsmGraph::minimizeApproximate()
+{
+ /* While the last minimization round succeeded in compacting states,
+ * continue to try to compact states. */
+ while ( true ) {
+ bool modified = minimizeRound();
+ if ( ! modified )
+ break;
+ }
+}
+
+
+/* Remove states that have no path to them from the start state. Recursively
+ * traverses the graph marking states that have paths into them. Then removes
+ * all states that did not get marked. */
+void FsmGraph::removeUnreachableStates()
+{
+ /* Misfit accounting should be off and there should be no states on the
+ * misfit list. */
+ assert( !misfitAccounting && misfitList.length() == 0 );
+
+ /* Mark all the states that can be reached
+ * through the existing set of entry points. */
+ markReachableFromHere( startState );
+ for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
+ markReachableFromHere( en->value );
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ FsmState *state = stateList.head;
+ while ( state ) {
+ FsmState *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
+
+bool FsmGraph::outListCovers( FsmState *state )
+{
+ /* Must be at least one range to cover. */
+ if ( state->outList.length() == 0 )
+ return false;
+
+ /* The first must start at the lower bound. */
+ TransList::Iter trans = state->outList.first();
+ if ( keyOps->minKey < trans->lowKey )
+ return false;
+
+ /* Loop starts at second el. */
+ trans.increment();
+
+ /* Loop checks lower against prev upper. */
+ for ( ; trans.lte(); trans++ ) {
+ /* Lower end of the trans must be one greater than the
+ * previous' high end. */
+ Key lowKey = trans->lowKey;
+ lowKey.decrement();
+ if ( trans->prev->highKey < lowKey )
+ return false;
+ }
+
+ /* Require that the last range extends to the upper bound. */
+ trans = state->outList.last();
+ if ( trans->highKey < keyOps->maxKey )
+ return false;
+
+ return true;
+}
+
+/* Remove states that that do not lead to a final states. Works recursivly traversing
+ * the graph in reverse (starting from all final states) and marking seen states. Then
+ * removes states that did not get marked. */
+void FsmGraph::removeDeadEndStates()
+{
+ /* Misfit accounting should be off and there should be no states on the
+ * misfit list. */
+ assert( !misfitAccounting && misfitList.length() == 0 );
+
+ /* Mark all states that have paths to the final states. */
+ FsmState **st = finStateSet.data;
+ int nst = finStateSet.length();
+ for ( int i = 0; i < nst; i++, st++ )
+ markReachableFromHereReverse( *st );
+
+ /* Start state gets honorary marking. If the machine accepts nothing we
+ * still want the start state to hang around. This must be done after the
+ * recursive call on all the final states so that it does not cause the
+ * start state in transitions to be skipped when the start state is
+ * visited by the traversal. */
+ startState->stateBits |= SB_ISMARKED;
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ FsmState *state = stateList.head;
+ while ( state != 0 ) {
+ FsmState *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
+
+/* Remove states on the misfit list. To work properly misfit accounting should
+ * be on when this is called. The detaching of a state will likely cause
+ * another misfit to be collected and it can then be removed. */
+void FsmGraph::removeMisfits()
+{
+ while ( misfitList.length() > 0 ) {
+ /* Get the first state. */
+ FsmState *state = misfitList.head;
+
+ /* Detach and delete. */
+ detachState( state );
+
+ /* The state was previously on the misfit list and detaching can only
+ * remove in transitions so the state must still be on the misfit
+ * list. */
+ misfitList.detach( state );
+ delete state;
+ }
+}
+
+/* Fuse src into dest because they have been deemed equivalent states.
+ * Involves moving transitions into src to go into dest and invoking
+ * callbacks. Src is deleted detached from the graph and deleted. */
+void FsmGraph::fuseEquivStates( FsmState *dest, FsmState *src )
+{
+ /* This would get ugly. */
+ assert( dest != src );
+
+ /* Cur is a duplicate. We can merge it with trail. */
+ inTransMove( dest, src );
+
+ detachState( src );
+ stateList.detach( src );
+ delete src;
+}
+
+void FsmGraph::fuseUnmarkedPairs( MarkIndex &markIndex )
+{
+ FsmState *p = stateList.head, *nextP, *q;
+
+ /* Definition: The primary state of an equivalence class is the first state
+ * encounterd that belongs to the equivalence class. All equivalence
+ * classes have primary state including equivalence classes with one state
+ * in it. */
+
+ /* For each unmarked pair merge p into q and delete p. q is always the
+ * primary state of it's equivalence class. We wouldn't have landed on it
+ * here if it were not, because it would have been deleted.
+ *
+ * Proof that q is the primaray state of it's equivalence class: Assume q
+ * is not the primary state of it's equivalence class, then it would be
+ * merged into some state that came before it and thus p would be
+ * equivalent to that state. But q is the first state that p is equivalent
+ * to so we have a contradiction. */
+
+ /* Walk all unordered pairs of (p, q) where p != q.
+ * The second depth of the walk stops before reaching p. This
+ * gives us all unordered pairs of states (p, q) where p != q. */
+ while ( p != 0 ) {
+ nextP = p->next;
+
+ q = stateList.head;
+ while ( q != p ) {
+ /* If one of p or q is a final state then mark. */
+ if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
+ fuseEquivStates( q, p );
+ break;
+ }
+ q = q->next;
+ }
+ p = nextP;
+ }
+}
+
+void FsmGraph::fusePartitions( MinPartition *parts, int numParts )
+{
+ /* For each partition, fuse state 2, 3, ... into state 1. */
+ for ( int p = 0; p < numParts; p++ ) {
+ /* Assume that there will always be at least one state. */
+ FsmState *first = parts[p].list.head, *toFuse = first->next;
+
+ /* Put the first state back onto the main state list. Don't bother
+ * removing it from the partition list first. */
+ stateList.append( first );
+
+ /* Fuse the rest of the state into the first. */
+ while ( toFuse != 0 ) {
+ /* Save the next. We will trash it before it is needed. */
+ FsmState *next = toFuse->next;
+
+ /* Put the state to be fused in to the first back onto the main
+ * list before it is fuse. the graph. The state needs to be on
+ * the main list for the detach from the graph to work. Don't
+ * bother removing the state from the partition list first. We
+ * need not maintain it. */
+ stateList.append( toFuse );
+
+ /* Now fuse to the first. */
+ fuseEquivStates( first, toFuse );
+
+ /* Go to the next that we saved before trashing the next pointer. */
+ toFuse = next;
+ }
+
+ /* We transfered the states from the partition list into the main list without
+ * removing the states from the partition list first. Clean it up. */
+ parts[p].list.abandon();
+ }
+}
+
+
+/* Merge neighboring transitions go to the same state and have the same
+ * transitions data. */
+void FsmGraph::compressTransitions()
+{
+ for ( StateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->outList.length() > 1 ) {
+ for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) {
+ Key nextLow = next->lowKey;
+ nextLow.decrement();
+ if ( trans->highKey == nextLow && trans->toState == next->toState &&
+ CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 )
+ {
+ trans->highKey = next->highKey;
+ st->outList.detach( next );
+ detachTrans( next->fromState, next->toState, next );
+ delete next;
+ next = trans.next();
+ }
+ else {
+ trans.increment();
+ next.increment();
+ }
+ }
+ }
+ }
+}
diff --git a/src/fsmstate.cc b/src/fsmstate.cc
new file mode 100644
index 00000000..b3d1c313
--- /dev/null
+++ b/src/fsmstate.cc
@@ -0,0 +1,441 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include <iostream>
+
+#include "fsmgraph.h"
+
+using namespace std;
+
+/* Construct a mark index for a specified number of states. Must new up
+ * an array that is states^2 in size. */
+MarkIndex::MarkIndex( int states ) : numStates(states)
+{
+ /* Total pairs is states^2. Actually only use half of these, but we allocate
+ * them all to make indexing into the array easier. */
+ int total = states * states;
+
+ /* New up chars so that individual DListEl constructors are
+ * not called. Zero out the mem manually. */
+ array = new bool[total];
+ memset( array, 0, sizeof(bool) * total );
+}
+
+/* Free the array used to store state pairs. */
+MarkIndex::~MarkIndex()
+{
+ delete[] array;
+}
+
+/* Mark a pair of states. States are specified by their number. The
+ * marked states are moved from the unmarked list to the marked list. */
+void MarkIndex::markPair(int state1, int state2)
+{
+ int pos = ( state1 >= state2 ) ?
+ ( state1 * numStates ) + state2 :
+ ( state2 * numStates ) + state1;
+
+ array[pos] = true;
+}
+
+/* Returns true if the pair of states are marked. Returns false otherwise.
+ * Ordering of states given does not matter. */
+bool MarkIndex::isPairMarked(int state1, int state2)
+{
+ int pos = ( state1 >= state2 ) ?
+ ( state1 * numStates ) + state2 :
+ ( state2 * numStates ) + state1;
+
+ return array[pos];
+}
+
+/* Create a new fsm state. State has not out transitions or in transitions, not
+ * out out transition data and not number. */
+FsmState::FsmState()
+:
+ /* No out or in transitions. */
+ outList(),
+ inList(),
+
+ /* No entry points, or epsilon trans. */
+ entryIds(),
+ epsilonTrans(),
+
+ /* No transitions in from other states. */
+ foreignInTrans(0),
+
+ /* Only used during merging. Normally null. */
+ stateDictEl(0),
+ eptVect(0),
+
+ /* No state identification bits. */
+ stateBits(0),
+
+ /* No Priority data. */
+ outPriorTable(),
+
+ /* No Action data. */
+ toStateActionTable(),
+ fromStateActionTable(),
+ outActionTable(),
+ outCondSet(),
+ errActionTable(),
+ eofActionTable(),
+
+ eofTarget(0)
+{
+}
+
+/* Copy everything except actual the transitions. That is left up to the
+ * FsmGraph copy constructor. */
+FsmState::FsmState(const FsmState &other)
+:
+ /* All lists are cleared. They will be filled in when the
+ * individual transitions are duplicated and attached. */
+ outList(),
+ inList(),
+
+ /* Duplicate the entry id set and epsilon transitions. These
+ * are sets of integers and as such need no fixing. */
+ entryIds(other.entryIds),
+ epsilonTrans(other.epsilonTrans),
+
+ /* No transitions in from other states. */
+ foreignInTrans(0),
+
+ /* This is only used during merging. Normally null. */
+ stateDictEl(0),
+ eptVect(0),
+
+ /* Fsm state data. */
+ stateBits(other.stateBits),
+
+ /* Copy in priority data. */
+ outPriorTable(other.outPriorTable),
+
+ /* Copy in action data. */
+ toStateActionTable(other.toStateActionTable),
+ fromStateActionTable(other.fromStateActionTable),
+ outActionTable(other.outActionTable),
+ outCondSet(other.outCondSet),
+ errActionTable(other.errActionTable),
+ eofActionTable(other.eofActionTable),
+
+ eofTarget(0)
+{
+ /* Duplicate all the transitions. */
+ for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) {
+ /* Dupicate and store the orginal target in the transition. This will
+ * be corrected once all the states have been created. */
+ FsmTrans *newTrans = new FsmTrans(*trans);
+ newTrans->toState = trans->toState;
+ outList.append( newTrans );
+ }
+}
+
+/* If there is a state dict element, then delete it. Everything else is left
+ * up to the FsmGraph destructor. */
+FsmState::~FsmState()
+{
+ if ( stateDictEl != 0 )
+ delete stateDictEl;
+}
+
+/* Compare two states using pointers to the states. With the approximate
+ * compare the idea is that if the compare finds them the same, they can
+ * immediately be merged. */
+int ApproxCompare::compare( const FsmState *state1 , const FsmState *state2 )
+{
+ int compareRes;
+
+ /* Test final state status. */
+ if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
+ return -1;
+ else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
+ return 1;
+
+ /* Test epsilon transition sets. */
+ compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
+ state2->epsilonTrans );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Compare the out transitions. */
+ compareRes = FsmGraph::compareStateData( state1, state2 );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmGraph::compareFullPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmGraph::compareFullPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmGraph::compareFullPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ /* Got through the entire state comparison, deem them equal. */
+ return 0;
+}
+
+/* Compare class for the sort that does the intial partition of compaction. */
+int InitPartitionCompare::compare( const FsmState *state1 , const FsmState *state2 )
+{
+ int compareRes;
+
+ /* Test final state status. */
+ if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) )
+ return -1;
+ else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) )
+ return 1;
+
+ /* Test epsilon transition sets. */
+ compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans,
+ state2->epsilonTrans );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Compare the out transitions. */
+ compareRes = FsmGraph::compareStateData( state1, state2 );
+ if ( compareRes != 0 )
+ return compareRes;
+
+ /* Use a pair iterator to test the transition pairs. */
+ PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmGraph::compareDataPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmGraph::compareDataPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmGraph::compareDataPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Compare class for the sort that does the partitioning. */
+int PartitionCompare::compare( const FsmState *state1, const FsmState *state2 )
+{
+ int compareRes;
+
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ compareRes = FsmGraph::comparePartPtr( outPair.s1Tel.trans, 0 );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeInS2:
+ compareRes = FsmGraph::comparePartPtr( 0, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case RangeOverlap:
+ compareRes = FsmGraph::comparePartPtr(
+ outPair.s1Tel.trans, outPair.s2Tel.trans );
+ if ( compareRes != 0 )
+ return compareRes;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Compare class for the sort that does the partitioning. */
+bool MarkCompare::shouldMark( MarkIndex &markIndex, const FsmState *state1,
+ const FsmState *state2 )
+{
+ /* Use a pair iterator to get the transition pairs. */
+ PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head );
+ for ( ; !outPair.end(); outPair++ ) {
+ switch ( outPair.userState ) {
+
+ case RangeInS1:
+ if ( FsmGraph::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) )
+ return true;
+ break;
+
+ case RangeInS2:
+ if ( FsmGraph::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) )
+ return true;
+ break;
+
+ case RangeOverlap:
+ if ( FsmGraph::shouldMarkPtr( markIndex,
+ outPair.s1Tel.trans, outPair.s2Tel.trans ) )
+ return true;
+ break;
+
+ case BreakS1:
+ case BreakS2:
+ break;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Transition Comparison.
+ */
+
+/* Compare target partitions. Either pointer may be null. */
+int FsmGraph::comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 )
+{
+ if ( trans1 != 0 ) {
+ /* If trans1 is set then so should trans2. The initial partitioning
+ * guarantees this for us. */
+ if ( trans1->toState == 0 && trans2->toState != 0 )
+ return -1;
+ else if ( trans1->toState != 0 && trans2->toState == 0 )
+ return 1;
+ else if ( trans1->toState != 0 ) {
+ /* Both of targets are set. */
+ return CmpOrd< MinPartition* >::compare(
+ trans1->toState->alg.partition, trans2->toState->alg.partition );
+ }
+ }
+ return 0;
+}
+
+
+/* Compares two transition pointers according to priority and functions.
+ * Either pointer may be null. Does not consider to state or from state. */
+int FsmGraph::compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 )
+{
+ if ( trans1 == 0 && trans2 != 0 )
+ return -1;
+ else if ( trans1 != 0 && trans2 == 0 )
+ return 1;
+ else if ( trans1 != 0 ) {
+ /* Both of the transition pointers are set. */
+ int compareRes = compareTransData( trans1, trans2 );
+ if ( compareRes != 0 )
+ return compareRes;
+ }
+ return 0;
+}
+
+/* Compares two transitions according to target state, priority and functions.
+ * Does not consider from state. Either of the pointers may be null. */
+int FsmGraph::compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 )
+{
+ if ( (trans1 != 0) ^ (trans2 != 0) ) {
+ /* Exactly one of the transitions is set. */
+ if ( trans1 != 0 )
+ return -1;
+ else
+ return 1;
+ }
+ else if ( trans1 != 0 ) {
+ /* Both of the transition pointers are set. Test target state,
+ * priority and funcs. */
+ if ( trans1->toState < trans2->toState )
+ return -1;
+ else if ( trans1->toState > trans2->toState )
+ return 1;
+ else if ( trans1->toState != 0 ) {
+ /* Test transition data. */
+ int compareRes = compareTransData( trans1, trans2 );
+ if ( compareRes != 0 )
+ return compareRes;
+ }
+ }
+ return 0;
+}
+
+
+bool FsmGraph::shouldMarkPtr( MarkIndex &markIndex, FsmTrans *trans1,
+ FsmTrans *trans2 )
+{
+ if ( (trans1 != 0) ^ (trans2 != 0) ) {
+ /* Exactly one of the transitions is set. The initial mark round
+ * should rule out this case. */
+ assert( false );
+ }
+ else if ( trans1 != 0 ) {
+ /* Both of the transitions are set. If the target pair is marked, then
+ * the pair we are considering gets marked. */
+ return markIndex.isPairMarked( trans1->toState->alg.stateNum,
+ trans2->toState->alg.stateNum );
+ }
+
+ /* Neither of the transitiosn are set. */
+ return false;
+}
+
+
diff --git a/src/global.h b/src/global.h
new file mode 100644
index 00000000..58b98077
--- /dev/null
+++ b/src/global.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_GLOBAL_H
+#define _COLM_GLOBAL_H
+
+#include <stdio.h>
+
+#include <iostream>
+#include <fstream>
+#include <fstream>
+#include <string>
+
+#include <avltree.h>
+
+#include "defs.h"
+#include "keyops.h"
+
+#define PROGNAME "colm"
+
+/* IO filenames and stream. */
+extern bool genGraphviz;
+extern int gblErrorCount;
+
+std::ostream &error();
+
+/* IO filenames and stream. */
+extern std::ostream *outStream;
+extern bool generateGraphviz;
+extern bool branchPointInfo;
+extern bool verbose, logging;
+extern bool addUniqueEmptyProductions;
+
+extern int gblErrorCount;
+extern char startDefName[];
+
+/* Error reporting. */
+std::ostream &error();
+std::ostream &error( int first_line, int first_column );
+std::ostream &warning( );
+std::ostream &warning( int first_line, int first_column );
+
+extern std::ostream *outStream;
+extern bool printStatistics;
+
+extern int gblErrorCount;
+extern bool gblLibrary;
+extern long gblActiveRealm;
+extern char machineMain[];
+extern const char *exportHeaderFn;
+extern bool rangeCrossesZero;
+
+struct colm_location;
+
+/* Location in an input file. */
+struct InputLoc
+{
+ InputLoc( colm_location *pcloc );
+
+ InputLoc() : fileName(0), line(-1), col(-1) {}
+
+ InputLoc( const InputLoc &loc )
+ {
+ fileName = loc.fileName;
+ line = loc.line;
+ col = loc.col;
+ }
+
+ const char *fileName;
+ int line;
+ int col;
+};
+
+extern InputLoc internal;
+
+/* Error reporting. */
+std::ostream &error();
+std::ostream &error( const InputLoc &loc );
+std::ostream &warning( const InputLoc &loc );
+
+void scan( char *fileName, std::istream &input, std::ostream &output );
+void terminateAllParsers( );
+void checkMachines( );
+
+void xmlEscapeHost( std::ostream &out, char *data, int len );
+void openOutput();
+void escapeLiteralString( std::ostream &out, const char *data );
+bool readCheck( const char *fn );
+
+#endif /* _COLM_GLOBAL_H */
+
diff --git a/src/input.c b/src/input.c
new file mode 100644
index 00000000..043791f2
--- /dev/null
+++ b/src/input.c
@@ -0,0 +1,759 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <colm/input.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+#include <colm/pdarun.h>
+#include <colm/debug.h>
+#include <colm/program.h>
+#include <colm/tree.h>
+#include <colm/bytecode.h>
+#include <colm/pool.h>
+#include <colm/struct.h>
+
+DEF_INPUT_FUNCS( input_funcs_seq, input_impl_seq );
+extern struct input_funcs_seq input_funcs;
+
+static bool is_tree( struct seq_buf *b )
+{
+ return b->type == SB_TOKEN || b->type == SB_IGNORE;
+}
+
+static bool is_stream( struct seq_buf *b )
+{
+ return b->type == SB_SOURCE || b->type == SB_ACCUM;
+}
+
+char *colm_filename_add( program_t *prg, const char *fn )
+{
+ /* Search for it. */
+ const char **ptr = prg->stream_fns;
+ while ( *ptr != 0 ) {
+ if ( strcmp( *ptr, fn ) == 0 )
+ return (char*)*ptr;
+ ptr += 1;
+ }
+
+ /* Not present, find. */
+ int items = ptr - prg->stream_fns;
+
+ prg->stream_fns = realloc( prg->stream_fns, sizeof(char*) * ( items + 2 ) );
+ prg->stream_fns[items] = strdup( fn );
+ prg->stream_fns[items+1] = 0;
+
+ return (char*)prg->stream_fns[items];
+}
+
+static struct seq_buf *new_seq_buf()
+{
+ struct seq_buf *rb = (struct seq_buf*) malloc( sizeof(struct seq_buf) );
+ memset( rb, 0, sizeof(struct seq_buf) );
+ return rb;
+}
+
+static void input_transfer_loc( struct colm_program *prg, location_t *loc,
+ struct input_impl_seq *ss )
+{
+}
+
+static bool call_destructor( struct seq_buf *buf )
+{
+ return is_stream( buf ) && buf->own_si;
+}
+
+static void colm_input_destroy( program_t *prg, tree_t **sp, struct_t *s )
+{
+ input_t *input = (input_t*) s;
+ struct input_impl *si = input->impl;
+ si->funcs->destructor( prg, sp, si );
+}
+
+static void input_stream_stash_head( struct colm_program *prg,
+ struct input_impl_seq *si, struct seq_buf *seq_buf )
+{
+ debug( prg, REALM_INPUT, "stash_head: stream %p buf %p\n", si, seq_buf );
+ seq_buf->next = si->stash;
+ si->stash = seq_buf;
+}
+
+static struct seq_buf *input_stream_pop_stash( struct colm_program *prg, struct input_impl_seq *si )
+{
+ struct seq_buf *seq_buf = si->stash;
+ si->stash = si->stash->next;
+
+ debug( prg, REALM_INPUT, "pop_stash: stream %p buf %p\n", si, seq_buf );
+
+ return seq_buf;
+}
+
+static void maybe_split( struct colm_program *prg, struct input_impl_seq *iis )
+{
+ struct seq_buf *head = iis->queue.head;
+ if ( head != 0 && is_stream( head ) ) {
+ /* Maybe the stream will split itself off. */
+ struct stream_impl *split_off = head->si->funcs->split_consumed( prg, head->si );
+
+ if ( split_off != 0 ) {
+ debug( prg, REALM_INPUT, "maybe split: consumed is > 0, splitting\n" );
+
+ struct seq_buf *new_buf = new_seq_buf();
+ new_buf->type = SB_ACCUM;
+ new_buf->si = split_off;
+ new_buf->own_si = 1;
+
+ input_stream_stash_head( prg, iis, new_buf );
+ }
+ }
+}
+
+
+/*
+ * StreamImpl struct, this wraps the list of input streams.
+ */
+
+void init_input_impl_seq( struct input_impl_seq *is, char *name )
+{
+ memset( is, 0, sizeof(struct input_impl_seq) );
+
+ is->type = 'S';
+ //is->name = name;
+ //is->line = 1;
+ //is->column = 1;
+ //is->byte = 0;
+}
+
+static struct seq_buf *input_stream_seq_pop_head( struct input_impl_seq *is )
+{
+ struct seq_buf *ret = is->queue.head;
+ is->queue.head = is->queue.head->next;
+ if ( is->queue.head == 0 )
+ is->queue.tail = 0;
+ else
+ is->queue.head->prev = 0;
+ return ret;
+}
+
+static void input_stream_seq_append( struct input_impl_seq *is, struct seq_buf *seq_buf )
+{
+ if ( is->queue.head == 0 ) {
+ seq_buf->prev = seq_buf->next = 0;
+ is->queue.head = is->queue.tail = seq_buf;
+ }
+ else {
+ is->queue.tail->next = seq_buf;
+ seq_buf->prev = is->queue.tail;
+ seq_buf->next = 0;
+ is->queue.tail = seq_buf;
+ }
+}
+
+static struct seq_buf *input_stream_seq_pop_tail( struct input_impl_seq *is )
+{
+ struct seq_buf *ret = is->queue.tail;
+ is->queue.tail = is->queue.tail->prev;
+ if ( is->queue.tail == 0 )
+ is->queue.head = 0;
+ else
+ is->queue.tail->next = 0;
+ return ret;
+}
+
+static void input_stream_seq_prepend( struct input_impl_seq *is, struct seq_buf *seq_buf )
+{
+ if ( is->queue.head == 0 ) {
+ seq_buf->prev = seq_buf->next = 0;
+ is->queue.head = is->queue.tail = seq_buf;
+ }
+ else {
+ is->queue.head->prev = seq_buf;
+ seq_buf->prev = 0;
+ seq_buf->next = is->queue.head;
+ is->queue.head = seq_buf;
+ }
+}
+
+void input_set_eof_mark( struct colm_program *prg, struct input_impl_seq *si, char eof_mark )
+{
+ si->eof_mark = eof_mark;
+}
+
+static void input_destructor( program_t *prg, tree_t **sp, struct input_impl_seq *si )
+{
+ struct seq_buf *buf = si->queue.head;
+ while ( buf != 0 ) {
+ if ( is_tree( buf ) )
+ colm_tree_downref( prg, sp, buf->tree );
+
+ if ( call_destructor( buf ) )
+ buf->si->funcs->destructor( prg, sp, buf->si );
+
+ struct seq_buf *next = buf->next;
+ free( buf );
+ buf = next;
+ }
+
+ buf = si->stash;
+ while ( buf != 0 ) {
+ struct seq_buf *next = buf->next;
+ if ( call_destructor( buf ) )
+ buf->si->funcs->destructor( prg, sp, buf->si );
+
+ free( buf );
+ buf = next;
+ }
+
+ si->queue.head = 0;
+
+ /* FIXME: Need to leak this for now. Until we can return strings to a
+ * program loader and free them at a later date (after the colm program is
+ * deleted). */
+ // if ( stream->impl->name != 0 )
+ // free( stream->impl->name );
+
+ free( si );
+}
+
+static int input_get_option( struct colm_program *prg, struct input_impl_seq *ii,
+ int option )
+{
+ return ii->auto_trim;
+}
+
+static void input_set_option( struct colm_program *prg, struct input_impl_seq *ii,
+ int option, int value )
+{
+ ii->auto_trim = value ? 1 : 0;
+}
+
+
+static int input_get_parse_block( struct colm_program *prg, struct input_impl_seq *is,
+ int *pskip, alph_t **pdp, int *copied )
+{
+ int ret = 0;
+ *copied = 0;
+
+ /* Move over skip bytes. */
+ struct seq_buf *buf = is->queue.head;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ ret = is->eof_mark ? INPUT_EOF : INPUT_EOD;
+ break;
+ }
+
+ if ( is_stream( buf ) ) {
+ struct stream_impl *si = buf->si;
+ int type = si->funcs->get_parse_block( prg, si, pskip, pdp, copied );
+
+ if ( type == INPUT_EOD || type == INPUT_EOF ) {
+ buf = buf->next;
+ continue;
+ }
+
+ ret = type;
+ break;
+ }
+
+ if ( buf->type == SB_TOKEN ) {
+ ret = INPUT_TREE;
+ break;
+ }
+
+ if ( buf->type == SB_IGNORE ) {
+ ret = INPUT_IGNORE;
+ break;
+ }
+
+ buf = buf->next;
+ }
+
+#if DEBUG
+ switch ( ret ) {
+ case INPUT_DATA:
+ if ( *pdp != 0 ) {
+ debug( prg, REALM_INPUT, "get parse block: DATA: %d %.*s\n",
+ *copied, (int)(*copied), *pdp );
+ }
+ else {
+ debug( prg, REALM_INPUT, "get parse block: DATA: %d\n", *copied );
+ }
+ break;
+ case INPUT_EOD:
+ debug( prg, REALM_INPUT, "get parse block: EOD\n" );
+ break;
+ case INPUT_EOF:
+ debug( prg, REALM_INPUT, "get parse block: EOF\n" );
+ break;
+ case INPUT_TREE:
+ debug( prg, REALM_INPUT, "get parse block: TREE\n" );
+ break;
+ case INPUT_IGNORE:
+ debug( prg, REALM_INPUT, "get parse block: IGNORE\n" );
+ break;
+ case INPUT_LANG_EL:
+ debug( prg, REALM_INPUT, "get parse block: LANG_EL\n" );
+ break;
+ }
+#endif
+
+ return ret;
+}
+
+static int input_get_data( struct colm_program *prg, struct input_impl_seq *is,
+ alph_t *dest, int length )
+{
+ int copied = 0;
+
+ /* Move over skip bytes. */
+ struct seq_buf *buf = is->queue.head;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ break;
+ }
+
+ if ( is_stream( buf ) ) {
+ struct stream_impl *si = buf->si;
+ int glen = si->funcs->get_data( prg, si, dest+copied, length );
+
+ if ( glen == 0 ) {
+ //debug( REALM_INPUT, "skipping over input\n" );
+ buf = buf->next;
+ continue;
+ }
+
+ copied += glen;
+ length -= glen;
+ }
+ else if ( buf->type == SB_TOKEN )
+ break;
+ else if ( buf->type == SB_IGNORE )
+ break;
+
+ if ( length == 0 ) {
+ //debug( REALM_INPUT, "exiting get data\n", length );
+ break;
+ }
+
+ buf = buf->next;
+ }
+
+ return copied;
+}
+
+/*
+ * Consume
+ */
+
+static int input_consume_data( struct colm_program *prg, struct input_impl_seq *si,
+ int length, location_t *loc )
+{
+ debug( prg, REALM_INPUT, "input_consume_data: stream %p consuming %d bytes\n", si, length );
+
+ int consumed = 0;
+
+ /* Move over skip bytes. */
+ while ( true ) {
+ struct seq_buf *buf = si->queue.head;
+
+ if ( buf == 0 )
+ break;
+
+ if ( is_stream( buf ) ) {
+ struct stream_impl *sub = buf->si;
+ int slen = sub->funcs->consume_data( prg, sub, length, loc );
+ //debug( REALM_INPUT, " got %d bytes from source\n", slen );
+
+ consumed += slen;
+ length -= slen;
+ }
+ else if ( buf->type == SB_TOKEN )
+ break;
+ else if ( buf->type == SB_IGNORE )
+ break;
+ else {
+ assert(false);
+ }
+
+ if ( length == 0 ) {
+ //debug( REALM_INPUT, "exiting consume\n", length );
+ break;
+ }
+
+ struct seq_buf *seq_buf = input_stream_seq_pop_head( si );
+ input_stream_stash_head( prg, si, seq_buf );
+ }
+
+ return consumed;
+}
+
+static int input_undo_consume_data( struct colm_program *prg, struct input_impl_seq *si,
+ const alph_t *data, int length )
+{
+ /* When we push back data we need to move backwards through the block of
+ * text. The source stream type will */
+ debug( prg, REALM_INPUT, "input_undo_consume_data: stream %p undoing consume of %d bytes\n", si, length );
+
+ assert( length > 0 );
+ long tot = length;
+ int offset = 0;
+ int remaining = length;
+
+ while ( true ) {
+ if ( is_stream( si->queue.head ) ) {
+ struct stream_impl *sub = si->queue.head->si;
+ int pushed_back = sub->funcs->undo_consume_data( prg, sub, data, remaining );
+ remaining -= pushed_back;
+ offset += pushed_back;
+
+ if ( remaining == 0 )
+ break;
+ }
+
+ struct seq_buf *b = input_stream_pop_stash( prg, si );
+ input_stream_seq_prepend( si, b );
+ }
+
+ return tot;
+}
+
+static tree_t *input_consume_tree( struct colm_program *prg, struct input_impl_seq *si )
+{
+ debug( prg, REALM_INPUT, "input_consume_tree: stream %p\n", si );
+
+ while ( si->queue.head != 0 && is_stream( si->queue.head ) )
+ {
+ debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si );
+ struct seq_buf *seq_buf = input_stream_seq_pop_head( si );
+ input_stream_stash_head( prg, si, seq_buf );
+ }
+
+ assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN ||
+ si->queue.head->type == SB_IGNORE ) );
+
+ {
+ struct seq_buf *seq_buf = input_stream_seq_pop_head( si );
+ input_stream_stash_head( prg, si, seq_buf );
+ tree_t *tree = seq_buf->tree;
+ debug( prg, REALM_INPUT, " stream %p consume: tree: %p\n", si, tree );
+ return tree;
+ }
+
+ return 0;
+}
+
+
+static void input_undo_consume_tree( struct colm_program *prg, struct input_impl_seq *si,
+ tree_t *tree, int ignore )
+{
+ debug( prg, REALM_INPUT, "input_undo_consume_tree: stream %p undo "
+ "consume tree %p\n", si, tree );
+
+ while ( true ) {
+ debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si );
+
+ struct seq_buf *b = input_stream_pop_stash( prg, si );
+ input_stream_seq_prepend( si, b );
+
+ if ( is_tree( b ) ) {
+ assert( b->tree->id == tree->id );
+ break;
+ }
+ }
+}
+
+/*
+ * Prepend
+ */
+static void input_prepend_data( struct colm_program *prg, struct input_impl_seq *si,
+ struct colm_location *loc, const alph_t *data, long length )
+{
+ debug( prg, REALM_INPUT, "input_prepend_data: stream %p prepend data length %d\n", si, length );
+
+ maybe_split( prg, si );
+
+ char *name = loc != 0 ? (char*)loc->name : "<text1>";
+ struct stream_impl *sub_si = colm_impl_new_text( name, loc, data, length );
+
+ struct seq_buf *new_buf = new_seq_buf();
+ new_buf->type = SB_ACCUM;
+ new_buf->si = sub_si;
+ new_buf->own_si = 1;
+
+ input_stream_seq_prepend( si, new_buf );
+}
+
+static int input_undo_prepend_data( struct colm_program *prg, struct input_impl_seq *si, int length )
+{
+ debug( prg, REALM_INPUT, "input_undo_prepend_data: stream %p undo "
+ "append data length %d\n", si, length );
+
+ struct seq_buf *seq_buf = input_stream_seq_pop_head( si );
+ free( seq_buf );
+
+ return 0;
+}
+
+static void input_prepend_tree( struct colm_program *prg, struct input_impl_seq *si,
+ tree_t *tree, int ignore )
+{
+ debug( prg, REALM_INPUT, "input_prepend_tree: stream %p prepend tree %p\n", si, tree );
+
+ maybe_split( prg, si );
+
+ /* Create a new buffer for the data. This is the easy implementation.
+ * Something better is needed here. It puts a max on the amount of
+ * data that can be pushed back to the inputStream. */
+ struct seq_buf *new_buf = new_seq_buf();
+ new_buf->type = ignore ? SB_IGNORE : SB_TOKEN;
+ new_buf->tree = tree;
+ input_stream_seq_prepend( si, new_buf );
+}
+
+static tree_t *input_undo_prepend_tree( struct colm_program *prg, struct input_impl_seq *si )
+{
+ debug( prg, REALM_INPUT, "input_undo_prepend_tree: stream %p undo prepend tree\n", si );
+
+ assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN ||
+ si->queue.head->type == SB_IGNORE ) );
+
+ struct seq_buf *seq_buf = input_stream_seq_pop_head( si );
+
+ tree_t *tree = seq_buf->tree;
+ free(seq_buf);
+
+ debug( prg, REALM_INPUT, " stream %p tree %p\n", si, tree );
+
+ return tree;
+}
+
+
+static void input_prepend_stream( struct colm_program *prg, struct input_impl_seq *si,
+ struct colm_stream *stream )
+{
+ maybe_split( prg, si );
+
+ /* Create a new buffer for the data. This is the easy implementation.
+ * Something better is needed here. It puts a max on the amount of
+ * data that can be pushed back to the inputStream. */
+ struct seq_buf *new_buf = new_seq_buf();
+ new_buf->type = SB_SOURCE;
+ new_buf->si = stream_to_impl( stream );
+ input_stream_seq_prepend( si, new_buf );
+
+ assert( ((struct stream_impl_data*)new_buf->si)->type == 'D' );
+}
+
+static tree_t *input_undo_prepend_stream( struct colm_program *prg, struct input_impl_seq *is )
+{
+ struct seq_buf *seq_buf = input_stream_seq_pop_head( is );
+ free( seq_buf );
+ return 0;
+}
+
+static void input_append_data( struct colm_program *prg, struct input_impl_seq *si,
+ const alph_t *data, long length )
+{
+ debug( prg, REALM_INPUT, "input_append_data: stream %p append data length %d\n", si, length );
+
+ if ( si->queue.tail == 0 || si->queue.tail->type != SB_ACCUM ) {
+ debug( prg, REALM_INPUT, "input_append_data: creating accum\n" );
+
+ struct stream_impl *sub_si = colm_impl_new_accum( "<text2>" );
+
+ struct seq_buf *new_buf = new_seq_buf();
+ new_buf->type = SB_ACCUM;
+ new_buf->si = sub_si;
+ new_buf->own_si = 1;
+
+ input_stream_seq_append( si, new_buf );
+ }
+
+ si->queue.tail->si->funcs->append_data( prg, si->queue.tail->si, data, length );
+}
+
+static tree_t *input_undo_append_data( struct colm_program *prg, struct input_impl_seq *si, int length )
+{
+ debug( prg, REALM_INPUT, "input_undo_append_data: stream %p undo append data length %d\n", si, length );
+
+ while ( true ) {
+ struct seq_buf *buf = si->queue.tail;
+
+ if ( buf == 0 )
+ break;
+
+ if ( is_stream( buf ) ) {
+ struct stream_impl *sub = buf->si;
+ int slen = sub->funcs->undo_append_data( prg, sub, length );
+ //debug( REALM_INPUT, " got %d bytes from source\n", slen );
+ //consumed += slen;
+ length -= slen;
+ }
+ else if ( buf->type == SB_TOKEN )
+ break;
+ else if ( buf->type == SB_IGNORE )
+ break;
+ else {
+ assert(false);
+ }
+
+ if ( length == 0 ) {
+ //debug( REALM_INPUT, "exiting consume\n", length );
+ break;
+ }
+
+ struct seq_buf *seq_buf = input_stream_seq_pop_tail( si );
+ free( seq_buf );
+ }
+ return 0;
+}
+
+static void input_append_tree( struct colm_program *prg, struct input_impl_seq *si, tree_t *tree )
+{
+ debug( prg, REALM_INPUT, "input_append_tree: stream %p append tree %p\n", si, tree );
+
+ struct seq_buf *ad = new_seq_buf();
+
+ input_stream_seq_append( si, ad );
+
+ ad->type = SB_TOKEN;
+ ad->tree = tree;
+}
+
+static tree_t *input_undo_append_tree( struct colm_program *prg, struct input_impl_seq *si )
+{
+ debug( prg, REALM_INPUT, "input_undo_append_tree: stream %p undo append tree\n", si );
+
+ struct seq_buf *seq_buf = input_stream_seq_pop_tail( si );
+ tree_t *tree = seq_buf->tree;
+ free( seq_buf );
+ return tree;
+}
+
+static void input_append_stream( struct colm_program *prg, struct input_impl_seq *si,
+ struct colm_stream *stream )
+{
+ debug( prg, REALM_INPUT, "input_append_stream: stream %p append stream %p\n", si, stream );
+
+ struct seq_buf *ad = new_seq_buf();
+
+ input_stream_seq_append( si, ad );
+
+ ad->type = SB_SOURCE;
+ ad->si = stream_to_impl( stream );
+
+ assert( ((struct stream_impl_data*)ad->si)->type == 'D' );
+}
+
+static tree_t *input_undo_append_stream( struct colm_program *prg, struct input_impl_seq *si )
+{
+ debug( prg, REALM_INPUT, "input_undo_append_stream: stream %p undo append stream\n", si );
+
+ struct seq_buf *seq_buf = input_stream_seq_pop_tail( si );
+ free( seq_buf );
+ return 0;
+}
+
+struct input_funcs_seq input_funcs =
+{
+ &input_get_parse_block,
+ &input_get_data,
+
+ /* Consume. */
+ &input_consume_data,
+ &input_undo_consume_data,
+
+ &input_consume_tree,
+ &input_undo_consume_tree,
+
+ 0, /* consume_lang_el */
+ 0, /* undo_consume_lang_el */
+
+ /* Prepend */
+ &input_prepend_data,
+ &input_undo_prepend_data,
+
+ &input_prepend_tree,
+ &input_undo_prepend_tree,
+
+ &input_prepend_stream,
+ &input_undo_prepend_stream,
+
+ /* Append */
+ &input_append_data,
+ &input_undo_append_data,
+
+ &input_append_tree,
+ &input_undo_append_tree,
+
+ &input_append_stream,
+ &input_undo_append_stream,
+
+ /* EOF */
+ &input_set_eof_mark,
+
+ &input_transfer_loc,
+ &input_destructor,
+
+ /* Trimming */
+ &input_get_option,
+ &input_set_option,
+};
+
+struct input_impl *colm_impl_new_generic( char *name )
+{
+ struct input_impl_seq *ss = (struct input_impl_seq*)malloc(sizeof(struct input_impl_seq));
+ init_input_impl_seq( ss, name );
+ ss->funcs = (struct input_funcs*)&input_funcs;
+ return (struct input_impl*)ss;
+}
+
+input_t *colm_input_new_struct( program_t *prg )
+{
+ size_t memsize = sizeof(struct colm_input);
+ struct colm_input *input = (struct colm_input*) malloc( memsize );
+ memset( input, 0, memsize );
+ colm_struct_add( prg, (struct colm_struct *)input );
+ input->id = prg->rtd->struct_input_id;
+ input->destructor = &colm_input_destroy;
+ return input;
+}
+
+input_t *colm_input_new( program_t *prg )
+{
+ struct input_impl *impl = colm_impl_new_generic( colm_filename_add( prg, "<internal>" ) );
+ struct colm_input *input = colm_input_new_struct( prg );
+ input->impl = impl;
+ return input;
+}
+
+struct input_impl *input_to_impl( input_t *ptr )
+{
+ return ptr->impl;
+}
diff --git a/src/input.h b/src/input.h
new file mode 100644
index 00000000..8cb20088
--- /dev/null
+++ b/src/input.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_INPUT_H
+#define _COLM_INPUT_H
+
+#include <stdio.h>
+#include "colm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FSM_BUFSIZE 8192
+//#define FSM_BUFSIZE 8
+
+#define INPUT_DATA 1
+/* This is for data sources to return, not for the wrapper. */
+#define INPUT_EOD 2
+#define INPUT_EOF 3
+#define INPUT_EOS 4
+#define INPUT_LANG_EL 5
+#define INPUT_TREE 6
+#define INPUT_IGNORE 7
+
+struct LangEl;
+struct colm_tree;
+struct colm_stream;
+struct colm_location;
+struct colm_program;
+struct colm_struct;
+struct colm_str;
+struct colm_stream;
+
+struct input_impl;
+struct stream_impl;
+
+typedef colm_alph_t alph_t;
+
+#define DEF_INPUT_FUNCS( input_funcs, _input_impl ) \
+struct input_funcs \
+{ \
+ int (*get_parse_block)( struct colm_program *prg, struct _input_impl *si, int *pskip, alph_t **pdp, int *copied ); \
+ int (*get_data)( struct colm_program *prg, struct _input_impl *si, alph_t *dest, int length ); \
+ int (*consume_data)( struct colm_program *prg, struct _input_impl *si, int length, struct colm_location *loc ); \
+ int (*undo_consume_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, int length ); \
+ struct colm_tree *(*consume_tree)( struct colm_program *prg, struct _input_impl *si ); \
+ void (*undo_consume_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \
+ struct LangEl *(*consume_lang_el)( struct colm_program *prg, struct _input_impl *si, long *bind_id, alph_t **data, long *length ); \
+ void (*undo_consume_lang_el)( struct colm_program *prg, struct _input_impl *si ); \
+ void (*prepend_data)( struct colm_program *prg, struct _input_impl *si, struct colm_location *loc, const alph_t *data, long len ); \
+ int (*undo_prepend_data)( struct colm_program *prg, struct _input_impl *si, int length ); \
+ void (*prepend_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \
+ struct colm_tree *(*undo_prepend_tree)( struct colm_program *prg, struct _input_impl *si ); \
+ void (*prepend_stream)( struct colm_program *prg, struct _input_impl *si, struct colm_stream *stream ); \
+ struct colm_tree *(*undo_prepend_stream)( struct colm_program *prg, struct _input_impl *si ); \
+ void (*append_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, long length ); \
+ struct colm_tree *(*undo_append_data)( struct colm_program *prg, struct _input_impl *si, int length ); \
+ void (*append_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree ); \
+ struct colm_tree *(*undo_append_tree)( struct colm_program *prg, struct _input_impl *si ); \
+ void (*append_stream)( struct colm_program *prg, struct _input_impl *si, struct colm_stream *stream ); \
+ struct colm_tree *(*undo_append_stream)( struct colm_program *prg, struct _input_impl *si ); \
+ void (*set_eof_mark)( struct colm_program *prg, struct _input_impl *si, char eof_mark ); \
+ void (*transfer_loc)( struct colm_program *prg, struct colm_location *loc, struct _input_impl *si ); \
+ void (*destructor)( struct colm_program *prg, struct colm_tree **sp, struct _input_impl *si ); \
+ int (*get_option)( struct colm_program *prg, struct _input_impl *si, int option ); \
+ void (*set_option)( struct colm_program *prg, struct _input_impl *si, int option, int value ); \
+}
+
+#define DEF_STREAM_FUNCS( stream_funcs, _stream_impl ) \
+struct stream_funcs \
+{ \
+ int (*get_parse_block)( struct colm_program *prg, struct _stream_impl *si, int *pskip, alph_t **pdp, int *copied ); \
+ int (*get_data)( struct colm_program *prg, struct _stream_impl *si, alph_t *dest, int length ); \
+ int (*get_data_source)( struct colm_program *prg, struct _stream_impl *si, alph_t *dest, int length ); \
+ int (*consume_data)( struct colm_program *prg, struct _stream_impl *si, int length, struct colm_location *loc ); \
+ int (*undo_consume_data)( struct colm_program *prg, struct _stream_impl *si, const alph_t *data, int length ); \
+ void (*transfer_loc)( struct colm_program *prg, struct colm_location *loc, struct _stream_impl *si ); \
+ struct colm_str_collect *(*get_collect)( struct colm_program *prg, struct _stream_impl *si ); \
+ void (*flush_stream)( struct colm_program *prg, struct _stream_impl *si ); \
+ void (*close_stream)( struct colm_program *prg, struct _stream_impl *si ); \
+ void (*print_tree)( struct colm_program *prg, struct colm_tree **sp, \
+ struct _stream_impl *impl, struct colm_tree *tree, int trim ); \
+ struct stream_impl *(*split_consumed)( struct colm_program *prg, struct _stream_impl *si ); \
+ int (*append_data)( struct colm_program *prg, struct _stream_impl *si, const alph_t *data, int len ); \
+ int (*undo_append_data)( struct colm_program *prg, struct _stream_impl *si, int length ); \
+ void (*destructor)( struct colm_program *prg, struct colm_tree **sp, struct _stream_impl *si ); \
+ int (*get_option)( struct colm_program *prg, struct _stream_impl *si, int option ); \
+ void (*set_option)( struct colm_program *prg, struct _stream_impl *si, int option, int value ); \
+}
+
+DEF_INPUT_FUNCS( input_funcs, input_impl );
+DEF_STREAM_FUNCS( stream_funcs, stream_impl );
+
+/* List of source streams. Enables streams to be pushed/popped. */
+struct input_impl
+{
+ struct input_funcs *funcs;
+};
+
+/* List of source streams. Enables streams to be pushed/popped. */
+struct stream_impl
+{
+ struct stream_funcs *funcs;
+};
+
+enum seq_buf_type {
+ SB_TOKEN = 1,
+ SB_IGNORE,
+ SB_SOURCE,
+ SB_ACCUM
+};
+
+struct seq_buf
+{
+ enum seq_buf_type type;
+ char own_si;
+ struct colm_tree *tree;
+ struct stream_impl *si;
+ struct seq_buf *next, *prev;
+};
+
+/* List of source streams. Enables streams to be pushed/popped. */
+struct input_impl_seq
+{
+ struct input_funcs *funcs;
+ char type;
+
+ char eof_mark;
+ char eof_sent;
+
+ struct {
+ struct seq_buf *head;
+ struct seq_buf *tail;
+ } queue;
+
+ struct seq_buf *stash;
+
+ int consumed;
+ int auto_trim;
+};
+
+struct run_buf
+{
+ long length;
+ long offset;
+ struct run_buf *next, *prev;
+
+ /* Must be at the end. We will grow this struct to add data if the input
+ * demands it. */
+ alph_t data[FSM_BUFSIZE];
+};
+
+struct run_buf *new_run_buf( int sz );
+
+struct stream_impl_data
+{
+ struct stream_funcs *funcs;
+ char type;
+
+ struct {
+ struct run_buf *head;
+ struct run_buf *tail;
+ } queue;
+
+ const alph_t *data;
+ long dlen;
+ int offset;
+
+ long line;
+ long column;
+ long byte;
+
+ char *name;
+ FILE *file;
+
+ struct colm_str_collect *collect;
+
+ int consumed;
+
+ struct indent_impl indent;
+
+ int *line_len;
+ int lines_alloc;
+ int lines_cur;
+
+ int auto_trim;
+};
+
+void stream_impl_push_line( struct stream_impl_data *ss, int ll );
+int stream_impl_pop_line( struct stream_impl_data *ss );
+
+struct input_impl *colm_impl_new_generic( char *name );
+
+void update_position( struct stream_impl *input_stream, const char *data, long length );
+void undo_position( struct stream_impl *input_stream, const char *data, long length );
+
+struct stream_impl *colm_stream_impl( struct colm_struct *s );
+
+struct colm_str *collect_string( struct colm_program *prg, struct colm_stream *s );
+struct colm_stream *colm_stream_open_collect( struct colm_program *prg );
+
+char *colm_filename_add( struct colm_program *prg, const char *fn );
+struct stream_impl *colm_impl_new_accum( char *name );
+struct stream_impl *colm_impl_consumed( char *name, int len );
+struct stream_impl *colm_impl_new_text( char *name, struct colm_location *loc, const alph_t *data, int len );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COLM_INPUT_H */
+
diff --git a/src/internal.h b/src/internal.h
new file mode 100644
index 00000000..e6e1fa7e
--- /dev/null
+++ b/src/internal.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_INTERNAL_H
+#define _COLM_INTERNAL_H
+
+#include "colm.h"
+
+typedef struct colm_struct struct_t;
+typedef struct colm_program program_t;
+typedef unsigned long value_t;
+
+#endif /* _COLM_INTERNAL_H */
+
diff --git a/src/iter.c b/src/iter.c
new file mode 100644
index 00000000..66974f4a
--- /dev/null
+++ b/src/iter.c
@@ -0,0 +1,648 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include <colm/tree.h>
+#include <colm/bytecode.h>
+#include <colm/program.h>
+
+#include "internal.h"
+
+void colm_init_list_iter( generic_iter_t *list_iter, tree_t **stack_root,
+ long arg_size, long root_size, const ref_t *root_ref, int generic_id )
+{
+ list_iter->type = IT_Tree;
+ list_iter->root_ref = *root_ref;
+ list_iter->stack_root = stack_root;
+ list_iter->yield_size = 0;
+ list_iter->root_size = root_size;
+ list_iter->ref.kid = 0;
+ list_iter->ref.next = 0;
+ list_iter->arg_size = arg_size;
+ list_iter->generic_id = generic_id;
+}
+
+void colm_list_iter_destroy( program_t *prg, tree_t ***psp, generic_iter_t *iter )
+{
+ if ( (int)iter->type != 0 ) {
+ int i;
+ tree_t **sp = *psp;
+ long cur_stack_size = vm_ssize() - iter->root_size;
+ assert( iter->yield_size == cur_stack_size );
+ vm_popn( iter->yield_size );
+ for ( i = 0; i < iter->arg_size; i++ ) {
+ //colm_tree_downref( prg, sp, vm_pop_tree() );
+ vm_pop_value();
+ }
+ iter->type = 0;
+ *psp = sp;
+ }
+}
+
+tree_t *colm_list_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter )
+{
+ tree_t **sp = *psp;
+ assert( iter->yield_size == (vm_ssize() - iter->root_size) );
+
+ if ( iter->ref.kid == 0 ) {
+ /* kid_t is zero, start from the root. */
+ list_t *list = *((list_t**)iter->root_ref.kid);
+ iter->ref.kid = (kid_t*)list->head;
+ iter->ref.next = 0;
+
+ //= iter->rootRef;
+ //iter
+ //iterFind( prg, psp, iter, true );
+ }
+ else {
+ /* Have a previous item, continue searching from there. */
+ //iterFind( prg, psp, iter, false );
+
+ list_el_t *list_el = (list_el_t*)iter->ref.kid;
+ list_el = list_el->list_next;
+ iter->ref.kid = (kid_t*)list_el;
+ iter->ref.next = 0;
+ }
+
+ sp = *psp;
+ iter->yield_size = vm_ssize() - iter->root_size;
+
+ return (iter->ref.kid ? prg->true_val : prg->false_val );
+}
+
+tree_t *colm_rev_list_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter )
+{
+ tree_t **sp = *psp;
+ assert( iter->yield_size == (vm_ssize() - iter->root_size) );
+
+ if ( iter->ref.kid == 0 ) {
+ /* kid_t is zero, start from the root. */
+ list_t *list = *((list_t**)iter->root_ref.kid);
+ iter->ref.kid = (kid_t*)list->tail;
+ iter->ref.next = 0;
+
+ //= iter->rootRef;
+ //iter
+ //iterFind( prg, psp, iter, true );
+ }
+ else {
+ /* Have a previous item, continue searching from there. */
+ //iterFind( prg, psp, iter, false );
+
+ list_el_t *list_el = (list_el_t*)iter->ref.kid;
+ list_el = list_el->list_prev;
+ iter->ref.kid = (kid_t*)list_el;
+ iter->ref.next = 0;
+ }
+
+ sp = *psp;
+ iter->yield_size = vm_ssize() - iter->root_size;
+
+ return (iter->ref.kid ? prg->true_val : prg->false_val );
+}
+
+tree_t *colm_map_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter )
+{
+ tree_t **sp = *psp;
+ assert( iter->yield_size == (vm_ssize() - iter->root_size) );
+
+ if ( iter->ref.kid == 0 ) {
+ /* kid_t is zero, start from the root. */
+ map_t *map = *((map_t**)iter->root_ref.kid);
+ iter->ref.kid = (kid_t*)map->head;
+ iter->ref.next = 0;
+
+ //= iter->rootRef;
+ //iter
+ //iterFind( prg, psp, iter, true );
+ }
+ else {
+ /* Have a previous item, continue searching from there. */
+ //iterFind( prg, psp, iter, false );
+
+ map_el_t *map_el = (map_el_t*)iter->ref.kid;
+ map_el = map_el->next;
+ iter->ref.kid = (kid_t*)map_el;
+ iter->ref.next = 0;
+ }
+
+ sp = *psp;
+ iter->yield_size = vm_ssize() - iter->root_size;
+
+ return (iter->ref.kid ? prg->true_val : prg->false_val );
+}
+
+tree_t *colm_list_iter_deref_cur( program_t *prg, generic_iter_t *iter )
+{
+ struct generic_info *gi = &prg->rtd->generic_info[iter->generic_id];
+ list_el_t *el = (list_el_t*)iter->ref.kid;
+ struct colm_struct *s = el != 0 ?
+ colm_struct_container( el, gi->el_offset ) : 0;
+ return (tree_t*)s;
+}
+
+value_t colm_viter_deref_cur( program_t *prg, generic_iter_t *iter )
+{
+ struct generic_info *gi = &prg->rtd->generic_info[iter->generic_id];
+ list_el_t *el = (list_el_t*)iter->ref.kid;
+ struct colm_struct *s = el != 0 ?
+ colm_struct_container( el, gi->el_offset ) : 0;
+
+ value_t value = colm_struct_get_field( s, value_t, 0 );
+ if ( gi->value_type == TYPE_TREE )
+ colm_tree_upref( prg, (tree_t*)value );
+
+ return value;
+}
+
+void colm_init_tree_iter( tree_iter_t *tree_iter, tree_t **stack_root,
+ long arg_size, long root_size,
+ const ref_t *root_ref, int search_id )
+{
+ tree_iter->type = IT_Tree;
+ tree_iter->root_ref = *root_ref;
+ tree_iter->search_id = search_id;
+ tree_iter->stack_root = stack_root;
+ tree_iter->yield_size = 0;
+ tree_iter->root_size = root_size;
+ tree_iter->ref.kid = 0;
+ tree_iter->ref.next = 0;
+ tree_iter->arg_size = arg_size;
+}
+
+void colm_init_rev_tree_iter( rev_tree_iter_t *rev_triter, tree_t **stack_root,
+ long arg_size, long root_size,
+ const ref_t *root_ref, int search_id, int children )
+{
+ rev_triter->type = IT_RevTree;
+ rev_triter->root_ref = *root_ref;
+ rev_triter->search_id = search_id;
+ rev_triter->stack_root = stack_root;
+ rev_triter->yield_size = children;
+ rev_triter->root_size = root_size;
+ rev_triter->kid_at_yield = 0;
+ rev_triter->children = children;
+ rev_triter->ref.kid = 0;
+ rev_triter->ref.next = 0;
+ rev_triter->arg_size = arg_size;
+}
+
+void init_user_iter( user_iter_t *user_iter, tree_t **stack_root, long root_size,
+ long arg_size, long search_id )
+{
+ user_iter->type = IT_User;
+ user_iter->stack_root = stack_root;
+ user_iter->arg_size = arg_size;
+ user_iter->yield_size = 0;
+ user_iter->root_size = root_size;
+ user_iter->resume = 0;
+ user_iter->frame = 0;
+ user_iter->search_id = search_id;
+
+ user_iter->ref.kid = 0;
+ user_iter->ref.next = 0;
+}
+
+
+user_iter_t *colm_uiter_create( program_t *prg, tree_t ***psp, struct function_info *fi, long search_id )
+{
+ tree_t **sp = *psp;
+
+ vm_pushn( sizeof(user_iter_t) / sizeof(word_t) );
+ void *mem = vm_ptop();
+ user_iter_t *uiter = mem;
+
+ tree_t **stack_root = vm_ptop();
+ long root_size = vm_ssize();
+
+ init_user_iter( uiter, stack_root, root_size, fi->arg_size, search_id );
+
+ *psp = sp;
+ return uiter;
+}
+
+void uiter_init( program_t *prg, tree_t **sp, user_iter_t *uiter,
+ struct function_info *fi, int revert_on )
+{
+ /* Set up the first yeild so when we resume it starts at the beginning. */
+ uiter->ref.kid = 0;
+ uiter->yield_size = vm_ssize() - uiter->root_size;
+ // uiter->frame = &uiter->stackRoot[-IFR_AA];
+
+ if ( revert_on )
+ uiter->resume = prg->rtd->frame_info[fi->frame_id].codeWV;
+ else
+ uiter->resume = prg->rtd->frame_info[fi->frame_id].codeWC;
+}
+
+
+void colm_tree_iter_destroy( program_t *prg, tree_t ***psp, tree_iter_t *iter )
+{
+ if ( (int)iter->type != 0 ) {
+ int i;
+ tree_t **sp = *psp;
+ long cur_stack_size = vm_ssize() - iter->root_size;
+ assert( iter->yield_size == cur_stack_size );
+ vm_popn( iter->yield_size );
+ for ( i = 0; i < iter->arg_size; i++ )
+ colm_tree_downref( prg, sp, vm_pop_tree() );
+ iter->type = 0;
+ *psp = sp;
+ }
+}
+
+void colm_rev_tree_iter_destroy( struct colm_program *prg, tree_t ***psp, rev_tree_iter_t *riter )
+{
+ if ( (int)riter->type != 0 ) {
+ int i;
+ tree_t **sp = *psp;
+ long cur_stack_size = vm_ssize() - riter->root_size;
+ assert( riter->yield_size == cur_stack_size );
+ vm_popn( riter->yield_size );
+ for ( i = 0; i < riter->arg_size; i++ )
+ colm_tree_downref( prg, sp, vm_pop_tree() );
+ riter->type = 0;
+ *psp = sp;
+ }
+}
+
+void colm_uiter_destroy( program_t *prg, tree_t ***psp, user_iter_t *uiter )
+{
+ if ( uiter != 0 && (int)uiter->type != 0 ) {
+ tree_t **sp = *psp;
+
+ /* We should always be coming from a yield. The current stack size will be
+ * nonzero and the stack size in the iterator will be correct. */
+ long cur_stack_size = vm_ssize() - uiter->root_size;
+ assert( uiter->yield_size == cur_stack_size );
+
+ vm_popn( uiter->yield_size );
+ vm_popn( sizeof(user_iter_t) / sizeof(word_t) );
+
+ uiter->type = 0;
+
+ *psp = sp;
+ }
+}
+
+void colm_uiter_unwind( program_t *prg, tree_t ***psp, user_iter_t *uiter )
+{
+ if ( uiter != 0 && (int)uiter->type != 0 ) {
+ tree_t **sp = *psp;
+
+ /* We should always be coming from a yield. The current stack size will be
+ * nonzero and the stack size in the iterator will be correct. */
+ long cur_stack_size = vm_ssize() - uiter->root_size;
+ assert( uiter->yield_size == cur_stack_size );
+
+ long arg_size = uiter->arg_size;
+
+ vm_popn( uiter->yield_size );
+ vm_popn( sizeof(user_iter_t) / sizeof(word_t) );
+
+ /* The IN_PREP_ARGS stack data. */
+ vm_popn( arg_size );
+ vm_pop_value();
+
+ uiter->type = 0;
+
+ *psp = sp;
+ }
+}
+
+tree_t *tree_iter_deref_cur( tree_iter_t *iter )
+{
+ return iter->ref.kid == 0 ? 0 : iter->ref.kid->tree;
+}
+
+void set_triter_cur( program_t *prg, tree_iter_t *iter, tree_t *tree )
+{
+ iter->ref.kid->tree = tree;
+}
+
+void set_uiter_cur( program_t *prg, user_iter_t *uiter, tree_t *tree )
+{
+ uiter->ref.kid->tree = tree;
+}
+
+void split_iter_cur( program_t *prg, tree_t ***psp, tree_iter_t *iter )
+{
+ if ( iter->ref.kid == 0 )
+ return;
+
+ split_ref( prg, psp, &iter->ref );
+}
+
+void iter_find( program_t *prg, tree_t ***psp, tree_iter_t *iter, int try_first )
+{
+ int any_tree = iter->search_id == prg->rtd->any_id;
+ tree_t **top = iter->stack_root;
+ kid_t *child;
+ tree_t **sp = *psp;
+
+rec_call:
+ if ( try_first && ( iter->ref.kid->tree->id == iter->search_id || any_tree ) ) {
+ *psp = sp;
+ return;
+ }
+ else {
+ child = tree_child( prg, iter->ref.kid->tree );
+ if ( child != 0 ) {
+ vm_contiguous( 2 );
+ vm_push_ref( iter->ref.next );
+ vm_push_kid( iter->ref.kid );
+ iter->ref.kid = child;
+ iter->ref.next = (ref_t*)vm_ptop();
+ while ( iter->ref.kid != 0 ) {
+ try_first = true;
+ goto rec_call;
+ rec_return:
+ iter->ref.kid = iter->ref.kid->next;
+ }
+ iter->ref.kid = vm_pop_kid();
+ iter->ref.next = vm_pop_ref();
+ }
+ }
+
+ if ( top != vm_ptop() )
+ goto rec_return;
+
+ iter->ref.kid = 0;
+ *psp = sp;
+}
+
+tree_t *tree_iter_advance( program_t *prg, tree_t ***psp, tree_iter_t *iter )
+{
+ tree_t **sp = *psp;
+ assert( iter->yield_size == (vm_ssize() - iter->root_size) );
+
+ if ( iter->ref.kid == 0 ) {
+ /* kid_t is zero, start from the root. */
+ iter->ref = iter->root_ref;
+ iter_find( prg, psp, iter, true );
+ }
+ else {
+ /* Have a previous item, continue searching from there. */
+ iter_find( prg, psp, iter, false );
+ }
+
+ sp = *psp;
+ iter->yield_size = vm_ssize() - iter->root_size;
+
+ return (iter->ref.kid ? prg->true_val : prg->false_val );
+}
+
+tree_t *tree_iter_next_child( program_t *prg, tree_t ***psp, tree_iter_t *iter )
+{
+ tree_t **sp = *psp;
+ assert( iter->yield_size == (vm_ssize() - iter->root_size) );
+ kid_t *kid = 0;
+
+ if ( iter->ref.kid == 0 ) {
+ /* kid_t is zero, start from the first child. */
+ kid_t *child = tree_child( prg, iter->root_ref.kid->tree );
+
+ if ( child == 0 )
+ iter->ref.next = 0;
+ else {
+ /* Make a reference to the root. */
+ vm_contiguous( 2 );
+ vm_push_ref( iter->root_ref.next );
+ vm_push_kid( iter->root_ref.kid );
+ iter->ref.next = (ref_t*)vm_ptop();
+
+ kid = child;
+ }
+ }
+ else {
+ /* Start at next. */
+ kid = iter->ref.kid->next;
+ }
+
+ if ( iter->search_id != prg->rtd->any_id ) {
+ /* Have a previous item, go to the next sibling. */
+ while ( kid != 0 && kid->tree->id != iter->search_id )
+ kid = kid->next;
+ }
+
+ iter->ref.kid = kid;
+ iter->yield_size = vm_ssize() - iter->root_size;
+ *psp = sp;
+ return ( iter->ref.kid ? prg->true_val : prg->false_val );
+}
+
+tree_t *tree_rev_iter_prev_child( program_t *prg, tree_t ***psp, rev_tree_iter_t *iter )
+{
+ tree_t **sp = *psp;
+ assert( iter->yield_size == ( vm_ssize() - iter->root_size ) );
+
+ if ( iter->kid_at_yield != iter->ref.kid ) {
+ /* Need to reload the kids. */
+ vm_popn( iter->children );
+
+ int c;
+ kid_t *kid = tree_child( prg, iter->root_ref.kid->tree );
+ for ( c = 0; c < iter->children; c++ ) {
+ vm_push_kid( kid );
+ kid = kid->next;
+ }
+ }
+
+ if ( iter->ref.kid != 0 ) {
+ vm_pop_ignore();
+ iter->children -= 1;
+ }
+
+ if ( iter->search_id != prg->rtd->any_id ) {
+ /* Have a previous item, go to the next sibling. */
+ while ( iter->children > 0 && ((kid_t*)(vm_top()))->tree->id != iter->search_id ) {
+ iter->children -= 1;
+ vm_pop_ignore();
+ }
+ }
+
+ if ( iter->children == 0 ) {
+ iter->ref.next = 0;
+ iter->ref.kid = 0;
+ }
+ else {
+ iter->ref.next = &iter->root_ref;
+ iter->ref.kid = (kid_t*)vm_top();
+ }
+
+ /* We will use this to detect a split above the iterated tree. */
+ iter->kid_at_yield = iter->ref.kid;
+
+ iter->yield_size = vm_ssize() - iter->root_size;
+
+ *psp = sp;
+
+ return (iter->ref.kid ? prg->true_val : prg->false_val );
+}
+
+void iter_find_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter, int try_first )
+{
+ tree_t **sp = *psp;
+ int any_tree = iter->search_id == prg->rtd->any_id;
+ tree_t **top = iter->stack_root;
+ kid_t *child;
+
+rec_call:
+ if ( try_first && ( iter->ref.kid->tree->id == iter->search_id || any_tree ) ) {
+ *psp = sp;
+ return;
+ }
+ else {
+ /* The repeat iterator is just like the normal top-down-left-right,
+ * execept it only goes into the children of a node if the node is the
+ * root of the iteration, or if does not have any neighbours to the
+ * right. */
+ if ( top == vm_ptop() || iter->ref.kid->next == 0 ) {
+ child = tree_child( prg, iter->ref.kid->tree );
+ if ( child != 0 ) {
+ vm_contiguous( 2 );
+ vm_push_ref( iter->ref.next );
+ vm_push_kid( iter->ref.kid );
+ iter->ref.kid = child;
+ iter->ref.next = (ref_t*)vm_ptop();
+ while ( iter->ref.kid != 0 ) {
+ try_first = true;
+ goto rec_call;
+ rec_return:
+ iter->ref.kid = iter->ref.kid->next;
+ }
+ iter->ref.kid = vm_pop_kid();
+ iter->ref.next = vm_pop_ref();
+ }
+ }
+ }
+
+ if ( top != vm_ptop() )
+ goto rec_return;
+
+ iter->ref.kid = 0;
+ *psp = sp;
+}
+
+tree_t *tree_iter_next_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter )
+{
+ tree_t **sp = *psp;
+ assert( iter->yield_size == ( vm_ssize() - iter->root_size ) );
+
+ if ( iter->ref.kid == 0 ) {
+ /* kid_t is zero, start from the root. */
+ iter->ref = iter->root_ref;
+ iter_find_repeat( prg, psp, iter, true );
+ }
+ else {
+ /* Have a previous item, continue searching from there. */
+ iter_find_repeat( prg, psp, iter, false );
+ }
+
+ sp = *psp;
+ iter->yield_size = vm_ssize() - iter->root_size;
+
+ return (iter->ref.kid ? prg->true_val : prg->false_val );
+}
+
+void iter_find_rev_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter, int try_first )
+{
+ tree_t **sp = *psp;
+ int any_tree = iter->search_id == prg->rtd->any_id;
+ tree_t **top = iter->stack_root;
+ kid_t *child;
+
+ if ( try_first ) {
+ while ( true ) {
+ if ( top == vm_ptop() || iter->ref.kid->next == 0 ) {
+ child = tree_child( prg, iter->ref.kid->tree );
+
+ if ( child == 0 )
+ break;
+ vm_contiguous( 2 );
+ vm_push_ref( iter->ref.next );
+ vm_push_kid( iter->ref.kid );
+ iter->ref.kid = child;
+ iter->ref.next = (ref_t*)vm_ptop();
+ }
+ else {
+ /* Not the top and not there is a next, go over to it. */
+ iter->ref.kid = iter->ref.kid->next;
+ }
+ }
+
+ goto first;
+ }
+
+ while ( true ) {
+ if ( top == vm_ptop() ) {
+ iter->ref.kid = 0;
+ return;
+ }
+
+ if ( iter->ref.kid->next == 0 ) {
+ /* Go up one and then down. Remember we can't use iter->ref.next
+ * because the chain may have been split, setting it null (to
+ * prevent repeated walks up). */
+ ref_t *ref = (ref_t*)vm_ptop();
+ iter->ref.kid = tree_child( prg, ref->kid->tree );
+ }
+ else {
+ iter->ref.kid = vm_pop_kid();
+ iter->ref.next = vm_pop_ref();
+ }
+first:
+ if ( iter->ref.kid->tree->id == iter->search_id || any_tree ) {
+ *psp = sp;
+ return;
+ }
+ }
+ *psp = sp;
+ return;
+}
+
+
+tree_t *tree_iter_prev_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter )
+{
+ tree_t **sp = *psp;
+ assert( iter->yield_size == (vm_ssize() - iter->root_size) );
+
+ if ( iter->ref.kid == 0 ) {
+ /* kid_t is zero, start from the root. */
+ iter->ref = iter->root_ref;
+ iter_find_rev_repeat( prg, psp, iter, true );
+ }
+ else {
+ /* Have a previous item, continue searching from there. */
+ iter_find_rev_repeat( prg, psp, iter, false );
+ }
+
+ sp = *psp;
+ iter->yield_size = vm_ssize() - iter->root_size;
+
+ return (iter->ref.kid ? prg->true_val : prg->false_val );
+}
+
+
+
diff --git a/src/keyops.h b/src/keyops.h
new file mode 100644
index 00000000..ed58db8d
--- /dev/null
+++ b/src/keyops.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2001-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#ifndef _COLM_KEYOPS_H
+#define _COLM_KEYOPS_H
+
+#include <fstream>
+#include <climits>
+
+enum MarkType
+{
+ MarkNone = 0,
+ MarkMark
+};
+
+typedef unsigned long long Size;
+
+/* This key struct does not implement unsigned. */
+struct Key
+{
+private:
+ long key;
+
+public:
+ friend inline Key operator+(const Key key1, const Key key2);
+ friend inline Key operator-(const Key key1, const Key key2);
+
+ friend inline bool operator<( const Key key1, const Key key2 );
+ friend inline bool operator<=( const Key key1, const Key key2 );
+ friend inline bool operator>( const Key key1, const Key key2 );
+ friend inline bool operator>=( const Key key1, const Key key2 );
+ friend inline bool operator==( const Key key1, const Key key2 );
+ friend inline bool operator!=( const Key key1, const Key key2 );
+
+ friend struct KeyOps;
+
+ Key( ) {}
+ Key( const Key &key ) : key(key.key) {}
+ Key( long key ) : key(key) {}
+
+ long getVal() const { return key; };
+
+ long long getLongLong() const;
+
+ bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); }
+ bool isLower() const { return ( 'a' <= key && key <= 'z' ); }
+ bool isPrintable() const { return ( 32 <= key && key < 127 ); }
+
+ Key toUpper() const
+ { return Key( 'A' + ( key - 'a' ) ); }
+ Key toLower() const
+ { return Key( 'a' + ( key - 'A' ) ); }
+
+ void operator+=( const Key other )
+ { key += other.key; }
+
+ void operator-=( const Key other )
+ { key -= other.key; }
+
+ void operator|=( const Key other )
+ { key |= other.key; }
+
+ /* Decrement. Needed only for ranges. */
+ inline void decrement();
+ inline void increment();
+};
+
+struct HostType
+{
+ const char *data1;
+ const char *data2;
+ bool isSigned;
+ long long minVal;
+ long long maxVal;
+ unsigned int size;
+};
+
+struct HostLang
+{
+ HostType *hostTypes;
+ int numHostTypes;
+ int defaultHostType;
+};
+
+extern HostLang *hostLang;
+extern HostLang hostLangC;
+
+/* An abstraction of the key operators that manages key operations such as
+ * comparison and increment according the signedness of the key. */
+struct KeyOps
+{
+ /* Default to signed alphabet. */
+ KeyOps() : alphType(0) {}
+
+ Key minKey, maxKey;
+ const HostType *alphType;
+
+ void setAlphType( const HostType *alphType )
+ {
+ this->alphType = alphType;
+ minKey = (long) alphType->minVal;
+ maxKey = (long) alphType->maxVal;
+ }
+
+ /* Compute the distance between two keys. */
+ Size span( Key key1, Key key2 )
+ {
+ return (unsigned long long)( (long long)key2.key - (long long)key1.key + 1) ;
+ }
+
+ Size alphSize()
+ { return span( minKey, maxKey ); }
+};
+
+inline bool operator<( const Key key1, const Key key2 )
+{
+ return key1.key < key2.key;
+}
+
+inline bool operator<=( const Key key1, const Key key2 )
+{
+ return key1.key <= key2.key;
+}
+
+inline bool operator>( const Key key1, const Key key2 )
+{
+ return key1.key > key2.key;
+}
+
+inline bool operator>=( const Key key1, const Key key2 )
+{
+ return key1.key >= key2.key;
+}
+
+inline bool operator==( const Key key1, const Key key2 )
+{
+ return key1.key == key2.key;
+}
+
+inline bool operator!=( const Key key1, const Key key2 )
+{
+ return key1.key != key2.key;
+}
+
+/* Decrement. Needed only for ranges. */
+inline void Key::decrement()
+{
+ key = key - 1;
+}
+
+/* Increment. Needed only for ranges. */
+inline void Key::increment()
+{
+ key = key + 1;
+}
+
+inline long long Key::getLongLong() const
+{
+ return (long long) key;
+}
+
+inline Key operator+(const Key key1, const Key key2)
+{
+ return Key( key1.key + key2.key );
+}
+
+inline Key operator-(const Key key1, const Key key2)
+{
+ return Key( key1.key - key2.key );
+}
+
+const char *findFileExtension( const char *stemFile );
+char *fileNameFromStem( const char *stemFile, const char *suffix );
+
+#endif /* _COLM_KEYOPS_H */
+
diff --git a/src/list.c b/src/list.c
new file mode 100644
index 00000000..2003674a
--- /dev/null
+++ b/src/list.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include <colm/pdarun.h>
+#include <colm/program.h>
+#include <colm/struct.h>
+#include <colm/bytecode.h>
+
+static void colm_list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el );
+static void colm_list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el);
+list_el_t *colm_list_detach( list_t *list, list_el_t *el );
+
+void colm_list_prepend( list_t *list, list_el_t *new_el )
+{
+ colm_list_add_before( list, list->head, new_el );
+}
+
+void colm_list_append( list_t *list, list_el_t *new_el )
+{
+ colm_list_add_after( list, list->tail, new_el );
+}
+
+list_el_t *colm_list_detach_head( list_t *list )
+{
+ return colm_list_detach( list, list->head );
+}
+
+list_el_t *colm_list_detach_tail( list_t *list )
+{
+ return colm_list_detach( list, list->tail );
+}
+
+long colm_list_length( list_t *list )
+{
+ return list->list_len;
+}
+
+void colm_vlist_append( struct colm_program *prg, list_t *list, value_t value )
+{
+ struct colm_struct *s = colm_struct_new( prg, list->generic_info->el_struct_id );
+
+ colm_struct_set_field( s, value_t, 0, value );
+
+ list_el_t *list_el = colm_struct_get_addr( s, list_el_t*, list->generic_info->el_offset );
+
+ colm_list_append( list, list_el );
+}
+
+void colm_vlist_prepend( struct colm_program *prg, list_t *list, value_t value )
+{
+ struct colm_struct *s = colm_struct_new( prg, list->generic_info->el_struct_id );
+
+ colm_struct_set_field( s, value_t, 0, value );
+
+ list_el_t *list_el = colm_struct_get_addr( s, list_el_t*, list->generic_info->el_offset );
+
+ colm_list_prepend( list, list_el );
+}
+
+value_t colm_vlist_detach_tail( struct colm_program *prg, list_t *list )
+{
+ list_el_t *list_el = list->tail;
+ colm_list_detach( list, list_el );
+
+ struct colm_struct *s = colm_generic_el_container( prg, list_el,
+ (list->generic_info - prg->rtd->generic_info) );
+
+ value_t val = colm_struct_get_field( s, value_t, 0 );
+
+ if ( list->generic_info->value_type == TYPE_TREE )
+ colm_tree_upref( prg, (tree_t*)val );
+
+ return val;
+}
+
+value_t colm_vlist_detach_head( struct colm_program *prg, list_t *list )
+{
+ list_el_t *list_el = list->head;
+ colm_list_detach( list, list_el );
+
+ struct colm_struct *s = colm_generic_el_container( prg, list_el,
+ (list->generic_info - prg->rtd->generic_info) );
+
+ value_t val = colm_struct_get_field( s, value_t, 0 );
+
+ if ( list->generic_info->value_type == TYPE_TREE )
+ colm_tree_upref( prg, (tree_t*) val );
+
+ return val;
+}
+
+
+static void colm_list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el )
+{
+ /* Set the previous pointer of new_el to prev_el. We do
+ * this regardless of the state of the list. */
+ new_el->list_prev = prev_el;
+
+ /* Set forward pointers. */
+ if (prev_el == 0) {
+ /* There was no prev_el, we are inserting at the head. */
+ new_el->list_next = list->head;
+ list->head = new_el;
+ }
+ else {
+ /* There was a prev_el, we can access previous next. */
+ new_el->list_next = prev_el->list_next;
+ prev_el->list_next = new_el;
+ }
+
+ /* Set reverse pointers. */
+ if (new_el->list_next == 0) {
+ /* There is no next element. Set the tail pointer. */
+ list->tail = new_el;
+ }
+ else {
+ /* There is a next element. Set it's prev pointer. */
+ new_el->list_next->list_prev = new_el;
+ }
+
+ /* Update list length. */
+ list->list_len++;
+}
+
+static void colm_list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el)
+{
+ /* Set the next pointer of the new element to next_el. We do
+ * this regardless of the state of the list. */
+ new_el->list_next = next_el;
+
+ /* Set reverse pointers. */
+ if (next_el == 0) {
+ /* There is no next elememnt. We are inserting at the tail. */
+ new_el->list_prev = list->tail;
+ list->tail = new_el;
+ }
+ else {
+ /* There is a next element and we can access next's previous. */
+ new_el->list_prev = next_el->list_prev;
+ next_el->list_prev = new_el;
+ }
+
+ /* Set forward pointers. */
+ if (new_el->list_prev == 0) {
+ /* There is no previous element. Set the head pointer.*/
+ list->head = new_el;
+ }
+ else {
+ /* There is a previous element, set it's next pointer to new_el. */
+ new_el->list_prev->list_next = new_el;
+ }
+
+ list->list_len++;
+}
+
+list_el_t *colm_list_detach( list_t *list, list_el_t *el )
+{
+ /* Set forward pointers to skip over el. */
+ if (el->list_prev == 0)
+ list->head = el->list_next;
+ else
+ el->list_prev->list_next = el->list_next;
+
+ /* Set reverse pointers to skip over el. */
+ if (el->list_next == 0)
+ list->tail = el->list_prev;
+ else
+ el->list_next->list_prev = el->list_prev;
+
+ /* Update List length and return element we detached. */
+ list->list_len--;
+ return el;
+}
+
+void colm_list_destroy( struct colm_program *prg, tree_t **sp, struct colm_struct *s )
+{
+}
+
+list_t *colm_list_new( struct colm_program *prg )
+{
+ size_t memsize = sizeof(struct colm_list);
+ struct colm_list *list = (struct colm_list*) malloc( memsize );
+ memset( list, 0, memsize );
+ colm_struct_add( prg, (struct colm_struct *)list );
+ list->id = prg->rtd->struct_inbuilt_id;
+ list->destructor = &colm_list_destroy;
+ return list;
+}
+
+struct colm_struct *colm_list_get( struct colm_program *prg,
+ list_t *list, word_t gen_id, word_t field )
+{
+ struct generic_info *gi = &prg->rtd->generic_info[gen_id];
+ list_el_t *result = 0;
+ switch ( field ) {
+ case 0:
+ result = list->head;
+ break;
+ case 1:
+ result = list->tail;
+ break;
+ default:
+ assert( 0 );
+ break;
+ }
+
+ struct colm_struct *s = result != 0 ?
+ colm_struct_container( result, gi->el_offset ) : 0;
+ return s;
+}
+
+struct colm_struct *colm_list_el_get( struct colm_program *prg,
+ list_el_t *list_el, word_t gen_id, word_t field )
+{
+ struct generic_info *gi = &prg->rtd->generic_info[gen_id];
+ list_el_t *result = 0;
+ switch ( field ) {
+ case 0:
+ result = list_el->list_prev;
+ break;
+ case 1:
+ result = list_el->list_next;
+ break;
+ default:
+ assert( 0 );
+ break;
+ }
+
+ struct colm_struct *s = result != 0 ?
+ colm_struct_container( result, gi->el_offset ) : 0;
+ return s;
+}
diff --git a/src/lmparse.kh b/src/lmparse.kh
new file mode 100644
index 00000000..13977a9e
--- /dev/null
+++ b/src/lmparse.kh
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2001-2007, 2013 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef LMPARSE_H
+#define LMPARSE_H
+
+#include <iostream>
+#include "avltree.h"
+#include "parsedata.h"
+#include "parser.h"
+
+struct ColmParser
+:
+ public BaseParser
+{
+ ColmParser( Compiler *pd )
+ : BaseParser( pd )
+ {}
+
+ %%{
+ parser ColmParser;
+
+ # Use a class for tokens.
+ token uses class Token;
+
+ # Atoms.
+ token TK_Word, TK_Literal, TK_SingleLit, TK_DoubleLit, TK_Number, TK_UInt,
+ TK_Hex, KW_Nil, KW_True, KW_False;
+
+ # General tokens.
+ token TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon,
+ TK_DoubleArrow, TK_StarStar, TK_NameSep, TK_DashDash, TK_DoubleEql,
+ TK_NotEql, TK_DoubleColon, TK_LessEql, TK_GrtrEql, TK_RightArrow,
+ TK_LitPat, TK_AmpAmp, TK_BarBar, TK_SqOpen, TK_SqOpenNeg, TK_SqClose,
+ TK_Dash, TK_ReChar, TK_LtLt;
+
+ # Defining things.
+ token KW_Rl, KW_Def, KW_Lex, KW_Context, KW_Ignore, KW_Token, KW_Commit, KW_Namespace, KW_End,
+ KW_Literal, KW_ReduceFirst, KW_Map, KW_List, KW_Vector, KW_Parser, KW_Global, KW_Export,
+ KW_Iter, KW_Reject, KW_Ptr, KW_Ref, KW_Deref;
+
+ # Language.
+ token KW_If, KW_While, KW_Else, KW_Elsif, KW_For, KW_Return, KW_Yield, KW_In,
+ KW_Break, KW_PrintXMLAC, KW_PrintXML, KW_Print, KW_PrintStream, KW_Require;
+
+ # Patterns.
+ token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken,
+ KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni;
+
+ token KW_Include, KW_Preeof;
+
+ token KW_Left, KW_Right, KW_Nonassoc, KW_Prec;
+
+ }%%
+
+ %% write instance_data;
+
+ /* Report an error encountered by the parser. */
+ ostream &parse_error( int tokId, Token &token );
+ void init();
+ int parseLangEl( int type, const Token *token );
+ int token( InputLoc &loc, int tokId, char *tokstart, int toklen );
+};
+
+%% write token_defs;
+
+#endif
diff --git a/src/lmparse.kl b/src/lmparse.kl
new file mode 100644
index 00000000..b64bd344
--- /dev/null
+++ b/src/lmparse.kl
@@ -0,0 +1,2139 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <iostream>
+#include <errno.h>
+
+#include "config.h"
+#include "lmparse.h"
+#include "global.h"
+#include "input.h"
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+%%{
+
+parser ColmParser;
+
+include "lmparse.kh";
+
+start: root_item_list
+ final {
+ pd->rootCodeBlock = CodeBlock::cons( $1->stmtList, 0 );
+ };
+
+nonterm root_item_list uses lang_stmt_list;
+
+root_item_list: root_item_list root_item
+ final {
+ $$->stmtList = appendStatement( $1->stmtList, $2->stmt );
+ };
+
+root_item_list:
+ final {
+ $$->stmtList = new StmtList;
+ };
+
+nonterm root_item uses statement;
+
+root_item: literal_def commit final { $$->stmt = 0; };
+root_item: rl_def commit final { $$->stmt = 0; };
+root_item: token_def commit final { $$->stmt = 0; };
+root_item: cfl_def commit final { $$->stmt = 0; };
+root_item: region_def commit final { $$->stmt = 0; };
+root_item: context_def commit final { $$->stmt = 0; };
+root_item: namespace_def commit final { $$->stmt = 0; };
+root_item: function_def commit final { $$->stmt = 0; };
+root_item: iter_def commit final { $$->stmt = 0; };
+root_item: global_def commit final { $$->stmt = $1->stmt; };
+root_item: export_def commit final { $$->stmt = 0; };
+root_item: statement commit final { $$->stmt = $1->stmt; };
+root_item: pre_eof commit final { $$->stmt = 0; };
+root_item: precedence commit final { $$->stmt = 0; };
+root_item: typedef commit final { $$->stmt = 0; };
+
+nonterm block_open
+{
+ ObjectDef *localFrame;
+};
+
+block_open: '{'
+ final {
+ $$->localFrame = blockOpen();
+ };
+
+block_close: '}'
+ final {
+ blockClose();
+ };
+
+
+iter_def:
+ KW_Iter TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close
+ final {
+ iterDef( $7->stmtList, $6->localFrame, $4->paramList, $2->data );
+ };
+
+function_def:
+ type_ref TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close
+ final {
+ functionDef( $7->stmtList, $6->localFrame, $4->paramList, $1->typeRef, $2->data );
+ };
+
+nonterm opt_param_list uses param_list;
+
+opt_param_list: param_list
+ final {
+ $$->paramList = $1->paramList;
+ };
+
+opt_param_list:
+ final {
+ $$->paramList = new ParameterList;
+ };
+
+nonterm param_list
+{
+ ParameterList *paramList;
+};
+
+param_list: param_list param_var_def
+ final {
+ $$->paramList = appendParam( $1->paramList, $2->objField );
+ };
+
+param_list: param_var_def
+ final {
+ $$->paramList = appendParam( new ParameterList, $1->objField );
+ };
+
+nonterm param_var_def uses var_def;
+
+param_var_def: TK_Word ':' type_ref
+ final {
+ $$->objField = addParam( $1->loc, $3->typeRef, $1->data );
+ };
+param_var_def: TK_Word ':' reference_type_ref
+ final {
+ $$->objField = addParam( $1->loc, $3->typeRef, $1->data );
+ };
+
+nonterm reference_type_ref uses type_ref;
+
+reference_type_ref: KW_Ref type_ref
+ final {
+ $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Ref, $2->typeRef );
+ };
+
+nonterm global_def uses statement;
+
+global_def: KW_Global var_def opt_def_init
+ final {
+ $$->stmt = globalDef( $2->objField, $3->expr, $3->assignType );
+ };
+
+nonterm export_def uses statement;
+
+export_def: KW_Export var_def opt_def_init
+ final {
+ $$->stmt = exportStmt( $2->objField, $3->assignType, $3->expr );
+ };
+
+precedence:
+ pred_type pred_token_list
+ final {
+ precedenceStmt( $1->predType, $2->predDeclList );
+ };
+
+nonterm pred_type
+{
+ PredType predType;
+};
+
+pred_type: KW_Left final { $$->predType = PredLeft; };
+pred_type: KW_Right final { $$->predType = PredRight; };
+pred_type: KW_Nonassoc final { $$->predType = PredNonassoc; };
+
+nonterm pred_token_list
+{
+ PredDeclList *predDeclList;
+};
+
+pred_token_list:
+ pred_token_list ',' pred_token
+ final {
+ $$->predDeclList = $1->predDeclList;
+ $$->predDeclList->append( $3->predDecl );
+ };
+
+pred_token_list:
+ pred_token
+ final {
+ $$->predDeclList = new PredDeclList;
+ $$->predDeclList->append( $1->predDecl );
+ };
+
+nonterm pred_token
+{
+ PredDecl *predDecl;
+};
+
+pred_token:
+ region_qual TK_Word
+ final {
+ $$->predDecl = predTokenName( $2->loc, $1->nspaceQual, $2->data );
+ };
+
+pred_token:
+ region_qual TK_Literal
+ final {
+ $$->predDecl = predTokenLit( $2->loc, $2->data, $1->nspaceQual );
+ };
+
+typedef:
+ KW_Alias TK_Word type_ref
+ final {
+ alias( $1->loc, $2->data, $3->typeRef );
+ };
+
+cfl_def:
+ cfl_def_head obj_var_list opt_reduce_first cfl_prod_list
+ final {
+ $2->objectDef->name = $1->name;
+ NtDef *ntDef = NtDef::cons( $1->name, namespaceStack.top(),
+ contextStack.top(), $3->reduceFirst );
+
+ cflDef( ntDef, $2->objectDef, $4->defList );
+ };
+
+nonterm class cfl_def_head
+{
+ String name;
+};
+
+cfl_def_head: KW_Def TK_Word
+ final {
+ $$->name = $2->data;
+ };
+
+nonterm cfl_prod_list
+{
+ LelDefList *defList;
+};
+
+cfl_prod_list: cfl_prod_list '|' define_prod
+ final {
+ $$->defList = prodAppend( $1->defList, $3->definition );
+ };
+cfl_prod_list: define_prod
+ final {
+ $$->defList = prodAppend( new LelDefList, $1->definition );
+ };
+
+nonterm opt_reduce_first
+{
+ bool reduceFirst;
+};
+
+opt_reduce_first:
+ KW_ReduceFirst
+ final {
+ $$->reduceFirst = true;
+ };
+opt_reduce_first:
+ final {
+ $$->reduceFirst = false;
+ };
+
+nonterm opt_prec
+{
+ LangEl *predOf;
+};
+
+opt_prec:
+ final {
+ $$->predOf = 0;
+ };
+
+opt_prec:
+ KW_Prec pred_token
+ final {
+ //$$->predOf = $2->factor->langEl;
+ assert(false);
+ };
+
+nonterm define_prod
+{
+ Production *definition;
+};
+
+define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec
+ final {
+ $$->definition = production( $1->loc, $2->list, $4->commit,
+ $5->codeBlock, $6->predOf );
+ };
+
+nonterm obj_var_list
+{
+ ObjectDef *objectDef;
+};
+
+obj_var_list: obj_var_list var_def
+ final {
+ objVarDef( $1->objectDef, $2->objField );
+ $$->objectDef = $1->objectDef;
+ };
+
+obj_var_list:
+ final {
+ $$->objectDef = ObjectDef::cons( ObjectDef::UserType,
+ String(), pd->nextObjectId++ );
+ };
+
+
+nonterm type_ref
+{
+ TypeRef *typeRef;
+};
+
+type_ref: basic_type_ref
+ final {
+ $$->typeRef = $1->typeRef;
+ };
+
+type_ref: KW_Map '<' type_ref type_ref '>'
+ final {
+ $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Map,
+ 0, $3->typeRef, $4->typeRef );
+ };
+
+type_ref: KW_List '<' type_ref '>'
+ final {
+ $$->typeRef = TypeRef::cons( $1->loc, TypeRef::List,
+ 0, $3->typeRef, 0 );
+ };
+type_ref: KW_Vector '<' type_ref '>'
+ final {
+ $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Vector,
+ 0, $3->typeRef, 0 );
+ };
+type_ref: KW_Parser '<' type_ref '>'
+ final {
+ $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Parser,
+ 0, $3->typeRef, 0 );
+ };
+
+nonterm basic_type_ref uses type_ref;
+
+basic_type_ref: region_qual TK_Word opt_repeat
+ final {
+ $$->typeRef = TypeRef::cons( $2->loc, $1->nspaceQual, $2->data, $3->repeatType );
+ };
+
+basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat
+ final {
+ TypeRef *inner = TypeRef::cons( $1->loc, $2->nspaceQual, $3->data, $4->repeatType );
+ $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Ptr, inner );
+ };
+
+
+nonterm var_def
+{
+ InputLoc loc;
+ ObjectField *objField;
+};
+
+var_def: TK_Word ':' type_ref
+ final {
+ /* Return an object field object. The user of this nonterminal must
+ * load it into the approrpriate map and do error checking. */
+ $$->objField = ObjectField::cons( $1->loc, $3->typeRef, $1->data );
+ };
+
+region_def:
+ region_head root_item_list KW_End
+ final {
+ popRegionSet();
+ };
+
+region_head:
+ KW_Lex
+ final {
+ pushRegionSet( $1->loc );
+ };
+
+namespace_def:
+ namespace_head root_item_list KW_End
+ final {
+ namespaceStack.pop();
+ };
+
+
+namespace_head:
+ KW_Namespace TK_Word
+ final {
+ /* Make the new namespace. */
+ createNamespace( $1->loc, $2->data );
+ };
+
+context_var_def:
+ var_def
+ final {
+ contextVarDef( $1->loc, $1->objField );
+ };
+
+
+context_item: context_var_def commit;
+context_item: literal_def commit;
+context_item: rl_def commit;
+context_item: token_def commit;
+context_item: cfl_def commit;
+context_item: region_def commit;
+context_item: context_def commit;
+context_item: function_def commit;
+context_item: iter_def commit;
+context_item: export_def commit;
+context_item: pre_eof commit;
+context_item: precedence commit;
+
+context_item_list:
+ context_item_list context_item;
+context_item_list:
+ ;
+
+context_def:
+ context_head context_item_list KW_End
+ final {
+ contextStack.pop();
+ namespaceStack.pop();
+ };
+
+context_head:
+ KW_Context TK_Word
+ final {
+ contextHead( $1->loc, $2->data );
+ };
+
+#
+# Pattern
+#
+
+nonterm pattern
+{
+ PatternItemList *list;
+ InputLoc loc;
+};
+
+pattern:
+ pattern_list
+ final {
+ $$->list = $1->list;
+ };
+
+nonterm pattern_list uses pattern;
+
+pattern_list: pattern_list pattern_top_el
+ final {
+ $$->list = patListConcat( $1->list, $2->list );
+ };
+pattern_list: pattern_top_el
+ final {
+ $$->list = $1->list;
+ };
+
+nonterm pattern_top_el uses pattern;
+
+pattern_top_el: '"' litpat_el_list '"'
+ final {
+ $$->list = $2->list;
+ };
+pattern_top_el: '[' pattern_el_list ']'
+ final {
+ $$->list = $2->list;
+ };
+
+nonterm litpat_el_list uses pattern;
+
+litpat_el_list: litpat_el_list litpat_el
+ final {
+ $$->list = patListConcat( $1->list, $2->list );
+ };
+litpat_el_list:
+ final {
+ $$->list = new PatternItemList;
+ };
+
+nonterm litpat_el uses pattern;
+
+litpat_el: TK_LitPat
+ final {
+ PatternItem *patternItem = PatternItem::cons( $1->loc, $1->data,
+ PatternItem::InputText );
+ $$->list = PatternItemList::cons( patternItem );
+ };
+
+litpat_el: '[' pattern_el_list ']'
+ final {
+ $$->list = $2->list;
+ };
+
+nonterm pattern_el_list uses pattern;
+
+pattern_el_list:
+ pattern_el_list pattern_el
+ final {
+ $$->list = patListConcat( $1->list, $2->list );
+ };
+pattern_el_list:
+ final {
+ $$->list = new PatternItemList;
+ };
+
+nonterm pattern_el uses pattern;
+
+pattern_el:
+ opt_label pattern_el_type_or_lit
+ final {
+ $$->list = patternEl( $1->varRef, $2->list );
+ };
+
+nonterm pattern_el uses pattern;
+
+pattern_el: '"' litpat_el_list '"'
+ final {
+ $$->list = $2->list;
+ };
+pattern_el: '?' TK_Word
+ final {
+ /* FIXME: Implement */
+ assert(false);
+ };
+
+nonterm pattern_el_type_or_lit uses pattern;
+
+pattern_el_type_or_lit:
+ region_qual TK_Word opt_repeat
+ final {
+ $$->list = patternElNamed( $2->loc, $1->nspaceQual, $2->data, $3->repeatType );
+ };
+
+pattern_el_type_or_lit:
+ region_qual TK_Literal opt_repeat
+ final {
+ $$->list = patternElType( $2->loc, $1->nspaceQual, $2->data, $3->repeatType );
+ };
+
+nonterm opt_label
+{
+ /* Variable reference. */
+ LangVarRef *varRef;
+};
+
+opt_label: TK_Word ':'
+ final {
+ $$->varRef = LangVarRef::cons( $1->loc, $1->data );
+ };
+opt_label:
+ final {
+ $$->varRef = 0;
+ };
+
+#
+# Constructor List (constructor)
+#
+
+nonterm constructor
+{
+ ConsItemList *list;
+};
+
+constructor: cons_list
+ final {
+ $$->list = $1->list;
+ };
+
+nonterm cons_list uses constructor;
+
+cons_list: cons_top_el cons_list
+ final {
+ $$->list = consListConcat( $1->list, $2->list );
+ };
+cons_list: cons_top_el
+ final {
+ $$->list = $1->list;
+ };
+
+nonterm cons_top_el uses constructor;
+
+cons_top_el: '"' lit_cons_el_list '"'
+ final {
+ $$->list = $2->list;
+ };
+cons_top_el: '[' cons_el_list ']'
+ final {
+ $$->list = $2->list;
+ };
+
+nonterm lit_cons_el_list uses constructor;
+
+lit_cons_el_list: lit_cons_el_list lit_cons_el
+ final {
+ $$->list = consListConcat( $1->list, $2->list );
+ };
+lit_cons_el_list:
+ final {
+ $$->list = new ConsItemList;
+ };
+
+nonterm lit_cons_el uses constructor;
+
+lit_cons_el: TK_LitPat
+ final {
+ ConsItem *consItem = ConsItem::cons( $1->loc, ConsItem::InputText, $1->data );
+ $$->list = ConsItemList::cons( consItem );
+ };
+
+lit_cons_el: '[' cons_el_list ']'
+ final {
+ $$->list = $2->list;
+ };
+
+nonterm cons_el_list uses constructor;
+
+cons_el_list: cons_el_list cons_el
+ final {
+ $$->list = consListConcat( $1->list, $2->list );
+ };
+cons_el_list:
+ final {
+ $$->list = new ConsItemList;
+ };
+
+nonterm cons_el uses constructor;
+
+cons_el: region_qual TK_Literal
+ final {
+ $$->list = consElLiteral( $2->loc, $2->data, $1->nspaceQual );
+ };
+
+cons_el: '"' lit_cons_el_list '"'
+ final {
+ $$->list = $2->list;
+ };
+
+cons_el: code_expr
+ final {
+ ConsItem *consItem = ConsItem::cons( $1->expr->loc, ConsItem::ExprType, $1->expr );
+ $$->list = ConsItemList::cons( consItem );
+ };
+
+#
+# Accumulate List
+#
+
+nonterm accumulate
+{
+ ConsItemList *list;
+};
+
+accumulate:
+ accum_list
+ final {
+ $$->list = $1->list;
+ };
+
+nonterm accum_list uses accumulate;
+
+accum_list: accum_top_el accum_list
+ final {
+ $$->list = consListConcat( $1->list, $2->list );
+ };
+
+accum_list: accum_top_el
+ final {
+ $$->list = $1->list;
+ };
+
+nonterm accum_top_el uses accumulate;
+
+accum_top_el: '"' lit_accum_el_list '"'
+ final {
+ $$->list = $2->list;
+ };
+
+accum_top_el: '[' accum_el_list ']'
+ final {
+ $$->list = $2->list;
+ };
+
+nonterm lit_accum_el_list uses accumulate;
+
+lit_accum_el_list:
+ lit_accum_el_list lit_accum_el
+ final {
+ $$->list = consListConcat( $1->list, $2->list );
+ };
+
+lit_accum_el_list:
+ final {
+ $$->list = new ConsItemList;
+ };
+
+nonterm lit_accum_el uses accumulate;
+
+lit_accum_el: TK_LitPat
+ final {
+ ConsItem *consItem = ConsItem::cons( $1->loc, ConsItem::InputText, $1->data );
+ $$->list = ConsItemList::cons( consItem );
+ };
+
+lit_accum_el: '[' accum_el_list ']'
+ final {
+ $$->list = $2->list;
+ };
+
+nonterm accum_el_list uses accumulate;
+
+accum_el_list: accum_el_list accum_el
+ final {
+ $$->list = consListConcat( $1->list, $2->list );
+ };
+
+accum_el_list:
+ final {
+ $$->list = new ConsItemList;
+ };
+
+nonterm accum_el uses accumulate;
+
+accum_el: code_expr
+ final {
+ ConsItem *consItem = ConsItem::cons( $1->expr->loc, ConsItem::ExprType, $1->expr );
+ $$->list = ConsItemList::cons( consItem );
+ };
+
+accum_el: '"' lit_accum_el_list '"'
+ final {
+ $$->list = $2->list;
+ };
+
+
+#
+# String List
+#
+
+nonterm string
+{
+ ConsItemList *list;
+};
+
+string: string_list
+ final {
+ $$->list = $1->list;
+ };
+
+nonterm string_list uses string;
+
+string_list: string_top_el string_list
+ final {
+ $$->list = consListConcat( $1->list, $2->list );
+ };
+string_list: string_top_el
+ final {
+ $$->list = $1->list;
+ };
+
+nonterm string_top_el uses string;
+
+string_top_el: '"' lit_string_el_list '"'
+ final {
+ $$->list = $2->list;
+ };
+string_top_el: '[' string_el_list ']'
+ final {
+ $$->list = $2->list;
+ };
+
+nonterm lit_string_el_list uses string;
+
+lit_string_el_list: lit_string_el_list lit_string_el
+ final {
+ $$->list = consListConcat( $1->list, $2->list );
+ };
+lit_string_el_list:
+ final {
+ $$->list = new ConsItemList;
+ };
+
+nonterm lit_string_el uses string;
+
+lit_string_el: TK_LitPat
+ final {
+ ConsItem *consItem = ConsItem::cons( $1->loc, ConsItem::InputText, $1->data );
+ $$->list = ConsItemList::cons( consItem );
+ };
+
+lit_string_el: '[' string_el_list ']'
+ final {
+ $$->list = $2->list;
+ };
+
+nonterm string_el_list uses string;
+
+string_el_list: string_el_list string_el
+ final {
+ $$->list = consListConcat( $1->list, $2->list );
+ };
+string_el_list:
+ final {
+ $$->list = new ConsItemList;
+ };
+
+nonterm string_el uses string;
+
+string_el: code_expr
+ final {
+ ConsItem *consItem = ConsItem::cons( $1->expr->loc, ConsItem::ExprType, $1->expr );
+ $$->list = ConsItemList::cons( consItem );
+ };
+
+string_el: '"' lit_string_el_list '"'
+ final {
+ $$->list = $2->list;
+ };
+
+#
+# Production Lists.
+#
+
+nonterm prod_el_list
+{
+ ProdElList *list;
+};
+
+prod_el_list:
+ prod_el_list prod_el
+ final {
+ $$->list = appendProdEl( $1->list, $2->prodEl );
+ };
+
+prod_el_list:
+ final {
+ $$->list = new ProdElList;
+ };
+
+nonterm opt_no_ignore { bool noIgnore; };
+
+opt_no_ignore: KW_Ni final { $$->noIgnore = true; };
+opt_no_ignore: final { $$->noIgnore = false; };
+
+nonterm prod_el
+{
+ ProdEl *prodEl;
+};
+
+prod_el:
+ opt_capture opt_commit region_qual TK_Word opt_repeat
+ final {
+ $$->prodEl = prodElName( $4->loc, $4->data, $3->nspaceQual,
+ $1->objField, $5->repeatType, $2->commit );
+ };
+
+prod_el:
+ opt_capture opt_commit region_qual TK_Literal opt_repeat
+ final {
+ $$->prodEl = prodElLiteral( $4->loc, $4->data, $3->nspaceQual,
+ $1->objField, $5->repeatType, $2->commit );
+ };
+
+nonterm opt_repeat
+{
+ bool opt;
+ bool repeat;
+ RepeatType repeatType;
+};
+
+opt_repeat: '*' final { $$->opt = false; $$->repeat = true; $$->repeatType = RepeatRepeat; };
+opt_repeat: '+' final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatList; };
+opt_repeat: '?' final { $$->opt = true; $$->repeat = false; $$->repeatType = RepeatOpt; };
+opt_repeat: final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatNone; };
+
+nonterm region_qual
+{
+ NamespaceQual *nspaceQual;
+};
+
+region_qual: region_qual TK_Word TK_DoubleColon
+ final {
+ $$->nspaceQual = $1->nspaceQual;
+ $$->nspaceQual->qualNames.append( $2->data );
+ };
+
+region_qual:
+ final {
+ $$->nspaceQual = NamespaceQual::cons( namespaceStack.top() );
+ };
+
+literal_def: KW_Literal literal_list;
+
+literal_list: literal_list ',' literal_item;
+literal_list: literal_item;
+
+literal_item: opt_no_ignore TK_Literal opt_no_ignore
+ final {
+ if ( strcmp( $2->data, "''" ) == 0 )
+ zeroDef( $2->loc, $2->data, $1->noIgnore, $3->noIgnore );
+ else
+ literalDef( $2->loc, $2->data, $1->noIgnore, $3->noIgnore );
+ };
+
+
+# These two productions are responsible for setting and unsetting the Regular
+# language scanning context.
+enter_rl:
+ try {
+ enterRl = true;
+ }
+ undo {
+ enterRl = false;
+ };
+leave_rl:
+ try {
+ enterRl = false;
+ }
+ undo {
+ enterRl = true;
+ };
+
+token_def:
+ token_or_ignore token_def_name obj_var_list
+ enter_rl opt_no_ignore '/' opt_lex_join leave_rl '/' opt_no_ignore
+ opt_translate
+ final {
+ $3->objectDef->name = $2->name;
+ defineToken( $1->loc, $2->name, $7->join, $3->objectDef,
+ $11->transBlock, $1->ignore, $5->noIgnore, $10->noIgnore );
+ };
+
+nonterm token_or_ignore
+{
+ InputLoc loc;
+ bool ignore;
+};
+
+token_or_ignore: KW_Token
+ final { $$->loc = $1->loc; $$->ignore = false; };
+
+token_or_ignore: KW_Ignore
+ final { $$->loc = $1->loc; $$->ignore = true; };
+
+nonterm class token_def_name
+{
+ String name;
+};
+
+token_def_name:
+ opt_name
+ final {
+ $$->name = $1->name;
+ };
+
+nonterm class opt_name
+{
+ String name;
+};
+
+opt_name: TK_Word final { $$->name = $1->data; };
+opt_name: ;
+
+nonterm opt_translate
+{
+ CodeBlock *transBlock;
+};
+
+opt_translate:
+ block_open lang_stmt_list block_close
+ final {
+ $$->transBlock = CodeBlock::cons( $2->stmtList, $1->localFrame );
+ $$->transBlock->context = contextStack.top();
+ };
+
+opt_translate:
+ final {
+ $$->transBlock = 0;
+ };
+
+pre_eof:
+ KW_Preeof block_open lang_stmt_list block_close
+ final {
+ preEof( $1->loc, $3->stmtList, $2->localFrame );
+ };
+
+rl_def:
+ KW_Rl machine_name enter_rl '/' lex_join leave_rl '/'
+ final {
+ /* Generic creation of machine for instantiation and assignment. */
+ addRegularDef( $2->loc, namespaceStack.top(), $2->data, $5->join );
+ };
+
+type class token_data
+{
+ InputLoc loc;
+ String data;
+};
+
+nonterm machine_name uses token_data;
+
+machine_name:
+ TK_Word
+ final {
+ $$->loc = $1->loc;
+ $$->data = $1->data;
+ };
+
+#
+# Reduce statements
+#
+
+nonterm opt_reduce_code
+{
+ CodeBlock *codeBlock;
+};
+
+opt_reduce_code:
+ final { $$->codeBlock = 0; };
+
+opt_reduce_code:
+ start_reduce lang_stmt_list block_close
+ final {
+ $$->codeBlock = CodeBlock::cons( $2->stmtList, $1->localFrame );
+ $$->codeBlock->context = contextStack.top();
+ };
+
+nonterm start_reduce uses block_open;
+
+start_reduce:
+ block_open
+ final {
+ $$->localFrame = $1->localFrame;
+ };
+
+nonterm lang_stmt_list
+{
+ StmtList *stmtList;
+};
+
+lang_stmt_list: rec_stmt_list opt_require_stmt
+ final {
+ $$->stmtList = $1->stmtList;
+ if ( $2->stmt != 0 )
+ $$->stmtList->append( $2->stmt );
+ };
+
+nonterm rec_stmt_list uses lang_stmt_list;
+
+rec_stmt_list: rec_stmt_list statement
+ final {
+ $$->stmtList = $1->stmtList;
+
+ /* Maybe a statement was generated. */
+ if ( $2->stmt != 0 )
+ $$->stmtList->append( $2->stmt );
+ };
+
+rec_stmt_list:
+ final {
+ $$->stmtList = new StmtList;
+ };
+
+nonterm opt_def_init
+{
+ LangExpr *expr;
+ LangStmt::Type assignType;
+};
+
+opt_def_init: '=' code_expr
+ final {
+ $$->expr = $2->expr;
+ $$->assignType = LangStmt::AssignType;
+ };
+opt_def_init:
+ final {
+ $$->expr = 0;
+ };
+
+scope_push:
+ final {
+ pd->curLocalFrame->pushScope();
+ };
+
+scope_pop:
+ final {
+ pd->curLocalFrame->popScope();
+ };
+
+nonterm statement
+{
+ LangStmt *stmt;
+};
+nonterm for_scope uses statement;
+
+statement: var_def opt_def_init
+ final {
+ $$->stmt = varDef( $1->objField, $2->expr, $2->assignType );
+ };
+statement: var_ref '=' code_expr
+ final {
+ $$->stmt = LangStmt::cons( $2->loc, LangStmt::AssignType, $1->varRef, $3->expr );
+ };
+statement: KW_Print '(' code_expr_list ')'
+ final {
+ $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintType, $3->exprVect );
+ };
+statement: KW_PrintXMLAC '(' code_expr_list ')'
+ final {
+ $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintXMLACType, $3->exprVect );
+ };
+statement: KW_PrintXML '(' code_expr_list ')'
+ final {
+ $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintXMLType, $3->exprVect );
+ };
+statement: KW_PrintStream '(' code_expr_list ')'
+ final {
+ $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintStreamType, $3->exprVect );
+ };
+statement: code_expr
+ final {
+ $$->stmt = LangStmt::cons( InputLoc(), LangStmt::ExprType, $1->expr );
+ };
+statement: if_stmt
+ final {
+ $$->stmt = $1->stmt;
+ };
+statement: KW_Reject
+ final {
+ $$->stmt = LangStmt::cons( $1->loc, LangStmt::RejectType );
+ };
+statement: KW_While scope_push code_expr block_or_single scope_pop
+ final {
+ $$->stmt = LangStmt::cons( LangStmt::WhileType, $3->expr, $4->stmtList );
+ };
+
+for_scope: TK_Word ':' type_ref KW_In iter_call block_or_single
+ final {
+ $$->stmt = forScope( $1->loc, $1->data, $3->typeRef, $5->langTerm, $6->stmtList );
+ };
+
+statement: KW_For scope_push for_scope scope_pop
+ final {
+ $$->stmt = $3->stmt;
+ };
+
+statement: KW_Return code_expr
+ final {
+ $$->stmt = LangStmt::cons( $1->loc, LangStmt::ReturnType, $2->expr );
+ };
+statement: KW_Break
+ final {
+ $$->stmt = LangStmt::cons( LangStmt::BreakType );
+ };
+statement: KW_Yield var_ref
+ final {
+ $$->stmt = LangStmt::cons( LangStmt::YieldType, $2->varRef );
+ };
+
+nonterm opt_require_stmt uses statement;
+
+opt_require_stmt:
+ scope_push require_pattern lang_stmt_list scope_pop
+ final {
+ $$->stmt = LangStmt::cons( LangStmt::IfType, $2->expr, $3->stmtList, 0 );
+ };
+opt_require_stmt:
+ final {
+ $$->stmt = 0;
+ };
+
+nonterm require_pattern uses code_expr;
+
+require_pattern:
+ KW_Require var_ref pattern
+ final {
+ $$->expr = require( $1->loc, $2->varRef, $3->list );
+ };
+
+nonterm block_or_single uses lang_stmt_list;
+
+block_or_single: '{' lang_stmt_list '}'
+ final {
+ $$->stmtList = $2->stmtList;
+ };
+block_or_single: statement
+ final {
+ $$->stmtList = new StmtList;
+ $$->stmtList->append( $1->stmt );
+ };
+
+nonterm iter_call
+{
+ LangTerm *langTerm;
+};
+
+iter_call: var_ref '(' opt_code_expr_list ')'
+ final {
+ $$->langTerm = LangTerm::cons( InputLoc(), $1->varRef, $3->exprVect );
+ };
+iter_call: TK_Word
+ final {
+ $$->langTerm = LangTerm::cons( InputLoc(), LangTerm::VarRefType,
+ LangVarRef::cons( $1->loc, $1->data ) );
+ };
+
+#
+# If Statements
+#
+
+nonterm if_stmt uses statement;
+
+if_stmt: KW_If scope_push code_expr block_or_single scope_pop elsif_list
+ final {
+ $$->stmt = LangStmt::cons( LangStmt::IfType, $3->expr, $4->stmtList, $6->stmt );
+ };
+
+nonterm elsif_list
+{
+ LangStmt *stmt;
+};
+
+elsif_list:
+ elsif_clause elsif_list
+ final {
+ /* Put any of the followng elseif part, an else, or null into the elsePart. */
+ $$->stmt = $1->stmt;
+ $$->stmt->elsePart = $2->stmt;
+ };
+elsif_list:
+ optional_else
+ final {
+ $$->stmt = $1->stmt;
+ };
+
+nonterm elsif_clause
+{
+ LangStmt *stmt;
+};
+
+elsif_clause:
+ KW_Elsif scope_push code_expr block_or_single scope_pop
+ final {
+ $$->stmt = LangStmt::cons( LangStmt::IfType, $3->expr, $4->stmtList, 0 );
+ };
+
+nonterm optional_else
+{
+ LangStmt *stmt;
+};
+
+optional_else:
+ KW_Else scope_push block_or_single scope_pop
+ final {
+ $$->stmt = LangStmt::cons( LangStmt::ElseType, $3->stmtList );
+ };
+
+optional_else:
+ final {
+ $$->stmt = 0;
+ };
+
+#
+# Code LexExpression Lists.
+#
+nonterm code_expr_list
+{
+ ExprVect *exprVect;
+};
+
+code_expr_list:
+ code_expr_list code_expr
+ final {
+ $$->exprVect = $1->exprVect;
+ $$->exprVect->append( $2->expr );
+ };
+code_expr_list:
+ code_expr
+ final {
+ $$->exprVect = new ExprVect;
+ $$->exprVect->append( $1->expr );
+ };
+
+nonterm opt_code_expr_list uses code_expr_list;
+
+opt_code_expr_list:
+ code_expr_list
+ final {
+ $$->exprVect = $1->exprVect;
+ };
+
+opt_code_expr_list:
+ final {
+ $$->exprVect = 0;
+ };
+
+#
+# Type list
+#
+
+nonterm type_list
+{
+ TypeRefVect *typeRefVect;
+};
+
+type_list: type_list ',' type_ref
+ final {
+ $$->typeRefVect = $1->typeRefVect;
+ $$->typeRefVect->append( $3->typeRef );
+ };
+type_list: type_ref
+ final {
+ $$->typeRefVect = new TypeRefVect;
+ $$->typeRefVect->append( $1->typeRef );
+ };
+
+nonterm opt_type_list uses type_list;
+
+opt_type_list: type_list
+ final {
+ $$->typeRefVect = $1->typeRefVect;
+ };
+
+opt_type_list:
+ final {
+ $$->typeRefVect = 0;
+ };
+
+
+#
+# Variable reference
+#
+
+nonterm var_ref
+{
+ LangVarRef *varRef;
+};
+
+var_ref: qual TK_Word
+ final {
+ $$->varRef = LangVarRef::cons( $2->loc, $1->qual, $2->data );
+ };
+
+nonterm qual
+{
+ QualItemVect *qual;
+};
+
+qual: qual TK_Word '.'
+ final {
+ $$->qual = $1->qual;
+ $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Dot ) );
+ };
+qual: qual TK_Word TK_RightArrow
+ final {
+ $$->qual = $1->qual;
+ $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Arrow ) );
+ };
+qual:
+ final {
+ $$->qual = new QualItemVect;
+ };
+
+#
+# Code expression
+#
+
+nonterm code_expr
+{
+ LangExpr *expr;
+};
+
+code_expr: code_expr TK_AmpAmp code_relational
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LogicalAnd, $3->expr );
+ };
+
+code_expr: code_expr TK_BarBar code_relational
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LogicalOr, $3->expr );
+ };
+
+code_expr: code_relational
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm code_relational uses code_expr;
+
+code_relational: code_relational TK_DoubleEql code_additive
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_DoubleEql, $3->expr );
+ };
+
+code_relational: code_relational TK_NotEql code_additive
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_NotEql, $3->expr );
+ };
+
+code_relational: code_relational '<' code_additive
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, '<', $3->expr );
+ };
+
+code_relational: code_relational '>' code_additive
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, '>', $3->expr );
+ };
+
+code_relational: code_relational TK_LessEql code_additive
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LessEql, $3->expr );
+ };
+
+code_relational: code_relational TK_GrtrEql code_additive
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_GrtrEql, $3->expr );
+ };
+
+
+code_relational: code_additive
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm code_additive uses code_expr;
+
+code_additive: code_additive '+' code_multiplicitive
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, '+', $3->expr );
+ };
+
+code_additive: code_additive '-' code_multiplicitive
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, '-', $3->expr );
+ };
+
+code_additive: code_multiplicitive
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm code_multiplicitive uses code_expr;
+
+code_multiplicitive: code_multiplicitive '*' code_unary
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, '*', $3->expr );
+ };
+
+code_multiplicitive: code_multiplicitive '/' code_unary
+ final {
+ $$->expr = LangExpr::cons( $2->loc, $1->expr, '/', $3->expr );
+ };
+
+code_multiplicitive: code_unary
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm code_unary uses code_expr;
+code_unary: '!' code_factor
+ final {
+ $$->expr = LangExpr::cons( $1->loc, '!', $2->expr );
+ };
+code_unary: '$' code_factor
+ final {
+ $$->expr = LangExpr::cons( $1->loc, '$', $2->expr );
+ };
+code_unary: '^' code_factor
+ final {
+ $$->expr = LangExpr::cons( $1->loc, '^', $2->expr );
+ };
+code_unary: '%' code_factor
+ final {
+ $$->expr = LangExpr::cons( $1->loc, '%', $2->expr );
+ };
+code_unary: code_factor
+ final {
+ $$->expr = $1->expr;
+ };
+
+nonterm opt_capture uses var_def;
+
+opt_capture: TK_Word ':'
+ final {
+ $$->objField = ObjectField::cons( $1->loc, 0, $1->data );
+ };
+opt_capture:
+ final {
+ $$->objField = 0;
+ };
+
+nonterm parse_cmd
+{
+ bool stop;
+ InputLoc loc;
+};
+
+parse_cmd:
+ KW_Parse
+ final {
+ $$->stop = false;
+ $$->loc = $1->loc;
+ };
+
+parse_cmd:
+ KW_ParseStop
+ final {
+ $$->stop = true;
+ $$->loc = $1->loc;
+ };
+
+nonterm code_factor uses code_expr;
+
+code_factor: TK_Number
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::NumberType, $1->data ) );
+ };
+code_factor: TK_Literal
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::StringType, $1->data ) );
+ };
+code_factor: var_ref '(' opt_code_expr_list ')'
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), $1->varRef, $3->exprVect ) );
+ };
+code_factor: var_ref
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::VarRefType, $1->varRef ) );
+ };
+code_factor: KW_Match var_ref pattern
+ final {
+ $$->expr = match( $1->loc, $2->varRef, $3->list );
+ };
+code_factor: KW_New code_factor
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::NewType, $2->expr ) );
+ };
+code_factor:
+ KW_Construct opt_capture type_ref opt_field_init constructor
+ final {
+ $$->expr = construct( $1->loc, $2->objField, $5->list,
+ $3->typeRef, $4->fieldInitVect );
+ };
+
+code_factor:
+ parse_cmd opt_capture type_ref opt_field_init accumulate
+ final {
+ $$->expr = parseCmd( $1->loc, $1->stop, $2->objField,
+ $3->typeRef, $4->fieldInitVect, $5->list );
+ };
+code_factor:
+ var_ref TK_LtLt accumulate
+ final {
+ $$->expr = send( $2->loc, $1->varRef, $3->list );
+ };
+code_factor:
+ KW_Send var_ref accumulate
+ final {
+ $$->expr = send( $1->loc, $2->varRef, $3->list );
+ };
+code_factor: KW_TypeId '<' type_ref '>'
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( $1->loc,
+ LangTerm::TypeIdType, $3->typeRef ) );
+ };
+code_factor: type_ref KW_In var_ref
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( $2->loc,
+ LangTerm::SearchType, $1->typeRef, $3->varRef ) );
+ };
+code_factor: KW_Nil
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( $1->loc,
+ LangTerm::NilType ) );
+ };
+code_factor: KW_True
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( $1->loc,
+ LangTerm::TrueType ) );
+ };
+code_factor: KW_False
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( $1->loc,
+ LangTerm::FalseType ) );
+ };
+code_factor: '(' code_expr ')'
+ final {
+ $$->expr = $2->expr;
+ };
+code_factor: KW_MakeTree '(' opt_code_expr_list ')'
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( $1->loc,
+ LangTerm::MakeTreeType, $3->exprVect ) );
+ };
+code_factor: KW_MakeToken '(' opt_code_expr_list ')'
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( $1->loc,
+ LangTerm::MakeTokenType, $3->exprVect ) );
+ };
+code_factor: KW_Deref code_expr
+ final {
+ $$->expr = LangExpr::cons( $1->loc, OP_Deref, $2->expr );
+ };
+code_factor: string
+ final {
+ $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), $1->list ) );
+ };
+
+nonterm opt_field_init uses field_init_list;
+
+opt_field_init: '(' opt_field_init_list ')'
+ final {
+ $$->fieldInitVect = $2->fieldInitVect;
+ };
+opt_field_init:
+ final {
+ $$->fieldInitVect = 0;
+ };
+
+nonterm opt_field_init_list uses field_init_list;
+
+opt_field_init_list: field_init_list
+ final {
+ $$->fieldInitVect = $1->fieldInitVect;
+ };
+opt_field_init_list:
+ final {
+ $$->fieldInitVect = 0;
+ };
+
+nonterm field_init_list
+{
+ FieldInitVect *fieldInitVect;
+};
+
+field_init_list: field_init_list field_init
+ final {
+ $$->fieldInitVect = $1->fieldInitVect;
+ $$->fieldInitVect->append( $2->fieldInit );
+ };
+field_init_list: field_init
+ final {
+ $$->fieldInitVect = new FieldInitVect;
+ $$->fieldInitVect->append( $1->fieldInit );
+ };
+
+nonterm field_init
+{
+ FieldInit *fieldInit;
+};
+
+field_init: code_expr
+ final {
+ $$->fieldInit = FieldInit::cons( InputLoc(), "_name", $1->expr );
+ };
+
+#
+# Regular Expressions
+#
+
+nonterm opt_lex_join
+{
+ LexJoin *join;
+};
+
+opt_lex_join:
+ lex_join opt_context
+ final {
+ $$->join = lexOptJoin( $1->join, $2->context );
+ };
+
+opt_lex_join:
+ final {
+ $$->join = 0;
+ };
+
+nonterm lex_join
+{
+ LexJoin *join;
+};
+
+lex_join:
+ lex_expr
+ final {
+ $$->join = LexJoin::cons( $1->expression );
+ };
+
+nonterm opt_context
+{
+ LexJoin *context;
+};
+
+opt_context:
+ '@' lex_join
+ final
+ {
+ $$->context = $2->join;
+ };
+
+opt_context:
+ final {
+ $$->context = 0;
+ };
+
+nonterm lex_expr
+{
+ LexExpression *expression;
+};
+
+lex_expr:
+ lex_expr '|' lex_term_short
+ final {
+ $$->expression = LexExpression::cons( $1->expression,
+ $3->term, LexExpression::OrType );
+ };
+lex_expr:
+ lex_expr '&' lex_term_short
+ final {
+ $$->expression = LexExpression::cons( $1->expression,
+ $3->term, LexExpression::IntersectType );
+ };
+# This priority specification overrides the innermost parsing strategy which
+# results ordered choice interpretation of the grammar.
+lex_expr:
+ lex_expr '-' lex_term_short
+ final {
+ $$->expression = LexExpression::cons( $1->expression,
+ $3->term, LexExpression::SubtractType );
+ };
+lex_expr:
+ lex_expr TK_DashDash lex_term_short
+ final {
+ $$->expression = LexExpression::cons( $1->expression,
+ $3->term, LexExpression::StrongSubtractType );
+ };
+lex_expr:
+ lex_term_short
+ final {
+ $$->expression = LexExpression::cons( $1->term );
+ };
+
+nonterm lex_term_short
+{
+ LexTerm *term;
+};
+
+shortest lex_term_short;
+
+lex_term_short: lex_term
+ final { $$->term = $1->term; };
+
+nonterm lex_term
+{
+ LexTerm *term;
+};
+
+lex_term:
+ lex_term lex_factor_label
+ final {
+ $$->term = LexTerm::cons( $1->term, $2->factorAug );
+ };
+lex_term:
+ lex_term '.' lex_factor_label
+ final {
+ $$->term = LexTerm::cons( $1->term, $3->factorAug );
+ };
+lex_term:
+ lex_term TK_ColonGt lex_factor_label
+ final {
+ $$->term = LexTerm::cons( $1->term, $3->factorAug, LexTerm::RightStartType );
+ };
+lex_term:
+ lex_term TK_ColonGtGt lex_factor_label
+ final {
+ $$->term = LexTerm::cons( $1->term, $3->factorAug, LexTerm::RightFinishType );
+ };
+lex_term:
+ lex_term TK_LtColon lex_factor_label
+ final {
+ $$->term = LexTerm::cons( $1->term,
+ $3->factorAug, LexTerm::LeftType );
+ };
+lex_term:
+ lex_factor_label
+ final {
+ $$->term = LexTerm::cons( $1->factorAug );
+ };
+
+nonterm lex_factor_label
+{
+ LexFactorAug *factorAug;
+};
+
+lex_factor_label:
+ factor_ep
+ final {
+ $$->factorAug = $1->factorAug;
+ };
+
+lex_factor_label:
+ TK_Word ':' lex_factor_label
+ final {
+ $$->factorAug = lexFactorLabel( $1->loc, $1->data, $3->factorAug );
+ };
+
+nonterm factor_ep
+{
+ LexFactorAug *factorAug;
+};
+
+factor_ep:
+ factor_aug
+ final {
+ $$->factorAug = $1->factorAug;
+ };
+
+nonterm factor_aug
+{
+ LexFactorAug *factorAug;
+};
+
+factor_aug:
+ lex_factor_rep
+ final {
+ $$->factorAug = LexFactorAug::cons( $1->factorRep );
+ };
+
+
+# The fourth level of precedence. These are the trailing unary operators that
+# allow for repetition.
+
+nonterm lex_factor_rep
+{
+ LexFactorRep *factorRep;
+};
+
+lex_factor_rep:
+ lex_factor_rep '*'
+ final {
+ $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep,
+ 0, 0, LexFactorRep::StarType );
+ };
+lex_factor_rep:
+ lex_factor_rep TK_StarStar
+ final {
+ $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep,
+ 0, 0, LexFactorRep::StarStarType );
+ };
+lex_factor_rep:
+ lex_factor_rep '?'
+ final {
+ $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep,
+ 0, 0, LexFactorRep::OptionalType );
+ };
+lex_factor_rep:
+ lex_factor_rep '+'
+ final {
+ $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep,
+ 0, 0, LexFactorRep::PlusType );
+ };
+lex_factor_rep:
+ lex_factor_rep '{' lex_factor_rep_num '}'
+ final {
+ $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep,
+ $3->rep, 0, LexFactorRep::ExactType );
+ };
+lex_factor_rep:
+ lex_factor_rep '{' ',' lex_factor_rep_num '}'
+ final {
+ $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep,
+ 0, $4->rep, LexFactorRep::MaxType );
+ };
+lex_factor_rep:
+ lex_factor_rep '{' lex_factor_rep_num ',' '}'
+ final {
+ $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep,
+ $3->rep, 0, LexFactorRep::MinType );
+ };
+lex_factor_rep:
+ lex_factor_rep '{' lex_factor_rep_num ',' lex_factor_rep_num '}'
+ final {
+ $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep,
+ $3->rep, $5->rep, LexFactorRep::RangeType );
+ };
+lex_factor_rep:
+ lex_factor_neg
+ final {
+ $$->factorRep = LexFactorRep::cons(
+ $1->factorNeg->loc, $1->factorNeg );
+ };
+
+nonterm lex_factor_rep_num
+{
+ int rep;
+};
+
+lex_factor_rep_num:
+ TK_UInt
+ final {
+ $$->rep = lexFactorRepNum( $1->loc, $1->data );
+ };
+
+
+#
+# The fifth level up in precedence. Negation.
+#
+
+nonterm lex_factor_neg
+{
+ LexFactorNeg *factorNeg;
+};
+
+lex_factor_neg:
+ '!' lex_factor_neg
+ final {
+ $$->factorNeg = LexFactorNeg::cons( $1->loc,
+ $2->factorNeg, LexFactorNeg::NegateType );
+ };
+lex_factor_neg:
+ '^' lex_factor_neg
+ final {
+ $$->factorNeg = LexFactorNeg::cons( $1->loc,
+ $2->factorNeg, LexFactorNeg::CharNegateType );
+ };
+lex_factor_neg:
+ lex_rl_factor
+ final {
+ $$->factorNeg = LexFactorNeg::cons( $1->factor->loc, $1->factor );
+ };
+
+nonterm lex_rl_factor
+{
+ LexFactor *factor;
+};
+
+lex_rl_factor:
+ TK_Literal
+ final {
+ /* Create a new factor node going to a concat literal. */
+ $$->factor = LexFactor::cons( Literal::cons( $1->loc,
+ $1->data, Literal::LitString ) );
+ };
+lex_rl_factor:
+ lex_alphabet_num
+ final {
+ /* Create a new factor node going to a literal number. */
+ $$->factor = LexFactor::cons( Literal::cons( $1->loc,
+ $1->data, Literal::Number ) );
+ };
+lex_rl_factor:
+ TK_Word
+ final {
+ $$->factor = lexRlFactorName( $1->data, $1->loc );
+ };
+lex_rl_factor:
+ TK_SqOpen lex_regular_expr_or_data TK_SqClose
+ final {
+ /* Create a new factor node going to an OR expression. */
+ $$->factor = LexFactor::cons( ReItem::cons( $1->loc, $2->reOrBlock, ReItem::OrBlock ) );
+ };
+lex_rl_factor:
+ TK_SqOpenNeg lex_regular_expr_or_data TK_SqClose
+ final {
+ /* Create a new factor node going to a negated OR expression. */
+ $$->factor = LexFactor::cons( ReItem::cons( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) );
+ };
+lex_rl_factor:
+ lex_range_lit TK_DotDot lex_range_lit
+ final {
+ /* Create a new factor node going to a range. */
+ $$->factor = LexFactor::cons( Range::cons( $1->literal, $3->literal ) );
+ };
+lex_rl_factor:
+ '(' lex_join ')'
+ final {
+ /* Create a new factor going to a parenthesized join. */
+ $$->factor = LexFactor::cons( $2->join );
+ };
+
+nonterm lex_range_lit
+{
+ Literal *literal;
+};
+
+# Literals which can be the end points of ranges.
+lex_range_lit:
+ TK_Literal
+ final {
+ /* Range literas must have only one char. We restrict this in the parse tree. */
+ $$->literal = Literal::cons( $1->loc, $1->data, Literal::LitString );
+ };
+lex_range_lit:
+ lex_alphabet_num
+ final {
+ /* Create a new literal number. */
+ $$->literal = Literal::cons( $1->loc, $1->data, Literal::Number );
+ };
+
+nonterm lex_alphabet_num uses token_data;
+
+# Any form of a number that can be used as a basic machine. */
+lex_alphabet_num:
+ TK_UInt
+ final {
+ $$->loc = $1->loc;
+ $$->data = $1->data;
+ };
+lex_alphabet_num:
+ '-' TK_UInt
+ final {
+ $$->loc = $1->loc;
+ $$->data = '+';
+ $$->data += $2->data;
+ };
+lex_alphabet_num:
+ TK_Hex
+ final {
+ $$->loc = $1->loc;
+ $$->data = $1->data;
+ };
+
+#
+# Regular Expressions.
+#
+
+
+# The data inside of a [] expression in a regular expression. Accepts any
+# number of characters or ranges. */
+nonterm lex_regular_expr_or_data
+{
+ ReOrBlock *reOrBlock;
+};
+
+lex_regular_expr_or_data:
+ lex_regular_expr_or_data lex_regular_expr_or_char
+ final {
+ $$->reOrBlock = lexRegularExprData( $1->reOrBlock, $2->reOrItem );
+ };
+lex_regular_expr_or_data:
+ final {
+ $$->reOrBlock = ReOrBlock::cons();
+ };
+
+# A single character inside of an or expression. Can either be a character or a
+# set of characters.
+nonterm lex_regular_expr_or_char
+{
+ ReOrItem *reOrItem;
+};
+
+lex_regular_expr_or_char:
+ TK_ReChar
+ final {
+ $$->reOrItem = ReOrItem::cons( $1->loc, $1->data );
+ };
+
+lex_regular_expr_or_char:
+ TK_ReChar TK_Dash TK_ReChar
+ final {
+ $$->reOrItem = ReOrItem::cons( $2->loc, $1->data[0], $3->data[0] );
+ };
+
+nonterm opt_commit
+{
+ bool commit;
+};
+
+opt_commit:
+ final {
+ $$->commit = false;
+ };
+
+opt_commit:
+ KW_Commit
+ final {
+ $$->commit = true;
+ };
+
+#
+# Grammar Finished
+#
+
+ write types;
+ write data;
+}%%
+
+void ColmParser::init()
+{
+ BaseParser::init();
+ %% write init;
+}
+
+int ColmParser::parseLangEl( int type, const Token *token )
+{
+ %% write exec;
+ return errCount == 0 ? 0 : -1;
+}
+
+int ColmParser::token( InputLoc &loc, int tokId, char *tokstart, int toklen )
+{
+ Token token;
+
+ if ( toklen > 0 )
+ token.data.setAs( tokstart, toklen );
+
+ token.loc = loc;
+ int res = parseLangEl( tokId, &token );
+ if ( res < 0 ) {
+ parse_error(tokId, token) << "parse error" << endl;
+ exit(1);
+ }
+ return res;
+}
+
+ostream &ColmParser::parse_error( int tokId, Token &token )
+{
+ /* Maintain the error count. */
+ gblErrorCount += 1;
+
+ cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": ";
+ cerr << "at token ";
+ if ( tokId < 128 )
+ cerr << "\"" << ColmParser_lelNames[tokId] << "\"";
+ else
+ cerr << ColmParser_lelNames[tokId];
+ if ( token.data != 0 )
+ cerr << " with data \"" << token.data << "\"";
+ cerr << ": ";
+
+ return cerr;
+}
+
diff --git a/src/lmscan.h b/src/lmscan.h
new file mode 100644
index 00000000..ff3de0ad
--- /dev/null
+++ b/src/lmscan.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _RLSCAN_H
+#define _RLSCAN_H
+
+#include <iostream>
+#include <fstream>
+#include <string.h>
+
+#include "global.h"
+#include "lmparse.h"
+#include "compiler.h"
+#include "avltree.h"
+#include "vector.h"
+#include "buffer.h"
+
+using std::ifstream;
+using std::istream;
+using std::ostream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+extern char *Parser_lelNames[];
+
+
+struct ColmScanner
+{
+ ColmScanner( const char *fileName, istream &input,
+ ColmParser *parser, int includeDepth )
+ :
+ fileName(fileName), input(input),
+ includeDepth(includeDepth),
+ line(1), column(1), lastnl(0),
+ parser(parser),
+ parserExistsError(false),
+ whitespaceOn(true)
+ {
+ }
+
+ ifstream *tryOpenInclude( char **pathChecks, long &found );
+ char **makeIncludePathChecks( const char *thisFileName, const char *fileName );
+ bool recursiveInclude( const char *inclFileName );
+
+ void sectionParseInit();
+ void token( int type, char *start, char *end );
+ void token( int type, char c );
+ void token( int type );
+ void updateCol();
+ void endSection();
+ void scan();
+ void eof();
+ ostream &scan_error();
+
+ const char *fileName;
+ istream &input;
+ int includeDepth;
+
+ int cs;
+ int line;
+ char *word, *lit;
+ int word_len, lit_len;
+ InputLoc sectionLoc;
+ char *ts, *te;
+ int column;
+ char *lastnl;
+
+ /* Set by machine statements, these persist from section to section
+ * allowing for unnamed sections. */
+ ColmParser *parser;
+ IncludeStack includeStack;
+
+ /* This is set if ragel has already emitted an error stating that
+ * no section name has been seen and thus no parser exists. */
+ bool parserExistsError;
+
+ /* This is for inline code. By default it is on. It goes off for
+ * statements and values in inline blocks which are parsed. */
+ bool whitespaceOn;
+
+ Buffer litBuf;
+};
+
+#endif /* _RLSCAN_H */
diff --git a/src/lmscan.rl b/src/lmscan.rl
new file mode 100644
index 00000000..231e2689
--- /dev/null
+++ b/src/lmscan.rl
@@ -0,0 +1,637 @@
+/*
+ * Copyright 2006-2012 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string.h>
+
+#include "global.h"
+#include "lmscan.h"
+#include "lmparse.h"
+#include "parsedata.h"
+#include "avltree.h"
+#include "vector.h"
+
+//#define PRINT_TOKENS
+
+using std::ifstream;
+using std::istream;
+using std::ostream;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+%%{
+ machine section_parse;
+ alphtype int;
+ write data;
+}%%
+
+void ColmScanner::sectionParseInit()
+{
+ %% write init;
+}
+
+ostream &ColmScanner::scan_error()
+{
+ /* Maintain the error count. */
+ gblErrorCount += 1;
+ cerr << fileName << ":" << line << ":" << column << ": ";
+ return cerr;
+}
+
+bool ColmScanner::recursiveInclude( const char *inclFileName )
+{
+ for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) {
+ if ( strcmp( si->fileName, inclFileName ) == 0 )
+ return true;
+ }
+ return false;
+}
+
+void ColmScanner::updateCol()
+{
+ char *from = lastnl;
+ if ( from == 0 )
+ from = ts;
+ //cerr << "adding " << te - from << " to column" << endl;
+ column += te - from;
+ lastnl = 0;
+}
+
+void ColmScanner::token( int type, char c )
+{
+ token( type, &c, &c + 1 );
+}
+
+void ColmScanner::token( int type )
+{
+ token( type, 0, 0 );
+}
+
+bool isAbsolutePath( const char *path )
+{
+ return path[0] == '/';
+}
+
+ifstream *ColmScanner::tryOpenInclude( char **pathChecks, long &found )
+{
+ char **check = pathChecks;
+ ifstream *inFile = new ifstream;
+
+ while ( *check != 0 ) {
+ inFile->open( *check );
+ if ( inFile->is_open() ) {
+ found = check - pathChecks;
+ return inFile;
+ }
+ check += 1;
+ }
+
+ found = -1;
+ delete inFile;
+ return 0;
+}
+
+char **ColmScanner::makeIncludePathChecks( const char *thisFileName, const char *fileName )
+{
+ char **checks = 0;
+ long nextCheck = 0;
+ char *data = strdup(fileName);
+ long length = strlen(fileName);
+
+ /* Absolute path? */
+ if ( isAbsolutePath( data ) ) {
+ checks = new char*[2];
+ checks[nextCheck++] = data;
+ }
+ else {
+ /* Search from the the location of the current file. */
+ checks = new char *[2 + includePaths.length()];
+ const char *lastSlash = strrchr( thisFileName, '/' );
+ if ( lastSlash == 0 )
+ checks[nextCheck++] = data;
+ else {
+ long givenPathLen = (lastSlash - thisFileName) + 1;
+ long checklen = givenPathLen + length;
+ char *check = new char[checklen+1];
+ memcpy( check, thisFileName, givenPathLen );
+ memcpy( check+givenPathLen, data, length );
+ check[checklen] = 0;
+ checks[nextCheck++] = check;
+ }
+
+ /* Search from the include paths given on the command line. */
+ for ( ArgsVector::Iter incp = includePaths; incp.lte(); incp++ ) {
+ long pathLen = strlen( *incp );
+ long checkLen = pathLen + 1 + length;
+ char *check = new char[checkLen+1];
+ memcpy( check, *incp, pathLen );
+ check[pathLen] = '/';
+ memcpy( check+pathLen+1, data, length );
+ check[checkLen] = 0;
+ checks[nextCheck++] = check;
+ }
+ }
+
+ checks[nextCheck] = 0;
+ return checks;
+}
+
+
+%%{
+ machine section_parse;
+ import "lmparse.h";
+
+ action clear_words { word = lit = 0; word_len = lit_len = 0; }
+ action store_lit { lit = tokdata; lit_len = toklen; }
+
+ action mach_err { scan_error() << "bad machine statement" << endl; }
+ action incl_err { scan_error() << "bad include statement" << endl; }
+ action write_err { scan_error() << "bad write statement" << endl; }
+
+ action handle_include
+ {
+ String src( lit, lit_len );
+ String fileName;
+ bool unused;
+
+ /* Need a location. */
+ InputLoc here;
+ here.fileName = fileName;
+ here.line = line;
+ here.col = column;
+
+ prepareLitString( fileName, unused, src, here );
+ char **checks = makeIncludePathChecks( this->fileName, fileName );
+
+ /* Open the input file for reading. */
+ long found = 0;
+ ifstream *inFile = tryOpenInclude( checks, found );
+ if ( inFile == 0 ) {
+ scan_error() << "include: could not open " <<
+ fileName << " for reading" << endl;
+ }
+ else {
+ /* Only proceed with the include if it was found. */
+ if ( recursiveInclude( checks[found] ) )
+ scan_error() << "include: this is a recursive include operation" << endl;
+
+ /* Check for a recursive include structure. Add the current file/section
+ * name then check if what we are including is already in the stack. */
+ includeStack.append( IncludeStackItem( checks[found] ) );
+
+ ColmScanner *scanner = new ColmScanner( fileName, *inFile, parser, includeDepth+1 );
+ scanner->scan();
+ delete inFile;
+
+ /* Remove the last element (len-1) */
+ includeStack.remove( -1 );
+
+ delete scanner;
+ }
+ }
+
+ include_target =
+ TK_Literal >clear_words @store_lit;
+
+ include_stmt =
+ ( KW_Include include_target ) @handle_include
+ <>err incl_err <>eof incl_err;
+
+ action handle_token
+ {
+// cout << Parser_lelNames[type] << " ";
+// if ( start != 0 ) {
+// cout.write( start, end-start );
+// }
+// cout << endl;
+
+ InputLoc loc;
+
+ #ifdef PRINT_TOKENS
+ cerr << "scanner:" << line << ":" << column <<
+ ": sending token to the parser " << Parser_lelNames[*p];
+ cerr << " " << toklen;
+ if ( tokdata != 0 )
+ cerr << " " << tokdata;
+ cerr << endl;
+ #endif
+
+ loc.fileName = fileName;
+ loc.line = line;
+ loc.col = column;
+
+ if ( tokdata != 0 && tokdata[toklen-1] == '\n' )
+ loc.line -= 1;
+
+ parser->token( loc, type, tokdata, toklen );
+ }
+
+ # Catch everything else.
+ everything_else = ^( KW_Include ) @handle_token;
+
+ main := (
+ include_stmt |
+ everything_else
+ )*;
+}%%
+
+void ColmScanner::token( int type, char *start, char *end )
+{
+ char *tokdata = 0;
+ int toklen = 0;
+ int *p = &type;
+ int *pe = &type + 1;
+ int *eof = 0;
+
+ if ( start != 0 ) {
+ toklen = end-start;
+ tokdata = new char[toklen+1];
+ memcpy( tokdata, start, toklen );
+ tokdata[toklen] = 0;
+ }
+
+ %%{
+ machine section_parse;
+ write exec;
+ }%%
+
+ updateCol();
+}
+
+void ColmScanner::endSection( )
+{
+ /* Execute the eof actions for the section parser. */
+ /* Probably use: token( -1 ); */
+}
+
+%%{
+ machine lmscan;
+
+ # This is sent by the driver code.
+ EOF = 0;
+
+ action inc_nl {
+ lastnl = p;
+ column = 0;
+ line++;
+ }
+ NL = '\n' @inc_nl;
+
+ # Identifiers, numbers, commetns, and other common things.
+ ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
+ number = digit+;
+ hex_number = '0x' [0-9a-fA-F]+;
+
+ # These literal forms are common to C-like host code and ragel.
+ s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
+ d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
+
+ whitespace = [ \t] | NL;
+ pound_comment = '#' [^\n]* NL;
+
+ or_literal := |*
+ # Escape sequences in OR expressions.
+ '\\0' => { token( TK_ReChar, '\0' ); };
+ '\\a' => { token( TK_ReChar, '\a' ); };
+ '\\b' => { token( TK_ReChar, '\b' ); };
+ '\\t' => { token( TK_ReChar, '\t' ); };
+ '\\n' => { token( TK_ReChar, '\n' ); };
+ '\\v' => { token( TK_ReChar, '\v' ); };
+ '\\f' => { token( TK_ReChar, '\f' ); };
+ '\\r' => { token( TK_ReChar, '\r' ); };
+ '\\\n' => { updateCol(); };
+ '\\' any => { token( TK_ReChar, ts+1, te ); };
+
+ # Range dash in an OR expression.
+ '-' => { token( TK_Dash, 0, 0 ); };
+
+ # Terminate an OR expression.
+ ']' => { token( TK_SqClose ); fret; };
+
+ EOF => {
+ scan_error() << "unterminated OR literal" << endl;
+ };
+
+ # Characters in an OR expression.
+ [^\]] => { token( TK_ReChar, ts, te ); };
+
+ *|;
+
+ regular_type := |*
+ # Identifiers.
+ ident => { token( TK_Word, ts, te ); } ;
+
+ # Numbers
+ number => { token( TK_UInt, ts, te ); };
+ hex_number => { token( TK_Hex, ts, te ); };
+
+ # Literals, with optionals.
+ ( s_literal | d_literal ) [i]?
+ => { token( TK_Literal, ts, te ); };
+
+ '[' => { token( TK_SqOpen ); fcall or_literal; };
+ '[^' => { token( TK_SqOpenNeg ); fcall or_literal; };
+
+ '/' => { token( '/'); fret; };
+
+ # Ignore.
+ pound_comment => { updateCol(); };
+
+ '..' => { token( TK_DotDot ); };
+ '**' => { token( TK_StarStar ); };
+ '--' => { token( TK_DashDash ); };
+
+ ':>' => { token( TK_ColonGt ); };
+ ':>>' => { token( TK_ColonGtGt ); };
+ '<:' => { token( TK_LtColon ); };
+
+ # Whitespace other than newline.
+ [ \t\r]+ => { updateCol(); };
+
+ # If we are in a single line machine then newline may end the spec.
+ NL => { updateCol(); };
+
+ # Consume eof.
+ EOF;
+
+ any => { token( *ts ); } ;
+ *|;
+
+ literal_pattern := |*
+ '\\' '0' { litBuf.append( '\0' ); };
+ '\\' 'a' { litBuf.append( '\a' ); };
+ '\\' 'b' { litBuf.append( '\b' ); };
+ '\\' 't' { litBuf.append( '\t' ); };
+ '\\' 'n' { litBuf.append( '\n' ); };
+ '\\' 'v' { litBuf.append( '\v' ); };
+ '\\' 'f' { litBuf.append( '\f' ); };
+ '\\' 'r' { litBuf.append( '\r' ); };
+
+ '\\' any {
+ litBuf.append( ts[1] );
+ };
+ '"' => {
+ if ( litBuf.length > 0 ) {
+ token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
+ litBuf.clear();
+ }
+ token( '"' );
+ fret;
+ };
+ NL => {
+ litBuf.append( '\n' );
+ token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
+ litBuf.clear();
+ token( '"' );
+ fret;
+ };
+ '[' => {
+ if ( litBuf.length > 0 ) {
+ token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length );
+ litBuf.clear();
+ }
+ token( '[' );
+ fcall main;
+ };
+ any => {
+ litBuf.append( *ts );
+ };
+ *|;
+
+ # Parser definitions.
+ main := |*
+ 'lex' => { token( KW_Lex ); };
+ 'commit' => { token( KW_Commit ); };
+ 'token' => { token( KW_Token ); };
+ 'literal' => { token( KW_Literal ); };
+ 'rl' => { token( KW_Rl ); };
+ 'def' => { token( KW_Def ); };
+ 'ignore' => { token( KW_Ignore ); };
+ 'construct' => { token( KW_Construct ); };
+ 'cons' => { token( KW_Construct ); };
+ 'new' => { token( KW_New ); };
+ 'if' => { token( KW_If ); };
+ 'reject' => { token( KW_Reject ); };
+ 'while' => { token( KW_While ); };
+ 'else' => { token( KW_Else ); };
+ 'elsif' => { token( KW_Elsif ); };
+ 'match' => { token( KW_Match ); };
+ 'for' => { token( KW_For ); };
+ 'iter' => { token( KW_Iter ); };
+ 'prints' => { token( KW_PrintStream ); };
+ 'print' => { token( KW_Print ); };
+ 'print_xml_ac' => { token( KW_PrintXMLAC ); };
+ 'print_xml' => { token( KW_PrintXML ); };
+ 'namespace' => { token( KW_Namespace ); };
+ 'lex' => { token( KW_Lex ); };
+ 'end' => { token( KW_End ); };
+ 'map' => { token( KW_Map ); };
+ 'list' => { token( KW_List ); };
+ 'vector' => { token( KW_Vector ); };
+ 'accum' => { token( KW_Parser ); };
+ 'parser' => { token( KW_Parser ); };
+ 'return' => { token( KW_Return ); };
+ 'break' => { token( KW_Break ); };
+ 'yield' => { token( KW_Yield ); };
+ 'typeid' => { token( KW_TypeId ); };
+ 'make_token' => { token( KW_MakeToken ); };
+ 'make_tree' => { token( KW_MakeTree ); };
+ 'reducefirst' => { token( KW_ReduceFirst ); };
+ 'for' => { token( KW_For ); };
+ 'in' => { token( KW_In ); };
+ 'nil' => { token( KW_Nil ); };
+ 'true' => { token( KW_True ); };
+ 'false' => { token( KW_False ); };
+ 'parse' => { token( KW_Parse ); };
+ 'parse_stop' => { token( KW_ParseStop ); };
+ 'global' => { token( KW_Global ); };
+ 'export' => { token( KW_Export ); };
+ 'ptr' => { token( KW_Ptr ); };
+ 'ref' => { token( KW_Ref ); };
+ 'deref' => { token( KW_Deref ); };
+ 'require' => { token( KW_Require ); };
+ 'preeof' => { token( KW_Preeof ); };
+ 'left' => { token( KW_Left ); };
+ 'right' => { token( KW_Right ); };
+ 'nonassoc' => { token( KW_Nonassoc ); };
+ 'prec' => { token( KW_Prec ); };
+ 'include' => { token( KW_Include ); };
+ 'context' => { token( KW_Context ); };
+ 'alias' => { token( KW_Alias ); };
+ 'send' => { token( KW_Send ); };
+ 'ni' => { token( KW_Ni ); };
+
+ # Identifiers.
+ ident => { token( TK_Word, ts, te ); } ;
+
+ number => { token( TK_Number, ts, te ); };
+
+ '/' => {
+ token( '/' );
+ if ( parser->enterRl )
+ fcall regular_type;
+ };
+
+ "~" [^\n]* NL => {
+ token( '"' );
+ token( TK_LitPat, ts+1, te );
+ token( '"' );
+ };
+
+ "'" ([^'\\\n] | '\\' (any | NL))* ( "'" | NL ) => {
+ token( TK_Literal, ts, te );
+ };
+
+ '"' => {
+ token( '"' );
+ litBuf.clear();
+ fcall literal_pattern;
+ };
+ '[' => {
+ token( '[' );
+ fcall main;
+ };
+
+ ']' => {
+ token( ']' );
+ if ( top > 0 )
+ fret;
+ };
+
+ # Ignore.
+ pound_comment => { updateCol(); };
+
+ '=>' => { token( TK_DoubleArrow ); };
+ '==' => { token( TK_DoubleEql ); };
+ '!=' => { token( TK_NotEql ); };
+ '::' => { token( TK_DoubleColon ); };
+ '<=' => { token( TK_LessEql ); };
+ '>=' => { token( TK_GrtrEql ); };
+ '->' => { token( TK_RightArrow ); };
+ '&&' => { token( TK_AmpAmp ); };
+ '||' => { token( TK_BarBar ); };
+ '<<' => { token( TK_LtLt ); };
+
+ ( '+' | '-' | '*' | '/' | '(' | ')' | '@' | '$' | '^' ) => { token( *ts ); };
+
+
+ # Whitespace other than newline.
+ [ \t\r]+ => { updateCol(); };
+ NL => { updateCol(); };
+
+ # Consume eof.
+ EOF;
+
+ any => { token( *ts ); } ;
+ *|;
+}%%
+
+%% write data;
+
+void ColmScanner::scan()
+{
+ int bufsize = 8;
+ char *buf = new char[bufsize];
+ const char last_char = 0;
+ int cs, act, have = 0;
+ int top, stack[32];
+ bool execute = true;
+
+ sectionParseInit();
+ %% write init;
+
+ while ( execute ) {
+ char *p = buf + have;
+ int space = bufsize - have;
+
+ if ( space == 0 ) {
+ /* We filled up the buffer trying to scan a token. Grow it. */
+ bufsize = bufsize * 2;
+ char *newbuf = new char[bufsize];
+
+ /* Recompute p and space. */
+ p = newbuf + have;
+ space = bufsize - have;
+
+ /* Patch up pointers possibly in use. */
+ if ( ts != 0 )
+ ts = newbuf + ( ts - buf );
+ te = newbuf + ( te - buf );
+
+ /* Copy the new buffer in. */
+ memcpy( newbuf, buf, have );
+ delete[] buf;
+ buf = newbuf;
+ }
+
+ input.read( p, space );
+ int len = input.gcount();
+
+ /* If we see eof then append the EOF char. */
+ if ( len == 0 ) {
+ p[0] = last_char, len = 1;
+ execute = false;
+ }
+
+ char *pe = p + len;
+ char *eof = 0;
+ %% write exec;
+
+ /* Check if we failed. */
+ if ( cs == lmscan_error ) {
+ /* Machine failed before finding a token. I'm not yet sure if this
+ * is reachable. */
+ scan_error() << "colm scanner error (metalanguage)" << endl;
+ exit(1);
+ }
+
+ /* Decide if we need to preserve anything. */
+ char *preserve = ts;
+
+ /* Now set up the prefix. */
+ if ( preserve == 0 )
+ have = 0;
+ else {
+ /* There is data that needs to be shifted over. */
+ have = pe - preserve;
+ memmove( buf, preserve, have );
+ unsigned int shiftback = preserve - buf;
+ if ( ts != 0 )
+ ts -= shiftback;
+ te -= shiftback;
+
+ preserve = buf;
+ }
+ }
+ delete[] buf;
+}
+
+void ColmScanner::eof()
+{
+ InputLoc loc;
+ loc.fileName = "<EOF>";
+ loc.line = line;
+ loc.col = 1;
+ parser->token( loc, ColmParser_tk_eof, 0, 0 );
+}
diff --git a/src/loadboot2.cc b/src/loadboot2.cc
new file mode 100644
index 00000000..b2553739
--- /dev/null
+++ b/src/loadboot2.cc
@@ -0,0 +1,3 @@
+#include "gen/if2.h"
+#include "loadfinal.cc"
+
diff --git a/src/loadcolm.cc b/src/loadcolm.cc
new file mode 100644
index 00000000..f16779be
--- /dev/null
+++ b/src/loadcolm.cc
@@ -0,0 +1,2 @@
+#include "gen/if3.h"
+#include "loadfinal.cc"
diff --git a/src/loadfinal.cc b/src/loadfinal.cc
new file mode 100644
index 00000000..0fddd360
--- /dev/null
+++ b/src/loadfinal.cc
@@ -0,0 +1,2978 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This file is not compiled directly, but rather included in sources. It is
+ * used for both bootstrap2 and colm, but these programs each use separate
+ * gen/if[23].h headers, so this file is included by distinct source files that
+ * each bring in the appropriate dependency, then include this file. */
+
+#include <stdbool.h>
+#include <string.h>
+#include <iostream>
+
+#include "loadfinal.h"
+
+extern colm_sections colm_object;
+
+InputLoc::InputLoc( colm_location *pcloc )
+{
+ if ( pcloc != 0 ) {
+ fileName = pcloc->name;
+ line = pcloc->line;
+ col = pcloc->column;
+ }
+ else {
+ fileName = 0;
+ line = -1;
+ col = -1;
+ }
+}
+
+String unescape( const String &s )
+{
+ String out( String::Fresh(), s.length() );
+ char *d = out.data;
+
+ for ( int i = 0; i < s.length(); ) {
+ if ( s[i] == '\\' ) {
+ switch ( s[i+1] ) {
+ case '0': *d++ = '\0'; break;
+ case 'a': *d++ = '\a'; break;
+ case 'b': *d++ = '\b'; break;
+ case 't': *d++ = '\t'; break;
+ case 'n': *d++ = '\n'; break;
+ case 'v': *d++ = '\v'; break;
+ case 'f': *d++ = '\f'; break;
+ case 'r': *d++ = '\r'; break;
+ default: *d++ = s[i+1]; break;
+ }
+ i += 2;
+ }
+ else {
+ *d++ = s[i];
+ i += 1;
+ }
+ }
+ out.chop( d - out.data );
+ return out;
+}
+
+struct LoadColm
+:
+ public BaseParser
+{
+ LoadColm( Compiler *pd, const char *inputFileName )
+ :
+ BaseParser( pd ),
+ inputFileName( inputFileName )
+ {}
+
+ const char *inputFileName;
+
+ struct Alignment
+ {
+ Alignment()
+ :
+ firstLine(0),
+ lastLine(0),
+ firstColumn(0)
+ {}
+
+ int firstLine;
+ int lastLine;
+ int firstColumn;
+
+ void check( const char *type, colm_location *loc )
+ {
+ if ( firstLine == 0 ) {
+ firstLine = lastLine = loc->line;
+ firstColumn = loc->column;
+ }
+ else {
+ /* Checking if we are outdented. Indents and are ok. So is
+ * outdenting back to the first. */
+ if ( loc->column < firstColumn ) {
+ warning( loc ) << type << " literal outdented beyond first at " <<
+ firstLine << ":" << firstColumn <<
+ ", possible unintended concatenation" << std::endl;
+ }
+
+ lastLine = loc->line;
+ }
+ }
+ };
+
+
+
+ Literal *walkLexRangeLit( lex_range_lit lexRangeLit )
+ {
+ Literal *literal = 0;
+ switch ( lexRangeLit.prodName() ) {
+ case lex_range_lit::Lit: {
+ String lit = lexRangeLit.lex_lit().data();
+ literal = Literal::cons( lexRangeLit.lex_lit().loc(), lit, Literal::LitString );
+ break;
+ }
+ case lex_range_lit::Number: {
+ String num = lexRangeLit.lex_num().text().c_str();
+ literal = Literal::cons( lexRangeLit.lex_num().loc(), num, Literal::Number );
+ break;
+ }}
+ return literal;
+ }
+
+ LexFactor *walkLexFactor( lex_factor lexFactor )
+ {
+ LexFactor *factor = 0;
+ switch ( lexFactor.prodName() ) {
+ case lex_factor::Literal: {
+ String litString = lexFactor.lex_lit().data();
+ Literal *literal = Literal::cons( lexFactor.lex_lit().loc(),
+ litString, Literal::LitString );
+ factor = LexFactor::cons( literal );
+ break;
+ }
+ case lex_factor::Id: {
+ String id = lexFactor.lex_id().data();
+ factor = lexRlFactorName( id, lexFactor.lex_id().loc() );
+ break;
+ }
+ case lex_factor::Range: {
+ Literal *low = walkLexRangeLit( lexFactor.Low() );
+ Literal *high = walkLexRangeLit( lexFactor.High() );
+
+ Range *range = Range::cons( low, high );
+ factor = LexFactor::cons( range );
+ break;
+ }
+ case lex_factor::PosOrBlock: {
+ ReOrBlock *block = walkRegOrData( lexFactor.reg_or_data() );
+ factor = LexFactor::cons( ReItem::cons( block, ReItem::OrBlock ) );
+ break;
+ }
+ case lex_factor::NegOrBlock: {
+ ReOrBlock *block = walkRegOrData( lexFactor.reg_or_data() );
+ factor = LexFactor::cons( ReItem::cons( block, ReItem::NegOrBlock ) );
+ break;
+ }
+ case lex_factor::Number: {
+ String number = lexFactor.lex_uint().text().c_str();
+ factor = LexFactor::cons( Literal::cons( lexFactor.lex_uint().loc(),
+ number, Literal::Number ) );
+ break;
+ }
+ case lex_factor::Hex: {
+ String number = lexFactor.lex_hex().text().c_str();
+ factor = LexFactor::cons( Literal::cons( lexFactor.lex_hex().loc(),
+ number, Literal::Number ) );
+ break;
+ }
+ case lex_factor::Paren: {
+ lex_expr LexExpr = lexFactor.lex_expr();
+ LexExpression *expr = walkLexExpr( LexExpr );
+ LexJoin *join = LexJoin::cons( expr );
+ factor = LexFactor::cons( join );
+ break;
+ }}
+ return factor;
+ }
+
+ LexFactorAug *walkLexFactorAug( lex_factor_rep LexFactorRepTree )
+ {
+ LexFactorRep *factorRep = walkLexFactorRep( LexFactorRepTree );
+ return LexFactorAug::cons( factorRep );
+ }
+
+ LangExpr *walkCodeExpr( code_expr codeExpr, bool used = true )
+ {
+ LangExpr *expr = 0;
+
+ switch ( codeExpr.prodName() ) {
+ case code_expr::AmpAmp: {
+ LangExpr *relational = walkCodeRelational( codeExpr.code_relational() );
+ LangExpr *left = walkCodeExpr( codeExpr._code_expr() );
+
+ InputLoc loc = codeExpr.AMPAMP().loc();
+ expr = LangExpr::cons( loc, left, OP_LogicalAnd, relational );
+ break;
+ }
+ case code_expr::BarBar: {
+ LangExpr *relational = walkCodeRelational( codeExpr.code_relational() );
+ LangExpr *left = walkCodeExpr( codeExpr._code_expr() );
+
+ InputLoc loc = codeExpr.BARBAR().loc();
+ expr = LangExpr::cons( loc, left, OP_LogicalOr, relational );
+ break;
+ }
+ case code_expr::Base: {
+ LangExpr *relational = walkCodeRelational( codeExpr.code_relational(), used );
+ expr = relational;
+ break;
+ }}
+ return expr;
+ }
+
+ LangStmt *walkStatement( statement Statement )
+ {
+ LangStmt *stmt = 0;
+ switch ( Statement.prodName() ) {
+ case statement::Print: {
+ print_stmt printStmt = Statement.print_stmt();
+ stmt = walkPrintStmt( printStmt );
+ break;
+ }
+ case statement::VarDef: {
+ ObjectField *objField = walkVarDef( Statement.var_def(),
+ ObjectField::UserLocalType );
+ LangExpr *expr = walkOptDefInit( Statement.opt_def_init() );
+ stmt = varDef( objField, expr, LangStmt::AssignType );
+ break;
+ }
+ case statement::For: {
+ pushScope();
+
+ String forDecl = Statement.id().text().c_str();
+ TypeRef *typeRef = walkTypeRef( Statement.type_ref() );
+ StmtList *stmtList = walkBlockOrSingle( Statement.block_or_single() );
+
+ IterCall *iterCall = walkIterCall( Statement.iter_call() );
+
+ stmt = forScope( Statement.id().loc(), forDecl,
+ curScope(), typeRef, iterCall, stmtList );
+
+ popScope();
+ break;
+ }
+ case statement::If: {
+ pushScope();
+
+ LangExpr *expr = walkCodeExpr( Statement.code_expr() );
+ StmtList *stmtList = walkBlockOrSingle( Statement.block_or_single() );
+
+ popScope();
+
+ LangStmt *elsifList = walkElsifList( Statement.elsif_list() );
+ stmt = LangStmt::cons( LangStmt::IfType, expr, stmtList, elsifList );
+ break;
+ }
+ case statement::SwitchUnder:
+ case statement::SwitchBlock: {
+ pushScope();
+ stmt = walkCaseClauseList( Statement.case_clause_list(), Statement.var_ref() );
+ popScope();
+ break;
+ }
+ case statement::While: {
+ pushScope();
+ LangExpr *expr = walkCodeExpr( Statement.code_expr() );
+ StmtList *stmtList = walkBlockOrSingle( Statement.block_or_single() );
+ stmt = LangStmt::cons( LangStmt::WhileType, expr, stmtList );
+ popScope();
+ break;
+ }
+ case statement::LhsVarRef: {
+ LangVarRef *varRef = walkVarRef( Statement.var_ref() );
+ LangExpr *expr = walkCodeExpr( Statement.code_expr() );
+ stmt = LangStmt::cons( varRef->loc, LangStmt::AssignType, varRef, expr );
+ break;
+ }
+ case statement::Yield: {
+ LangVarRef *varRef = walkVarRef( Statement.var_ref() );
+ stmt = LangStmt::cons( LangStmt::YieldType, varRef );
+ break;
+ }
+ case statement::Return: {
+ LangExpr *expr = walkCodeExpr( Statement.code_expr() );
+ stmt = LangStmt::cons( Statement.loc(), LangStmt::ReturnType, expr );
+ break;
+ }
+ case statement::Break: {
+ stmt = LangStmt::cons( LangStmt::BreakType );
+ break;
+ }
+ case statement::Reject: {
+ stmt = LangStmt::cons( Statement.REJECT().loc(), LangStmt::RejectType );
+ break;
+ }
+ case statement::Call: {
+ LangVarRef *langVarRef = walkVarRef( Statement.var_ref() );
+ CallArgVect *exprVect = walkCallArgList( Statement.call_arg_list() );
+ LangTerm *term = LangTerm::cons( langVarRef->loc, langVarRef, exprVect );
+ LangExpr *expr = LangExpr::cons( term );
+ stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr );
+ break;
+ }
+ case statement::StmtOrFactor: {
+ LangExpr *expr = walkStmtOrFactor( Statement.stmt_or_factor() );
+ stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr );
+ break;
+ }
+ case statement::BareSend: {
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ QualItemVect *qualItemVect = new QualItemVect;
+
+ LangVarRef *varRef = LangVarRef::cons( InputLoc(),
+ curNspace(), curStruct(), curScope(), nspaceQual,
+ qualItemVect, String("_") );
+
+ ConsItemList *list = walkAccumulate( Statement.accumulate() );
+ bool eof = walkOptEos( Statement.opt_eos() );
+ LangExpr *expr = send( InputLoc(), varRef, list, eof );
+ stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr );
+ break;
+ }
+ }
+ return stmt;
+ }
+
+ StmtList *walkLangStmtList( lang_stmt_list langStmtList )
+ {
+ StmtList *retList = new StmtList;
+
+ /* Walk the list of statements. */
+ RepeatIter<statement> ri( langStmtList.StmtList() );
+
+ while ( !ri.end() ) {
+ statement Statement = ri.value();
+ LangStmt *stmt = walkStatement( Statement );
+ if ( stmt != 0 )
+ retList->append( stmt );
+ ri.next();
+ }
+
+ require_pattern require = langStmtList.opt_require_stmt().require_pattern();
+ if ( require != 0 ) {
+ pushScope();
+
+ LangVarRef *varRef = walkVarRef( require.var_ref() );
+ PatternItemList *list = walkPattern( require.pattern(), varRef );
+ LangExpr *expr = match( require.REQUIRE().loc(), varRef, list );
+
+ StmtList *reqList = walkLangStmtList( langStmtList.opt_require_stmt().lang_stmt_list() );
+
+ LangStmt *stmt = LangStmt::cons( LangStmt::IfType, expr, reqList, 0 );
+
+ popScope();
+
+ retList->append( stmt );
+ }
+
+ return retList;
+ }
+
+ void walkTokenDef( token_def TokenDef )
+ {
+ String name = TokenDef.id().data();
+
+ bool niLeft = walkNoIgnoreLeft( TokenDef.no_ignore_left() );
+ bool niRight = walkNoIgnoreRight( TokenDef.no_ignore_right() );
+
+ ObjectDef *objectDef = walkVarDefList( TokenDef.VarDefList() );
+ objectDef->name = name;
+
+ LexJoin *join = 0;
+ if ( TokenDef.opt_lex_expr().lex_expr() != 0 ) {
+ LexExpression *expr = walkLexExpr( TokenDef.opt_lex_expr().lex_expr() );
+ join = LexJoin::cons( expr );
+ }
+
+ CodeBlock *translate = walkOptTranslate( TokenDef.opt_translate() );
+
+ defineToken( TokenDef.id().loc(), name, join, objectDef,
+ translate, false, niLeft, niRight );
+ }
+
+ void walkIgnoreCollector( ic_def IgnoreCollector )
+ {
+ String id = IgnoreCollector.id().data();
+ zeroDef( IgnoreCollector.id().loc(), id );
+ }
+
+ String walkOptId( opt_id optId )
+ {
+ String name;
+ if ( optId.prodName() == opt_id::Id )
+ name = optId.id().data();
+ return name;
+ }
+
+ ObjectDef *walkVarDefList( _lrepeat_var_def varDefList )
+ {
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType,
+ String(), pd->nextObjectId++ );
+
+ RepeatIter<var_def> varDefIter( varDefList );
+
+ while ( !varDefIter.end() ) {
+ ObjectField *varDef = walkVarDef( varDefIter.value(),
+ ObjectField::UserFieldType );
+ objVarDef( objectDef, varDef );
+ varDefIter.next();
+ }
+
+ return objectDef;
+ }
+
+ void walkPreEof( pre_eof_def PreEofDef )
+ {
+ ObjectDef *localFrame = blockOpen();
+ StmtList *stmtList = walkLangStmtList( PreEofDef.lang_stmt_list() );
+ preEof( PreEofDef.PREEOF().loc(), stmtList, localFrame );
+ blockClose();
+ }
+
+ void walkIgnoreDef( ignore_def IgnoreDef )
+ {
+ String name = walkOptId( IgnoreDef.opt_id() );
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType,
+ name, pd->nextObjectId++ );
+
+ LexJoin *join = 0;
+ if ( IgnoreDef.opt_lex_expr().lex_expr() != 0 ) {
+ LexExpression *expr = walkLexExpr( IgnoreDef.opt_lex_expr().lex_expr() );
+ join = LexJoin::cons( expr );
+ }
+
+ defineToken( IgnoreDef.IGNORE().loc(), name, join, objectDef,
+ 0, true, false, false );
+ }
+
+ LangExpr *walkCodeMultiplicitive( code_multiplicitive mult, bool used = true )
+ {
+ LangExpr *expr = 0;
+ switch ( mult.prodName() ) {
+ case code_multiplicitive::Star: {
+ LangExpr *right = walkCodeUnary( mult.code_unary() );
+ LangExpr *left = walkCodeMultiplicitive( mult._code_multiplicitive() );
+ expr = LangExpr::cons( mult.STAR().loc(), left, '*', right );
+ break;
+ }
+ case code_multiplicitive::Fslash: {
+ LangExpr *right = walkCodeUnary( mult.code_unary() );
+ LangExpr *left = walkCodeMultiplicitive( mult._code_multiplicitive() );
+ expr = LangExpr::cons( mult.FSLASH().loc(), left, '/', right );
+ break;
+ }
+ case code_multiplicitive::Base: {
+ LangExpr *right = walkCodeUnary( mult.code_unary(), used );
+ expr = right;
+ break;
+ }}
+ return expr;
+ }
+
+ PatternItemList *walkPatternElTypeOrLit( pattern_el_lel typeOrLit,
+ LangVarRef *patternVarRef )
+ {
+ NamespaceQual *nspaceQual = walkRegionQual( typeOrLit.region_qual() );
+ RepeatType repeatType = walkOptRepeat( typeOrLit.opt_repeat() );
+
+ PatternItemList *list = 0;
+ switch ( typeOrLit.prodName() ) {
+ case pattern_el_lel::Id: {
+ String id = typeOrLit.id().data();
+ list = patternElNamed( typeOrLit.id().loc(), patternVarRef,
+ nspaceQual, id, repeatType );
+ break;
+ }
+ case pattern_el_lel::Lit: {
+ String lit = typeOrLit.backtick_lit().data();
+ list = patternElType( typeOrLit.backtick_lit().loc(), patternVarRef,
+ nspaceQual, lit, repeatType );
+ break;
+ }}
+
+ return list;
+ }
+
+ LangVarRef *walkOptLabel( opt_label optLabel )
+ {
+ LangVarRef *varRef = 0;
+ if ( optLabel.prodName() == opt_label::Id ) {
+ String id = optLabel.id().data();
+ varRef = LangVarRef::cons( optLabel.id().loc(),
+ curNspace(), curStruct(), curScope(), id );
+ }
+ return varRef;
+ }
+
+ PatternItemList *walkPatternEl( pattern_el patternEl, LangVarRef *patternVarRef )
+ {
+ PatternItemList *list = 0;
+ switch ( patternEl.prodName() ) {
+ case pattern_el::Dq: {
+ list = walkLitpatElList( patternEl.LitpatElList(),
+ patternEl.dq_lit_term().LIT_DQ_NL(), patternVarRef );
+ break;
+ }
+ case pattern_el::Sq: {
+ list = walkPatSqConsDataList( patternEl.SqConsDataList(),
+ patternEl.sq_lit_term().CONS_SQ_NL() );
+ break;
+ }
+ case pattern_el::Tilde: {
+ String patternData = patternEl.opt_tilde_data().text().c_str();
+ patternData += '\n';
+ PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm,
+ patternEl.opt_tilde_data().loc(), patternData );
+ list = PatternItemList::cons( patternItem );
+ break;
+ }
+ case pattern_el::PatternEl: {
+ PatternItemList *typeOrLitList = walkPatternElTypeOrLit(
+ patternEl.pattern_el_lel(), patternVarRef );
+ LangVarRef *varRef = walkOptLabel( patternEl.opt_label() );
+ list = consPatternEl( varRef, typeOrLitList );
+ break;
+ }}
+ return list;
+ }
+
+ PatternItemList *walkLitpatEl( litpat_el litpatEl, LangVarRef *patternVarRef )
+ {
+ PatternItemList *list = 0;
+ switch ( litpatEl.prodName() ) {
+ case litpat_el::ConsData: {
+ String consData = unescape( litpatEl.lit_dq_data().text().c_str() );
+ PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm,
+ litpatEl.lit_dq_data().loc(), consData );
+ list = PatternItemList::cons( patternItem );
+ break;
+ }
+ case litpat_el::SubList: {
+ list = walkPatternElList( litpatEl.PatternElList(), patternVarRef );
+ break;
+ }}
+ return list;
+ }
+
+ PatternItemList *walkPatSqConsDataList( _lrepeat_sq_cons_data sqConsDataList, CONS_SQ_NL Nl )
+ {
+ PatternItemList *list = new PatternItemList;
+
+ RepeatIter<sq_cons_data> sqConsDataIter( sqConsDataList );
+
+ while ( !sqConsDataIter.end() ) {
+ String consData = unescape( sqConsDataIter.value().text().c_str() );
+ PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm,
+ sqConsDataIter.value().loc(), consData );
+ PatternItemList *tail = PatternItemList::cons( patternItem );
+ list = patListConcat( list, tail );
+
+ sqConsDataIter.next();
+ }
+
+ if ( Nl != 0 ) {
+ String nl = unescape( Nl.data() );
+ PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm,
+ Nl.loc(), nl );
+ PatternItemList *tail = PatternItemList::cons( patternItem );
+ list = patListConcat( list, tail );
+ }
+
+ return list;
+ }
+
+ ConsItemList *walkConsSqConsDataList( _lrepeat_sq_cons_data sqConsDataList, CONS_SQ_NL Nl )
+ {
+ ConsItemList *list = new ConsItemList;
+
+ RepeatIter<sq_cons_data> sqConsDataIter( sqConsDataList );
+
+ while ( !sqConsDataIter.end() ) {
+ String consData = unescape( sqConsDataIter.value().text().c_str() );
+ ConsItem *consItem = ConsItem::cons(
+ sqConsDataIter.value().loc(), ConsItem::InputText, consData );
+ ConsItemList *tail = ConsItemList::cons( consItem );
+ list = consListConcat( list, tail );
+
+ sqConsDataIter.next();
+ }
+
+ if ( Nl != 0 ) {
+ String nl = unescape( Nl.data() );
+ ConsItem *consItem = ConsItem::cons(
+ Nl.loc(), ConsItem::InputText, nl );
+ ConsItemList *tail = ConsItemList::cons( consItem );
+ list = consListConcat( list, tail );
+ }
+
+ return list;
+ }
+
+ PatternItemList *walkLitpatElList( _lrepeat_litpat_el litpatElList, LIT_DQ_NL Nl,
+ LangVarRef *patternVarRef )
+ {
+ PatternItemList *list = new PatternItemList;
+
+ RepeatIter<litpat_el> litpatElIter( litpatElList );
+
+ while ( !litpatElIter.end() ) {
+ PatternItemList *tail = walkLitpatEl( litpatElIter.value(), patternVarRef );
+ list = patListConcat( list, tail );
+ litpatElIter.next();
+ }
+
+ if ( Nl != 0 ) {
+ String nl = unescape( Nl.data() );
+ PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm,
+ Nl.loc(), nl );
+ PatternItemList *tail = PatternItemList::cons( patternItem );
+ list = patListConcat( list, tail );
+ }
+
+ return list;
+ }
+
+ PatternItemList *walkPatternElList( _lrepeat_pattern_el patternElList,
+ LangVarRef *patternVarRef )
+ {
+ PatternItemList *list = new PatternItemList;
+
+ RepeatIter<pattern_el> patternElIter( patternElList );
+
+ while ( !patternElIter.end() ) {
+ PatternItemList *tail = walkPatternEl( patternElIter.value(), patternVarRef );
+ list = patListConcat( list, tail );
+ patternElIter.next();
+ }
+ return list;
+ }
+
+ PatternItemList *walkPattternTopEl( pattern_top_el patternTopEl,
+ LangVarRef *patternVarRef )
+ {
+ PatternItemList *list = 0;
+ switch ( patternTopEl.prodName() ) {
+ case pattern_top_el::Dq: {
+ list = walkLitpatElList( patternTopEl.LitpatElList(),
+ patternTopEl.dq_lit_term().LIT_DQ_NL(), patternVarRef );
+ break;
+ }
+ case pattern_top_el::Sq: {
+ list = walkPatSqConsDataList( patternTopEl.SqConsDataList(),
+ patternTopEl.sq_lit_term().CONS_SQ_NL() );
+ break;
+ }
+ case pattern_top_el::Tilde: {
+ String patternData = patternTopEl.opt_tilde_data().text().c_str();
+ patternData += '\n';
+ PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm,
+ patternTopEl.opt_tilde_data().loc(), patternData );
+ list = PatternItemList::cons( patternItem );
+ break;
+ }}
+ return list;
+ }
+
+ PatternItemList *walkPatternList( pattern_list patternList, LangVarRef *patternVarRef )
+ {
+ Alignment alignment;
+ PatternItemList *list = new PatternItemList;
+ RepeatIter<pattern_top_el> patternTopElIter ( patternList );
+ while ( !patternTopElIter.end() ) {
+ pattern_top_el topEl = patternTopElIter.value();
+ alignment.check( "pattern", topEl.loc() );
+
+ PatternItemList *tail = walkPattternTopEl( topEl, patternVarRef );
+ list = patListConcat( list, tail );
+ patternTopElIter.next();
+ }
+ return list;
+ }
+
+ PatternItemList *walkPattern( pattern Pattern, LangVarRef *patternVarRef )
+ {
+ if ( Pattern.prodName() == pattern::TopList )
+ return walkPatternList( Pattern.pattern_list(), patternVarRef );
+ else
+ return walkPatternElList( Pattern.PatternElList(), patternVarRef );
+ }
+
+ LangExpr *walkOptDefInit( opt_def_init optDefInit )
+ {
+ LangExpr *expr = 0;
+ if ( optDefInit.prodName() == opt_def_init::Init )
+ expr = walkCodeExpr( optDefInit.code_expr() );
+ return expr;
+ }
+
+ LangStmt *walkExportDef( export_def exportDef )
+ {
+ ObjectField *objField = walkVarDef( exportDef.var_def(),
+ ObjectField::StructFieldType );
+ LangExpr *expr = walkOptDefInit( exportDef.opt_def_init() );
+
+ return exportStmt( objField, LangStmt::AssignType, expr );
+ }
+
+ LangStmt *walkGlobalDef( global_def GlobalDef )
+ {
+ ObjectField *objField = walkVarDef( GlobalDef.var_def(),
+ ObjectField::StructFieldType );
+ LangExpr *expr = walkOptDefInit( GlobalDef.opt_def_init() );
+
+ return globalDef( objField, expr, LangStmt::AssignType );
+ }
+
+ void walkAliasDef( alias_def aliasDef )
+ {
+ String id = aliasDef.id().data();
+ TypeRef *typeRef = walkTypeRef( aliasDef.type_ref() );
+ alias( aliasDef.id().loc(), id, typeRef );
+ }
+
+ CodeBlock *walkOptTranslate( opt_translate optTranslate )
+ {
+ CodeBlock *block = 0;
+ if ( optTranslate.prodName() == opt_translate::Translate ) {
+ ObjectDef *localFrame = blockOpen();
+ StmtList *stmtList = walkLangStmtList( optTranslate.lang_stmt_list() );
+ block = CodeBlock::cons( stmtList, localFrame );
+ block->context = curStruct();
+ blockClose();
+ }
+ return block;
+ }
+
+ PredDecl *walkPredToken( pred_token predToken )
+ {
+ NamespaceQual *nspaceQual = walkRegionQual( predToken.region_qual() );
+ PredDecl *predDecl = 0;
+ switch ( predToken.prodName() ) {
+ case pred_token::Id: {
+ String id = predToken.id().data();
+ predDecl = predTokenName( predToken.id().loc(), nspaceQual, id );
+ break;
+ }
+ case pred_token::Lit: {
+ String lit = predToken.backtick_lit().data();
+ predDecl = predTokenLit( predToken.backtick_lit().loc(), lit, nspaceQual );
+ break;
+ }}
+ return predDecl;
+ }
+
+ PredDeclList *walkPredTokenList( pred_token_list predTokenList )
+ {
+ PredDeclList *list = 0;
+ switch ( predTokenList.prodName() ) {
+ case pred_token_list::List: {
+ list = walkPredTokenList( predTokenList._pred_token_list() );
+ PredDecl *predDecl = walkPredToken( predTokenList.pred_token() );
+ list->append( predDecl );
+ break;
+ }
+ case pred_token_list::Base: {
+ PredDecl *predDecl = walkPredToken( predTokenList.pred_token() );
+ list = new PredDeclList;
+ list->append( predDecl );
+ break;
+ }}
+ return list;
+ }
+
+ PredType walkPredType( pred_type predType )
+ {
+ PredType pt = PredLeft;
+ switch ( predType.prodName() ) {
+ case pred_type::Left:
+ pt = PredLeft;
+ break;
+ case pred_type::Right:
+ pt = PredRight;
+ break;
+ case pred_type::NonAssoc:
+ pt = PredNonassoc;
+ break;
+ }
+
+ return pt;
+ }
+
+ void walkPrecedenceDef( precedence_def precedenceDef )
+ {
+ PredType predType = walkPredType( precedenceDef.pred_type() );
+ PredDeclList *predDeclList = walkPredTokenList(
+ precedenceDef.pred_token_list() );
+ precedenceStmt( predType, predDeclList );
+ }
+
+ StmtList *walkInclude( _include Include )
+ {
+ String lit = "";
+ _lrepeat_sq_cons_data sqConsDataList = Include.SqConsDataList();
+
+ RepeatIter<sq_cons_data> sqConsDataIter( sqConsDataList );
+
+ while ( !sqConsDataIter.end() ) {
+ colm_data *data = sqConsDataIter.value().data();
+ lit.append( data->data, data->length );
+ sqConsDataIter.next();
+ }
+
+ String file = unescape( lit );
+
+ /* Check if we can open the input file for reading. */
+ if ( ! readCheck( file.data ) ) {
+
+ bool found = false;
+ for ( ArgsVector::Iter av = includePaths; av.lte(); av++ ) {
+ String path = String( *av ) + "/" + file;
+ if ( readCheck( path.data ) ) {
+ found = true;
+ file = path;
+ break;
+ }
+ }
+
+ if ( !found )
+ error() << "could not open " << file.data << " for reading" << endp;
+ }
+
+ const char *argv[3];
+ argv[0] = "load-include";
+ argv[1] = file.data;
+ argv[2] = 0;
+
+ colm_program *program = colm_new_program( &colm_object );
+ colm_run_program( program, 2, argv );
+
+ /* Extract the parse tree. */
+ start Start = ColmTree( program );
+ str Error = ColmError( program );
+
+ if ( Start == 0 ) {
+ gblErrorCount += 1;
+ InputLoc loc = Error.loc();
+ error(loc) << file.data << ": parse error: " << Error.text() << std::endl;
+ return 0;
+ }
+
+ StmtList *stmtList = walkRootItemList( Start.RootItemList() );
+ pd->streamFileNames.append( colm_extract_fns( program ) );
+ colm_delete_program( program );
+ return stmtList;
+ }
+
+
+ NamespaceQual *walkRegionQual( region_qual regionQual )
+ {
+ NamespaceQual *qual = 0;
+ switch ( regionQual.prodName() ) {
+ case region_qual::Qual: {
+ qual = walkRegionQual( regionQual._region_qual() );
+ qual->qualNames.append( String( regionQual.id().data() ) );
+ break;
+ }
+ case region_qual::Base: {
+ qual = NamespaceQual::cons( curNspace() );
+ break;
+ }}
+ return qual;
+ }
+
+ RepeatType walkOptRepeat( opt_repeat OptRepeat )
+ {
+ RepeatType repeatType = RepeatNone;
+ switch ( OptRepeat.prodName() ) {
+ case opt_repeat::Star:
+ repeatType = RepeatRepeat;
+ break;
+ case opt_repeat::Plus:
+ repeatType = RepeatList;
+ break;
+ case opt_repeat::Question:
+ repeatType = RepeatOpt;
+ break;
+ case opt_repeat::LeftStar:
+ repeatType = RepeatLeftRepeat;
+ break;
+ case opt_repeat::LeftPlus:
+ repeatType = RepeatLeftList;
+ break;
+ }
+ return repeatType;
+ }
+
+ TypeRef *walkValueList( type_ref typeRef )
+ {
+ TypeRef *valType = walkTypeRef( typeRef._type_ref() );
+ TypeRef *elType = TypeRef::cons( typeRef.loc(), TypeRef::ListEl, valType );
+ return TypeRef::cons( typeRef.loc(), TypeRef::List, 0, elType, valType );
+ }
+
+ TypeRef *walkListEl( type_ref typeRef )
+ {
+ TypeRef *valType = walkTypeRef( typeRef._type_ref() );
+ return TypeRef::cons( typeRef.loc(), TypeRef::ListEl, valType );
+ }
+
+ TypeRef *walkValueMap( type_ref typeRef )
+ {
+ TypeRef *keyType = walkTypeRef( typeRef.KeyType() );
+ TypeRef *valType = walkTypeRef( typeRef.ValType() );
+ TypeRef *elType = TypeRef::cons( typeRef.loc(),
+ TypeRef::MapEl, 0, keyType, valType );
+
+ return TypeRef::cons( typeRef.loc(), TypeRef::Map, 0,
+ keyType, elType, valType );
+ }
+
+ TypeRef *walkMapEl( type_ref typeRef )
+ {
+ TypeRef *keyType = walkTypeRef( typeRef.KeyType() );
+ TypeRef *valType = walkTypeRef( typeRef.ValType() );
+
+ return TypeRef::cons( typeRef.loc(), TypeRef::MapEl, 0, keyType, valType );
+ }
+
+ TypeRef *walkTypeRef( type_ref typeRef )
+ {
+ TypeRef *tr = 0;
+ switch ( typeRef.prodName() ) {
+ case type_ref::Id: {
+ NamespaceQual *nspaceQual = walkRegionQual( typeRef.region_qual() );
+ String id = typeRef.id().data();
+ RepeatType repeatType = walkOptRepeat( typeRef.opt_repeat() );
+ tr = TypeRef::cons( typeRef.id().loc(), nspaceQual, id, repeatType );
+ break;
+ }
+ case type_ref::Int: {
+ tr = TypeRef::cons( internal, pd->uniqueTypeInt );
+ break;
+ }
+ case type_ref::Bool: {
+ tr = TypeRef::cons( internal, pd->uniqueTypeBool );
+ break;
+ }
+ case type_ref::Void: {
+ tr = TypeRef::cons( internal, pd->uniqueTypeVoid );
+ break;
+ }
+ case type_ref::Parser: {
+ TypeRef *type = walkTypeRef( typeRef._type_ref() );
+ tr = TypeRef::cons( typeRef.loc(), TypeRef::Parser, 0, type, 0 );
+ break;
+ }
+ case type_ref::List: {
+ tr = walkValueList( typeRef );
+ break;
+ }
+ case type_ref::Map: {
+ tr = walkValueMap( typeRef );
+ break;
+ }
+ case type_ref::ListEl: {
+ tr = walkListEl( typeRef );
+ break;
+ }
+ case type_ref::MapEl: {
+ tr = walkMapEl( typeRef );
+ break;
+ }}
+ return tr;
+ }
+
+ StmtList *walkBlockOrSingle( block_or_single blockOrSingle )
+ {
+ StmtList *stmtList = 0;
+ switch ( blockOrSingle.prodName() ) {
+ case block_or_single::Single: {
+ stmtList = new StmtList;
+ LangStmt *stmt = walkStatement( blockOrSingle.statement() );
+ stmtList->append( stmt );
+ break;
+ }
+ case block_or_single::Block: {
+ stmtList = walkLangStmtList( blockOrSingle.lang_stmt_list() );
+ break;
+ }}
+
+ return stmtList;
+ }
+
+ void walkProdEl( const String &defName, ProdElList *list, prod_el El )
+ {
+ ObjectField *captureField = 0;
+ if ( El.opt_prod_el_name().prodName() == opt_prod_el_name::Name ) {
+ String fieldName = El.opt_prod_el_name().id().data();
+ captureField = ObjectField::cons( El.opt_prod_el_name().id().loc(),
+ ObjectField::RhsNameType, 0, fieldName );
+ }
+ else {
+ /* default the prod name. */
+ if ( El.prodName() == prod_el::Id ) {
+ String fieldName = El.id().data();
+ opt_repeat::prod_name orpn = El.opt_repeat().prodName();
+ if ( orpn == opt_repeat::Star )
+ fieldName = "_repeat_" + fieldName;
+ else if ( orpn == opt_repeat::LeftStar )
+ fieldName = "_lrepeat_" + fieldName;
+ else if ( orpn == opt_repeat::Plus )
+ fieldName = "_list_" + fieldName;
+ else if ( orpn == opt_repeat::LeftPlus )
+ fieldName = "_llist_" + fieldName;
+ else if ( orpn == opt_repeat::Question )
+ fieldName = "_opt_" + fieldName;
+ else if ( strcmp( fieldName, defName ) == 0 )
+ fieldName = "_" + fieldName;
+ captureField = ObjectField::cons( El.id().loc(),
+ ObjectField::RhsNameType, 0, fieldName );
+ }
+ }
+
+ RepeatType repeatType = walkOptRepeat( El.opt_repeat() );
+ switch ( El.prodName() ) {
+ case prod_el::Id: {
+ NamespaceQual *nspaceQual = walkRegionQual( El.region_qual() );
+
+ String typeName = El.id().data();
+ ProdEl *prodEl = prodElName( El.id().loc(), typeName,
+ nspaceQual, captureField, repeatType, false );
+ appendProdEl( list, prodEl );
+ break;
+ }
+ case prod_el::Lit: {
+ NamespaceQual *nspaceQual = walkRegionQual( El.region_qual() );
+
+ String lit = El.backtick_lit().data();
+ ProdEl *prodEl = prodElLiteral( El.backtick_lit().loc(), lit,
+ nspaceQual, captureField, repeatType, false );
+ appendProdEl( list, prodEl );
+ break;
+ }
+ case prod_el::SubList: {
+ error( El.POPEN().loc() ) << "production sublist is implemented as a "
+ "colm transformation, it is not accepted at this stage" << endp;
+ }}
+ }
+
+ void walkProdElList( const String &defName, ProdElList *list, prod_el_list ProdElList )
+ {
+ if ( ProdElList.prodName() == prod_el_list::List ) {
+ prod_el_list RightProdElList = ProdElList._prod_el_list();
+ walkProdElList( defName, list, RightProdElList );
+ walkProdEl( defName, list, ProdElList.prod_el() );
+ }
+ }
+
+ CodeBlock *walkOptReduce( opt_reduce OptReduce )
+ {
+ CodeBlock *block = 0;
+ if ( OptReduce.prodName() == opt_reduce::Reduce ) {
+ ObjectDef *localFrame = blockOpen();
+ StmtList *stmtList = walkLangStmtList( OptReduce.lang_stmt_list() );
+
+ block = CodeBlock::cons( stmtList, localFrame );
+ block->context = curStruct();
+
+ blockClose();
+ }
+ return block;
+ }
+
+ void walkProdudction( const String &defName, LelDefList *lelDefList, prod Prod )
+ {
+ ProdElList *list = new ProdElList;
+
+ walkProdElList( defName, list, Prod.prod_el_list() );
+
+ String name;
+ if ( Prod.opt_prod_name().prodName() == opt_prod_name::Name )
+ name = Prod.opt_prod_name().id().data();
+
+ CodeBlock *codeBlock = walkOptReduce( Prod.opt_reduce() );
+ bool commit = Prod.opt_commit().prodName() == opt_commit::Commit;
+
+ Production *prod = BaseParser::production( Prod.SQOPEN().loc(),
+ list, name, commit, codeBlock, 0 );
+ prodAppend( lelDefList, prod );
+ }
+
+ void walkProdList( const String &name, LelDefList *lelDefList, prod_list ProdList )
+ {
+ if ( ProdList.prodName() == prod_list::List )
+ walkProdList( name, lelDefList, ProdList._prod_list() );
+
+ walkProdudction( name, lelDefList, ProdList.prod() );
+ }
+
+ ReOrItem *walkRegOrChar( reg_or_char regOrChar )
+ {
+ ReOrItem *orItem = 0;
+ switch ( regOrChar.prodName() ) {
+ case reg_or_char::Char: {
+ String c = unescape( regOrChar.RE_CHAR().data() );
+ orItem = ReOrItem::cons( regOrChar.RE_CHAR().loc(), c );
+ break;
+ }
+ case reg_or_char::Range: {
+ String low = unescape( regOrChar.Low().data() );
+ String high = unescape( regOrChar.High().data() );
+ orItem = ReOrItem::cons( regOrChar.Low().loc(), low[0], high[0] );
+ break;
+ }}
+ return orItem;
+ }
+
+ ReOrBlock *walkRegOrData( reg_or_data regOrData )
+ {
+ ReOrBlock *block = 0;
+ switch ( regOrData.prodName() ) {
+ case reg_or_data::Data: {
+ ReOrBlock *left = walkRegOrData( regOrData._reg_or_data() );
+ ReOrItem *right = walkRegOrChar( regOrData.reg_or_char() );
+ block = lexRegularExprData( left, right );
+ break;
+ }
+ case reg_or_data::Base: {
+ block = ReOrBlock::cons();
+ break;
+ }}
+ return block;
+ }
+
+ LexFactorNeg *walkLexFactorNeg( lex_factor_neg lexFactorNeg )
+ {
+ LexFactorNeg *factorNeg = 0;
+ switch ( lexFactorNeg.prodName() ) {
+ case lex_factor_neg::Caret: {
+ LexFactorNeg *recNeg = walkLexFactorNeg( lexFactorNeg._lex_factor_neg() );
+ factorNeg = LexFactorNeg::cons( recNeg, LexFactorNeg::CharNegateType );
+ break;
+ }
+ case lex_factor_neg::Base: {
+ LexFactor *factor = walkLexFactor( lexFactorNeg.lex_factor() );
+ factorNeg = LexFactorNeg::cons( factor );
+ break;
+ }}
+ return factorNeg;
+ }
+
+ LexFactorRep *walkLexFactorRep( lex_factor_rep lexFactorRep )
+ {
+ LexFactorRep *factorRep = 0;
+ LexFactorRep *recRep = 0;
+ lex_factor_rep::prod_name pn = lexFactorRep.prodName();
+
+ if ( pn != lex_factor_rep::Base )
+ recRep = walkLexFactorRep( lexFactorRep._lex_factor_rep() );
+
+ switch ( pn ) {
+ case lex_factor_rep::Star: {
+ factorRep = LexFactorRep::cons( lexFactorRep.LEX_STAR().loc(),
+ recRep, 0, 0, LexFactorRep::StarType );
+ break;
+ }
+ case lex_factor_rep::StarStar: {
+ factorRep = LexFactorRep::cons( lexFactorRep.LEX_STARSTAR().loc(),
+ recRep, 0, 0, LexFactorRep::StarStarType );
+ break;
+ }
+ case lex_factor_rep::Plus: {
+ factorRep = LexFactorRep::cons( lexFactorRep.LEX_PLUS().loc(),
+ recRep, 0, 0, LexFactorRep::PlusType );
+ break;
+ }
+ case lex_factor_rep::Question: {
+ factorRep = LexFactorRep::cons( lexFactorRep.LEX_QUESTION().loc(),
+ recRep, 0, 0, LexFactorRep::OptionalType );
+ break;
+ }
+ case lex_factor_rep::Exact: {
+ int low = atoi( lexFactorRep.lex_uint().data()->data );
+ factorRep = LexFactorRep::cons( lexFactorRep.lex_uint().loc(),
+ recRep, low, 0, LexFactorRep::ExactType );
+ break;
+ }
+ case lex_factor_rep::Max: {
+ int high = atoi( lexFactorRep.lex_uint().data()->data );
+ factorRep = LexFactorRep::cons( lexFactorRep.lex_uint().loc(),
+ recRep, 0, high, LexFactorRep::MaxType );
+ break;
+ }
+ case lex_factor_rep::Min: {
+ int low = atoi( lexFactorRep.lex_uint().data()->data );
+ factorRep = LexFactorRep::cons( lexFactorRep.lex_uint().loc(),
+ recRep, low, 0, LexFactorRep::MinType );
+ break;
+ }
+ case lex_factor_rep::Range: {
+ int low = atoi( lexFactorRep.Low().data()->data );
+ int high = atoi( lexFactorRep.High().data()->data );
+ factorRep = LexFactorRep::cons( lexFactorRep.Low().loc(),
+ recRep, low, high, LexFactorRep::RangeType );
+ break;
+ }
+ case lex_factor_rep::Base: {
+ LexFactorNeg *factorNeg = walkLexFactorNeg( lexFactorRep.lex_factor_neg() );
+ factorRep = LexFactorRep::cons( factorNeg );
+ }}
+
+ return factorRep;
+ }
+
+ LexTerm *walkLexTerm( lex_term lexTerm )
+ {
+ LexTerm *term = 0;
+ lex_term::prod_name pn = lexTerm.prodName();
+
+ LexTerm *leftTerm = 0;
+ if ( pn != lex_term::Base )
+ leftTerm = walkLexTerm( lexTerm._lex_term() );
+
+ LexFactorAug *factorAug = walkLexFactorAug( lexTerm.lex_factor_rep() );
+
+ switch ( pn ) {
+ case lex_term::Dot:
+ term = LexTerm::cons( leftTerm, factorAug, LexTerm::ConcatType );
+ break;
+ case lex_term::ColonGt:
+ term = LexTerm::cons( leftTerm, factorAug, LexTerm::RightStartType );
+ break;
+ case lex_term::ColonGtGt:
+ term = LexTerm::cons( leftTerm, factorAug, LexTerm::RightFinishType );
+ break;
+ case lex_term::LtColon:
+ term = LexTerm::cons( leftTerm, factorAug, LexTerm::LeftType );
+ break;
+ default:
+ term = LexTerm::cons( factorAug );
+ break;
+ }
+
+ return term;
+ }
+
+ LexExpression *walkLexExpr( lex_expr lexExpr )
+ {
+ LexExpression *expr = 0;
+ lex_expr::prod_name pn = lexExpr.prodName();
+
+ LexExpression *leftExpr = 0;
+ if ( pn != lex_expr::Base )
+ leftExpr = walkLexExpr( lexExpr._lex_expr() );
+
+ LexTerm *term = walkLexTerm( lexExpr.lex_term() );
+
+ switch ( pn ) {
+ case lex_expr::Bar:
+ expr = LexExpression::cons( leftExpr, term, LexExpression::OrType );
+ break;
+ case lex_expr::Amp:
+ expr = LexExpression::cons( leftExpr, term, LexExpression::IntersectType );
+ break;
+ case lex_expr::Dash:
+ expr = LexExpression::cons( leftExpr, term, LexExpression::SubtractType );
+ break;
+ case lex_expr::DashDash:
+ expr = LexExpression::cons( leftExpr, term, LexExpression::StrongSubtractType );
+ break;
+ case lex_expr::Base:
+ expr = LexExpression::cons( term );
+ }
+ return expr;
+ }
+
+
+ void walkRlDef( rl_def rlDef )
+ {
+ String id = rlDef.id().data();
+
+ lex_expr LexExpr = rlDef.lex_expr();
+ LexExpression *expr = walkLexExpr( LexExpr );
+ LexJoin *join = LexJoin::cons( expr );
+
+ addRegularDef( rlDef.id().loc(), curNspace(), id, join );
+ }
+
+ void walkLexRegion( region_def regionDef )
+ {
+ pushRegionSet( regionDef.loc() );
+ walkRootItemList( regionDef.RootItemList() );
+ popRegionSet();
+ }
+
+ void walkCflDef( cfl_def cflDef )
+ {
+ String name = cflDef.id().data();
+ ObjectDef *objectDef = walkVarDefList( cflDef.VarDefList() );
+ objectDef->name = name;
+
+ LelDefList *defList = new LelDefList;
+ walkProdList( name, defList, cflDef.prod_list() );
+
+ bool reduceFirst = cflDef.opt_reduce_first().REDUCEFIRST() != 0;
+
+ NtDef *ntDef = NtDef::cons( name, curNspace(),
+ curStruct(), reduceFirst );
+
+ BaseParser::cflDef( ntDef, objectDef, defList );
+ }
+
+ CallArgVect *walkCallArgSeq( call_arg_seq callArgSeq )
+ {
+ CallArgVect *callArgVect = new CallArgVect;
+ while ( callArgSeq != 0 ) {
+ code_expr codeExpr = callArgSeq.code_expr();
+ LangExpr *expr = walkCodeExpr( codeExpr );
+ callArgVect->append( new CallArg(expr) );
+ callArgSeq = callArgSeq._call_arg_seq();
+ }
+ return callArgVect;
+ }
+
+ CallArgVect *walkCallArgList( call_arg_list callArgList )
+ {
+ CallArgVect *callArgVect = walkCallArgSeq( callArgList.call_arg_seq() );
+ return callArgVect;
+ }
+
+ LangExpr *liftTrim( LangExpr *expr, ConsItem::Trim &trim )
+ {
+ if ( expr->type == LangExpr::UnaryType ) {
+ if ( expr->op == '^' ) {
+ trim = ConsItem::TrimYes;
+ expr = expr->right;
+ }
+ else if ( expr->op == '@' ) {
+ trim = ConsItem::TrimNo;
+ expr = expr->right;
+ }
+ }
+ return expr;
+ }
+
+ ConsItemList *walkCallArgSeqAccum( call_arg_seq callArgSeq )
+ {
+ ConsItemList *consItemList = new ConsItemList;
+ while ( callArgSeq != 0 ) {
+ code_expr codeExpr = callArgSeq.code_expr();
+
+// LangExpr *expr = walkCodeExpr( codeExpr );
+// callArgVect->append( new CallArg(expr) );
+
+ ConsItem::Trim trim = ConsItem::TrimDefault;
+ LangExpr *consExpr = walkCodeExpr( codeExpr );
+
+ ConsItem *consItem = ConsItem::cons( consExpr->loc,
+ ConsItem::ExprType, consExpr, trim );
+ consItemList->append( consItem );
+
+ callArgSeq = callArgSeq._call_arg_seq();
+ }
+ return consItemList;
+ }
+
+ ConsItemList *walkCallArgListAccum( call_arg_list callArgList )
+ {
+ return walkCallArgSeqAccum( callArgList.call_arg_seq() );
+ }
+
+ LangStmt *walkPrintStmt( print_stmt &printStmt )
+ {
+ LangStmt *stmt = 0;
+ switch ( printStmt.prodName() ) {
+ case print_stmt::Accum: {
+ InputLoc loc = printStmt.PRINT().loc();
+
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ QualItemVect *qualItemVect = new QualItemVect;
+ LangVarRef *varRef = LangVarRef::cons( loc, curNspace(), curStruct(),
+ curScope(), nspaceQual, qualItemVect, String("stdout") );
+
+ ConsItemList *list = walkAccumulate( printStmt.accumulate() );
+
+ bool eof = false; //walkOptEos( StmtOrFactor.opt_eos() );
+ LangExpr *expr = send( loc, varRef, list, eof );
+ stmt = LangStmt::cons( loc, LangStmt::ExprType, expr );
+ break;
+ }
+ case print_stmt::Tree: {
+ InputLoc loc = printStmt.PRINT().loc();
+
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ QualItemVect *qualItemVect = new QualItemVect;
+ LangVarRef *varRef = LangVarRef::cons( loc, curNspace(), curStruct(),
+ curScope(), nspaceQual, qualItemVect, String("stdout") );
+
+ ConsItemList *list = walkCallArgListAccum( printStmt.call_arg_list() );
+
+ bool eof = false; //walkOptEos( StmtOrFactor.opt_eos() );
+ LangExpr *expr = send( loc, varRef, list, eof );
+ stmt = LangStmt::cons( loc, LangStmt::ExprType, expr );
+ break;
+ }
+ case print_stmt::PrintStream: {
+ LangVarRef *varRef = walkVarRef( printStmt.var_ref() );
+
+ ConsItemList *list = walkCallArgListAccum( printStmt.call_arg_list() );
+
+ InputLoc loc = printStmt.PRINTS().loc();
+
+ bool eof = false; //walkOptEos( StmtOrFactor.opt_eos() );
+ LangExpr *expr = send( loc, varRef, list, eof );
+ stmt = LangStmt::cons( loc, LangStmt::ExprType, expr );
+ break;
+ }}
+ return stmt;
+ }
+
+ QualItemVect *walkQual( qual &Qual )
+ {
+ QualItemVect *qualItemVect = 0;
+ qual RecQual = Qual._qual();
+ switch ( Qual.prodName() ) {
+ case qual::Dot:
+ case qual::Arrow: {
+ qualItemVect = walkQual( RecQual );
+ String id = Qual.id().data();
+ QualItem::Form form = Qual.DOT() != 0 ? QualItem::Dot : QualItem::Arrow;
+ qualItemVect->append( QualItem( form, Qual.id().loc(), id ) );
+ break;
+ }
+ case qual::Base: {
+ qualItemVect = new QualItemVect;
+ break;
+ }}
+ return qualItemVect;
+ }
+
+ LangVarRef *walkVarRef( var_ref varRef )
+ {
+ NamespaceQual *nspaceQual = walkRegionQual( varRef.region_qual() );
+ qual Qual = varRef.qual();
+ QualItemVect *qualItemVect = walkQual( Qual );
+ String id = varRef.id().data();
+ LangVarRef *langVarRef = LangVarRef::cons( varRef.id().loc(),
+ curNspace(), curStruct(), curScope(), nspaceQual, qualItemVect, id );
+ return langVarRef;
+ }
+
+ ObjectField *walkOptCapture( opt_capture optCapture )
+ {
+ ObjectField *objField = 0;
+ if ( optCapture.prodName() == opt_capture::Id ) {
+ String id = optCapture.id().data();
+ objField = ObjectField::cons( optCapture.id().loc(),
+ ObjectField::UserLocalType, 0, id );
+ }
+ return objField;
+ }
+
+ /*
+ * Constructor
+ */
+
+ ConsItemList *walkLitConsEl( lit_cons_el litConsEl, TypeRef *consTypeRef )
+ {
+ ConsItemList *list = 0;
+ switch ( litConsEl.prodName() ) {
+ case lit_cons_el::ConsData: {
+ String consData = unescape( litConsEl.lit_dq_data().text().c_str() );
+ ConsItem *consItem = ConsItem::cons( litConsEl.lit_dq_data().loc(),
+ ConsItem::InputText, consData );
+ list = ConsItemList::cons( consItem );
+ break;
+ }
+ case lit_cons_el::SubList: {
+ list = walkConsElList( litConsEl.ConsElList(), consTypeRef );
+ break;
+ }}
+ return list;
+ }
+
+ ConsItemList *walkLitConsElList( _lrepeat_lit_cons_el litConsElList,
+ LIT_DQ_NL Nl, TypeRef *consTypeRef )
+ {
+ ConsItemList *list = new ConsItemList;
+
+ RepeatIter<lit_cons_el> litConsElIter( litConsElList );
+ while ( !litConsElIter.end() ) {
+ ConsItemList *tail = walkLitConsEl( litConsElIter.value(), consTypeRef );
+ list = consListConcat( list, tail );
+ litConsElIter.next();
+ }
+
+ if ( Nl != 0 ) {
+ String consData = unescape( Nl.data() );
+ ConsItem *consItem = ConsItem::cons( Nl.loc(), ConsItem::InputText, consData );
+ ConsItemList *tail = ConsItemList::cons( consItem );
+ list = consListConcat( list, tail );
+ }
+
+ return list;
+ }
+
+ ConsItemList *walkConsEl( cons_el consEl, TypeRef *consTypeRef )
+ {
+ ConsItemList *list = 0;
+ switch ( consEl.prodName() ) {
+ case cons_el::Lit: {
+ NamespaceQual *nspaceQual = walkRegionQual( consEl.region_qual() );
+ String lit = consEl.backtick_lit().data();
+ list = consElLiteral( consEl.backtick_lit().loc(), consTypeRef, lit, nspaceQual );
+ break;
+ }
+ case cons_el::Tilde: {
+ String consData = consEl.opt_tilde_data().text().c_str();
+ consData += '\n';
+ ConsItem *consItem = ConsItem::cons( consEl.opt_tilde_data().loc(),
+ ConsItem::InputText, consData );
+ list = ConsItemList::cons( consItem );
+ break;
+ }
+ case cons_el::Sq: {
+ list = walkConsSqConsDataList( consEl.SqConsDataList(),
+ consEl.sq_lit_term().CONS_SQ_NL() );
+ break;
+ }
+ case cons_el::CodeExpr: {
+ ConsItem::Trim trim = ConsItem::TrimDefault;
+ LangExpr *consExpr = walkCodeExpr( consEl.code_expr() );
+ ConsItem *consItem = ConsItem::cons( consExpr->loc,
+ ConsItem::ExprType, consExpr, trim );
+ list = ConsItemList::cons( consItem );
+ break;
+ }
+ case cons_el::Dq: {
+ list = walkLitConsElList( consEl.LitConsElList(),
+ consEl.dq_lit_term().LIT_DQ_NL(), consTypeRef );
+ break;
+ }}
+ return list;
+ }
+
+ ConsItemList *walkConsElList( _lrepeat_cons_el consElList, TypeRef *consTypeRef )
+ {
+ ConsItemList *list = new ConsItemList;
+
+ RepeatIter<cons_el> consElIter( consElList );
+
+ while ( !consElIter.end() ) {
+ ConsItemList *tail = walkConsEl( consElIter.value(), consTypeRef );
+ list = consListConcat( list, tail );
+ consElIter.next();
+ }
+ return list;
+ }
+
+ ConsItemList *walkConsTopEl( cons_top_el consTopEl, TypeRef *consTypeRef )
+ {
+ ConsItemList *list = 0;
+ switch ( consTopEl.prodName() ) {
+ case cons_top_el::Dq: {
+ list = walkLitConsElList( consTopEl.LitConsElList(),
+ consTopEl.dq_lit_term().LIT_DQ_NL(), consTypeRef );
+ break;
+ }
+ case cons_top_el::Sq: {
+ list = walkConsSqConsDataList( consTopEl.SqConsDataList(),
+ consTopEl.sq_lit_term().CONS_SQ_NL() );
+ break;
+ }
+ case cons_top_el::Tilde: {
+ String consData = consTopEl.opt_tilde_data().text().c_str();
+ consData += '\n';
+ ConsItem *consItem = ConsItem::cons( consTopEl.opt_tilde_data().loc(),
+ ConsItem::InputText, consData );
+ list = ConsItemList::cons( consItem );
+ break;
+ }}
+ return list;
+ }
+
+ ConsItemList *walkConsList( cons_list consList, TypeRef *consTypeRef )
+ {
+ Alignment alignment;
+ ConsItemList *list = new ConsItemList;
+ RepeatIter<cons_top_el> consTopElIter ( consList );
+ while ( !consTopElIter.end() ) {
+ cons_top_el topEl = consTopElIter.value();
+ alignment.check( "constructor", topEl.loc() );
+
+ ConsItemList *tail = walkConsTopEl( topEl, consTypeRef );
+ list = consListConcat( list, tail );
+ consTopElIter.next();
+ }
+ return list;
+ }
+
+ ConsItemList *walkConstructor( constructor Constructor, TypeRef *consTypeRef )
+ {
+ if ( Constructor.prodName() == constructor::TopList )
+ return walkConsList( Constructor.cons_list(), consTypeRef );
+ else
+ return walkConsElList( Constructor.ConsElList(), consTypeRef );
+ }
+
+ /*
+ * String
+ */
+
+ ConsItemList *walkLitStringEl( lit_string_el litStringEl )
+ {
+ ConsItemList *list = 0;
+ switch ( litStringEl.prodName() ) {
+ case lit_string_el::ConsData: {
+ String consData = unescape( litStringEl.lit_dq_data().text().c_str() );
+ ConsItem *stringItem = ConsItem::cons( litStringEl.lit_dq_data().loc(),
+ ConsItem::InputText, consData );
+ list = ConsItemList::cons( stringItem );
+ break;
+ }
+ case lit_string_el::SubList: {
+ list = walkStringElList( litStringEl.StringElList() );
+ break;
+ }}
+ return list;
+ }
+
+ ConsItemList *walkLitStringElList( _lrepeat_lit_string_el litStringElList, LIT_DQ_NL Nl )
+ {
+ ConsItemList *list = new ConsItemList;
+
+ RepeatIter<lit_string_el> litStringElIter( litStringElList );
+
+ while ( !litStringElIter.end() ) {
+ ConsItemList *tail = walkLitStringEl( litStringElIter.value() );
+ list = consListConcat( list, tail );
+ litStringElIter.next();
+ }
+
+ if ( Nl != 0 ) {
+ String consData = unescape( Nl.data() );
+ ConsItem *consItem = ConsItem::cons( Nl.loc(),
+ ConsItem::InputText, consData );
+ ConsItemList *tail = ConsItemList::cons( consItem );
+ list = consListConcat( list, tail );
+ }
+ return list;
+ }
+
+ ConsItemList *walkStringEl( string_el stringEl )
+ {
+ ConsItemList *list = 0;
+ switch ( stringEl.prodName() ) {
+ case string_el::Dq: {
+ list = walkLitStringElList( stringEl.LitStringElList(),
+ stringEl.dq_lit_term().LIT_DQ_NL() );
+ break;
+ }
+ case string_el::Sq: {
+ list = walkConsSqConsDataList( stringEl.SqConsDataList(),
+ stringEl.sq_lit_term().CONS_SQ_NL() );
+ break;
+ }
+ case string_el::Tilde: {
+ String consData = stringEl.opt_tilde_data().text().c_str();
+ consData += '\n';
+ ConsItem *consItem = ConsItem::cons( stringEl.opt_tilde_data().loc(),
+ ConsItem::InputText, consData );
+ list = ConsItemList::cons( consItem );
+ break;
+ }
+ case string_el::CodeExpr: {
+ ConsItem::Trim trim = ConsItem::TrimDefault;
+ LangExpr *consExpr = walkCodeExpr( stringEl.code_expr() );
+ consExpr = liftTrim( consExpr, trim );
+ ConsItem *consItem = ConsItem::cons( consExpr->loc,
+ ConsItem::ExprType, consExpr, trim );
+ list = ConsItemList::cons( consItem );
+ break;
+ }}
+ return list;
+ }
+
+ ConsItemList *walkStringElList( _lrepeat_string_el stringElList )
+ {
+ ConsItemList *list = new ConsItemList;
+
+ RepeatIter<string_el> stringElIter( stringElList );
+
+ while ( !stringElIter.end() ) {
+ ConsItemList *tail = walkStringEl( stringElIter.value() );
+ list = consListConcat( list, tail );
+ stringElIter.next();
+ }
+ return list;
+ }
+
+ ConsItemList *walkStringTopEl( string_top_el stringTopEl )
+ {
+ ConsItemList *list = 0;
+ switch ( stringTopEl.prodName() ) {
+ case string_top_el::Dq: {
+ list = walkLitStringElList( stringTopEl.LitStringElList(),
+ stringTopEl.dq_lit_term().LIT_DQ_NL() );
+ break;
+ }
+ case string_top_el::Sq: {
+ list = walkConsSqConsDataList( stringTopEl.SqConsDataList(),
+ stringTopEl.sq_lit_term().CONS_SQ_NL() );
+ break;
+ }
+ case string_top_el::Tilde: {
+ String consData = stringTopEl.opt_tilde_data().text().c_str();
+ consData += '\n';
+ ConsItem *consItem = ConsItem::cons( stringTopEl.opt_tilde_data().loc(),
+ ConsItem::InputText, consData );
+ list = ConsItemList::cons( consItem );
+ break;
+ }}
+ return list;
+ }
+
+ ConsItemList *walkStringList( string_list stringList )
+ {
+ Alignment alignment;
+ ConsItemList *list = new ConsItemList;
+ RepeatIter<string_top_el> stringTopElIter( stringList );
+ while ( !stringTopElIter.end() ) {
+ string_top_el topEl = stringTopElIter.value();
+ alignment.check( "string", topEl.loc() );
+
+ ConsItemList *tail = walkStringTopEl( topEl );
+ list = consListConcat( list, tail );
+ stringTopElIter.next();
+ }
+ return list;
+ }
+
+ ConsItemList *walkString( string String )
+ {
+ if ( String.prodName() == string::TopList )
+ return walkStringList( String.string_list() );
+ else
+ return walkStringElList( String.StringElList() );
+ }
+
+ /*
+ * Accum
+ */
+
+ ConsItemList *walkLitAccumEl( lit_accum_el litAccumEl )
+ {
+ ConsItemList *list = 0;
+ switch ( litAccumEl.prodName() ) {
+ case lit_accum_el::ConsData: {
+ String consData = unescape( litAccumEl.lit_dq_data().text().c_str() );
+ ConsItem *consItem = ConsItem::cons( litAccumEl.lit_dq_data().loc(),
+ ConsItem::InputText, consData );
+ list = ConsItemList::cons( consItem );
+ break;
+ }
+ case lit_accum_el::SubList: {
+ list = walkAccumElList( litAccumEl.AccumElList() );
+ break;
+ }}
+ return list;
+ }
+
+ ConsItemList *walkLitAccumElList( _lrepeat_lit_accum_el litAccumElList, LIT_DQ_NL Nl )
+ {
+ ConsItemList *list = new ConsItemList;
+
+ RepeatIter<lit_accum_el> litAccumElIter( litAccumElList );
+
+ while ( !litAccumElIter.end() ) {
+ ConsItemList *tail = walkLitAccumEl( litAccumElIter.value() );
+ list = consListConcat( list, tail );
+ litAccumElIter.next();
+ }
+
+ if ( Nl != 0 ) {
+ String consData = unescape( Nl.data() );
+ ConsItem *consItem = ConsItem::cons( Nl.loc(), ConsItem::InputText, consData );
+ ConsItemList *tail = ConsItemList::cons( consItem );
+ list = consListConcat( list, tail );
+ }
+
+ return list;
+ }
+
+ ConsItemList *walkAccumEl( accum_el accumEl )
+ {
+ ConsItemList *list = 0;
+ switch ( accumEl.prodName() ) {
+ case accum_el::Dq: {
+ list = walkLitAccumElList( accumEl.LitAccumElList(),
+ accumEl.dq_lit_term().LIT_DQ_NL() );
+ break;
+ }
+ case accum_el::Sq: {
+ list = walkConsSqConsDataList( accumEl.SqConsDataList(),
+ accumEl.sq_lit_term().CONS_SQ_NL() );
+ break;
+ }
+ case accum_el::Tilde: {
+ String consData = accumEl.opt_tilde_data().text().c_str();
+ consData += '\n';
+ ConsItem *consItem = ConsItem::cons( accumEl.opt_tilde_data().loc(),
+ ConsItem::InputText, consData );
+ list = ConsItemList::cons( consItem );
+ break;
+ }
+ case accum_el::CodeExpr: {
+ ConsItem::Trim trim = ConsItem::TrimDefault;
+ LangExpr *accumExpr = walkCodeExpr( accumEl.code_expr() );
+ accumExpr = liftTrim( accumExpr, trim );
+ ConsItem *consItem = ConsItem::cons( accumExpr->loc,
+ ConsItem::ExprType, accumExpr, trim );
+ list = ConsItemList::cons( consItem );
+ break;
+ }}
+ return list;
+ }
+
+ ConsItemList *walkAccumElList( _lrepeat_accum_el accumElList )
+ {
+ ConsItemList *list = new ConsItemList;
+
+ RepeatIter<accum_el> accumElIter( accumElList );
+
+ while ( !accumElIter.end() ) {
+ ConsItemList *tail = walkAccumEl( accumElIter.value() );
+ list = consListConcat( list, tail );
+ accumElIter.next();
+ }
+ return list;
+ }
+
+ ConsItemList *walkAccumTopEl( accum_top_el accumTopEl )
+ {
+ ConsItemList *list = 0;
+ switch ( accumTopEl.prodName() ) {
+ case accum_top_el::Dq: {
+ list = walkLitAccumElList( accumTopEl.LitAccumElList(),
+ accumTopEl.dq_lit_term().LIT_DQ_NL() );
+ break;
+ }
+ case accum_top_el::Sq: {
+ list = walkConsSqConsDataList( accumTopEl.SqConsDataList(),
+ accumTopEl.sq_lit_term().CONS_SQ_NL() );
+ break;
+ }
+ case accum_top_el::Tilde: {
+ String consData = accumTopEl.opt_tilde_data().text().c_str();
+ consData += '\n';
+ ConsItem *consItem = ConsItem::cons( accumTopEl.opt_tilde_data().loc(),
+ ConsItem::InputText, consData );
+ list = ConsItemList::cons( consItem );
+ break;
+ }
+ case accum_top_el::SubList: {
+ list = walkAccumElList( accumTopEl.AccumElList() );
+ break;
+ }}
+ return list;
+ }
+
+ ConsItemList *walkAccumList( Alignment &alignment, accum_list accumList )
+ {
+ accum_top_el topEl = accumList.accum_top_el();
+ alignment.check( "accumulator", topEl.loc() );
+
+ ConsItemList *list = walkAccumTopEl( topEl );
+ if ( accumList.prodName() == accum_list::List ) {
+ ConsItemList *tail = walkAccumList( alignment, accumList._accum_list() );
+ list = consListConcat( list, tail );
+ }
+
+ return list;
+ }
+
+ ConsItemList *walkAccumulate( accumulate Accumulate )
+ {
+ Alignment alignment;
+ ConsItemList *list = walkAccumList( alignment, Accumulate.accum_list() );
+ return list;
+ }
+
+ void walkFieldInit( FieldInitVect *list, field_init fieldInit )
+ {
+ LangExpr *expr = walkCodeExpr( fieldInit.code_expr() );
+ FieldInit *init = FieldInit::cons( expr->loc, "_name", expr );
+ list->append( init );
+ }
+
+ FieldInitVect *walkFieldInit( _lrepeat_field_init fieldInitList )
+ {
+ FieldInitVect *list = new FieldInitVect;
+
+ RepeatIter<field_init> fieldInitIter( fieldInitList );
+
+ while ( !fieldInitIter.end() ) {
+ walkFieldInit( list, fieldInitIter.value() );
+ fieldInitIter.next();
+ }
+ return list;
+ }
+ FieldInitVect *walkOptFieldInit( opt_field_init optFieldInit )
+ {
+ FieldInitVect *list = 0;
+ if ( optFieldInit.prodName() == opt_field_init::Init )
+ list = walkFieldInit( optFieldInit.FieldInitList() );
+ return list;
+ }
+
+ LangExpr *walkStmtOrFactor( stmt_or_factor StmtOrFactor )
+ {
+ LangExpr *expr = 0;
+ switch ( StmtOrFactor.prodName() ) {
+ case stmt_or_factor::Parse: {
+ /* The type we are parsing. */
+ type_ref typeRefTree = StmtOrFactor.type_ref();
+ TypeRef *typeRef = walkTypeRef( typeRefTree );
+ ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() );
+ FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() );
+ ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() );
+
+ expr = parseCmd( StmtOrFactor.PARSE().loc(), false, false, objField,
+ typeRef, init, list, true, false, false, "" );
+ break;
+ }
+ case stmt_or_factor::ParseTree: {
+ /* The type we are parsing. */
+ type_ref typeRefTree = StmtOrFactor.type_ref();
+ TypeRef *typeRef = walkTypeRef( typeRefTree );
+ ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() );
+ FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() );
+ ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() );
+
+ expr = parseCmd( StmtOrFactor.PARSE_TREE().loc(), true, false, objField,
+ typeRef, init, list, true, false, false, "" );
+ break;
+ }
+ case stmt_or_factor::ParseStop: {
+ /* The type we are parsing. */
+ type_ref typeRefTree = StmtOrFactor.type_ref();
+ TypeRef *typeRef = walkTypeRef( typeRefTree );
+ ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() );
+ FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() );
+ ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() );
+
+ expr = parseCmd( StmtOrFactor.PARSE_STOP().loc(), false, true, objField,
+ typeRef, init, list, true, false, false, "" );
+ break;
+ }
+ case stmt_or_factor::Reduce: {
+ /* The reducer name. */
+ String reducer = StmtOrFactor.id().data();
+
+ /* The type we are parsing. */
+ type_ref typeRefTree = StmtOrFactor.type_ref();
+ TypeRef *typeRef = walkTypeRef( typeRefTree );
+ FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() );
+ ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() );
+
+ expr = parseCmd( StmtOrFactor.REDUCE().loc(), false, false, 0,
+ typeRef, init, list, true, true, false, reducer );
+ break;
+ }
+ case stmt_or_factor::ReadReduce: {
+ /* The reducer name. */
+ String reducer = StmtOrFactor.id().data();
+
+ /* The type we are parsing. */
+ type_ref typeRefTree = StmtOrFactor.type_ref();
+ TypeRef *typeRef = walkTypeRef( typeRefTree );
+ FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() );
+ ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() );
+
+ expr = parseCmd( StmtOrFactor.READ_REDUCE().loc(), false, false, 0,
+ typeRef, init, list, true, true, true, reducer );
+ break;
+ }
+ case stmt_or_factor::Send: {
+ LangVarRef *varRef = walkVarRef( StmtOrFactor.var_ref() );
+ ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() );
+ bool eof = walkOptEos( StmtOrFactor.opt_eos() );
+ expr = send( StmtOrFactor.SEND().loc(), varRef, list, eof );
+ break;
+ }
+ case stmt_or_factor::SendTree: {
+ LangVarRef *varRef = walkVarRef( StmtOrFactor.var_ref() );
+ ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() );
+ bool eof = walkOptEos( StmtOrFactor.opt_eos() );
+ expr = sendTree( StmtOrFactor.SEND_TREE().loc(), varRef, list, eof );
+ break;
+ }
+ case stmt_or_factor::MakeTree: {
+ CallArgVect *exprList = walkCallArgList( StmtOrFactor.call_arg_list() );
+ expr = LangExpr::cons( LangTerm::cons( StmtOrFactor.loc(),
+ LangTerm::MakeTreeType, exprList ) );
+ break;
+ }
+ case stmt_or_factor::MakeToken: {
+ CallArgVect *exprList = walkCallArgList( StmtOrFactor.call_arg_list() );
+ expr = LangExpr::cons( LangTerm::cons( StmtOrFactor.loc(),
+ LangTerm::MakeTokenType, exprList ) );
+ break;
+ }
+ case stmt_or_factor::Cons: {
+ /* The type we are parsing. */
+ type_ref typeRefTree = StmtOrFactor.type_ref();
+ TypeRef *typeRef = walkTypeRef( typeRefTree );
+ ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() );
+ ConsItemList *list = walkConstructor( StmtOrFactor.constructor(), typeRef );
+ FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() );
+
+ expr = construct( StmtOrFactor.CONS().loc(), objField, list, typeRef, init );
+ break;
+ }
+ case stmt_or_factor::Match: {
+ LangVarRef *varRef = walkVarRef( StmtOrFactor.var_ref() );
+ PatternItemList *list = walkPattern( StmtOrFactor.pattern(), varRef );
+ expr = match( StmtOrFactor.loc(), varRef, list );
+ break;
+ }
+ case stmt_or_factor::New: {
+ TypeRef *typeRef = walkTypeRef( StmtOrFactor.type_ref() );
+
+ ObjectField *captureField = walkOptCapture( StmtOrFactor.opt_capture() );
+ FieldInitVect *init = walkFieldInit( StmtOrFactor.FieldInitList() );
+
+ LangVarRef *captureVarRef = 0;
+ if ( captureField != 0 ) {
+ captureVarRef = LangVarRef::cons( captureField->loc,
+ curNspace(), curStruct(), curScope(), captureField->name );
+ }
+
+ expr = LangExpr::cons( LangTerm::consNew(
+ StmtOrFactor.loc(), typeRef, captureVarRef, init ) );
+
+ /* Check for redeclaration. */
+ if ( captureField != 0 ) {
+ if ( curScope()->checkRedecl( captureField->name ) != 0 ) {
+ error( captureField->loc ) << "variable " <<
+ captureField->name << " redeclared" << endp;
+ }
+
+ /* Insert it into the field map. */
+ captureField->typeRef = typeRef;
+ curScope()->insertField( captureField->name, captureField );
+ }
+ break;
+ }}
+ return expr;
+ }
+
+ LangExpr *walkCodeFactor( code_factor codeFactor, bool used = true )
+ {
+ LangExpr *expr = 0;
+ switch ( codeFactor.prodName() ) {
+ case code_factor::VarRef: {
+ LangVarRef *langVarRef = walkVarRef( codeFactor.var_ref() );
+ LangTerm *term = LangTerm::cons( langVarRef->loc,
+ LangTerm::VarRefType, langVarRef );
+ expr = LangExpr::cons( term );
+ break;
+ }
+ case code_factor::Call: {
+ LangVarRef *langVarRef = walkVarRef( codeFactor.var_ref() );
+ CallArgVect *exprVect = walkCallArgList( codeFactor.call_arg_list() );
+ LangTerm *term = LangTerm::cons( langVarRef->loc, langVarRef, exprVect );
+ expr = LangExpr::cons( term );
+ break;
+ }
+ case code_factor::Number: {
+ String number = codeFactor.number().text().c_str();
+ LangTerm *term = LangTerm::cons( codeFactor.number().loc(),
+ LangTerm::NumberType, number );
+ expr = LangExpr::cons( term );
+ break;
+ }
+ case code_factor::StmtOrFactor: {
+ expr = walkStmtOrFactor( codeFactor.stmt_or_factor() );
+ break;
+ }
+ case code_factor::Nil: {
+ expr = LangExpr::cons( LangTerm::cons( codeFactor.NIL().loc(),
+ LangTerm::NilType ) );
+ break;
+ }
+ case code_factor::True: {
+ expr = LangExpr::cons( LangTerm::cons( codeFactor.TRUE().loc(),
+ LangTerm::TrueType ) );
+ break;
+ }
+ case code_factor::False: {
+ expr = LangExpr::cons( LangTerm::cons( codeFactor.FALSE().loc(),
+ LangTerm::FalseType ) );
+ break;
+ }
+ case code_factor::Paren: {
+ expr = walkCodeExpr( codeFactor.code_expr() );
+ break;
+ }
+ case code_factor::String: {
+ ConsItemList *list = walkString( codeFactor.string() );
+ expr = LangExpr::cons( LangTerm::cons( codeFactor.string().loc(), list ) );
+ break;
+ }
+ case code_factor::In: {
+ TypeRef *typeRef = walkTypeRef( codeFactor.type_ref() );
+ LangVarRef *varRef = walkVarRef( codeFactor.var_ref() );
+ expr = LangExpr::cons( LangTerm::cons( typeRef->loc,
+ LangTerm::SearchType, typeRef, varRef ) );
+ break;
+ }
+ case code_factor::TypeId: {
+ TypeRef *typeRef = walkTypeRef( codeFactor.type_ref() );
+ expr = LangExpr::cons( LangTerm::cons( codeFactor.loc(),
+ LangTerm::TypeIdType, typeRef ) );
+ break;
+ }
+ case code_factor::Cast: {
+ TypeRef *typeRef = walkTypeRef( codeFactor.type_ref() );
+ LangExpr *castExpr = walkCodeFactor( codeFactor._code_factor() );
+ expr = LangExpr::cons( LangTerm::cons( codeFactor.loc(),
+ LangTerm::CastType, typeRef, castExpr ) );
+ break;
+ }}
+ return expr;
+ }
+
+ LangExpr *walkCodeAdditive( code_additive additive, bool used = true )
+ {
+ LangExpr *expr = 0;
+ switch ( additive.prodName() ) {
+ case code_additive::Plus: {
+ LangExpr *left = walkCodeAdditive( additive._code_additive() );
+ LangExpr *right = walkCodeMultiplicitive( additive.code_multiplicitive() );
+ expr = LangExpr::cons( additive.PLUS().loc(), left, '+', right );
+ break;
+ }
+ case code_additive::Minus: {
+ LangExpr *left = walkCodeAdditive( additive._code_additive() );
+ LangExpr *right = walkCodeMultiplicitive( additive.code_multiplicitive() );
+ expr = LangExpr::cons( additive.MINUS().loc(), left, '-', right );
+ break;
+ }
+ case code_additive::Base: {
+ expr = walkCodeMultiplicitive( additive.code_multiplicitive(), used );
+ break;
+ }}
+ return expr;
+ }
+
+ LangExpr *walkCodeUnary( code_unary unary, bool used = true )
+ {
+ LangExpr *expr = 0;
+
+ switch ( unary.prodName() ) {
+ case code_unary::Bang: {
+ LangExpr *factor = walkCodeFactor( unary.code_factor() );
+ expr = LangExpr::cons( unary.BANG().loc(), '!', factor );
+ break;
+ }
+ case code_unary::Dollar: {
+ LangExpr *factor = walkCodeFactor( unary.code_factor() );
+ expr = LangExpr::cons( unary.DOLLAR().loc(), '$', factor );
+ break;
+ }
+ case code_unary::DollarDollar: {
+ LangExpr *factor = walkCodeFactor( unary.code_factor() );
+ expr = LangExpr::cons( unary.DOLLAR().loc(), 'S', factor );
+ break;
+ }
+ case code_unary::Caret: {
+ LangExpr *factor = walkCodeFactor( unary.code_factor() );
+ expr = LangExpr::cons( unary.CARET().loc(), '^', factor );
+ break;
+ }
+ case code_unary::At: {
+ LangExpr *factor = walkCodeFactor( unary.code_factor() );
+ expr = LangExpr::cons( unary.AT().loc(), '@', factor );
+ break;
+ }
+ case code_unary::Percent: {
+ LangExpr *factor = walkCodeFactor( unary.code_factor() );
+ expr = LangExpr::cons( unary.PERCENT().loc(), '%', factor );
+ break;
+ }
+ case code_unary::Base: {
+ LangExpr *factor = walkCodeFactor( unary.code_factor(), used );
+ expr = factor;
+ }}
+
+ return expr;
+ }
+
+ LangExpr *walkCodeRelational( code_relational codeRelational, bool used = true )
+ {
+ LangExpr *expr = 0, *left = 0;
+
+ bool base = codeRelational.prodName() == code_relational::Base;
+
+ if ( ! base ) {
+ used = true;
+ left = walkCodeRelational( codeRelational._code_relational() );
+ }
+
+ LangExpr *additive = walkCodeAdditive( codeRelational.code_additive(), used );
+
+ switch ( codeRelational.prodName() ) {
+ case code_relational::EqEq: {
+ expr = LangExpr::cons( codeRelational.loc(), left, OP_DoubleEql, additive );
+ break;
+ }
+ case code_relational::Neq: {
+ expr = LangExpr::cons( codeRelational.loc(), left, OP_NotEql, additive );
+ break;
+ }
+ case code_relational::Lt: {
+ expr = LangExpr::cons( codeRelational.loc(), left, '<', additive );
+ break;
+ }
+ case code_relational::Gt: {
+ expr = LangExpr::cons( codeRelational.loc(), left, '>', additive );
+ break;
+ }
+ case code_relational::LtEq: {
+ expr = LangExpr::cons( codeRelational.loc(), left, OP_LessEql, additive );
+ break;
+ }
+ case code_relational::GtEq: {
+ expr = LangExpr::cons( codeRelational.loc(), left, OP_GrtrEql, additive );
+ break;
+ }
+ case code_relational::Base: {
+ expr = additive;
+ break;
+ }}
+ return expr;
+ }
+
+ LangStmt *walkExprStmt( expr_stmt exprStmt )
+ {
+ LangExpr *expr = walkCodeExpr( exprStmt.code_expr(), false );
+ LangStmt *stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr );
+ return stmt;
+ }
+
+ ObjectField *walkVarDef( var_def varDef, ObjectField::Type type )
+ {
+ String id = varDef.id().data();
+ TypeRef *typeRef = walkTypeRef( varDef.type_ref() );
+ return ObjectField::cons( varDef.id().loc(), type, typeRef, id );
+ }
+
+ IterCall *walkIterCall( iter_call Tree )
+ {
+ IterCall *iterCall = 0;
+ switch ( Tree.prodName() ) {
+ case iter_call::Call: {
+ LangVarRef *varRef = walkVarRef( Tree.var_ref() );
+ CallArgVect *exprVect = walkCallArgList( Tree.call_arg_list() );
+ LangTerm *langTerm = LangTerm::cons( varRef->loc, varRef, exprVect );
+ iterCall = IterCall::cons( IterCall::Call, langTerm );
+ break;
+ }
+ case iter_call::Id: {
+ String tree = Tree.id().data();
+ LangVarRef *varRef = LangVarRef::cons( Tree.id().loc(),
+ curNspace(), curStruct(), curScope(), tree );
+ LangTerm *langTerm = LangTerm::cons( Tree.id().loc(),
+ LangTerm::VarRefType, varRef );
+ LangExpr *langExpr = LangExpr::cons( langTerm );
+ iterCall = IterCall::cons( IterCall::Expr, langExpr );
+ break;
+ }
+ case iter_call::Expr: {
+ LangExpr *langExpr = walkCodeExpr( Tree.code_expr() );
+ iterCall = IterCall::cons( IterCall::Expr, langExpr );
+ break;
+ }}
+
+ return iterCall;
+ }
+
+ LangStmt *walkElsifClause( elsif_clause elsifClause )
+ {
+ pushScope();
+ LangExpr *expr = walkCodeExpr( elsifClause.code_expr() );
+ StmtList *stmtList = walkBlockOrSingle( elsifClause.block_or_single() );
+ LangStmt *stmt = LangStmt::cons( LangStmt::IfType, expr, stmtList, 0 );
+ popScope();
+ return stmt;
+ }
+
+ LangStmt *walkOptionalElse( optional_else optionalElse )
+ {
+ LangStmt *stmt = 0;
+ if ( optionalElse.prodName() == optional_else::Else ) {
+ pushScope();
+ StmtList *stmtList = walkBlockOrSingle( optionalElse.block_or_single() );
+ stmt = LangStmt::cons( LangStmt::ElseType, stmtList );
+ popScope();
+ }
+ return stmt;
+ }
+
+ LangStmt *walkElsifList( elsif_list elsifList )
+ {
+ LangStmt *stmt = 0;
+ switch ( elsifList.prodName() ) {
+ case elsif_list::Clause:
+ stmt = walkElsifClause( elsifList.elsif_clause() );
+ stmt->elsePart = walkElsifList( elsifList._elsif_list() );
+ break;
+ case elsif_list::OptElse:
+ stmt = walkOptionalElse( elsifList.optional_else() );
+ break;
+ }
+ return stmt;
+ }
+
+ LangStmt *walkCaseClause( case_clause CaseClause, var_ref VarRef )
+ {
+ pushScope();
+
+ LangVarRef *varRef = walkVarRef( VarRef );
+
+ scopeTop->caseClauseVarRef = varRef;
+
+ LangExpr *expr = 0;
+
+ switch ( CaseClause.prodName() ) {
+ case case_clause::Pattern: {
+ /* A match pattern. */
+ PatternItemList *list = walkPattern( CaseClause.pattern(), varRef );
+ expr = match( CaseClause.loc(), varRef, list );
+ break;
+ }
+ case case_clause::Id: {
+ /* An identifier to be interpreted as a production name. */
+ String prod = CaseClause.id().text().c_str();
+ expr = prodCompare( CaseClause.loc(), varRef, prod, 0 );
+ break;
+ }
+ case case_clause::IdPat: {
+ String prod = CaseClause.id().text().c_str();
+ PatternItemList *list = walkPattern( CaseClause.pattern(), varRef );
+ LangExpr *matchExpr = match( CaseClause.loc(), varRef, list );
+ expr = prodCompare( CaseClause.loc(), varRef, prod, matchExpr );
+ break;
+ }
+ }
+
+ StmtList *stmtList = walkBlockOrSingle( CaseClause.block_or_single() );
+
+ popScope();
+
+ LangStmt *stmt = LangStmt::cons( LangStmt::IfType, expr, stmtList );
+
+ return stmt;
+ }
+
+ LangStmt *walkCaseClauseList( case_clause_list CaseClauseList, var_ref VarRef )
+ {
+ LangStmt *stmt = 0;
+ switch ( CaseClauseList.prodName() ) {
+ case case_clause_list::Recursive: {
+ stmt = walkCaseClause( CaseClauseList.case_clause(), VarRef );
+
+ LangStmt *recList = walkCaseClauseList(
+ CaseClauseList._case_clause_list(), VarRef );
+
+ stmt->setElsePart( recList );
+ break;
+ }
+ case case_clause_list::BaseCase: {
+ stmt = walkCaseClause( CaseClauseList.case_clause(), VarRef );
+ break;
+ }
+ case case_clause_list::BaseDefault: {
+ pushScope();
+ StmtList *stmtList = walkBlockOrSingle(
+ CaseClauseList.default_clause().block_or_single() );
+ popScope();
+ stmt = LangStmt::cons( LangStmt::ElseType, stmtList );
+ break;
+ }
+ }
+ return stmt;
+ }
+
+ void walkStructVarDef( struct_var_def StructVarDef )
+ {
+ ObjectField *objField = walkVarDef( StructVarDef.var_def(),
+ ObjectField::StructFieldType );
+ structVarDef( objField->loc, objField );
+ }
+
+ TypeRef *walkReferenceTypeRef( reference_type_ref ReferenceTypeRef )
+ {
+ TypeRef *typeRef = walkTypeRef( ReferenceTypeRef.type_ref() );
+ return TypeRef::cons( ReferenceTypeRef.REF().loc(), TypeRef::Ref, typeRef );
+ }
+
+ ObjectField *walkParamVarDef( param_var_def paramVarDef )
+ {
+ String id = paramVarDef.id().data();
+ TypeRef *typeRef = 0;
+ ObjectField::Type type;
+
+ switch ( paramVarDef.prodName() ) {
+ case param_var_def::Type:
+ typeRef = walkTypeRef( paramVarDef.type_ref() );
+ type = ObjectField::ParamValType;
+ break;
+ case param_var_def::Ref:
+ typeRef = walkReferenceTypeRef( paramVarDef.reference_type_ref() );
+ type = ObjectField::ParamRefType;
+ break;
+ }
+
+ return addParam( paramVarDef.id().loc(), type, typeRef, id );
+ }
+
+ ParameterList *walkParamVarDefSeq( param_var_def_seq paramVarDefSeq )
+ {
+ ParameterList *paramList = new ParameterList;
+ while ( paramVarDefSeq != 0 ) {
+ ObjectField *param = walkParamVarDef( paramVarDefSeq.param_var_def() );
+ appendParam( paramList, param );
+ paramVarDefSeq = paramVarDefSeq._param_var_def_seq();
+ }
+ return paramList;
+ }
+
+ ParameterList *walkParamVarDefList( param_var_def_list paramVarDefList )
+ {
+ ParameterList *paramList = walkParamVarDefSeq(
+ paramVarDefList.param_var_def_seq() );
+ return paramList;
+ }
+
+ bool walkOptExport( opt_export OptExport )
+ {
+ return OptExport.prodName() == opt_export::Export;
+ }
+
+ void walkFunctionDef( function_def FunctionDef )
+ {
+ ObjectDef *localFrame = blockOpen();
+
+ bool exprt = walkOptExport( FunctionDef.opt_export() );
+ TypeRef *typeRef = walkTypeRef( FunctionDef.type_ref() );
+ String id = FunctionDef.id().data();
+ ParameterList *paramList = walkParamVarDefList( FunctionDef.ParamVarDefList() );
+ StmtList *stmtList = walkLangStmtList( FunctionDef.lang_stmt_list() );
+ functionDef( stmtList, localFrame, paramList, typeRef, id, exprt );
+
+ blockClose();
+ }
+
+ void walkInHostDef( in_host_def InHostDef )
+ {
+ ObjectDef *localFrame = blockOpen();
+
+ TypeRef *typeRef = walkTypeRef( InHostDef.type_ref() );
+ String id = InHostDef.id().data();
+ ParameterList *paramList = walkParamVarDefList( InHostDef.ParamVarDefList() );
+ inHostDef( InHostDef.HostFunc().data(), localFrame, paramList, typeRef, id, false );
+
+ blockClose();
+ }
+
+ void walkIterDef( iter_def IterDef )
+ {
+ ObjectDef *localFrame = blockOpen();
+
+ String id = IterDef.id().data();
+ ParameterList *paramList = walkParamVarDefList( IterDef.ParamVarDefList() );
+ StmtList *stmtList = walkLangStmtList( IterDef.lang_stmt_list() );
+ iterDef( stmtList, localFrame, paramList, id );
+
+ blockClose();
+ }
+
+ void walkStructItem( struct_item structItem )
+ {
+ switch ( structItem.prodName() ) {
+ case struct_item::Rl:
+ walkRlDef( structItem.rl_def() );
+ break;
+ case struct_item::StructVar:
+ walkStructVarDef( structItem.struct_var_def() );
+ break;
+ case struct_item::Token:
+ walkTokenDef( structItem.token_def() );
+ break;
+ case struct_item::IgnoreCollector:
+ walkIgnoreCollector( structItem.ic_def() );
+ break;
+ case struct_item::Ignore:
+ walkIgnoreDef( structItem.ignore_def() );
+ break;
+ case struct_item::Literal:
+ walkLiteralDef( structItem.literal_def() );
+ break;
+ case struct_item::Cfl:
+ walkCflDef( structItem.cfl_def() );
+ break;
+ case struct_item::Region:
+ walkLexRegion( structItem.region_def() );
+ break;
+ case struct_item::Struct:
+ walkStructDef( structItem.struct_def() );
+ break;
+ case struct_item::Function:
+ walkFunctionDef( structItem.function_def() );
+ break;
+ case struct_item::InHost:
+ walkInHostDef( structItem.in_host_def() );
+ break;
+ case struct_item::Iter:
+ walkIterDef( structItem.iter_def() );
+ break;
+ case struct_item::PreEof:
+ walkPreEof( structItem.pre_eof_def() );
+ break;
+ case struct_item::Export:
+ walkExportDef( structItem.export_def() );
+ break;
+ case struct_item::Precedence:
+ walkPrecedenceDef( structItem.precedence_def() );
+ break;
+// case struct_item::ListEl:
+// listElDef( structItem.list_el_def().id().data() );
+// break;
+// case struct_item::MapEl: {
+// map_el_def Def = structItem.map_el_def();
+// TypeRef *keyTr = walkTypeRef( Def.type_ref() );
+// mapElDef( Def.id().data(), keyTr );
+// break;
+// }
+ case struct_item::Alias:
+ walkAliasDef( structItem.alias_def() );
+ break;
+ }
+ }
+
+ void walkStructDef( struct_def structDef )
+ {
+ String name = structDef.id().data();
+ structHead( structDef.id().loc(), curNspace(), name, ObjectDef::StructType );
+
+ _lrepeat_struct_item structItemList = structDef.ItemList();
+
+ RepeatIter<struct_item> structItemIter( structItemList );
+
+ while ( !structItemIter.end() ) {
+ walkStructItem( structItemIter.value() );
+ structItemIter.next();
+ }
+
+ structStack.pop();
+ namespaceStack.pop();
+ }
+
+ void walkNamespaceDef( namespace_def NamespaceDef, StmtList *stmtList )
+ {
+ String name = NamespaceDef.id().data();
+ createNamespace( NamespaceDef.id().loc(), name );
+ walkNamespaceItemList( NamespaceDef.ItemList(), stmtList );
+ namespaceStack.pop();
+ }
+
+ void walkRedItem( host_item item, ReduceTextItemList &list )
+ {
+ if ( item.RED_LHS() != 0 ) {
+ ReduceTextItem *rti = new ReduceTextItem;
+ rti->type = ReduceTextItem::LhsRef;
+ list.append( rti );
+ }
+ else if ( item.RED_RHS_REF() != 0 ) {
+ ReduceTextItem *rti = new ReduceTextItem;
+ rti->type = ReduceTextItem::RhsRef;
+ rti->txt = item.RED_RHS_REF().text().c_str();
+ list.append( rti );
+ }
+ else if ( item.RED_TREE_REF() != 0 ) {
+ ReduceTextItem *rti = new ReduceTextItem;
+ rti->type = ReduceTextItem::TreeRef;
+ rti->txt = item.RED_TREE_REF().text().c_str();
+ list.append( rti );
+ }
+ else if ( item.RED_RHS_LOC() != 0 ) {
+ ReduceTextItem *rti = new ReduceTextItem;
+ rti->type = ReduceTextItem::RhsLoc;
+ rti->txt = item.RED_RHS_LOC().text().c_str();
+ list.append( rti );
+ }
+ else if ( item.RED_RHS_NREF() != 0 ) {
+ ReduceTextItem *rti = new ReduceTextItem;
+ rti->type = ReduceTextItem::RhsRef;
+ rti->n = atoi( item.RED_RHS_NREF().text().c_str() + 1 );
+ list.append( rti );
+ }
+ else if ( item.RED_TREE_NREF() != 0 ) {
+ ReduceTextItem *rti = new ReduceTextItem;
+ rti->type = ReduceTextItem::TreeRef;
+ rti->n = atoi( item.RED_TREE_NREF().text().c_str() + 2 );
+ list.append( rti );
+ }
+ else if ( item.RED_RHS_NLOC() != 0 ) {
+ ReduceTextItem *rti = new ReduceTextItem;
+ rti->type = ReduceTextItem::RhsLoc;
+ rti->n = atoi( item.RED_RHS_NLOC().text().c_str() + 1 );
+ list.append( rti );
+ }
+ else if ( item.RED_OPEN() != 0 ) {
+ ReduceTextItem *open = new ReduceTextItem;
+ open->type = ReduceTextItem::Txt;
+ open->txt = "{";
+ list.append( open );
+
+ walkRedItemList( item.HostItems(), list );
+
+ ReduceTextItem *close = new ReduceTextItem;
+ close->type = ReduceTextItem::Txt;
+ close->txt = "}";
+ list.append( close );
+ }
+ else {
+ if ( list.length() > 0 && list.tail->type == ReduceTextItem::Txt ) {
+ std::string txt = item.text();
+ list.tail->txt.append( txt.c_str(), txt.size() );
+ }
+ else {
+ ReduceTextItem *rti = new ReduceTextItem;
+ rti->type = ReduceTextItem::Txt;
+ rti->txt = item.text().c_str();
+ list.append( rti );
+ }
+ }
+ }
+
+ void walkRedItemList( _lrepeat_host_item itemList, ReduceTextItemList &list )
+ {
+ RepeatIter<host_item> itemIter( itemList );
+
+ while ( !itemIter.end() ) {
+ walkRedItem( itemIter.value(), list );
+ itemIter.next();
+ }
+ }
+
+ void walkRedNonTerm( red_nonterm RN )
+ {
+ InputLoc loc = RN.RED_OPEN().loc();
+
+ TypeRef *typeRef = walkTypeRef( RN.type_ref() );
+
+ ReduceNonTerm *rnt = new ReduceNonTerm( loc, typeRef );
+
+ walkRedItemList( RN.HostItems(), rnt->itemList );
+
+ curReduction()->reduceNonTerms.append( rnt );
+ }
+
+ void walkRedAction( red_action RA )
+ {
+ InputLoc loc = RA.RED_OPEN().loc();
+ String text = RA.HostItems().text().c_str();
+
+ TypeRef *typeRef = walkTypeRef( RA.type_ref() );
+
+ ReduceAction *ra = new ReduceAction( loc, typeRef, RA.id().data() );
+
+ walkRedItemList( RA.HostItems(), ra->itemList );
+
+ curReduction()->reduceActions.append( ra );
+ }
+
+ void walkReductionItem( reduction_item reductionItem )
+ {
+ switch ( reductionItem.prodName() ) {
+ case reduction_item::NonTerm: {
+ walkRedNonTerm( reductionItem.red_nonterm() );
+ break;
+ }
+ case reduction_item::Action: {
+ walkRedAction( reductionItem.red_action() );
+ break;
+ }
+ }
+ }
+
+ void walkReductionList( _lrepeat_reduction_item itemList )
+ {
+ RepeatIter<reduction_item> itemIter( itemList );
+
+ while ( !itemIter.end() ) {
+ walkReductionItem( itemIter.value() );
+ itemIter.next();
+ }
+ }
+
+ void walkRootItem( root_item rootItem, StmtList *stmtList )
+ {
+ switch ( rootItem.prodName() ) {
+ case root_item::Rl:
+ walkRlDef( rootItem.rl_def() );
+ break;
+ case root_item::Token:
+ walkTokenDef( rootItem.token_def() );
+ break;
+ case root_item::IgnoreCollector:
+ walkIgnoreCollector( rootItem.ic_def() );
+ break;
+ case root_item::Ignore:
+ walkIgnoreDef( rootItem.ignore_def() );
+ break;
+ case root_item::Literal:
+ walkLiteralDef( rootItem.literal_def() );
+ break;
+ case root_item::Cfl:
+ walkCflDef( rootItem.cfl_def() );
+ break;
+ case root_item::Region:
+ walkLexRegion( rootItem.region_def() );
+ break;
+ case root_item::Statement: {
+ LangStmt *stmt = walkStatement( rootItem.statement() );
+ if ( stmt != 0 )
+ stmtList->append( stmt );
+ break;
+ }
+ case root_item::Struct:
+ walkStructDef( rootItem.struct_def() );
+ break;
+ case root_item::Namespace:
+ walkNamespaceDef( rootItem.namespace_def(), stmtList );
+ break;
+ case root_item::Function:
+ walkFunctionDef( rootItem.function_def() );
+ break;
+ case root_item::InHost:
+ walkInHostDef( rootItem.in_host_def() );
+ break;
+ case root_item::Iter:
+ walkIterDef( rootItem.iter_def() );
+ break;
+ case root_item::PreEof:
+ walkPreEof( rootItem.pre_eof_def() );
+ break;
+ case root_item::Export: {
+ LangStmt *stmt = walkExportDef( rootItem.export_def() );
+ if ( stmt != 0 )
+ stmtList->append( stmt );
+ break;
+ }
+ case root_item::Alias:
+ walkAliasDef( rootItem.alias_def() );
+ break;
+ case root_item::Precedence:
+ walkPrecedenceDef( rootItem.precedence_def() );
+ break;
+ case root_item::Include: {
+ StmtList *includeList = walkInclude( rootItem._include() );
+ if ( includeList )
+ stmtList->append( *includeList );
+ break;
+ }
+ case root_item::Global: {
+ LangStmt *stmt = walkGlobalDef( rootItem.global_def() );
+ if ( stmt != 0 )
+ stmtList->append( stmt );
+ break;
+ }
+ case root_item::Reduction: {
+ reduction_def RD = rootItem.reduction_def();
+
+ InputLoc loc = RD.REDUCTION().loc();
+ String id = RD.id().data();
+
+ createReduction( loc, id );
+
+ walkReductionList( RD.ItemList() );
+
+ reductionStack.pop();
+ break;
+ }}
+ }
+
+ void walkNamespaceItem( namespace_item item, StmtList *stmtList )
+ {
+ switch ( item.prodName() ) {
+ case namespace_item::Rl:
+ walkRlDef( item.rl_def() );
+ break;
+ case namespace_item::Token:
+ walkTokenDef( item.token_def() );
+ break;
+ case namespace_item::IgnoreCollector:
+ walkIgnoreCollector( item.ic_def() );
+ break;
+ case namespace_item::Ignore:
+ walkIgnoreDef( item.ignore_def() );
+ break;
+ case namespace_item::Literal:
+ walkLiteralDef( item.literal_def() );
+ break;
+ case namespace_item::Cfl:
+ walkCflDef( item.cfl_def() );
+ break;
+ case namespace_item::Region:
+ walkLexRegion( item.region_def() );
+ break;
+ case namespace_item::Struct:
+ walkStructDef( item.struct_def() );
+ break;
+ case namespace_item::Namespace:
+ walkNamespaceDef( item.namespace_def(), stmtList );
+ break;
+ case namespace_item::Function:
+ walkFunctionDef( item.function_def() );
+ break;
+ case namespace_item::InHost:
+ walkInHostDef( item.in_host_def() );
+ break;
+ case namespace_item::Iter:
+ walkIterDef( item.iter_def() );
+ break;
+ case namespace_item::PreEof:
+ walkPreEof( item.pre_eof_def() );
+ break;
+ case namespace_item::Alias:
+ walkAliasDef( item.alias_def() );
+ break;
+ case namespace_item::Precedence:
+ walkPrecedenceDef( item.precedence_def() );
+ break;
+ case namespace_item::Include: {
+ StmtList *includeList = walkInclude( item._include() );
+ stmtList->append( *includeList );
+ break;
+ }
+ case namespace_item::Global: {
+ LangStmt *stmt = walkGlobalDef( item.global_def() );
+ if ( stmt != 0 )
+ stmtList->append( stmt );
+ break;
+ }}
+ }
+
+ bool walkNoIgnoreLeft( no_ignore_left OptNoIngore )
+ {
+ return OptNoIngore.prodName() == no_ignore_left::Ni;
+ }
+
+ bool walkNoIgnoreRight( no_ignore_right OptNoIngore )
+ {
+ return OptNoIngore.prodName() == no_ignore_right::Ni;
+ }
+
+ bool walkOptEos( opt_eos OptEos )
+ {
+ opt_eos::prod_name pn = OptEos.prodName();
+ return pn == opt_eos::Dot || pn == opt_eos::Eos;
+ }
+
+ void walkLiteralItem( literal_item literalItem )
+ {
+ bool niLeft = walkNoIgnoreLeft( literalItem.no_ignore_left() );
+ bool niRight = walkNoIgnoreRight( literalItem.no_ignore_right() );
+
+ String lit = literalItem.backtick_lit().data();
+ literalDef( literalItem.backtick_lit().loc(), lit, niLeft, niRight );
+ }
+
+ void walkLiteralList( literal_list literalList )
+ {
+ if ( literalList.prodName() == literal_list::Item )
+ walkLiteralList( literalList._literal_list() );
+ walkLiteralItem( literalList.literal_item() );
+ }
+
+ void walkLiteralDef( literal_def literalDef )
+ {
+ walkLiteralList( literalDef.literal_list() );
+ }
+
+ void walkNamespaceItemList( _lrepeat_namespace_item itemList, StmtList *stmtList )
+ {
+ /* Walk the list of items. */
+ RepeatIter<namespace_item> itemIter( itemList );
+ while ( !itemIter.end() ) {
+ walkNamespaceItem( itemIter.value(), stmtList );
+ itemIter.next();
+ }
+ }
+
+ StmtList *walkRootItemList( _lrepeat_root_item rootItemList )
+ {
+ StmtList *stmtList = new StmtList;
+
+ /* Walk the list of items. */
+ RepeatIter<root_item> rootItemIter( rootItemList );
+ while ( !rootItemIter.end() ) {
+ walkRootItem( rootItemIter.value(), stmtList );
+ rootItemIter.next();
+ }
+ return stmtList;
+ }
+
+ virtual void go( long activeRealm );
+};
+
+void LoadColm::go( long activeRealm )
+{
+ LoadColm::init();
+
+ const char *argv[3];
+ argv[0] = "load-colm";
+ argv[1] = inputFileName;
+ argv[2] = 0;
+
+ colm_program *program = colm_new_program( &colm_object );
+ colm_set_debug( program, activeRealm );
+ colm_run_program( program, 2, argv );
+
+ /* Extract the parse tree. */
+ start Start = ColmTree( program );
+ str Error = ColmError( program );
+
+ if ( Start == 0 ) {
+ gblErrorCount += 1;
+ InputLoc loc = Error.loc();
+ error(loc) << inputFileName << ": parse error: " << Error.text() << std::endl;
+ return;
+ }
+
+ StmtList *stmtList = walkRootItemList( Start.RootItemList() );
+ pd->streamFileNames.append( colm_extract_fns( program ) );
+ colm_delete_program( program );
+
+ pd->rootCodeBlock = CodeBlock::cons( stmtList, 0 );
+}
+
+BaseParser *consLoadColm( Compiler *pd, const char *inputFileName )
+{
+ return new LoadColm( pd, inputFileName );
+}
diff --git a/src/loadfinal.h b/src/loadfinal.h
new file mode 100644
index 00000000..0c888f9a
--- /dev/null
+++ b/src/loadfinal.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2013-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_LOADCOLM_H
+#define _COLM_LOADCOLM_H
+
+#include "parser.h"
+
+BaseParser *consLoadColm( Compiler *pd, const char *inputFileName );
+
+#endif /* _COLM_LOADCOLM_H */
+
diff --git a/src/loadinit.cc b/src/loadinit.cc
new file mode 100644
index 00000000..f5281da3
--- /dev/null
+++ b/src/loadinit.cc
@@ -0,0 +1,416 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "loadinit.h"
+
+#include <string.h>
+
+#include <iostream>
+
+#include "gen/if1.h"
+
+using std::string;
+
+extern colm_sections colm_object;
+
+void LoadInit::walkProdElList( String defName, ProdElList *list, prod_el_list &prodElList )
+{
+ if ( prodElList.ProdElList() != 0 ) {
+ prod_el_list RightProdElList = prodElList.ProdElList();
+ walkProdElList( defName, list, RightProdElList );
+ }
+
+ if ( prodElList.ProdEl() != 0 ) {
+ prod_el El = prodElList.ProdEl();
+ String typeName = El.Id().text().c_str();
+
+ ObjectField *captureField = 0;
+ if ( El.OptName().Name() != 0 ) {
+ /* Has a capture. */
+ String fieldName = El.OptName().Name().text().c_str();
+ captureField = ObjectField::cons( internal,
+ ObjectField::RhsNameType, 0, fieldName );
+ }
+ else {
+ /* Default the capture to the name of the type. */
+ String fieldName = typeName;
+ if ( strcmp( fieldName, defName ) == 0 )
+ fieldName = "_" + defName;
+ captureField = ObjectField::cons( internal,
+ ObjectField::RhsNameType, 0, fieldName );
+ }
+
+ RepeatType repeatType = RepeatNone;
+ if ( El.OptRepeat().Star() != 0 )
+ repeatType = RepeatRepeat;
+ if ( El.OptRepeat().LeftStar() != 0 )
+ repeatType = RepeatLeftRepeat;
+
+ ProdEl *prodEl = prodElName( internal, typeName,
+ NamespaceQual::cons( curNspace() ),
+ captureField, repeatType, false );
+
+ appendProdEl( list, prodEl );
+ }
+}
+
+void LoadInit::walkProdList( String defName, LelDefList *outProdList, prod_list &prodList )
+{
+ if ( prodList.ProdList() != 0 ) {
+ prod_list RightProdList = prodList.ProdList();
+ walkProdList( defName, outProdList, RightProdList );
+ }
+
+ ProdElList *outElList = new ProdElList;
+ prod_el_list prodElList = prodList.Prod().ProdElList();
+ walkProdElList( defName, outElList, prodElList );
+
+ String name;
+ if ( prodList.Prod().OptName().Name() != 0 )
+ name = prodList.Prod().OptName().Name().text().c_str();
+
+ bool commit = prodList.Prod().OptCommit().Commit() != 0;
+
+ Production *prod = BaseParser::production( internal, outElList, name, commit, 0, 0 );
+ prodAppend( outProdList, prod );
+}
+
+LexFactor *LoadInit::walkLexFactor( lex_factor &lexFactor )
+{
+ LexFactor *factor = 0;
+ if ( lexFactor.Literal() != 0 ) {
+ String litString = lexFactor.Literal().text().c_str();
+ Literal *literal = Literal::cons( internal, litString, Literal::LitString );
+ factor = LexFactor::cons( literal );
+ }
+ if ( lexFactor.Id() != 0 ) {
+ String id = lexFactor.Id().text().c_str();
+ factor = lexRlFactorName( id, internal );
+ }
+ else if ( lexFactor.Expr() != 0 ) {
+ lex_expr LexExpr = lexFactor.Expr();
+ LexExpression *expr = walkLexExpr( LexExpr );
+ LexJoin *join = LexJoin::cons( expr );
+ factor = LexFactor::cons( join );
+ }
+ else if ( lexFactor.Low() != 0 ) {
+ String low = lexFactor.Low().text().c_str();
+ Literal *lowLit = Literal::cons( internal, low, Literal::LitString );
+
+ String high = lexFactor.High().text().c_str();
+ Literal *highLit = Literal::cons( internal, high, Literal::LitString );
+
+ Range *range = Range::cons( lowLit, highLit );
+ factor = LexFactor::cons( range );
+ }
+ return factor;
+}
+
+LexFactorNeg *LoadInit::walkLexFactorNeg( lex_factor_neg &lexFactorNeg )
+{
+ if ( lexFactorNeg.FactorNeg() != 0 ) {
+ lex_factor_neg Rec = lexFactorNeg.FactorNeg();
+ LexFactorNeg *recNeg = walkLexFactorNeg( Rec );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( recNeg, LexFactorNeg::CharNegateType );
+ return factorNeg;
+ }
+ else {
+ lex_factor LexFactorTree = lexFactorNeg.Factor();
+ LexFactor *factor = walkLexFactor( LexFactorTree );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ return factorNeg;
+ }
+}
+
+LexFactorRep *LoadInit::walkLexFactorRep( lex_factor_rep &lexFactorRep )
+{
+ LexFactorRep *factorRep = 0;
+ if ( lexFactorRep.Star() != 0 ) {
+ lex_factor_rep Rec = lexFactorRep.FactorRep();
+ LexFactorRep *recRep = walkLexFactorRep( Rec );
+ factorRep = LexFactorRep::cons( internal, recRep, 0, 0, LexFactorRep::StarType );
+ }
+ else if ( lexFactorRep.Plus() != 0 ) {
+ lex_factor_rep Rec = lexFactorRep.FactorRep();
+ LexFactorRep *recRep = walkLexFactorRep( Rec );
+ factorRep = LexFactorRep::cons( internal, recRep, 0, 0, LexFactorRep::PlusType );
+ }
+ else {
+ lex_factor_neg LexFactorNegTree = lexFactorRep.FactorNeg();
+ LexFactorNeg *factorNeg = walkLexFactorNeg( LexFactorNegTree );
+ factorRep = LexFactorRep::cons( factorNeg );
+ }
+ return factorRep;
+}
+
+LexFactorAug *LoadInit::walkLexFactorAug( lex_factor_rep &lexFactorRep )
+{
+ LexFactorRep *factorRep = walkLexFactorRep( lexFactorRep );
+ return LexFactorAug::cons( factorRep );
+}
+
+LexTerm *LoadInit::walkLexTerm( lex_term &lexTerm )
+{
+ if ( lexTerm.Term() != 0 ) {
+ lex_term Rec = lexTerm.Term();
+ LexTerm *leftTerm = walkLexTerm( Rec );
+
+ lex_factor_rep LexFactorRepTree = lexTerm.FactorRep();
+ LexFactorAug *factorAug = walkLexFactorAug( LexFactorRepTree );
+
+ LexTerm::Type type = lexTerm.Dot() != 0 ?
+ LexTerm::ConcatType : LexTerm::RightFinishType;
+
+ LexTerm *term = LexTerm::cons( leftTerm, factorAug, type );
+
+ return term;
+ }
+ else {
+ lex_factor_rep LexFactorRepTree = lexTerm.FactorRep();
+ LexFactorAug *factorAug = walkLexFactorAug( LexFactorRepTree );
+ LexTerm *term = LexTerm::cons( factorAug );
+ return term;
+ }
+}
+
+LexExpression *LoadInit::walkLexExpr( lex_expr &LexExprTree )
+{
+ if ( LexExprTree.Expr() != 0 ) {
+ lex_expr Rec = LexExprTree.Expr();
+ LexExpression *leftExpr = walkLexExpr( Rec );
+
+ lex_term lexTerm = LexExprTree.Term();
+ LexTerm *term = walkLexTerm( lexTerm );
+ LexExpression *expr = LexExpression::cons( leftExpr, term, LexExpression::OrType );
+
+ return expr;
+ }
+ else {
+ lex_term lexTerm = LexExprTree.Term();
+ LexTerm *term = walkLexTerm( lexTerm );
+ LexExpression *expr = LexExpression::cons( term );
+ return expr;
+ }
+}
+
+bool walkNoIgnore( opt_ni OptNi )
+{
+ return OptNi.Ni() != 0;
+}
+
+void LoadInit::walkTokenList( token_list &tokenList )
+{
+ if ( tokenList.TokenList() != 0 ) {
+ token_list RightTokenList = tokenList.TokenList();
+ walkTokenList( RightTokenList );
+ }
+
+ if ( tokenList.TokenDef() != 0 ) {
+ token_def tokenDef = tokenList.TokenDef();
+ String name = tokenDef.Id().text().c_str();
+
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, name, pd->nextObjectId++ );
+
+ lex_expr LexExpr = tokenDef.Expr();
+ LexExpression *expr = walkLexExpr( LexExpr );
+ LexJoin *join = LexJoin::cons( expr );
+
+ bool leftNi = walkNoIgnore( tokenDef.LeftNi() );
+ bool rightNi = walkNoIgnore( tokenDef.RightNi() );
+
+ defineToken( internal, name, join, objectDef, 0, false, leftNi, rightNi );
+ }
+
+ if ( tokenList.IgnoreDef() != 0 ) {
+ ignore_def IgnoreDef = tokenList.IgnoreDef();
+
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, String(), pd->nextObjectId++ );
+
+ lex_expr LexExpr = IgnoreDef.Expr();
+ LexExpression *expr = walkLexExpr( LexExpr );
+ LexJoin *join = LexJoin::cons( expr );
+
+ defineToken( internal, String(), join, objectDef, 0, true, false, false );
+ }
+}
+
+void LoadInit::walkLexRegion( item &LexRegion )
+{
+ pushRegionSet( internal );
+
+ token_list tokenList = LexRegion.TokenList();
+ walkTokenList( tokenList );
+
+ popRegionSet();
+}
+
+void LoadInit::walkDefinition( item &define )
+{
+ prod_list ProdList = define.ProdList();
+
+ String name = define.DefId().text().c_str();
+
+ LelDefList *defList = new LelDefList;
+ walkProdList( name, defList, ProdList );
+
+ NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false );
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, name,
+ pd->nextObjectId++ );
+ cflDef( ntDef, objectDef, defList );
+}
+
+void LoadInit::consParseStmt( StmtList *stmtList )
+{
+ /* Pop argv, this yields the file name . */
+ CallArgVect *popArgs = new CallArgVect;
+ QualItemVect *popQual = new QualItemVect;
+ popQual->append( QualItem( QualItem::Arrow, internal, String( "argv" ) ) );
+
+ LangVarRef *popRef = LangVarRef::cons( internal, curNspace(), 0,
+ curLocalFrame()->rootScope, NamespaceQual::cons( curNspace() ),
+ popQual, String("pop") );
+ LangExpr *pop = LangExpr::cons( LangTerm::cons( InputLoc(), popRef, popArgs ) );
+
+ TypeRef *typeRef = TypeRef::cons( internal, pd->uniqueTypeStr );
+ ObjectField *objField = ObjectField::cons( internal,
+ ObjectField::UserLocalType, typeRef, "A" );
+
+ LangStmt *stmt = varDef( objField, pop, LangStmt::AssignType );
+ stmtList->append( stmt );
+
+ /* Construct a literal string 'r', for second arg to open. */
+ ConsItem *modeConsItem = ConsItem::cons( internal,
+ ConsItem::InputText, String("r") );
+ ConsItemList *modeCons = new ConsItemList;
+ modeCons->append( modeConsItem );
+ LangExpr *modeExpr = LangExpr::cons( LangTerm::cons( internal, modeCons ) );
+
+ /* Reference A->value */
+ LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0,
+ curLocalFrame()->rootScope, String("A") );
+ LangExpr *Avalue = LangExpr::cons( LangTerm::cons( internal,
+ LangTerm::VarRefType, varRef ) );
+
+ /* Call open. */
+ LangVarRef *openRef = LangVarRef::cons( internal,
+ curNspace(), 0, curLocalFrame()->rootScope, String("open") );
+ CallArgVect *openArgs = new CallArgVect;
+ openArgs->append( new CallArg(Avalue) );
+ openArgs->append( new CallArg(modeExpr) );
+ LangExpr *open = LangExpr::cons( LangTerm::cons( InputLoc(), openRef, openArgs ) );
+
+ /* Construct a list containing the open stream. */
+ ConsItem *consItem = ConsItem::cons( internal, ConsItem::ExprType, open, ConsItem::TrimDefault );
+ ConsItemList *list = ConsItemList::cons( consItem );
+
+ /* Will capture the parser to "P" */
+ objField = ObjectField::cons( internal,
+ ObjectField::UserLocalType, 0, String("P") );
+
+ /* Ref the start def. */
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ typeRef = TypeRef::cons( internal, nspaceQual,
+ String("start"), RepeatNone );
+
+ /* Parse the above list. */
+ LangExpr *parseExpr = parseCmd( internal, false, false, objField,
+ typeRef, 0, list, true, false, false, "" );
+ LangStmt *parseStmt = LangStmt::cons( internal, LangStmt::ExprType, parseExpr );
+ stmtList->append( parseStmt );
+}
+
+void LoadInit::consExportTree( StmtList *stmtList )
+{
+ LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0,
+ curLocalFrame()->rootScope, String("P") );
+ LangExpr *expr = LangExpr::cons( LangTerm::cons( internal,
+ LangTerm::VarRefType, varRef ) );
+
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, String("start"), RepeatNone );
+ ObjectField *program = ObjectField::cons( internal,
+ ObjectField::StructFieldType, typeRef, String("ColmTree") );
+ LangStmt *programExport = exportStmt( program, LangStmt::AssignType, expr );
+ stmtList->append( programExport );
+}
+
+void LoadInit::consExportError( StmtList *stmtList )
+{
+ LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0,
+ curLocalFrame()->rootScope, String("error") );
+ LangExpr *expr = LangExpr::cons( LangTerm::cons( internal,
+ LangTerm::VarRefType, varRef ) );
+
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, String("str"), RepeatNone );
+ ObjectField *program = ObjectField::cons( internal,
+ ObjectField::StructFieldType, typeRef, String("ColmError") );
+ LangStmt *programExport = exportStmt( program, LangStmt::AssignType, expr );
+ stmtList->append( programExport );
+}
+
+void LoadInit::go( long activeRealm )
+{
+ LoadInit::init();
+
+ StmtList *stmtList = new StmtList;
+
+ const char *argv[3];
+ argv[0] = "load-init";
+ argv[1] = inputFileName;
+ argv[2] = 0;
+
+ colm_program *program = colm_new_program( &colm_object );
+ colm_set_debug( program, 0 );
+ colm_run_program( program, 2, argv );
+
+ /* Extract the parse tree. */
+ start Start = ColmTree( program );
+
+ if ( Start == 0 ) {
+ gblErrorCount += 1;
+ std::cerr << inputFileName << ": parse error" << std::endl;
+ return;
+ }
+
+ /* Walk the list of items. */
+ _lrepeat_item ItemList = Start.ItemList();
+ RepeatIter<item> itemIter( ItemList );
+ while ( !itemIter.end() ) {
+
+ item Item = itemIter.value();
+ if ( Item.DefId() != 0 )
+ walkDefinition( Item );
+ else if ( Item.TokenList() != 0 )
+ walkLexRegion( Item );
+ itemIter.next();
+ }
+
+ pd->streamFileNames.append( colm_extract_fns( program ) );
+ colm_delete_program( program );
+
+ consParseStmt( stmtList );
+ consExportTree( stmtList );
+ consExportError( stmtList );
+
+ pd->rootCodeBlock = CodeBlock::cons( stmtList, 0 );
+}
diff --git a/src/loadinit.h b/src/loadinit.h
new file mode 100644
index 00000000..93a18444
--- /dev/null
+++ b/src/loadinit.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2013-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_LOADINIT_H
+#define _COLM_LOADINIT_H
+
+#include <iostream>
+
+#include <avltree.h>
+
+#include "compiler.h"
+#include "parser.h"
+
+struct lex_factor;
+struct lex_factor_neg;
+struct lex_factor_rep;
+struct lex_term;
+struct lex_expr;
+struct token_list;
+struct prod_el_list;
+struct prod_list;
+struct item;
+
+struct LoadInit
+:
+ public BaseParser
+{
+ LoadInit( Compiler *pd, const char *inputFileName )
+ :
+ BaseParser(pd),
+ inputFileName(inputFileName)
+ {}
+
+ const char *inputFileName;
+
+ /* Constructing the colm language data structures from the the parse tree. */
+ LexFactor *walkLexFactor( lex_factor &LexFactorTree );
+ LexFactorNeg *walkLexFactorNeg( lex_factor_neg &LexFactorNegTree );
+ LexFactorRep *walkLexFactorRep( lex_factor_rep &LexFactorRepTree );
+ LexFactorAug *walkLexFactorAug( lex_factor_rep &LexFactorRepTree );
+ LexTerm *walkLexTerm( lex_term &LexTerm );
+ LexExpression *walkLexExpr( lex_expr &LexExpr );
+ void walkTokenList( token_list &TokenList );
+ void walkLexRegion( item &LexRegion );
+ void walkProdElList( String defName, ProdElList *list, prod_el_list &prodElList );
+ void walkProdList( String defName, LelDefList *list, prod_list &prodList );
+ void walkDefinition( item &define );
+
+ /* Constructing statements needed to parse and export the input. */
+ void consParseStmt( StmtList *stmtList );
+ void consExportTree( StmtList *stmtList );
+ void consExportError( StmtList *stmtList );
+
+ virtual void go( long activeRealm );
+};
+
+#endif /* _COLM_LOAD_INIT_H */
+
diff --git a/src/lookup.cc b/src/lookup.cc
new file mode 100644
index 00000000..cb243dc6
--- /dev/null
+++ b/src/lookup.cc
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <assert.h>
+#include <iostream>
+#include "compiler.h"
+
+/*
+ * Variable Lookup
+ */
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+ObjectDef *UniqueType::objectDef()
+{
+ if ( typeId == TYPE_TREE || typeId == TYPE_REF ) {
+ return langEl->objectDef;
+ }
+ else if ( typeId == TYPE_STRUCT ) {
+ return structEl->structDef->objectDef;
+ }
+ else if ( typeId == TYPE_GENERIC ) {
+ return generic->objDef;
+ }
+
+ /* This should have generated a compiler error. */
+ assert( false );
+}
+
+/* Recurisve find through a single object def's scope. */
+ObjectField *ObjectDef::findFieldInScope( const NameScope *inScope,
+ const String &name ) const
+{
+ FieldMapEl *objDefMapEl = inScope->fieldMap.find( name );
+ if ( objDefMapEl != 0 )
+ return objDefMapEl->value;
+ if ( inScope->parentScope != 0 )
+ return findFieldInScope( inScope->parentScope, name );
+ return 0;
+}
+
+ObjectField *NameScope::findField( const String &name ) const
+{
+ return owningObj->findFieldInScope( this, name );
+}
+
+ObjectMethod *NameScope::findMethod( const String &name ) const
+{
+ MethodMapEl *methodMapEl = methodMap.find( name );
+ if ( methodMapEl != 0 )
+ return methodMapEl->value;
+ if ( parentScope != 0 )
+ return parentScope->findMethod( name );
+ return 0;
+}
+
+VarRefLookup LangVarRef::lookupQualification( Compiler *pd, NameScope *rootScope ) const
+{
+ int lastPtrInQual = -1;
+ NameScope *searchScope = rootScope;
+ int firstConstPart = -1;
+
+ for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) {
+ /* Lookup the field int the current qualification. */
+ ObjectField *el = searchScope->findField( qi->data );
+ if ( el == 0 )
+ error(qi->loc) << "cannot resolve qualification " << qi->data << endp;
+
+ /* Lookup the type of the field. */
+ el->typeRef->resolveType( pd );
+ UniqueType *qualUT = el->typeRef->uniqueType;
+
+ /* If we are dealing with an iterator then dereference it. */
+ if ( qualUT->typeId == TYPE_ITER )
+ qualUT = el->typeRef->searchUniqueType;
+
+ /* Is it const? */
+ if ( firstConstPart < 0 && el->isConst )
+ firstConstPart = qi.pos();
+
+ /* Check for references. When loop is done we will have the last one
+ * present, if any. */
+ if ( qualUT->ptr() )
+ lastPtrInQual = qi.pos();
+
+ if ( qi->form == QualItem::Dot ) {
+ /* Cannot dot a reference. Iterator yes (access of the iterator
+ * not the current) */
+ if ( qualUT->ptr() )
+ error(loc) << "dot cannot be used to access a pointer" << endp;
+ }
+ else if ( qi->form == QualItem::Arrow ) {
+ if ( qualUT->typeId == TYPE_ITER )
+ qualUT = el->typeRef->searchUniqueType;
+ }
+
+ ObjectDef *searchObjDef = qualUT->objectDef();
+ if ( searchObjDef == 0 )
+ error(qi->loc) << "left hand side of qual has no object defintion" << endp;
+ searchScope = searchObjDef->rootScope;
+ }
+
+ return VarRefLookup( lastPtrInQual, firstConstPart, searchScope->owningObj, searchScope );
+}
+
+bool LangVarRef::isLocalRef() const
+{
+ if ( qual->length() > 0 ) {
+ if ( scope->findField( qual->data[0].data ) != 0 )
+ return true;
+ }
+ else if ( scope->findField( name ) != 0 )
+ return true;
+ else if ( scope->findMethod( name ) != 0 )
+ return true;
+
+ return false;
+}
+
+/* For accesing production RHS values inside a switch case that limits our
+ * search to a particular productions. */
+bool LangVarRef::isProdRef( Compiler *pd ) const
+{
+ if ( scope->caseClauseVarRef != 0 ) {
+ UniqueType *varUt = scope->caseClauseVarRef->lookup( pd );
+ ObjectDef *searchObjDef = varUt->objectDef();
+
+ if ( qual->length() > 0 ) {
+ if ( searchObjDef->rootScope->findField( qual->data[0].data ) != 0 )
+ return true;
+ }
+ else if ( searchObjDef->rootScope->findField( name ) != 0 )
+ return true;
+ else if ( searchObjDef->rootScope->findMethod( name ) != 0 )
+ return true;
+ }
+ return false;
+}
+
+bool LangVarRef::isStructRef() const
+{
+ if ( structDef != 0 ) {
+ if ( qual->length() > 0 ) {
+ if ( structDef->objectDef->rootScope->findField( qual->data[0].data ) != 0 )
+ return true;
+ }
+ else if ( structDef->objectDef->rootScope->findField( name ) != 0 )
+ return true;
+ else if ( structDef->objectDef->rootScope->findMethod( name ) != 0 )
+ return true;
+ }
+
+ return false;
+}
+
+bool LangVarRef::isInbuiltObject() const
+{
+ if ( qual->length() > 0 ) {
+ ObjectField *field = scope->findField( qual->data[0].data );
+ if ( field != 0 && field->isInbuiltObject() )
+ return true;
+ }
+ else {
+ ObjectField *field = scope->findField( name );
+ if ( field != 0 ) {
+ if ( field->isInbuiltObject() )
+ return true;
+ }
+ }
+ return false;
+}
+
+VarRefLookup LangVarRef::lookupObj( Compiler *pd ) const
+{
+ NameScope *rootScope;
+
+ if ( nspaceQual != 0 && nspaceQual->qualNames.length() > 0 ) {
+ Namespace *nspace = pd->rootNamespace->findNamespace( nspaceQual->qualNames[0] );
+ rootScope = nspace->rootScope;
+ }
+ else if ( isLocalRef() )
+ rootScope = scope;
+ else if ( isProdRef( pd ) ) {
+ UniqueType *varUt = scope->caseClauseVarRef->lookup( pd );
+ ObjectDef *searchObjDef = varUt->objectDef();
+ rootScope = searchObjDef->rootScope;
+ }
+ else if ( isStructRef() )
+ rootScope = structDef->objectDef->rootScope;
+ else
+ rootScope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope;
+
+ return lookupQualification( pd, rootScope );
+}
+
+VarRefLookup LangVarRef::lookupMethodObj( Compiler *pd ) const
+{
+ NameScope *rootScope;
+
+ if ( nspaceQual != 0 && nspaceQual->qualNames.length() > 0 ) {
+ Namespace *nspace = pd->rootNamespace->findNamespace( nspaceQual->qualNames[0] );
+ rootScope = nspace->rootScope;
+ }
+ else if ( isLocalRef() )
+ rootScope = scope;
+ else if ( isStructRef() )
+ rootScope = structDef->objectDef->rootScope;
+ else
+ rootScope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope;
+
+ return lookupQualification( pd, rootScope );
+}
+
+
+VarRefLookup LangVarRef::lookupField( Compiler *pd ) const
+{
+ /* Lookup the object that the field is in. */
+ VarRefLookup lookup = lookupObj( pd );
+
+ /* Lookup the field. */
+ ObjectField *field = lookup.inScope->findField( name );
+ if ( field == 0 )
+ error(loc) << "cannot find name " << name << " in object" << endp;
+
+ lookup.objField = field;
+ lookup.uniqueType = field->typeRef->uniqueType;
+
+ if ( field->typeRef->searchUniqueType != 0 )
+ lookup.iterSearchUT = field->typeRef->searchUniqueType;
+
+ return lookup;
+}
+
+UniqueType *LangVarRef::lookup( Compiler *pd ) const
+{
+ /* Lookup the loadObj. */
+ VarRefLookup lookup = lookupField( pd );
+
+ ObjectField *el = lookup.objField;
+ UniqueType *elUT = el->typeRef->resolveType( pd );
+
+ /* Deref iterators. */
+ if ( elUT->typeId == TYPE_ITER )
+ elUT = el->typeRef->searchUniqueType;
+
+ return elUT;
+}
+
+VarRefLookup LangVarRef::lookupMethod( Compiler *pd ) const
+{
+ /* Lookup the object that the field is in. */
+ VarRefLookup lookup = lookupMethodObj( pd );
+
+ /* Find the method. */
+ ObjectMethod *method = lookup.inScope->findMethod( name );
+ if ( method == 0 ) {
+ /* Not found as a method, try it as an object on which we will call a
+ * default function. */
+ qual->append( QualItem( QualItem::Dot, loc, name ) );
+
+ /* Lookup the object that the field is in. */
+ VarRefLookup lookup = lookupObj( pd );
+
+ /* Find the method. */
+ method = lookup.inScope->findMethod( "finish" );
+ if ( method == 0 )
+ error(loc) << "cannot find " << name << "(...) in object" << endp;
+ }
+
+ lookup.objMethod = method;
+ lookup.uniqueType = method->returnUT;
+
+ return lookup;
+}
+
+VarRefLookup LangVarRef::lookupIterCall( Compiler *pd ) const
+{
+ /* Lookup the object that the field is in. */
+ VarRefLookup lookup = lookupObj( pd );
+
+ /* Find the method. */
+ ObjectMethod *method = lookup.inScope->findMethod( name );
+ if ( method == 0 ) {
+ /* Not found as a method, try it as an object on which we will call a
+ * default function. */
+ qual->append( QualItem( QualItem::Dot, loc, name ) );
+
+ /* Lookup the object that the field is in. */
+ VarRefLookup lookup = lookupObj( pd );
+
+ /* Find the method. */
+ method = lookup.inScope->findMethod( "finish" );
+ if ( method == 0 )
+ error(loc) << "cannot find " << name << "(...) in object" << endp;
+ }
+
+ lookup.objMethod = method;
+ lookup.uniqueType = method->returnUT;
+
+ return lookup;
+}
diff --git a/src/main.cc b/src/main.cc
new file mode 100644
index 00000000..a3a7d2c9
--- /dev/null
+++ b/src/main.cc
@@ -0,0 +1,836 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <iostream>
+
+#include "debug.h"
+#include "pcheck.h"
+#include "version.h"
+#include "compiler.h"
+
+#if defined(CONS_INIT)
+#include "consinit.h"
+#elif defined(LOAD_INIT)
+#include "loadinit.h"
+#else
+#include "loadfinal.h"
+#endif
+
+using std::istream;
+using std::ifstream;
+using std::ostream;
+using std::ios;
+using std::cin;
+using std::cout;
+using std::cerr;
+using std::endl;
+
+/* Graphviz dot file generation. */
+bool genGraphviz = false;
+
+using std::ostream;
+using std::istream;
+using std::ifstream;
+using std::ofstream;
+using std::ios;
+using std::cout;
+using std::cerr;
+using std::cin;
+using std::endl;
+
+InputLoc internal;
+
+/* Io globals. */
+istream *inStream = 0;
+ostream *outStream = 0;
+const char *inputFn = 0;
+const char *outputFn = 0;
+const char *intermedFn = 0;
+const char *binaryFn = 0;
+const char *exportHeaderFn = 0;
+const char *exportCodeFn = 0;
+const char *commitCodeFn = 0;
+const char *objectName = "colm_object";
+bool exportCode = false;
+bool hostAdapters = true;
+
+bool generateGraphviz = false;
+bool verbose = false;
+bool logging = false;
+bool branchPointInfo = false;
+bool run = false;
+bool addUniqueEmptyProductions = false;
+bool gblLibrary = false;
+long gblActiveRealm = 0;
+bool outputSpecifiedWithDashP = false;
+
+ArgsVector includePaths;
+ArgsVector libraryPaths;
+DefineVector defineArgs;
+ArgsVector additionalCodeFiles;
+
+/* Print version information. */
+void version();
+
+/* Total error count. */
+int gblErrorCount = 0;
+
+/*
+ * Alphabet Type for the parsing machinery. The trees/strings of parsed data
+ * all use char type. Currently we can support signed char, unsigned char or
+ * char. If changing this, the colm_alph_t type needs to change as well.
+ * Currently, this is a compile time change only. A colm binary currently
+ * connot be made to work with multiple alphabet types.
+ */
+
+HostType hostTypesC[] =
+{
+ { "unsigned", "char", false, 0, UCHAR_MAX, sizeof(unsigned char) },
+};
+
+
+HostLang hostLangC = { hostTypesC, 1, 0 };
+HostLang *hostLang = &hostLangC;
+
+/* Print the opening to an error in the input, then return the error ostream. */
+ostream &error( const InputLoc &loc )
+{
+ /* Keep the error count. */
+ gblErrorCount += 1;
+
+ if ( loc.fileName != 0 )
+ cerr << loc.fileName << ":";
+ else
+ cerr << "<input>:";
+
+ if ( loc.line == -1 ) {
+ cerr << "INT: ";
+ }
+ else {
+ cerr << loc.line << ":" << loc.col << ": ";
+ }
+ return cerr;
+}
+
+/* Print the opening to a program error, then return the error stream. */
+ostream &error()
+{
+ gblErrorCount += 1;
+ cerr << "error: " PROGNAME ": ";
+ return cerr;
+}
+
+
+/* Print the opening to a warning, then return the error ostream. */
+ostream &warning( )
+{
+ cerr << "warning: " << inputFn << ": ";
+ return cerr;
+}
+
+/* Print the opening to a warning in the input, then return the error ostream. */
+ostream &warning( const InputLoc &loc )
+{
+ assert( inputFn != 0 );
+ cerr << "warning: " << inputFn << ":" <<
+ loc.line << ":" << loc.col << ": ";
+ return cerr;
+}
+
+void escapeLineDirectivePath( std::ostream &out, char *path )
+{
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ out << "\\\\";
+ else
+ out << *pc;
+ }
+}
+
+void escapeLineDirectivePath( std::ostream &out, char *path );
+void scan( char *fileName, istream &input );
+
+bool printStatistics = false;
+
+/* Print a summary of the options. */
+void usage()
+{
+ cout <<
+"usage: colm [options] file\n"
+"general:\n"
+" -h, -H, -?, --help print this usage and exit\n"
+" -v --version print version information and exit\n"
+" -b <ident> use <ident> as name of C object encapulaing the program\n"
+" -o <file> if -c given, write C parse object to <file>,\n"
+" otherwise write binary to <file>\n"
+" -p <file> write C parse object to <file>\n"
+" -e <file> write C++ export header to <file>\n"
+" -x <file> write C++ export code to <file>\n"
+" -m <file> write C++ commit code to <file>\n"
+" -a <file> additional code file to include in output program\n"
+" -E N=V set a string value available in the program\n"
+" -I <path> additional include path for the compiler\n"
+" -i activate branchpoint information\n"
+" -L <path> additional library path for the linker\n"
+" -l activate logging\n"
+" -r run output program and replace process\n"
+" -c compile only (don't produce binary)\n"
+" -V print dot format (graphiz)\n"
+" -d print verbose debug information\n"
+#if DEBUG
+" -D <tag> print more information about <tag>\n"
+" (BYTECODE|PARSE|MATCH|COMPILE|POOL|PRINT|INPUT|SCAN\n"
+#endif
+ ;
+}
+
+/* Print version information. */
+void version()
+{
+ cout << "Colm version " COLM_VERSION << " " COLM_PUBDATE << endl <<
+ "Copyright (c) 2007-2019 by Adrian D. Thurston" << endl;
+}
+
+/* Scans a string looking for the file extension. If there is a file
+ * extension then pointer returned points to inside the string
+ * passed in. Otherwise returns null. */
+const char *findFileExtension( const char *stemFile )
+{
+ const char *ppos = stemFile + strlen(stemFile) - 1;
+
+ /* Scan backwards from the end looking for the first dot.
+ * If we encounter a '/' before the first dot, then stop the scan. */
+ while ( 1 ) {
+ /* If we found a dot or got to the beginning of the string then
+ * we are done. */
+ if ( ppos == stemFile || *ppos == '.' )
+ break;
+
+ /* If we hit a / then there is no extension. Done. */
+ if ( *ppos == '/' ) {
+ ppos = stemFile;
+ break;
+ }
+ ppos--;
+ }
+
+ /* If we got to the front of the string then bail we
+ * did not find an extension */
+ if ( ppos == stemFile )
+ ppos = 0;
+
+ return ppos;
+}
+
+/* Make a file name from a stem. Removes the old filename suffix and
+ * replaces it with a new one. Returns a newed up string. */
+char *fileNameFromStem( const char *stemFile, const char *suffix )
+{
+ int len = strlen( stemFile );
+ assert( len > 0 );
+
+ /* Get the extension. */
+ const char *ppos = findFileExtension( stemFile );
+
+ /* If an extension was found, then shorten what we think the len is. */
+ if ( ppos != 0 )
+ len = ppos - stemFile;
+
+ int slen = suffix != 0 ? strlen( suffix ) : 0;
+ char *retVal = new char[ len + slen + 1 ];
+ strncpy( retVal, stemFile, len );
+ if ( suffix != 0 )
+ strcpy( retVal + len, suffix );
+ retVal[len+slen] = 0;
+
+ return retVal;
+}
+
+void openOutputCompiled()
+{
+ /* Start with the fn given by -o option. */
+ binaryFn = outputFn;
+
+ if ( binaryFn == 0 )
+ binaryFn = fileNameFromStem( inputFn, 0 );
+
+ if ( intermedFn == 0 )
+ intermedFn = fileNameFromStem( binaryFn, ".c" );
+
+ if ( binaryFn != 0 && inputFn != 0 &&
+ strcmp( inputFn, binaryFn ) == 0 )
+ {
+ error() << "output file \"" << binaryFn <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( intermedFn != 0 && inputFn != 0 &&
+ strcmp( inputFn, intermedFn ) == 0 )
+ {
+ error() << "intermediate file \"" << intermedFn <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( intermedFn != 0 ) {
+ /* Open the output stream, attaching it to the filter. */
+ ofstream *outFStream = new ofstream( intermedFn );
+
+ if ( !outFStream->is_open() ) {
+ error() << "error opening " << intermedFn << " for writing" << endl;
+ exit(1);
+ }
+
+ outStream = outFStream;
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+}
+
+void openOutputLibrary()
+{
+ if ( outputFn == 0 )
+ outputFn = fileNameFromStem( inputFn, ".c" );
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( outputFn != 0 && inputFn != 0 &&
+ strcmp( inputFn, outputFn ) == 0 )
+ {
+ error() << "output file \"" << outputFn <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( outputFn != 0 ) {
+ /* Open the output stream, attaching it to the filter. */
+ ofstream *outFStream = new ofstream( outputFn );
+
+ if ( !outFStream->is_open() ) {
+ error() << "error opening " << outputFn << " for writing" << endl;
+ exit(1);
+ }
+
+ outStream = outFStream;
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+}
+
+void openExports( )
+{
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( inputFn != 0 && exportHeaderFn != 0 && strcmp( inputFn, exportHeaderFn ) == 0 ) {
+ error() << "output file \"" << exportHeaderFn <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( exportHeaderFn != 0 ) {
+ /* Open the output stream, attaching it to the filter. */
+ ofstream *outFStream = new ofstream( exportHeaderFn );
+
+ if ( !outFStream->is_open() ) {
+ error() << "error opening " << exportHeaderFn << " for writing" << endl;
+ exit(1);
+ }
+
+ outStream = outFStream;
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+}
+
+void openExportsImpl( )
+{
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( inputFn != 0 && exportCodeFn != 0 && strcmp( inputFn, exportCodeFn ) == 0 ) {
+ error() << "output file \"" << exportCodeFn <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( exportCodeFn != 0 ) {
+ /* Open the output stream, attaching it to the filter. */
+ ofstream *outFStream = new ofstream( exportCodeFn );
+
+ if ( !outFStream->is_open() ) {
+ error() << "error opening " << exportCodeFn << " for writing" << endl;
+ exit(1);
+ }
+
+ outStream = outFStream;
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+}
+
+void openCommit( )
+{
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( inputFn != 0 && commitCodeFn != 0 && strcmp( inputFn, commitCodeFn ) == 0 ) {
+ error() << "output file \"" << commitCodeFn <<
+ "\" is the same as the input file" << endl;
+ }
+
+ if ( commitCodeFn != 0 ) {
+ /* Open the output stream, attaching it to the filter. */
+ ofstream *outFStream = new ofstream( commitCodeFn );
+
+ if ( !outFStream->is_open() ) {
+ error() << "error opening " << commitCodeFn << " for writing" << endl;
+ exit(1);
+ }
+
+ outStream = outFStream;
+ }
+ else {
+ /* Writing out ot std out. */
+ outStream = &cout;
+ }
+}
+
+int compileOutputCommand( const char *command )
+{
+ if ( verbose )
+ cout << "compiling with: '" << command << "'" << endl;
+ int res = system( command );
+ if ( res != 0 )
+ error() << "there was a problem compiling the output" << endl;
+
+ return res;
+}
+
+void runOutputProgram()
+{
+ if ( verbose )
+ cout << "running output: '" << binaryFn << "'" << endl;
+
+ execl( binaryFn, binaryFn, NULL );
+ /* We shall never return here! */
+}
+
+void compileOutput( const char *argv0, const bool inSource, char *srcLocation )
+{
+ /* Find the location of the colm program that is executing. */
+ char *location = strdup( argv0 );
+ char *last;
+ int length = 1024 + strlen( intermedFn ) + strlen( binaryFn );
+ if ( inSource ) {
+ last = strrchr( location, '/' );
+ assert( last != 0 );
+ last[0] = 0;
+ length += 3 * strlen( location );
+ }
+ else {
+ last = location + strlen( location ) - 1;
+ while ( true ) {
+ if ( last == location ) {
+ last[0] = '.';
+ last[1] = 0;
+ break;
+ }
+ if ( *last == '/' ) {
+ last[0] = 0;
+ break;
+ }
+ last -= 1;
+ }
+ }
+ for ( ArgsVector::Iter af = additionalCodeFiles; af.lte(); af++ )
+ length += strlen( *af ) + 2;
+ for ( ArgsVector::Iter ip = includePaths; ip.lte(); ip++ )
+ length += strlen( *ip ) + 3;
+ for ( ArgsVector::Iter lp = libraryPaths; lp.lte(); lp++ )
+ length += strlen( *lp ) + 3;
+#define COMPILE_COMMAND_STRING "gcc -Wall -Wwrite-strings" \
+ " -g" \
+ " -o %s" \
+ " %s"
+ char *command = new char[length];
+ if ( inSource ) {
+ sprintf( command,
+ COMPILE_COMMAND_STRING
+ " -I%s/../aapl"
+ " -I%s/include"
+ " -L%s"
+ " -Wl,-rpath,%s",
+ binaryFn, intermedFn, srcLocation,
+ srcLocation, location, location );
+ }
+ else {
+ sprintf( command,
+ COMPILE_COMMAND_STRING
+ " -I" PREFIX "/include"
+ " -L" PREFIX "/lib"
+ " -Wl,-rpath," PREFIX "/lib",
+ binaryFn, intermedFn );
+ }
+#undef COMPILE_COMMAND_STRING
+ for ( ArgsVector::Iter af = additionalCodeFiles; af.lte(); af++ ) {
+ strcat( command, " " );
+ strcat( command, *af );
+ }
+ for ( ArgsVector::Iter ip = includePaths; ip.lte(); ip++ ) {
+ strcat( command, " -I" );
+ strcat( command, *ip );
+ }
+ for ( ArgsVector::Iter lp = libraryPaths; lp.lte(); lp++ ) {
+ strcat( command, " -L" );
+ strcat( command, *lp );
+ }
+ strcat( command, " -lcolm" );
+
+ if( !compileOutputCommand( command ) && run )
+ runOutputProgram();
+
+ delete[] command;
+}
+
+bool inSourceTree( const char *argv0, char *&location )
+{
+ const char *lastSlash = strrchr( argv0, '/' );
+ if ( lastSlash != 0 ) {
+ /* Take off the file name. */
+ int rootLen = lastSlash - argv0;
+
+ /* Create string for dir. */
+ char *mainPath = new char[rootLen + 16];
+ memcpy( mainPath, argv0, rootLen );
+ mainPath[rootLen] = 0;
+
+ /* If built using ldconfig then there will be a .libs dir. */
+ lastSlash = strrchr( mainPath, '/' );
+ if ( lastSlash != 0 ) {
+ if ( strlen( lastSlash ) >= 6 && memcmp( lastSlash, "/.libs", 7 ) == 0 ) {
+ rootLen = lastSlash - mainPath;
+ mainPath[rootLen] = 0;
+ }
+ }
+
+ strcpy( mainPath + rootLen, "/main.cc" );
+
+ struct stat sb;
+ int res = stat( mainPath, &sb );
+ if ( res == 0 && S_ISREG( sb.st_mode ) ) {
+ mainPath[rootLen] = 0;
+ location = mainPath;
+ return true;
+ }
+
+ delete[] mainPath;
+ }
+
+ return false;
+}
+
+void processArgs( int argc, const char **argv )
+{
+ ParamCheck pc( "p:cD:e:x:I:L:vdliro:S:M:vHh?-:sVa:m:b:E:", argc, argv );
+
+ while ( pc.check() ) {
+ switch ( pc.state ) {
+ case ParamCheck::match:
+ switch ( pc.parameter ) {
+ case 'I':
+ includePaths.append( pc.parameterArg );
+ break;
+ case 'v':
+ version();
+ exit(0);
+ break;
+ case 'd':
+ verbose = true;
+ break;
+ case 'l':
+ logging = true;
+ break;
+ case 'L':
+ libraryPaths.append( pc.parameterArg );
+ break;
+ case 'i':
+ branchPointInfo = true;
+ break;
+ case 'r':
+ run = true;
+ break;
+ case 'p':
+ outputSpecifiedWithDashP = true;
+ /* fallthrough */
+ case 'o':
+ /* Output. */
+ if ( *pc.parameterArg == 0 )
+ error() << "a zero length output file name was given" << endl;
+ else if ( outputFn != 0 )
+ error() << "more than one output file name was given" << endl;
+ else {
+ /* Ok, remember the output file name. */
+ outputFn = pc.parameterArg;
+ }
+ break;
+
+ case 'b':
+ /* object name. */
+ if ( *pc.parameterArg == 0 )
+ error() << "a zero length object name was given" << endl;
+ else {
+ /* Ok, remember the output file name. */
+ objectName = pc.parameterArg;
+ hostAdapters = false;
+ }
+ break;
+
+ case 'H': case 'h': case '?':
+ usage();
+ exit(0);
+ case 's':
+ printStatistics = true;
+ break;
+ case 'V':
+ generateGraphviz = true;
+ break;
+ case '-':
+ if ( strcasecmp(pc.parameterArg, "help") == 0 ) {
+ usage();
+ exit(0);
+ }
+ else if ( strcasecmp(pc.parameterArg, "version") == 0 ) {
+ version();
+ exit(0);
+ }
+ else {
+ error() << "--" << pc.parameterArg <<
+ " is an invalid argument" << endl;
+ }
+ break;
+ case 'c':
+ gblLibrary = true;
+ break;
+ case 'e':
+ exportHeaderFn = pc.parameterArg;
+ break;
+ case 'x':
+ exportCodeFn = pc.parameterArg;
+ break;
+ case 'a':
+ additionalCodeFiles.append( pc.parameterArg );
+ break;
+ case 'm':
+ commitCodeFn = pc.parameterArg;
+ break;
+
+ case 'E': {
+ const char *eq = strchr( pc.parameterArg, '=' );
+ if ( eq == 0 )
+ fatal( "-E option argument must contain =" );
+ if ( eq == pc.parameterArg )
+ fatal( "-E variable name is of zero length" );
+
+ defineArgs.append( DefineArg(
+ String( pc.parameterArg, eq-pc.parameterArg ),
+ String( eq + 1 ) ) );
+
+ break;
+ }
+
+ case 'D':
+#if DEBUG
+ // @NOTE: keep this in sync with 'debug.c': 'colm_realm_names'
+ if ( strcmp( pc.parameterArg, colm_realm_names[0] ) == 0 )
+ gblActiveRealm |= REALM_BYTECODE;
+ else if ( strcmp( pc.parameterArg, colm_realm_names[1] ) == 0 )
+ gblActiveRealm |= REALM_PARSE;
+ else if ( strcmp( pc.parameterArg, colm_realm_names[2] ) == 0 )
+ gblActiveRealm |= REALM_MATCH;
+ else if ( strcmp( pc.parameterArg, colm_realm_names[3] ) == 0 )
+ gblActiveRealm |= REALM_COMPILE;
+ else if ( strcmp( pc.parameterArg, colm_realm_names[4] ) == 0 )
+ gblActiveRealm |= REALM_POOL;
+ else if ( strcmp( pc.parameterArg, colm_realm_names[5] ) == 0 )
+ gblActiveRealm |= REALM_PRINT;
+ else if ( strcmp( pc.parameterArg, colm_realm_names[6] ) == 0 )
+ gblActiveRealm |= REALM_INPUT;
+ else if ( strcmp( pc.parameterArg, colm_realm_names[7] ) == 0 )
+ gblActiveRealm |= REALM_SCAN;
+ else
+ fatal( "unknown argument to -D %s\n", pc.parameterArg );
+#else
+ fatal( "-D option specified but debugging messsages not compiled in\n" );
+#endif
+ break;
+
+ }
+ break;
+
+ case ParamCheck::invalid:
+ error() << "-" << pc.parameter << " is an invalid argument" << endl;
+ break;
+
+ case ParamCheck::noparam:
+ /* It is interpreted as an input file. */
+ if ( *pc.curArg == 0 )
+ error() << "a zero length input file name was given" << endl;
+ else if ( inputFn != 0 )
+ error() << "more than one input file name was given" << endl;
+ else {
+ /* OK, Remember the filename. */
+ inputFn = pc.curArg;
+ }
+ break;
+ }
+ }
+}
+
+bool readCheck( const char *fn )
+{
+ int result = true;
+
+ /* Check if we can open the input file for reading. */
+ ifstream *inFile = new ifstream( fn );
+ if ( ! inFile->is_open() )
+ result = false;
+
+ delete inFile;
+ return result;
+}
+
+/* Main, process args and call yyparse to start scanning input. */
+int main(int argc, const char **argv)
+{
+ processArgs( argc, argv );
+
+ if ( verbose )
+ gblActiveRealm = 0xffffffff;
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( inputFn != 0 && outputFn != 0 &&
+ strcmp( inputFn, outputFn ) == 0 )
+ {
+ error() << "output file \"" << outputFn <<
+ "\" is the same as the input file" << endl;
+ }
+
+#if defined(LOAD_INIT) || defined(LOAD_COLM)
+ /* Open the input file for reading. */
+ if ( inputFn == 0 ) {
+ error() << "colm: no input file given" << endl;
+ }
+ else {
+ /* Check if we can open the input file for reading. */
+ if ( ! readCheck( inputFn ) )
+ error() << "could not open " << inputFn << " for reading" << endl;
+ }
+#endif
+
+ if ( !gblLibrary && outputSpecifiedWithDashP ) {
+ error() << "-p option must be used with -c" << endl;
+ }
+
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ Compiler *pd = new Compiler;
+
+#if defined(CONS_INIT)
+ BaseParser *parser = new ConsInit( pd );
+#elif defined(LOAD_INIT)
+ BaseParser *parser = new LoadInit( pd, inputFn );
+#else
+ BaseParser *parser = consLoadColm( pd, inputFn );
+#endif
+
+ parser->go( gblActiveRealm );
+
+ /* Parsing complete, check for errors.. */
+ if ( gblErrorCount > 0 )
+ return 1;
+
+ /* Initiate a compile following a parse. */
+ pd->compile();
+
+ /*
+ * Write output.
+ */
+ if ( generateGraphviz ) {
+ outStream = &cout;
+ pd->writeDotFile();
+ }
+ else {
+ if ( gblLibrary )
+ openOutputLibrary();
+ else
+ openOutputCompiled();
+
+ pd->generateOutput( gblActiveRealm, ( commitCodeFn == 0 ) );
+ if ( outStream != 0 )
+ delete outStream;
+
+ if ( !gblLibrary ) {
+ char *location = 0;
+ bool inSource = inSourceTree( argv[0], location );
+ compileOutput( argv[0], inSource, location );
+ }
+
+ if ( exportHeaderFn != 0 ) {
+ openExports();
+ pd->generateExports();
+ delete outStream;
+ }
+ if ( exportCodeFn != 0 ) {
+ openExportsImpl();
+ pd->generateExportsImpl();
+ delete outStream;
+ }
+ if ( commitCodeFn != 0 ) {
+ openCommit();
+ pd->writeCommit();
+ delete outStream;
+ }
+ }
+
+ delete parser;
+ delete pd;
+
+ /* Bail on above errors. */
+ if ( gblErrorCount > 0 )
+ exit(1);
+
+ return 0;
+}
diff --git a/src/map.c b/src/map.c
new file mode 100644
index 00000000..052e5445
--- /dev/null
+++ b/src/map.c
@@ -0,0 +1,876 @@
+/*
+ * Copyright 2010-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <colm/map.h>
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include <colm/pdarun.h>
+#include <colm/pool.h>
+#include <colm/bytecode.h>
+
+struct colm_struct *colm_map_el_get( struct colm_program *prg,
+ map_el_t *map_el, word_t gen_id, word_t field )
+{
+ struct generic_info *gi = &prg->rtd->generic_info[gen_id];
+ map_el_t *result = 0;
+ switch ( field ) {
+ case 0:
+ result = map_el->prev;
+ break;
+ case 1:
+ result = map_el->next;
+ break;
+ default:
+ assert( 0 );
+ break;
+ }
+
+ struct colm_struct *s = result != 0 ?
+ colm_struct_container( result, gi->el_offset ) : 0;
+ return s;
+}
+
+struct colm_struct *colm_map_get( struct colm_program *prg,
+ map_t *map, word_t gen_id, word_t field )
+{
+ struct generic_info *gi = &prg->rtd->generic_info[gen_id];
+ map_el_t *result = 0;
+ switch ( field ) {
+ case 0:
+ result = map->head;
+ break;
+ case 1:
+ result = map->tail;
+ break;
+ default:
+ assert( 0 );
+ break;
+ }
+
+ struct colm_struct *s = result != 0 ?
+ colm_struct_container( result, gi->el_offset ) : 0;
+ return s;
+}
+
+void map_list_abandon( map_t *map )
+{
+ map->head = map->tail = 0;
+}
+
+void map_list_add_before( map_t *map, map_el_t *next_el, map_el_t *new_el )
+{
+ /* Set the next pointer of the new element to next_el. We do
+ * this regardless of the state of the list. */
+ new_el->next = next_el;
+
+ /* Set reverse pointers. */
+ if ( next_el == 0 ) {
+ /* There is no next elememnt. We are inserting at the tail. */
+ new_el->prev = map->tail;
+ map->tail = new_el;
+ }
+ else {
+ /* There is a next element and we can access next's previous. */
+ new_el->prev = next_el->prev;
+ next_el->prev = new_el;
+ }
+
+ /* Set forward pointers. */
+ if ( new_el->prev == 0 ) {
+ /* There is no previous element. Set the head pointer.*/
+ map->head = new_el;
+ }
+ else {
+ /* There is a previous element, set it's next pointer to new_el. */
+ new_el->prev->next = new_el;
+ }
+}
+
+void map_list_add_after( map_t *map, map_el_t *prev_el, map_el_t *new_el )
+{
+ /* Set the previous pointer of new_el to prev_el. We do
+ * this regardless of the state of the list. */
+ new_el->prev = prev_el;
+
+ /* Set forward pointers. */
+ if (prev_el == 0) {
+ /* There was no prev_el, we are inserting at the head. */
+ new_el->next = map->head;
+ map->head = new_el;
+ }
+ else {
+ /* There was a prev_el, we can access previous next. */
+ new_el->next = prev_el->next;
+ prev_el->next = new_el;
+ }
+
+ /* Set reverse pointers. */
+ if (new_el->next == 0) {
+ /* There is no next element. Set the tail pointer. */
+ map->tail = new_el;
+ }
+ else {
+ /* There is a next element. Set it's prev pointer. */
+ new_el->next->prev = new_el;
+ }
+}
+
+
+map_el_t *map_list_detach( map_t *map, map_el_t *el )
+{
+ /* Set forward pointers to skip over el. */
+ if ( el->prev == 0 )
+ map->head = el->next;
+ else
+ el->prev->next = el->next;
+
+ /* Set reverse pointers to skip over el. */
+ if ( el->next == 0 )
+ map->tail = el->prev;
+ else
+ el->next->prev = el->prev;
+
+ /* Update List length and return element we detached. */
+ return el;
+}
+
+
+/* Once an insertion position is found, attach a element to the tree. */
+void map_attach_rebal( map_t *map, map_el_t *element, map_el_t *parent_el, map_el_t *last_less )
+{
+ /* Increment the number of element in the tree. */
+ map->tree_size += 1;
+
+ /* Set element's parent. */
+ element->parent = parent_el;
+
+ /* New element always starts as a leaf with height 1. */
+ element->left = 0;
+ element->right = 0;
+ element->height = 1;
+
+ /* Are we inserting in the tree somewhere? */
+ if ( parent_el != 0 ) {
+ /* We have a parent so we are somewhere in the tree. If the parent
+ * equals lastLess, then the last traversal in the insertion went
+ * left, otherwise it went right. */
+ if ( last_less == parent_el ) {
+ parent_el->left = element;
+
+ map_list_add_before( map, parent_el, element );
+ }
+ else {
+ parent_el->right = element;
+
+ map_list_add_after( map, parent_el, element );
+ }
+ }
+ else {
+ /* No parent element so we are inserting the root. */
+ map->root = element;
+
+ map_list_add_after( map, map->tail, element );
+ }
+
+ /* Recalculate the heights. */
+ map_recalc_heights( map, parent_el );
+
+ /* Find the first unbalance. */
+ map_el_t *ub = mapFindFirstUnbalGP( map, element );
+
+ /* rebalance. */
+ if ( ub != 0 )
+ {
+ /* We assert that after this single rotation the
+ * tree is now properly balanced. */
+ map_rebalance( map, ub );
+ }
+}
+
+#if 0
+/* Recursively delete all the children of a element. */
+void map_delete_children_of( map_t *map, map_el_t *element )
+{
+ /* Recurse left. */
+ if ( element->left ) {
+ map_delete_children_of( map, element->left );
+
+ /* Delete left element. */
+ delete element->left;
+ element->left = 0;
+ }
+
+ /* Recurse right. */
+ if ( element->right ) {
+ map_delete_children_of( map, element->right );
+
+ /* Delete right element. */
+ delete element->right;
+ element->left = 0;
+ }
+}
+
+void map_empty( map_t *map )
+{
+ if ( map->root ) {
+ /* Recursively delete from the tree structure. */
+ map_delete_children_of( map, map->root );
+ delete map->root;
+ map->root = 0;
+ map->tree_size = 0;
+
+ map_list_abandon( map );
+ }
+}
+#endif
+
+/* rebalance from a element whose gradparent is unbalanced. Only
+ * call on a element that has a grandparent. */
+map_el_t *map_rebalance( map_t *map, map_el_t *n )
+{
+ long lheight, rheight;
+ map_el_t *a, *b, *c;
+ map_el_t *t1, *t2, *t3, *t4;
+
+ map_el_t *p = n->parent; /* parent (Non-NUL). L*/
+ map_el_t *gp = p->parent; /* Grand-parent (Non-NULL). */
+ map_el_t *ggp = gp->parent; /* Great grand-parent (may be NULL). */
+
+ if (gp->right == p)
+ {
+ /* gp
+ * * p
+ p
+ */
+ if (p->right == n)
+ {
+ /* gp
+ * * p
+ p
+ * * n
+ n
+ */
+ a = gp;
+ b = p;
+ c = n;
+ t1 = gp->left;
+ t2 = p->left;
+ t3 = n->left;
+ t4 = n->right;
+ }
+ else
+ {
+ /* gp
+ * * p
+ p
+ * /
+ * n
+ */
+ a = gp;
+ b = n;
+ c = p;
+ t1 = gp->left;
+ t2 = n->left;
+ t3 = n->right;
+ t4 = p->right;
+ }
+ }
+ else
+ {
+ /* gp
+ * /
+ * p
+ */
+ if (p->right == n)
+ {
+ /* gp
+ * /
+ * p
+ * * n
+ n
+ */
+ a = p;
+ b = n;
+ c = gp;
+ t1 = p->left;
+ t2 = n->left;
+ t3 = n->right;
+ t4 = gp->right;
+ }
+ else
+ {
+ /* gp
+ * /
+ * p
+ * /
+ * n
+ */
+ a = n;
+ b = p;
+ c = gp;
+ t1 = n->left;
+ t2 = n->right;
+ t3 = p->right;
+ t4 = gp->right;
+ }
+ }
+
+ /* Perform rotation.
+ */
+
+ /* Tie b to the great grandparent. */
+ if ( ggp == 0 )
+ map->root = b;
+ else if ( ggp->left == gp )
+ ggp->left = b;
+ else
+ ggp->right = b;
+ b->parent = ggp;
+
+ /* Tie a as a leftchild of b. */
+ b->left = a;
+ a->parent = b;
+
+ /* Tie c as a rightchild of b. */
+ b->right = c;
+ c->parent = b;
+
+ /* Tie t1 as a leftchild of a. */
+ a->left = t1;
+ if ( t1 != 0 ) t1->parent = a;
+
+ /* Tie t2 as a rightchild of a. */
+ a->right = t2;
+ if ( t2 != 0 ) t2->parent = a;
+
+ /* Tie t3 as a leftchild of c. */
+ c->left = t3;
+ if ( t3 != 0 ) t3->parent = c;
+
+ /* Tie t4 as a rightchild of c. */
+ c->right = t4;
+ if ( t4 != 0 ) t4->parent = c;
+
+ /* The heights are all recalculated manualy and the great
+ * grand-parent is passed to recalcHeights() to ensure
+ * the heights are correct up the tree.
+ *
+ * Note that recalcHeights() cuts out when it comes across
+ * a height that hasn't changed.
+ */
+
+ /* Fix height of a. */
+ lheight = a->left ? a->left->height : 0;
+ rheight = a->right ? a->right->height : 0;
+ a->height = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* Fix height of c. */
+ lheight = c->left ? c->left->height : 0;
+ rheight = c->right ? c->right->height : 0;
+ c->height = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* Fix height of b. */
+ lheight = a->height;
+ rheight = c->height;
+ b->height = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* Fix height of b's parents. */
+ map_recalc_heights( map, ggp );
+ return ggp;
+}
+
+/* Recalculates the heights of all the ancestors of element. */
+void map_recalc_heights( map_t *map, map_el_t *element )
+{
+ while ( element != 0 )
+ {
+ long lheight = element->left ? element->left->height : 0;
+ long rheight = element->right ? element->right->height : 0;
+
+ long new_height = (lheight > rheight ? lheight : rheight) + 1;
+
+ /* If there is no chage in the height, then there will be no
+ * change in any of the ancestor's height. We can stop going up.
+ * If there was a change, continue upward. */
+ if (new_height == element->height)
+ return;
+ else
+ element->height = new_height;
+
+ element = element->parent;
+ }
+}
+
+/* Finds the first element whose grandparent is unbalanced. */
+map_el_t *mapFindFirstUnbalGP( map_t *map, map_el_t *element )
+{
+ long lheight, rheight, balance_prop;
+ map_el_t *gp;
+
+ if ( element == 0 || element->parent == 0 ||
+ element->parent->parent == 0 )
+ return 0;
+
+ /* Don't do anything if we we have no grandparent. */
+ gp = element->parent->parent;
+ while ( gp != 0 )
+ {
+ lheight = gp->left ? gp->left->height : 0;
+ rheight = gp->right ? gp->right->height : 0;
+ balance_prop = lheight - rheight;
+
+ if ( balance_prop < -1 || balance_prop > 1 )
+ return element;
+
+ element = element->parent;
+ gp = gp->parent;
+ }
+ return 0;
+}
+
+
+
+/* Finds the first element that is unbalanced. */
+map_el_t *map_find_first_unbal_el( map_t *map, map_el_t *element )
+{
+ if ( element == 0 )
+ return 0;
+
+ while ( element != 0 )
+ {
+ long lheight = element->left ?
+ element->left->height : 0;
+ long rheight = element->right ?
+ element->right->height : 0;
+ long balance_prop = lheight - rheight;
+
+ if ( balance_prop < -1 || balance_prop > 1 )
+ return element;
+
+ element = element->parent;
+ }
+ return 0;
+}
+
+/* Replace a element in the tree with another element not in the tree. */
+void map_replace_el( map_t *map, map_el_t *element, map_el_t *replacement )
+{
+ map_el_t *parent = element->parent,
+ *left = element->left,
+ *right = element->right;
+
+ replacement->left = left;
+ if (left)
+ left->parent = replacement;
+ replacement->right = right;
+ if (right)
+ right->parent = replacement;
+
+ replacement->parent = parent;
+ if (parent)
+ {
+ if (parent->left == element)
+ parent->left = replacement;
+ else
+ parent->right = replacement;
+ }
+ else {
+ map->root = replacement;
+ }
+
+ replacement->height = element->height;
+}
+
+
+/* Removes a element from a tree and puts filler in it's place.
+ * Filler should be null or a child of element. */
+void map_remove_el( map_t *map, map_el_t *element, map_el_t *filler )
+{
+ map_el_t *parent = element->parent;
+
+ if ( parent )
+ {
+ if ( parent->left == element )
+ parent->left = filler;
+ else
+ parent->right = filler;
+ }
+ else {
+ map->root = filler;
+ }
+
+ if ( filler )
+ filler->parent = parent;
+
+ return;
+}
+
+#if 0
+/* Recursive worker for tree copying. */
+map_el_t *map_copy_branch( program_t *prg, map_t *map, map_el_t *el, kid_t *old_next_down, kid_t **new_next_down )
+{
+ /* Duplicate element. Either the base element's copy constructor or defaul
+ * constructor will get called. Both will suffice for initting the
+ * pointers to null when they need to be. */
+ map_el_t *new_el = map_el_allocate( prg );
+
+ if ( (kid_t*)el == old_next_down )
+ *new_next_down = (kid_t*)new_el;
+
+ /* If the left tree is there, copy it. */
+ if ( new_el->left ) {
+ new_el->left = map_copy_branch( prg, map, new_el->left, old_next_down, new_next_down );
+ new_el->left->parent = new_el;
+ }
+
+ map_list_add_after( map, map->tail, new_el );
+
+ /* If the right tree is there, copy it. */
+ if ( new_el->right ) {
+ new_el->right = map_copy_branch( prg, map, new_el->right, old_next_down, new_next_down );
+ new_el->right->parent = new_el;
+ }
+
+ return new_el;
+}
+#endif
+
+static long map_cmp( program_t *prg, map_t *map, const tree_t *tree1, const tree_t *tree2 )
+{
+ if ( map->generic_info->key_type == TYPE_TREE ) {
+ return colm_cmp_tree( prg, tree1, tree2 );
+ }
+ else {
+ if ( (long)tree1 < (long)tree2 )
+ return -1;
+ else if ( (long)tree1 > (long)tree2)
+ return 1;
+ return 0;
+ }
+}
+
+map_el_t *map_insert_el( program_t *prg, map_t *map, map_el_t *element, map_el_t **last_found )
+{
+ long key_relation;
+ map_el_t *cur_el = map->root, *parent_el = 0;
+ map_el_t *last_less = 0;
+
+ while ( true ) {
+ if ( cur_el == 0 ) {
+ /* We are at an external element and did not find the key we were
+ * looking for. Attach underneath the leaf and rebalance. */
+ map_attach_rebal( map, element, parent_el, last_less );
+
+ if ( last_found != 0 )
+ *last_found = element;
+ return element;
+ }
+
+ key_relation = map_cmp( prg, map,
+ element->key, cur_el->key );
+
+ /* Do we go left? */
+ if ( key_relation < 0 ) {
+ parent_el = last_less = cur_el;
+ cur_el = cur_el->left;
+ }
+ /* Do we go right? */
+ else if ( key_relation > 0 ) {
+ parent_el = cur_el;
+ cur_el = cur_el->right;
+ }
+ /* We have hit the target. */
+ else {
+ if ( last_found != 0 )
+ *last_found = cur_el;
+ return 0;
+ }
+ }
+}
+
+#if 0
+map_el_t *map_insert_key( program_t *prg, map_t *map, tree_t *key, map_el_t **last_found )
+{
+ long key_relation;
+ map_el_t *cur_el = map->root, *parent_el = 0;
+ map_el_t *last_less = 0;
+
+ while ( true ) {
+ if ( cur_el == 0 ) {
+ /* We are at an external element and did not find the key we were
+ * looking for. Create the new element, attach it underneath the leaf
+ * and rebalance. */
+ map_el_t *element = map_el_allocate( prg );
+ element->key = key;
+ map_attach_rebal( map, element, parent_el, last_less );
+
+ if ( last_found != 0 )
+ *last_found = element;
+ return element;
+ }
+
+ key_relation = map_cmp( prg, map, key, cur_el->key );
+
+ /* Do we go left? */
+ if ( key_relation < 0 ) {
+ parent_el = last_less = cur_el;
+ cur_el = cur_el->left;
+ }
+ /* Do we go right? */
+ else if ( key_relation > 0 ) {
+ parent_el = cur_el;
+ cur_el = cur_el->right;
+ }
+ /* We have hit the target. */
+ else {
+ if ( last_found != 0 )
+ *last_found = cur_el;
+ return 0;
+ }
+ }
+}
+#endif
+
+map_el_t *colm_map_insert( program_t *prg, map_t *map, map_el_t *map_el )
+{
+ return map_insert_el( prg, map, map_el, 0 );
+}
+
+map_el_t *colm_vmap_insert( program_t *prg, map_t *map, struct_t *key, struct_t *value )
+{
+ struct colm_struct *s = colm_struct_new( prg, map->generic_info->el_struct_id );
+
+ colm_struct_set_field( s, struct_t*, map->generic_info->el_offset, key );
+ colm_struct_set_field( s, struct_t*, 0, value );
+
+ map_el_t *map_el = colm_struct_get_addr( s, map_el_t*, map->generic_info->el_offset );
+
+ return colm_map_insert( prg, map, map_el );
+}
+
+map_el_t *colm_vmap_remove( program_t *prg, map_t *map, tree_t *key )
+{
+ map_el_t *map_el = colm_map_find( prg, map, key );
+ if ( map_el != 0 )
+ colm_map_detach( prg, map, map_el );
+ return 0;
+}
+
+tree_t *colm_vmap_find( program_t *prg, map_t *map, tree_t *key )
+{
+ map_el_t *map_el = colm_map_find( prg, map, key );
+ if ( map_el != 0 ) {
+ struct_t *s = colm_generic_el_container( prg, map_el,
+ map->generic_info - prg->rtd->generic_info );
+ tree_t *val = colm_struct_get_field( s, tree_t*, 0 );
+
+ if ( map->generic_info->value_type == TYPE_TREE )
+ colm_tree_upref( prg, val );
+
+ return val;
+ }
+ return 0;
+}
+
+void colm_map_detach( program_t *prg, map_t *map, map_el_t *map_el )
+{
+ map_detach( prg, map, map_el );
+}
+
+map_el_t *colm_map_find( program_t *prg, map_t *map, tree_t *key )
+{
+ return map_impl_find( prg, map, key );
+}
+
+/**
+ * \brief Find a element in the tree with the given key.
+ *
+ * \returns The element if key exists, null if the key does not exist.
+ */
+map_el_t *map_impl_find( program_t *prg, map_t *map, tree_t *key )
+{
+ map_el_t *cur_el = map->root;
+ long key_relation;
+
+ while ( cur_el != 0 ) {
+ key_relation = map_cmp( prg, map, key, cur_el->key );
+
+ /* Do we go left? */
+ if ( key_relation < 0 )
+ cur_el = cur_el->left;
+ /* Do we go right? */
+ else if ( key_relation > 0 )
+ cur_el = cur_el->right;
+ /* We have hit the target. */
+ else {
+ return cur_el;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * \brief Find a element, then detach it from the tree.
+ *
+ * The element is not deleted.
+ *
+ * \returns The element detached if the key is found, othewise returns null.
+ */
+map_el_t *map_detach_by_key( program_t *prg, map_t *map, tree_t *key )
+{
+ map_el_t *element = map_impl_find( prg, map, key );
+ if ( element )
+ map_detach( prg, map, element );
+
+ return element;
+}
+
+/**
+ * \brief Detach a element from the tree.
+ *
+ * If the element is not in the tree then undefined behaviour results.
+ *
+ * \returns The element given.
+ */
+map_el_t *map_detach( program_t *prg, map_t *map, map_el_t *element )
+{
+ map_el_t *replacement, *fixfrom;
+ long lheight, rheight;
+
+ /* Remove the element from the ordered list. */
+ map_list_detach( map, element );
+
+ /* Update treeSize. */
+ map->tree_size--;
+
+ /* Find a replacement element. */
+ if (element->right)
+ {
+ /* Find the leftmost element of the right subtree. */
+ replacement = element->right;
+ while (replacement->left)
+ replacement = replacement->left;
+
+ /* If replacing the element the with its child then we need to start
+ * fixing at the replacement, otherwise we start fixing at the
+ * parent of the replacement. */
+ if (replacement->parent == element)
+ fixfrom = replacement;
+ else
+ fixfrom = replacement->parent;
+
+ map_remove_el( map, replacement, replacement->right );
+ map_replace_el( map, element, replacement );
+ }
+ else if (element->left)
+ {
+ /* Find the rightmost element of the left subtree. */
+ replacement = element->left;
+ while (replacement->right)
+ replacement = replacement->right;
+
+ /* If replacing the element the with its child then we need to start
+ * fixing at the replacement, otherwise we start fixing at the
+ * parent of the replacement. */
+ if (replacement->parent == element)
+ fixfrom = replacement;
+ else
+ fixfrom = replacement->parent;
+
+ map_remove_el( map, replacement, replacement->left );
+ map_replace_el( map, element, replacement );
+ }
+ else
+ {
+ /* We need to start fixing at the parent of the element. */
+ fixfrom = element->parent;
+
+ /* The element we are deleting is a leaf element. */
+ map_remove_el( map, element, 0 );
+ }
+
+ /* If fixfrom is null it means we just deleted
+ * the root of the tree. */
+ if ( fixfrom == 0 )
+ return element;
+
+ /* Fix the heights after the deletion. */
+ map_recalc_heights( map, fixfrom );
+
+ /* Fix every unbalanced element going up in the tree. */
+ map_el_t *ub = map_find_first_unbal_el( map, fixfrom );
+ while ( ub )
+ {
+ /* Find the element to rebalance by moving down from the first unbalanced
+ * element 2 levels in the direction of the greatest heights. On the
+ * second move down, the heights may be equal ( but not on the first ).
+ * In which case go in the direction of the first move. */
+ lheight = ub->left ? ub->left->height : 0;
+ rheight = ub->right ? ub->right->height : 0;
+ assert( lheight != rheight );
+ if (rheight > lheight)
+ {
+ ub = ub->right;
+ lheight = ub->left ?
+ ub->left->height : 0;
+ rheight = ub->right ?
+ ub->right->height : 0;
+ if (rheight > lheight)
+ ub = ub->right;
+ else if (rheight < lheight)
+ ub = ub->left;
+ else
+ ub = ub->right;
+ }
+ else
+ {
+ ub = ub->left;
+ lheight = ub->left ?
+ ub->left->height : 0;
+ rheight = ub->right ?
+ ub->right->height : 0;
+ if (rheight > lheight)
+ ub = ub->right;
+ else if (rheight < lheight)
+ ub = ub->left;
+ else
+ ub = ub->left;
+ }
+
+
+ /* rebalance returns the grandparant of the subtree formed
+ * by the element that were rebalanced.
+ * We must continue upward from there rebalancing. */
+ fixfrom = map_rebalance( map, ub );
+
+ /* Find the next unbalaced element. */
+ ub = map_find_first_unbal_el( map, fixfrom );
+ }
+
+ return element;
+}
+
+
+
diff --git a/src/map.cc b/src/map.cc
new file mode 100644
index 00000000..4d3bd090
--- /dev/null
+++ b/src/map.cc
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2008-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pdarun.h"
+#include <assert.h>
+
+
+
diff --git a/src/map.h b/src/map.h
new file mode 100644
index 00000000..1d6db2d7
--- /dev/null
+++ b/src/map.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2010-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_MAP_H
+#define _COLM_MAP_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <colm/program.h>
+#include <colm/struct.h>
+
+#include "internal.h"
+
+void map_list_abandon( map_t *map );
+
+void map_list_add_before( map_t *map, map_el_t *next_el, map_el_t *new_el );
+void map_list_add_after( map_t *map, map_el_t *prev_el, map_el_t *new_el );
+map_el_t *map_list_detach( map_t *map, map_el_t *el );
+void map_attach_rebal( map_t *map, map_el_t *element, map_el_t *parent_el, map_el_t *last_less );
+void map_delete_children_of( map_t *map, map_el_t *element );
+void map_empty( map_t *map );
+map_el_t *map_rebalance( map_t *map, map_el_t *n );
+void map_recalc_heights( map_t *map, map_el_t *element );
+map_el_t *mapFindFirstUnbalGP( map_t *map, map_el_t *element );
+map_el_t *map_find_first_unbal_el( map_t *map, map_el_t *element );
+void map_remove_el( map_t *map, map_el_t *element, map_el_t *filler );
+void map_replace_el( map_t *map, map_el_t *element, map_el_t *replacement );
+map_el_t *map_insert_el( program_t *prg, map_t *map, map_el_t *element, map_el_t **last_found );
+map_el_t *map_insert_key( program_t *prg, map_t *map, tree_t *key, map_el_t **last_found );
+map_el_t *map_impl_find( program_t *prg, map_t *map, tree_t *key );
+map_el_t *map_detach_by_key( program_t *prg, map_t *map, tree_t *key );
+map_el_t *map_detach( program_t *prg, map_t *map, map_el_t *element );
+map_el_t *map_copy_branch( program_t *prg, map_t *map, map_el_t *el,
+ kid_t *old_next_down, kid_t **new_next_down );
+
+struct tree_pair map_remove( program_t *prg, map_t *map, tree_t *key );
+
+long cmp_tree( program_t *prg, const tree_t *tree1, const tree_t *tree2 );
+
+void map_impl_remove_el( program_t *prg, map_t *map, map_el_t *element );
+int map_impl_remove_key( program_t *prg, map_t *map, tree_t *key );
+
+tree_t *map_find( program_t *prg, map_t *map, tree_t *key );
+long map_length( map_t *map );
+tree_t *map_unstore( program_t *prg, map_t *map, tree_t *key, tree_t *existing );
+int map_insert( program_t *prg, map_t *map, tree_t *key, tree_t *element );
+void map_unremove( program_t *prg, map_t *map, tree_t *key, tree_t *element );
+tree_t *map_uninsert( program_t *prg, map_t *map, tree_t *key );
+tree_t *map_store( program_t *prg, map_t *map, tree_t *key, tree_t *element );
+
+map_el_t *colm_map_insert( program_t *prg, map_t *map, map_el_t *map_el );
+void colm_map_detach( program_t *prg, map_t *map, map_el_t *map_el );
+map_el_t *colm_map_find( program_t *prg, map_t *map, tree_t *key );
+
+map_el_t *colm_vmap_insert( program_t *prg, map_t *map, struct_t *key, struct_t *value );
+map_el_t *colm_vmap_remove( program_t *prg, map_t *map, tree_t *key );
+tree_t *colm_map_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter );
+tree_t *colm_vmap_find( program_t *prg, map_t *map, tree_t *key );
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _COLM_MAP_H */
+
diff --git a/src/parser.cc b/src/parser.cc
new file mode 100644
index 00000000..a41288b1
--- /dev/null
+++ b/src/parser.cc
@@ -0,0 +1,1128 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "parser.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <iostream>
+
+using std::endl;
+
+void BaseParser::listElDef( String name )
+{
+ /*
+ * The unique type. This is a def with a single empty form.
+ */
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType,
+ name, pd->nextObjectId++ );
+
+ LelDefList *defList = new LelDefList;
+
+ Production *prod = BaseParser::production( InputLoc(),
+ new ProdElList, String(), false, 0, 0 );
+
+ prodAppend( defList, prod );
+
+ NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false );
+ BaseParser::cflDef( ntDef, objectDef, defList );
+
+ /*
+ * List element with the same name as containing context.
+ */
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ String id = curStruct()->objectDef->name;
+ RepeatType repeatType = RepeatNone;
+ TypeRef *objTr = TypeRef::cons( InputLoc(), nspaceQual, id, repeatType );
+ TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::ListPtrs, 0, objTr, 0 );
+
+ ObjectField *of = ObjectField::cons( InputLoc(),
+ ObjectField::GenericElementType, elTr, name );
+
+ structVarDef( InputLoc(), of );
+}
+
+void BaseParser::mapElDef( String name, TypeRef *keyType )
+{
+ /*
+ * The unique type. This is a def with a single empty form.
+ */
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType,
+ name, pd->nextObjectId++ );
+
+ LelDefList *defList = new LelDefList;
+
+ Production *prod = BaseParser::production( InputLoc(),
+ new ProdElList, String(), false, 0, 0 );
+ prodAppend( defList, prod );
+
+ NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false );
+ BaseParser::cflDef( ntDef, objectDef, defList );
+
+ /*
+ * Same name as containing context.
+ */
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ String id = curStruct()->objectDef->name;
+ TypeRef *objTr = TypeRef::cons( InputLoc(), nspaceQual, id, RepeatNone );
+ TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::MapPtrs, 0, objTr, keyType );
+
+ ObjectField *of = ObjectField::cons( InputLoc(),
+ ObjectField::GenericElementType, elTr, name );
+ structVarDef( InputLoc(), of );
+}
+
+#if 0
+void BaseParser::argvDecl()
+{
+ String structName = "argv_el";
+ structHead( internal, pd->rootNamespace, structName, ObjectDef::StructType );
+
+ /* First the argv value. */
+ String name = "value";
+ String type = "str";
+ NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() );
+ TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, type, RepeatNone );
+ ObjectField *objField = ObjectField::cons( internal,
+ ObjectField::StructFieldType, typeRef, name );
+ structVarDef( objField->loc, objField );
+
+ pd->argvEl = objField->context;
+
+ /* Now the list element. */
+ listElDef( "el" );
+
+ structStack.pop();
+ namespaceStack.pop();
+}
+#endif
+
+void BaseParser::init()
+{
+ /* Set up the root namespace. */
+ pd->rootNamespace = createRootNamespace();
+
+ /* Setup the global object. */
+ String global = "global";
+ pd->globalObjectDef = ObjectDef::cons( ObjectDef::UserType,
+ global, pd->nextObjectId++ );
+
+ pd->rootNamespace->rootScope->owningObj = pd->globalObjectDef;
+
+ pd->global = new StructDef( internal, global, pd->globalObjectDef );
+ pd->globalSel = declareStruct( pd, 0, global, pd->global );
+
+ /* Setup the input object. */
+ global = "_input";
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::BuiltinType,
+ global, pd->nextObjectId++ );
+
+ pd->input = new StructDef( internal, global, objectDef );
+ pd->inputSel = declareStruct( pd, pd->rootNamespace,
+ pd->input->name, pd->input );
+
+ /* Setup the stream object. */
+ global = "stream";
+ objectDef = ObjectDef::cons( ObjectDef::BuiltinType,
+ global, pd->nextObjectId++ );
+
+ pd->stream = new StructDef( internal, global, objectDef );
+ pd->streamSel = declareStruct( pd, pd->rootNamespace,
+ pd->stream->name, pd->stream );
+
+ /* Initialize the dictionary of graphs. This is our symbol table. The
+ * initialization needs to be done on construction which happens at the
+ * beginning of a machine spec so any assignment operators can reference
+ * the builtins. */
+ pd->initGraphDict();
+
+ pd->rootLocalFrame = ObjectDef::cons( ObjectDef::FrameType,
+ "local", pd->nextObjectId++ );
+ localFrameTop = pd->rootLocalFrame;
+ scopeTop = pd->rootLocalFrame->rootScope;
+
+
+ /* Declarations of internal types. They must be declared now because we use
+ * them directly, rather than via type lookup. */
+ pd->declareBaseLangEls();
+ pd->initUniqueTypes();
+
+ //argvDecl();
+
+ /* Internal variables. */
+ addArgvList();
+ addStdsList();
+}
+
+void BaseParser::addRegularDef( const InputLoc &loc, Namespace *nspace,
+ const String &name, LexJoin *join )
+{
+ GraphDictEl *newEl = nspace->rlMap.insert( name );
+ if ( newEl != 0 ) {
+ /* New element in the dict, all good. */
+ newEl->value = new LexDefinition( name, join );
+ newEl->isInstance = false;
+ newEl->loc = loc;
+ }
+ else {
+ // Recover by ignoring the duplicate.
+ error(loc) << "regular definition \"" << name << "\" already exists" << endl;
+ }
+}
+
+TokenRegion *BaseParser::createRegion( const InputLoc &loc, RegionImpl *impl )
+{
+ TokenRegion *tokenRegion = new TokenRegion( loc,
+ pd->regionList.length(), impl );
+
+ pd->regionList.append( tokenRegion );
+
+ return tokenRegion;
+}
+
+void BaseParser::pushRegionSet( const InputLoc &loc )
+{
+ RegionImpl *implTokenIgnore = new RegionImpl;
+ RegionImpl *implTokenOnly = new RegionImpl;
+ RegionImpl *implIgnoreOnly = new RegionImpl;
+
+ pd->regionImplList.append( implTokenIgnore );
+ pd->regionImplList.append( implTokenOnly );
+ pd->regionImplList.append( implIgnoreOnly );
+
+ TokenRegion *tokenIgnore = createRegion( loc, implTokenIgnore );
+ TokenRegion *tokenOnly = createRegion( loc, implTokenOnly );
+ TokenRegion *ignoreOnly = createRegion( loc, implIgnoreOnly );
+ TokenRegion *collectIgnore = createRegion( loc, implIgnoreOnly );
+
+ RegionSet *regionSet = new RegionSet(
+ implTokenIgnore, implTokenIgnore, implIgnoreOnly,
+ tokenIgnore, tokenOnly, ignoreOnly, collectIgnore );
+
+ collectIgnore->ignoreOnly = ignoreOnly;
+
+ pd->regionSetList.append( regionSet );
+ regionStack.push( regionSet );
+}
+
+void BaseParser::popRegionSet()
+{
+ regionStack.pop();
+}
+
+Namespace *BaseParser::createRootNamespace()
+{
+ /* Gets id of zero and default name. No parent. */
+ Namespace *nspace = new Namespace( internal,
+ String("___ROOT_NAMESPACE"), 0, 0 );
+
+ nspace->rootScope->owningObj = pd->globalObjectDef;
+
+ pd->namespaceList.append( nspace );
+ namespaceStack.push( nspace );
+
+ return nspace;
+}
+
+Namespace *BaseParser::createNamespace( const InputLoc &loc, const String &name )
+{
+ Namespace *parent = namespaceStack.top();
+
+ /* Make the new namespace. */
+ Namespace *nspace = parent->findNamespace( name );
+
+ if ( nspace == 0 ) {
+ nspace = new Namespace( loc, name,
+ pd->namespaceList.length(), parent );
+
+ /* Link the new namespace's scope to the parent namespace's scope. */
+ nspace->rootScope->parentScope = parent->rootScope;
+ nspace->rootScope->owningObj = pd->globalObjectDef;
+
+ parent->childNamespaces.append( nspace );
+ pd->namespaceList.append( nspace );
+ }
+
+ namespaceStack.push( nspace );
+
+ return nspace;
+}
+
+Reduction *BaseParser::createReduction( const InputLoc loc, const String &name )
+{
+ Namespace *parent = namespaceStack.top();
+ Reduction *reduction = parent->findReduction( name );
+
+ if ( reduction == 0 ) {
+ reduction = new Reduction( loc, name );
+ parent->reductions.append( reduction );
+ }
+
+ reductionStack.push( reduction );
+
+ return reduction;
+}
+
+LexJoin *BaseParser::literalJoin( const InputLoc &loc, const String &data )
+{
+ Literal *literal = Literal::cons( loc, data, Literal::LitString );
+ LexFactor *factor = LexFactor::cons( literal );
+ LexFactorNeg *factorNeg = LexFactorNeg::cons( factor );
+ LexFactorRep *factorRep = LexFactorRep::cons( factorNeg );
+ LexFactorAug *factorAug = LexFactorAug::cons( factorRep );
+ LexTerm *term = LexTerm::cons( factorAug );
+ LexExpression *expr = LexExpression::cons( term );
+ LexJoin *join = LexJoin::cons( expr );
+ return join;
+}
+
+void BaseParser::defineToken( const InputLoc &loc, String name, LexJoin *join,
+ ObjectDef *objectDef, CodeBlock *transBlock, bool ignore,
+ bool noPreIgnore, bool noPostIgnore )
+{
+ bool pushedRegion = false;
+ if ( !insideRegion() ) {
+ if ( ignore )
+ error(loc) << "ignore tokens can only appear inside scanners" << endp;
+
+ pushedRegion = true;
+ pushRegionSet( internal );
+ }
+
+ /* Check the name if this is a token. */
+ if ( !ignore && name == 0 )
+ error(loc) << "tokens must have a name" << endp;
+
+ /* Give a default name to ignores. */
+ if ( name == 0 )
+ name.setAs( 32, "_ignore_%.4x", pd->nextTokenId );
+
+ Namespace *nspace = curNspace();
+ RegionSet *regionSet = regionStack.top();
+
+ TokenDef *tokenDef = TokenDef::cons( name, String(), false, ignore, join,
+ transBlock, loc, 0, nspace, regionSet, objectDef, curStruct() );
+
+ regionSet->tokenDefList.append( tokenDef );
+ nspace->tokenDefList.append( tokenDef );
+
+ tokenDef->noPreIgnore = noPreIgnore;
+ tokenDef->noPostIgnore = noPostIgnore;
+
+ TokenInstance *tokenInstance = TokenInstance::cons( tokenDef,
+ join, loc, pd->nextTokenId++, nspace,
+ regionSet->tokenIgnore );
+
+ regionSet->tokenIgnore->impl->tokenInstanceList.append( tokenInstance );
+
+ tokenDef->noPreIgnore = noPreIgnore;
+ tokenDef->noPostIgnore = noPostIgnore;
+
+ if ( ignore ) {
+ /* The instance for the ignore-only. */
+ TokenInstance *tokenInstanceIgn = TokenInstance::cons( tokenDef,
+ join, loc, pd->nextTokenId++, nspace, regionSet->ignoreOnly );
+
+ tokenInstanceIgn->dupOf = tokenInstance;
+
+ regionSet->ignoreOnly->impl->tokenInstanceList.append( tokenInstanceIgn );
+ }
+ else {
+ /* The instance for the token-only. */
+ TokenInstance *tokenInstanceTok = TokenInstance::cons( tokenDef,
+ join, loc, pd->nextTokenId++, nspace, regionSet->tokenOnly );
+
+ tokenInstanceTok->dupOf = tokenInstance;
+
+ regionSet->tokenOnly->impl->tokenInstanceList.append( tokenInstanceTok );
+ }
+
+ /* This is created and pushed in the name. */
+ if ( pushedRegion )
+ popRegionSet();
+
+ if ( join != 0 ) {
+ /* Create a regular language definition so the token can be used to
+ * make other tokens */
+ addRegularDef( loc, curNspace(), name, join );
+ }
+}
+
+void BaseParser::zeroDef( const InputLoc &loc, const String &name )
+{
+ if ( !insideRegion() )
+ error(loc) << "zero token should be inside token" << endp;
+
+ RegionSet *regionSet = regionStack.top();
+ Namespace *nspace = curNspace();
+
+ LexJoin *join = literalJoin( loc, String("`") );
+
+ TokenDef *tokenDef = TokenDef::cons( name, String(), false, false, join,
+ 0, loc, 0, nspace, regionSet, 0, curStruct() );
+
+ tokenDef->isZero = true;
+
+ regionSet->tokenDefList.append( tokenDef );
+ nspace->tokenDefList.append( tokenDef );
+
+ /* No token instance created. */
+}
+
+void BaseParser::literalDef( const InputLoc &loc, const String &data,
+ bool noPreIgnore, bool noPostIgnore )
+{
+ /* Create a name for the literal. */
+ String name( 32, "_literal_%.4x", pd->nextTokenId );
+
+ bool pushedRegion = false;
+ if ( !insideRegion() ) {
+ pushRegionSet( loc );
+ pushedRegion = true;
+ }
+
+ bool unusedCI;
+ String interp;
+ prepareLitString( interp, unusedCI, data, loc );
+
+ /* Look for the production's associated region. */
+ Namespace *nspace = curNspace();
+ RegionSet *regionSet = regionStack.top();
+
+ LiteralDictEl *ldel = nspace->literalDict.find( interp );
+ if ( ldel != 0 )
+ error( loc ) << "literal already defined in this namespace" << endp;
+
+ LexJoin *join = literalJoin( loc, data );
+
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType,
+ name, pd->nextObjectId++ );
+
+ /* The token definition. */
+ TokenDef *tokenDef = TokenDef::cons( name, data, true, false, join,
+ 0, loc, 0, nspace, regionSet, objectDef, 0 );
+
+ regionSet->tokenDefList.append( tokenDef );
+ nspace->tokenDefList.append( tokenDef );
+
+ /* The instance for the token/ignore region. */
+ TokenInstance *tokenInstance = TokenInstance::cons( tokenDef, join,
+ loc, pd->nextTokenId++, nspace, regionSet->tokenIgnore );
+
+ regionSet->tokenIgnore->impl->tokenInstanceList.append( tokenInstance );
+
+ ldel = nspace->literalDict.insert( interp, tokenInstance );
+
+ /* Make the duplicate for the token-only region. */
+ tokenDef->noPreIgnore = noPreIgnore;
+ tokenDef->noPostIgnore = noPostIgnore;
+
+ /* The instance for the token-only region. */
+ TokenInstance *tokenInstanceTok = TokenInstance::cons( tokenDef,
+ join, loc, pd->nextTokenId++, nspace,
+ regionSet->tokenOnly );
+
+ tokenInstanceTok->dupOf = tokenInstance;
+
+ regionSet->tokenOnly->impl->tokenInstanceList.append( tokenInstanceTok );
+
+ if ( pushedRegion )
+ popRegionSet();
+}
+
+void BaseParser::addArgvList()
+{
+ TypeRef *valType = TypeRef::cons( internal, pd->uniqueTypeStr );
+ TypeRef *elType = TypeRef::cons( internal, TypeRef::ListEl, valType );
+ pd->argvTypeRef = TypeRef::cons( internal, TypeRef::List, 0, elType, valType );
+}
+
+void BaseParser::addStdsList()
+{
+ TypeRef *valType = TypeRef::cons( internal, pd->uniqueTypeStream );
+ TypeRef *elType = TypeRef::cons( internal, TypeRef::ListEl, valType );
+ pd->stdsTypeRef = TypeRef::cons( internal, TypeRef::List, 0, elType, valType );
+}
+
+ObjectDef *BaseParser::blockOpen()
+{
+ /* Init the object representing the local frame. */
+ ObjectDef *frame = ObjectDef::cons( ObjectDef::FrameType,
+ "local", pd->nextObjectId++ );
+
+ localFrameTop = frame;
+ scopeTop = frame->rootScope;
+ return frame;
+}
+
+void BaseParser::blockClose()
+{
+ localFrameTop = pd->rootLocalFrame;
+ scopeTop = pd->rootLocalFrame->rootScope;
+}
+
+void BaseParser::functionDef( StmtList *stmtList, ObjectDef *localFrame,
+ ParameterList *paramList, TypeRef *typeRef, const String &name, bool exprt )
+{
+ CodeBlock *codeBlock = CodeBlock::cons( stmtList, localFrame );
+ Function *newFunction = Function::cons( curNspace(), typeRef, name,
+ paramList, codeBlock, pd->nextFuncId++, false, exprt );
+ pd->functionList.append( newFunction );
+ newFunction->inContext = curStruct();
+}
+
+void BaseParser::inHostDef( const String &hostCall, ObjectDef *localFrame,
+ ParameterList *paramList, TypeRef *typeRef, const String &name, bool exprt )
+{
+ Function *newFunction = Function::cons( curNspace(), typeRef, name,
+ paramList, 0, pd->nextHostId++, false, exprt );
+ newFunction->hostCall = hostCall;
+ newFunction->localFrame = localFrame;
+ newFunction->inHost = true;
+ pd->inHostList.append( newFunction );
+ newFunction->inContext = curStruct();
+}
+
+void BaseParser::iterDef( StmtList *stmtList, ObjectDef *localFrame,
+ ParameterList *paramList, const String &name )
+{
+ CodeBlock *codeBlock = CodeBlock::cons( stmtList, localFrame );
+ Function *newFunction = Function::cons( curNspace(), 0, name,
+ paramList, codeBlock, pd->nextFuncId++, true, false );
+ pd->functionList.append( newFunction );
+}
+
+LangStmt *BaseParser::globalDef( ObjectField *objField, LangExpr *expr,
+ LangStmt::Type assignType )
+{
+ LangStmt *stmt = 0;
+ ObjectDef *object = pd->globalObjectDef;
+ Namespace *nspace = curNspace(); //pd->rootNamespace;
+
+ if ( nspace->rootScope->checkRedecl( objField->name ) != 0 )
+ error(objField->loc) << "object field renamed" << endp;
+
+ object->insertField( nspace->rootScope, objField->name, objField );
+
+ if ( expr != 0 ) {
+ LangVarRef *varRef = LangVarRef::cons( objField->loc,
+ curNspace(), curStruct(), curScope(), objField->name );
+
+ stmt = LangStmt::cons( objField->loc, assignType, varRef, expr );
+ }
+
+ return stmt;
+}
+
+LangStmt *BaseParser::exportStmt( ObjectField *objField,
+ LangStmt::Type assignType, LangExpr *expr )
+{
+ LangStmt *stmt = 0;
+
+ ObjectDef *object = pd->globalObjectDef;
+ Namespace *nspace = curNspace(); //pd->rootNamespace;
+
+ if ( curStruct() != 0 )
+ error(objField->loc) << "cannot export parser context variables" << endp;
+
+ if ( nspace->rootScope->checkRedecl( objField->name ) != 0 )
+ error(objField->loc) << "object field renamed" << endp;
+
+ object->insertField( nspace->rootScope, objField->name, objField );
+ objField->isExport = true;
+
+ if ( expr != 0 ) {
+ LangVarRef *varRef = LangVarRef::cons( objField->loc,
+ curNspace(), 0, curScope(), objField->name );
+
+ stmt = LangStmt::cons( objField->loc, assignType, varRef, expr );
+ }
+
+ return stmt;
+}
+
+
+void BaseParser::cflDef( NtDef *ntDef, ObjectDef *objectDef, LelDefList *defList )
+{
+ Namespace *nspace = curNspace();
+
+ ntDef->objectDef = objectDef;
+ ntDef->defList = defList;
+
+ nspace->ntDefList.append( ntDef );
+
+ /* Declare the captures in the object. */
+ for ( LelDefList::Iter prod = *defList; prod.lte(); prod++ ) {
+ for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++ ) {
+ /* If there is a capture, create the field. */
+ if ( pel->captureField != 0 ) {
+ /* Might already exist. */
+ ObjectField *newOf = objectDef->rootScope->checkRedecl(
+ pel->captureField->name );
+ if ( newOf != 0 ) {
+ /* FIXME: check the types are the same. */
+ }
+ else {
+ newOf = pel->captureField;
+ newOf->typeRef = pel->typeRef;
+ objectDef->rootScope->insertField( newOf->name, newOf );
+ }
+
+ newOf->rhsVal.append( RhsVal( pel ) );
+ }
+ }
+ }
+}
+
+ReOrBlock *BaseParser::lexRegularExprData( ReOrBlock *reOrBlock, ReOrItem *reOrItem )
+{
+ ReOrBlock *ret;
+
+ /* An optimization to lessen the tree size. If an or char is directly under
+ * the left side on the right and the right side is another or char then
+ * paste them together and return the left side. Otherwise just put the two
+ * under a new or data node. */
+ if ( reOrItem->type == ReOrItem::Data &&
+ reOrBlock->type == ReOrBlock::RecurseItem &&
+ reOrBlock->item->type == ReOrItem::Data )
+ {
+ /* Append the right side to right side of the left and toss the
+ * right side. */
+ reOrBlock->item->data += reOrItem->data;
+ delete reOrItem;
+ ret = reOrBlock;
+ }
+ else {
+ /* Can't optimize, put the left and right under a new node. */
+ ret = ReOrBlock::cons( reOrBlock, reOrItem );
+ }
+ return ret;
+}
+
+LexFactor *BaseParser::lexRlFactorName( const String &data, const InputLoc &loc )
+{
+ LexFactor *factor = 0;
+ /* Find the named graph. */
+ Namespace *nspace = curNspace();
+
+ while ( nspace != 0 ) {
+ GraphDictEl *gdNode = nspace->rlMap.find( data );
+ if ( gdNode != 0 ) {
+ if ( gdNode->isInstance ) {
+ /* Recover by retuning null as the factor node. */
+ error(loc) << "references to graph instantiations not allowed "
+ "in expressions" << endl;
+ factor = 0;
+ }
+ else {
+ /* Create a factor node that is a lookup of an expression. */
+ factor = LexFactor::cons( loc, gdNode->value );
+ }
+ break;
+ }
+
+ nspace = nspace->parentNamespace;
+ }
+
+ if ( nspace == 0 ) {
+ /* Recover by returning null as the factor node. */
+ error(loc) << "graph lookup of \"" << data << "\" failed" << endl;
+ factor = 0;
+ }
+
+ return factor;
+}
+
+int BaseParser::lexFactorRepNum( const InputLoc &loc, const String &data )
+{
+ /* Convert the priority number to a long. Check for overflow. */
+ errno = 0;
+ long rep = strtol( data, 0, 10 );
+ if ( errno == ERANGE && rep == LONG_MAX ) {
+ /* Repetition too large. Recover by returing repetition 1. */
+ error(loc) << "repetition number " << data << " overflows" << endl;
+ rep = 1;
+ }
+ return rep;
+}
+
+LexFactorAug *BaseParser::lexFactorLabel( const InputLoc &loc,
+ const String &data, LexFactorAug *factorAug )
+{
+ /* Create the object field. */
+ TypeRef *typeRef = TypeRef::cons( loc, pd->uniqueTypeStr );
+ ObjectField *objField = ObjectField::cons( loc,
+ ObjectField::LexSubstrType, typeRef, data );
+
+ /* Create the enter and leaving actions that will mark the substring. */
+ Action *enter = Action::cons( MarkMark, pd->nextMatchEndNum++ );
+ Action *leave = Action::cons( MarkMark, pd->nextMatchEndNum++ );
+ pd->actionList.append( enter );
+ pd->actionList.append( leave );
+
+ /* Add entering and leaving actions. */
+ factorAug->actions.append( ParserAction( loc, at_start, 0, enter ) );
+ factorAug->actions.append( ParserAction( loc, at_leave, 0, leave ) );
+
+ factorAug->reCaptureVect.append( ReCapture( enter, leave, objField ) );
+
+ return factorAug;
+}
+
+LexJoin *BaseParser::lexOptJoin( LexJoin *join, LexJoin *context )
+{
+ if ( context != 0 ) {
+ /* Create the enter and leaving actions that will mark the substring. */
+ Action *mark = Action::cons( MarkMark, pd->nextMatchEndNum++ );
+ pd->actionList.append( mark );
+
+ join->context = context;
+ join->mark = mark;
+ }
+
+ return join;
+}
+
+LangExpr *BaseParser::send( const InputLoc &loc, LangVarRef *varRef,
+ ConsItemList *list, bool eof )
+{
+ ParserText *parserText = ParserText::cons( loc,
+ curNspace(), list, true, false, false, "" );
+ pd->parserTextList.append( parserText );
+
+ return LangExpr::cons( LangTerm::consSend( loc, varRef,
+ parserText, eof ) );
+}
+
+LangExpr *BaseParser::sendTree( const InputLoc &loc, LangVarRef *varRef,
+ ConsItemList *list, bool eof )
+{
+ ParserText *parserText = ParserText::cons( loc,
+ curNspace(), list, true, false, false, "" );
+ pd->parserTextList.append( parserText );
+
+ return LangExpr::cons( LangTerm::consSendTree( loc, varRef,
+ parserText, eof ) );
+}
+
+LangExpr *BaseParser::parseCmd( const InputLoc &loc, bool tree, bool stop,
+ ObjectField *objField, TypeRef *typeRef, FieldInitVect *fieldInitVect,
+ ConsItemList *list, bool used, bool reduce, bool read, const String &reducer )
+{
+ LangExpr *expr = 0;
+
+ /* Item list for what we are sending to the parser. */
+ ConsItemList *consItemList = new ConsItemList;
+
+ /* The parser may be referenced. */
+ LangVarRef *varRef = 0;
+ if ( objField != 0 ) {
+ varRef = LangVarRef::cons( objField->loc,
+ curNspace(), curStruct(), curScope(), objField->name );
+ }
+
+ /* The typeref for the parser. */
+ TypeRef *parserTypeRef = TypeRef::cons( loc,
+ TypeRef::Parser, 0, typeRef, 0 );
+
+ if ( objField != 0 )
+ used = true;
+
+ ParserText *parserText = ParserText::cons( loc, curNspace(),
+ list, used, reduce, read, reducer );
+ pd->parserTextList.append( parserText );
+
+ LangTerm::Type langTermType = stop ? LangTerm::ParseStopType : ( tree ?
+ LangTerm::ParseTreeType : LangTerm::ParseType );
+
+ expr = LangExpr::cons( LangTerm::cons( loc, langTermType,
+ varRef, objField, parserTypeRef, fieldInitVect, consItemList,
+ parserText ) );
+
+ /* Check for redeclaration. */
+ if ( objField != 0 ) {
+ if ( curScope()->checkRedecl( objField->name ) != 0 ) {
+ error( objField->loc ) << "variable " << objField->name <<
+ " redeclared" << endp;
+ }
+
+ /* Insert it into the field map. */
+ objField->typeRef = typeRef;
+ curScope()->insertField( objField->name, objField );
+ }
+
+ return expr;
+}
+
+PatternItemList *BaseParser::consPatternEl( LangVarRef *varRef, PatternItemList *list )
+{
+ /* Store the variable reference in the pattern itemm. */
+ list->head->varRef = varRef;
+
+ if ( varRef != 0 ) {
+ if ( curScope()->checkRedecl( varRef->name ) != 0 ) {
+ error( varRef->loc ) << "variable " << varRef->name <<
+ " redeclared" << endp;
+ }
+
+ TypeRef *typeRef = list->head->prodEl->typeRef;
+ ObjectField *objField = ObjectField::cons( InputLoc(),
+ ObjectField::UserLocalType, typeRef, varRef->name );
+
+ /* Insert it into the field map. */
+ curScope()->insertField( varRef->name, objField );
+ }
+
+ return list;
+}
+
+PatternItemList *BaseParser::patternElNamed( const InputLoc &loc,
+ LangVarRef *parsedVarRef, NamespaceQual *nspaceQual, const String &data,
+ RepeatType repeatType )
+{
+ TypeRef *typeRef = TypeRef::cons( loc, parsedVarRef, nspaceQual, data, repeatType );
+ ProdEl *prodEl = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef, 0 );
+ PatternItem *patternItem = PatternItem::cons( PatternItem::TypeRefForm, loc, prodEl );
+ return PatternItemList::cons( patternItem );
+}
+
+PatternItemList *BaseParser::patternElType( const InputLoc &loc,
+ LangVarRef *parsedVarRef, NamespaceQual *nspaceQual, const String &data,
+ RepeatType repeatType )
+{
+ PdaLiteral *literal = new PdaLiteral( loc, data );
+ TypeRef *typeRef = TypeRef::cons( loc, parsedVarRef, nspaceQual, literal, repeatType );
+
+ ProdEl *prodEl = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef, 0 );
+ PatternItem *patternItem = PatternItem::cons( PatternItem::TypeRefForm, loc, prodEl );
+ return PatternItemList::cons( patternItem );
+}
+
+ProdElList *BaseParser::appendProdEl( ProdElList *prodElList, ProdEl *prodEl )
+{
+ prodEl->pos = prodElList->length();
+ prodElList->append( prodEl );
+ return prodElList;
+}
+
+PatternItemList *BaseParser::patListConcat( PatternItemList *list1,
+ PatternItemList *list2 )
+{
+ if ( list1 == 0 )
+ list1 = new PatternItemList();
+
+ list1->append( *list2 );
+ delete list2;
+ return list1;
+}
+
+ConsItemList *BaseParser::consListConcat( ConsItemList *list1,
+ ConsItemList *list2 )
+{
+ if ( list1 == 0 )
+ list1 = new ConsItemList();
+
+ list1->append( *list2 );
+ delete list2;
+ return list1;
+}
+
+LangStmt *BaseParser::forScope( const InputLoc &loc, const String &data,
+ NameScope *scope, TypeRef *typeRef, IterCall *iterCall, StmtList *stmtList )
+{
+ /* Check for redeclaration. */
+ if ( curScope()->checkRedecl( data ) != 0 )
+ error( loc ) << "variable " << data << " redeclared" << endp;
+
+ /* Note that we pass in a null type reference. This type is dependent on
+ * the result of the iter_call lookup since it must contain a reference to
+ * the iterator that is called. This lookup is done at compile time. */
+ ObjectField *iterField = ObjectField::cons( loc,
+ ObjectField::UserLocalType, (TypeRef*)0, data );
+ curScope()->insertField( data, iterField );
+
+ LangStmt *stmt = LangStmt::cons( loc, LangStmt::ForIterType,
+ iterField, typeRef, iterCall, stmtList, curStruct(), scope );
+
+ return stmt;
+}
+
+void BaseParser::preEof( const InputLoc &loc, StmtList *stmtList, ObjectDef *localFrame )
+{
+ if ( !insideRegion() )
+ error(loc) << "preeof must be used inside an existing region" << endl;
+
+ CodeBlock *codeBlock = CodeBlock::cons( stmtList, localFrame );
+ codeBlock->context = curStruct();
+
+ RegionSet *regionSet = regionStack.top();
+ regionSet->tokenIgnore->preEofBlock = codeBlock;
+}
+
+ProdEl *BaseParser::prodElName( const InputLoc &loc, const String &data,
+ NamespaceQual *nspaceQual, ObjectField *objField,
+ RepeatType repeatType, bool commit )
+{
+ TypeRef *typeRef = TypeRef::cons( loc, nspaceQual, data, repeatType );
+ ProdEl *prodEl = new ProdEl( ProdEl::ReferenceType, loc, objField, commit, typeRef, 0 );
+ return prodEl;
+}
+
+ProdEl *BaseParser::prodElLiteral( const InputLoc &loc, const String &data,
+ NamespaceQual *nspaceQual, ObjectField *objField, RepeatType repeatType,
+ bool commit )
+{
+ /* Create a new prodEl node going to a concat literal. */
+ PdaLiteral *literal = new PdaLiteral( loc, data );
+ TypeRef *typeRef = TypeRef::cons( loc, nspaceQual, literal, repeatType );
+ ProdEl *prodEl = new ProdEl( ProdEl::LiteralType, loc, objField, commit, typeRef, 0 );
+ return prodEl;
+}
+
+ConsItemList *BaseParser::consElLiteral( const InputLoc &loc,
+ TypeRef *consTypeRef, const String &data, NamespaceQual *nspaceQual )
+{
+ PdaLiteral *literal = new PdaLiteral( loc, data );
+ TypeRef *typeRef = TypeRef::cons( loc, consTypeRef, nspaceQual, literal );
+ ProdEl *prodEl = new ProdEl( ProdEl::LiteralType, loc, 0, false, typeRef, 0 );
+ ConsItem *consItem = ConsItem::cons( loc, ConsItem::LiteralType, prodEl );
+ ConsItemList *list = ConsItemList::cons( consItem );
+ return list;
+}
+
+Production *BaseParser::production( const InputLoc &loc, ProdElList *prodElList,
+ String name, bool commit, CodeBlock *codeBlock, LangEl *predOf )
+{
+ Production *prod = Production::cons( loc, 0, prodElList,
+ name, commit, codeBlock, pd->prodList.length(), 0 );
+ prod->predOf = predOf;
+
+ /* Link the production elements back to the production. */
+ for ( ProdEl *prodEl = prodElList->head; prodEl != 0; prodEl = prodEl->next )
+ prodEl->production = prod;
+
+ pd->prodList.append( prod );
+
+ return prod;
+}
+
+void BaseParser::objVarDef( ObjectDef *objectDef, ObjectField *objField )
+{
+ if ( objectDef->rootScope->checkRedecl( objField->name ) != 0 )
+ error() << "object field renamed" << endp;
+
+ objectDef->rootScope->insertField( objField->name, objField );
+}
+
+LelDefList *BaseParser::prodAppend( LelDefList *defList, Production *definition )
+{
+ definition->prodNum = defList->length();
+ defList->append( definition );
+ return defList;
+}
+
+LangExpr *BaseParser::construct( const InputLoc &loc, ObjectField *objField,
+ ConsItemList *list, TypeRef *typeRef, FieldInitVect *fieldInitVect )
+{
+ Constructor *constructor = Constructor::cons( loc, curNspace(),
+ list, pd->nextPatConsId++ );
+ pd->replList.append( constructor );
+
+ LangVarRef *varRef = 0;
+ if ( objField != 0 ) {
+ varRef = LangVarRef::cons( objField->loc,
+ curNspace(), curStruct(), curScope(), objField->name );
+ }
+
+ LangExpr *expr = LangExpr::cons( LangTerm::cons( loc, LangTerm::ConstructType,
+ varRef, objField, typeRef, fieldInitVect, constructor ) );
+
+ /* Check for redeclaration. */
+ if ( objField != 0 ) {
+ if ( curScope()->checkRedecl( objField->name ) != 0 ) {
+ error( objField->loc ) << "variable " << objField->name <<
+ " redeclared" << endp;
+ }
+
+ /* Insert it into the field map. */
+ objField->typeRef = typeRef;
+ curScope()->insertField( objField->name, objField );
+ }
+
+ return expr;
+}
+
+LangExpr *BaseParser::match( const InputLoc &loc, LangVarRef *varRef,
+ PatternItemList *list )
+{
+ Pattern *pattern = Pattern::cons( loc, curNspace(),
+ list, pd->nextPatConsId++ );
+ pd->patternList.append( pattern );
+
+ LangExpr *expr = LangExpr::cons( LangTerm::consMatch(
+ InputLoc(), varRef, pattern ) );
+
+ return expr;
+}
+
+LangExpr *BaseParser::prodCompare( const InputLoc &loc, LangVarRef *varRef,
+ const String &prod, LangExpr *matchExpr )
+{
+ LangExpr *expr = LangExpr::cons( LangTerm::consProdCompare(
+ InputLoc(), varRef, prod, matchExpr ) );
+
+ return expr;
+}
+
+LangStmt *BaseParser::varDef( ObjectField *objField,
+ LangExpr *expr, LangStmt::Type assignType )
+{
+ LangStmt *stmt = 0;
+
+ /* Check for redeclaration. */
+ if ( curScope()->checkRedecl( objField->name ) != 0 ) {
+ error( objField->loc ) << "variable " << objField->name <<
+ " redeclared" << endp;
+ }
+
+ /* Insert it into the field map. */
+ curScope()->insertField( objField->name, objField );
+
+ //cout << "var def " << $1->objField->name << endl;
+
+ if ( expr != 0 ) {
+ LangVarRef *varRef = LangVarRef::cons( objField->loc,
+ curNspace(), curStruct(), curScope(), objField->name );
+
+ stmt = LangStmt::cons( objField->loc, assignType, varRef, expr );
+ }
+
+ return stmt;
+}
+
+LangExpr *BaseParser::require( const InputLoc &loc,
+ LangVarRef *varRef, PatternItemList *list )
+{
+ Pattern *pattern = Pattern::cons( loc, curNspace(),
+ list, pd->nextPatConsId++ );
+ pd->patternList.append( pattern );
+
+ LangExpr *expr = LangExpr::cons( LangTerm::consMatch(
+ InputLoc(), varRef, pattern ) );
+ return expr;
+}
+
+void BaseParser::structVarDef( const InputLoc &loc, ObjectField *objField )
+{
+ ObjectDef *object;
+ if ( curStruct() == 0 )
+ error(loc) << "internal error: no context stack items found" << endp;
+
+ StructDef *structDef = curStruct();
+ object = structDef->objectDef;
+
+ if ( object->rootScope->checkRedecl( objField->name ) != 0 )
+ error(objField->loc) << "object field renamed" << endp;
+
+ object->rootScope->insertField( objField->name, objField );
+}
+
+void BaseParser::structHead( const InputLoc &loc, Namespace *inNspace,
+ const String &data, ObjectDef::Type objectType )
+{
+ ObjectDef *objectDef = ObjectDef::cons( objectType,
+ data, pd->nextObjectId++ );
+
+ StructDef *context = new StructDef( loc, data, objectDef );
+ structStack.push( context );
+
+ inNspace->structDefList.append( context );
+
+ /* Make the namespace for the struct. */
+ createNamespace( loc, data );
+}
+
+StmtList *BaseParser::appendStatement( StmtList *stmtList, LangStmt *stmt )
+{
+ if ( stmt != 0 )
+ stmtList->append( stmt );
+ return stmtList;
+}
+
+ParameterList *BaseParser::appendParam( ParameterList *paramList, ObjectField *objField )
+{
+ paramList->append( objField );
+ return paramList;
+}
+
+ObjectField *BaseParser::addParam( const InputLoc &loc,
+ ObjectField::Type type, TypeRef *typeRef, const String &name )
+{
+ ObjectField *objField = ObjectField::cons( loc, type, typeRef, name );
+ return objField;
+}
+
+PredDecl *BaseParser::predTokenName( const InputLoc &loc, NamespaceQual *qual,
+ const String &data )
+{
+ TypeRef *typeRef = TypeRef::cons( loc, qual, data );
+ PredDecl *predDecl = new PredDecl( typeRef, pd->predValue );
+ return predDecl;
+}
+
+PredDecl *BaseParser::predTokenLit( const InputLoc &loc, const String &data,
+ NamespaceQual *nspaceQual )
+{
+ PdaLiteral *literal = new PdaLiteral( loc, data );
+ TypeRef *typeRef = TypeRef::cons( loc, nspaceQual, literal );
+ PredDecl *predDecl = new PredDecl( typeRef, pd->predValue );
+ return predDecl;
+}
+
+void BaseParser::alias( const InputLoc &loc, const String &data, TypeRef *typeRef )
+{
+ Namespace *nspace = curNspace();
+ TypeAlias *typeAlias = new TypeAlias( loc, nspace, data, typeRef );
+ nspace->typeAliasList.append( typeAlias );
+}
+
+void BaseParser::precedenceStmt( PredType predType, PredDeclList *predDeclList )
+{
+ while ( predDeclList->length() > 0 ) {
+ PredDecl *predDecl = predDeclList->detachFirst();
+ predDecl->predType = predType;
+ pd->predDeclList.append( predDecl );
+ }
+ pd->predValue++;
+}
+
+void BaseParser::pushScope()
+{
+ scopeTop = curLocalFrame()->pushScope( curScope() );
+}
+
+void BaseParser::popScope()
+{
+ scopeTop = curScope()->parentScope;
+}
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 00000000..aafa3f2b
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2013-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_PARSER_H
+#define _COLM_PARSER_H
+
+#include <iostream>
+
+#include <avltree.h>
+
+#include "compiler.h"
+#include "parser.h"
+
+#define PROPERTY_REDUCE_FIRST 0x1
+
+struct BaseParser
+{
+ BaseParser( Compiler *pd )
+ : pd(pd), enterRl(false)
+ {}
+
+ virtual ~BaseParser() {}
+
+ Compiler *pd;
+
+ RegionSetVect regionStack;
+ NamespaceVect namespaceStack;
+ ReductionVect reductionStack;
+ StructStack structStack;
+ ObjectDef *localFrameTop;
+ NameScope *scopeTop;
+
+ bool enterRl;
+
+ bool insideRegion()
+ { return regionStack.length() > 0; }
+
+ StructDef *curStruct()
+ { return structStack.length() == 0 ? 0 : structStack.top(); }
+
+ Namespace *curNspace()
+ { return namespaceStack.top(); }
+
+ NameScope *curScope()
+ { return scopeTop; }
+
+ ObjectDef *curLocalFrame()
+ { return localFrameTop; }
+
+ Reduction *curReduction()
+ { return reductionStack.top(); }
+
+ /* Lexical feedback. */
+
+ void listElDef( String name );
+ void mapElDef( String name, TypeRef *keyType );
+
+ void argvDecl();
+ void init();
+ void addRegularDef( const InputLoc &loc, Namespace *nspace,
+ const String &name, LexJoin *join );
+ TokenRegion *createRegion( const InputLoc &loc, RegionImpl *impl );
+ Namespace *createRootNamespace();
+ Namespace *createNamespace( const InputLoc &loc, const String &name );
+ void pushRegionSet( const InputLoc &loc );
+ void popRegionSet();
+ void addProduction( const InputLoc &loc, const String &name,
+ ProdElList *prodElList, bool commit,
+ CodeBlock *redBlock, LangEl *predOf );
+ void addArgvList();
+ void addStdsList();
+ LexJoin *literalJoin( const InputLoc &loc, const String &data );
+
+ Reduction *createReduction( const InputLoc loc, const String &name );
+
+ void defineToken( const InputLoc &loc, String name, LexJoin *join,
+ ObjectDef *objectDef, CodeBlock *transBlock,
+ bool ignore, bool noPreIgnore, bool noPostIgnore );
+
+ void zeroDef( const InputLoc &loc, const String &name );
+ void literalDef( const InputLoc &loc, const String &data,
+ bool noPreIgnore, bool noPostIgnore );
+
+ ObjectDef *blockOpen();
+ void blockClose();
+
+ void inHostDef( const String &hostCall, ObjectDef *localFrame,
+ ParameterList *paramList, TypeRef *typeRef,
+ const String &name, bool exprt );
+ void functionDef( StmtList *stmtList, ObjectDef *localFrame,
+ ParameterList *paramList, TypeRef *typeRef,
+ const String &name, bool exprt );
+
+ void iterDef( StmtList *stmtList, ObjectDef *localFrame,
+ ParameterList *paramList, const String &name );
+ LangStmt *globalDef( ObjectField *objField, LangExpr *expr,
+ LangStmt::Type assignType );
+ void cflDef( NtDef *ntDef, ObjectDef *objectDef, LelDefList *defList );
+ ReOrBlock *lexRegularExprData( ReOrBlock *reOrBlock, ReOrItem *reOrItem );
+
+ int lexFactorRepNum( const InputLoc &loc, const String &data );
+ LexFactor *lexRlFactorName( const String &data, const InputLoc &loc );
+ LexFactorAug *lexFactorLabel( const InputLoc &loc, const String &data,
+ LexFactorAug *factorAug );
+ LexJoin *lexOptJoin( LexJoin *join, LexJoin *context );
+ LangExpr *send( const InputLoc &loc, LangVarRef *varRef,
+ ConsItemList *list, bool eof );
+ LangExpr *sendTree( const InputLoc &loc, LangVarRef *varRef,
+ ConsItemList *list, bool eof );
+ LangExpr *parseCmd( const InputLoc &loc, bool tree, bool stop, ObjectField *objField,
+ TypeRef *typeRef, FieldInitVect *fieldInitVect, ConsItemList *list,
+ bool used, bool reduce, bool read, const String &reducer );
+ PatternItemList *consPatternEl( LangVarRef *varRef, PatternItemList *list );
+ PatternItemList *patternElNamed( const InputLoc &loc, LangVarRef *varRef,
+ NamespaceQual *nspaceQual, const String &data, RepeatType repeatType );
+ PatternItemList *patternElType( const InputLoc &loc, LangVarRef *varRef,
+ NamespaceQual *nspaceQual, const String &data, RepeatType repeatType );
+ PatternItemList *patListConcat( PatternItemList *list1, PatternItemList *list2 );
+ ConsItemList *consListConcat( ConsItemList *list1, ConsItemList *list2 );
+ LangStmt *forScope( const InputLoc &loc, const String &data,
+ NameScope *scope, TypeRef *typeRef, IterCall *iterCall, StmtList *stmtList );
+ void preEof( const InputLoc &loc, StmtList *stmtList, ObjectDef *localFrame );
+
+ ProdEl *prodElName( const InputLoc &loc, const String &data,
+ NamespaceQual *nspaceQual, ObjectField *objField, RepeatType repeatType,
+ bool commit );
+ ProdEl *prodElLiteral( const InputLoc &loc, const String &data,
+ NamespaceQual *nspaceQual, ObjectField *objField, RepeatType repeatType,
+ bool commit );
+ ConsItemList *consElLiteral( const InputLoc &loc, TypeRef *consTypeRef,
+ const String &data, NamespaceQual *nspaceQual );
+ Production *production( const InputLoc &loc, ProdElList *prodElList,
+ String name, bool commit, CodeBlock *codeBlock, LangEl *predOf );
+ void objVarDef( ObjectDef *objectDef, ObjectField *objField );
+ LelDefList *prodAppend( LelDefList *defList, Production *definition );
+
+ LangExpr *construct( const InputLoc &loc, ObjectField *objField,
+ ConsItemList *list, TypeRef *typeRef, FieldInitVect *fieldInitVect );
+ LangExpr *match( const InputLoc &loc, LangVarRef *varRef,
+ PatternItemList *list );
+ LangExpr *prodCompare( const InputLoc &loc, LangVarRef *varRef,
+ const String &prod, LangExpr *matchExpr );
+ LangStmt *varDef( ObjectField *objField,
+ LangExpr *expr, LangStmt::Type assignType );
+ LangStmt *exportStmt( ObjectField *objField, LangStmt::Type assignType, LangExpr *expr );
+
+
+ LangExpr *require( const InputLoc &loc, LangVarRef *varRef, PatternItemList *list );
+ void structVarDef( const InputLoc &loc, ObjectField *objField );
+ void structHead( const InputLoc &loc, Namespace *inNspace,
+ const String &data, ObjectDef::Type objectType );
+ StmtList *appendStatement( StmtList *stmtList, LangStmt *stmt );
+ ParameterList *appendParam( ParameterList *paramList, ObjectField *objField );
+ ObjectField *addParam( const InputLoc &loc,
+ ObjectField::Type type, TypeRef *typeRef, const String &name );
+ PredDecl *predTokenName( const InputLoc &loc, NamespaceQual *qual, const String &data );
+ PredDecl *predTokenLit( const InputLoc &loc, const String &data,
+ NamespaceQual *nspaceQual );
+ void alias( const InputLoc &loc, const String &data, TypeRef *typeRef );
+ void precedenceStmt( PredType predType, PredDeclList *predDeclList );
+ ProdElList *appendProdEl( ProdElList *prodElList, ProdEl *prodEl );
+
+ void pushScope();
+ void popScope();
+
+ virtual void go( long activeRealm ) = 0;
+
+ BstSet<String, ColmCmpStr> genericElDefined;
+
+ NamespaceQual *emptyNspaceQual()
+ {
+ return NamespaceQual::cons( curNspace() );
+ }
+
+};
+
+#endif /* _COLM_PARSER_H */
+
diff --git a/src/parsetree.cc b/src/parsetree.cc
new file mode 100644
index 00000000..572f0610
--- /dev/null
+++ b/src/parsetree.cc
@@ -0,0 +1,1495 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include <iostream>
+
+#include "fsmgraph.h"
+#include "compiler.h"
+#include "parsetree.h"
+
+using namespace std;
+ostream &operator<<( ostream &out, const NameRef &nameRef );
+ostream &operator<<( ostream &out, const NameInst &nameInst );
+ostream &operator<<( ostream &out, const Token &token );
+
+/* Convert the literal string which comes in from the scanner into an array of
+ * characters with escapes and options interpreted. Also null terminates the
+ * string. Though this null termination should not be relied on for
+ * interpreting literals in the parser because the string may contain a
+ * literal string with \0 */
+void prepareLitString( String &result, bool &caseInsensitive,
+ const String &srcString, const InputLoc &loc )
+{
+ result.setAs( String::Fresh(), srcString.length() );
+ caseInsensitive = false;
+
+ char *src = srcString.data + 1;
+ char *end = 0;
+ bool backtick = srcString[0] == '`';
+
+ if ( !backtick ) {
+ end = srcString.data + srcString.length() - 1;
+
+ while ( *end != '\'' && *end != '\"' && *end != '\n' ) {
+ if ( *end == 'i' )
+ caseInsensitive = true;
+ else {
+ error( loc ) << "literal string '" << *end <<
+ "' option not supported" << endl;
+ }
+ end -= 1;
+ }
+
+ if ( *end == '\n' )
+ end++;
+ }
+ else {
+ end = srcString.data + srcString.length();
+ if ( srcString.length() > 2 && *(end-1) == '`' )
+ end -= 1;
+ }
+
+ char *dest = result.data;
+ int len = 0;
+ while ( src != end ) {
+ if ( !backtick && *src == '\\' ) {
+ switch ( src[1] ) {
+ case '0': dest[len++] = '\0'; break;
+ case 'a': dest[len++] = '\a'; break;
+ case 'b': dest[len++] = '\b'; break;
+ case 't': dest[len++] = '\t'; break;
+ case 'n': dest[len++] = '\n'; break;
+ case 'v': dest[len++] = '\v'; break;
+ case 'f': dest[len++] = '\f'; break;
+ case 'r': dest[len++] = '\r'; break;
+ case '\n': break;
+ default: dest[len++] = src[1]; break;
+ }
+ src += 2;
+ }
+ else {
+ dest[len++] = *src++;
+ }
+ }
+
+ result.chop( len );
+}
+
+int CmpUniqueType::compare( const UniqueType &ut1, const UniqueType &ut2 )
+{
+ if ( ut1.typeId < ut2.typeId )
+ return -1;
+ else if ( ut1.typeId > ut2.typeId )
+ return 1;
+ switch ( ut1.typeId ) {
+ case TYPE_TREE:
+ case TYPE_REF:
+ if ( ut1.langEl < ut2.langEl )
+ return -1;
+ else if ( ut1.langEl > ut2.langEl )
+ return 1;
+ break;
+ case TYPE_ITER:
+ if ( ut1.iterDef < ut2.iterDef )
+ return -1;
+ else if ( ut1.iterDef > ut2.iterDef )
+ return 1;
+ break;
+
+ case TYPE_NOTYPE:
+ case TYPE_NIL:
+ case TYPE_INT:
+ case TYPE_BOOL:
+ case TYPE_LIST_PTRS:
+ case TYPE_MAP_PTRS:
+ case TYPE_VOID:
+ break;
+
+ case TYPE_STRUCT:
+ if ( ut1.structEl < ut2.structEl )
+ return -1;
+ else if ( ut1.structEl > ut2.structEl )
+ return 1;
+ break;
+ case TYPE_GENERIC:
+ if ( ut1.generic < ut2.generic )
+ return -1;
+ else if ( ut1.generic > ut2.generic )
+ return 1;
+ break;
+ }
+
+ return 0;
+}
+
+int CmpUniqueRepeat::compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 )
+{
+ if ( ut1.repeatType < ut2.repeatType )
+ return -1;
+ else if ( ut1.repeatType > ut2.repeatType )
+ return 1;
+ else {
+ if ( ut1.langEl < ut2.langEl )
+ return -1;
+ else if ( ut1.langEl > ut2.langEl )
+ return 1;
+ }
+
+ return 0;
+}
+
+int CmpUniqueGeneric::compare( const UniqueGeneric &ut1, const UniqueGeneric &ut2 )
+{
+ if ( ut1.type < ut2.type )
+ return -1;
+ else if ( ut1.type > ut2.type )
+ return 1;
+ else if ( ut1.value < ut2.value )
+ return -1;
+ else if ( ut1.value > ut2.value )
+ return 1;
+ else {
+ switch ( ut1.type ) {
+ case UniqueGeneric::List:
+ case UniqueGeneric::ListEl:
+ case UniqueGeneric::Parser:
+ break;
+
+ case UniqueGeneric::Map:
+ case UniqueGeneric::MapEl:
+ if ( ut1.key < ut2.key )
+ return -1;
+ else if ( ut1.key > ut2.key )
+ return 1;
+ break;
+ }
+ }
+ return 0;
+}
+
+FsmGraph *LexDefinition::walk( Compiler *pd )
+{
+ /* Recurse on the expression. */
+ FsmGraph *rtnVal = join->walk( pd );
+
+ /* If the expression below is a join operation with multiple expressions
+ * then it just had epsilon transisions resolved. If it is a join
+ * with only a single expression then run the epsilon op now. */
+ if ( join->expr != 0 )
+ rtnVal->epsilonOp();
+
+ return rtnVal;
+}
+
+void RegionImpl::makeNameTree( const InputLoc &loc, Compiler *pd )
+{
+ NameInst *nameInst = new NameInst( pd->nextNameId++ );
+ pd->nameInstList.append( nameInst );
+
+ /* Guess we do this now. */
+ makeActions( pd );
+
+ /* Save off the name inst into the token region. This is only legal for
+ * token regions because they are only ever referenced once (near the root
+ * of the name tree). They cannot have more than one corresponding name
+ * inst. */
+ assert( regionNameInst == 0 );
+ regionNameInst = nameInst;
+}
+
+InputLoc TokenInstance::getLoc()
+{
+ return action != 0 ? action->loc : semiLoc;
+}
+
+/*
+ * If there are any LMs then all of the following entry points must reset
+ * tokstart:
+ *
+ * 1. fentry(StateRef)
+ * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
+ * 3. targt of any transition that has an fcall (the return loc).
+ * 4. start state of all longest match routines.
+ */
+
+Action *RegionImpl::newAction( Compiler *pd, const InputLoc &loc,
+ const String &name, InlineList *inlineList )
+{
+ Action *action = Action::cons( loc, name, inlineList );
+ pd->actionList.append( action );
+ action->isLmAction = true;
+ return action;
+}
+
+void RegionImpl::makeActions( Compiler *pd )
+{
+ /* Make actions that set the action id. */
+ for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = InlineList::cons();
+ inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi,
+ InlineItem::LmSetActId ) );
+ char *actName = new char[50];
+ sprintf( actName, "store%i", lmi->longestMatchId );
+ lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the last character. */
+ for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = InlineList::cons();
+ inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLast ) );
+ char *actName = new char[50];
+ sprintf( actName, "imm%i", lmi->longestMatchId );
+ lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the next
+ * character. These actions will set tokend themselves (it is the current
+ * char). */
+ for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = InlineList::cons();
+ inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnNext ) );
+ char *actName = new char[50];
+ sprintf( actName, "lagh%i", lmi->longestMatchId );
+ lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart at tokend. These
+ * actions execute some time after matching the last char. */
+ for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = InlineList::cons();
+ inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLagBehind ) );
+ char *actName = new char[50];
+ sprintf( actName, "lag%i", lmi->longestMatchId );
+ lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ InputLoc loc;
+ loc.line = 1;
+ loc.col = 1;
+
+ /* Create the error action. */
+ InlineList *il6 = InlineList::cons();
+ il6->append( InlineItem::cons( loc, this, 0, InlineItem::LmSwitch ) );
+ lmActSelect = newAction( pd, loc, "lagsel", il6 );
+}
+
+void RegionImpl::restart( FsmGraph *graph, FsmTrans *trans )
+{
+ FsmState *fromState = trans->fromState;
+ graph->detachTrans( fromState, trans->toState, trans );
+ graph->attachTrans( fromState, graph->startState, trans );
+}
+
+void RegionImpl::runLongestMatch( Compiler *pd, FsmGraph *graph )
+{
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ ms->lmItemSet.insert( 0 );
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* Transfer the first item of non-empty lmAction tables to the item sets
+ * of the states that follow. Exclude states that have no transitions out.
+ * This must happen on a separate pass so that on each iteration of the
+ * next pass we have the item set entries from all lmAction tables. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ FsmState *toState = trans->toState;
+ assert( toState );
+
+ /* Check if there are transitions out, this may be a very
+ * close approximation? Out transitions going nowhere?
+ * FIXME: Check. */
+ if ( toState->outList.length() > 0 ) {
+ /* Fill the item sets. */
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ ms->lmItemSet.insert( lmAct->value );
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* The lmItem sets are now filled, telling us which longest match rules
+ * can succeed in which states. First determine if we need to make sure
+ * act is defaulted to zero. */
+ int maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* The actions executed on starting to match a token. */
+ graph->isolateStartState();
+ graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
+ if ( maxItemSetLength > 1 ) {
+ /* The longest match action switch may be called when tokens are
+ * matched, in which case act must be initialized, there must be a
+ * case to handle the error, and the generated machine will require an
+ * error state. */
+ lmSwitchHandlesError = true;
+ graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
+ }
+
+ /* The place to store transitions to restart. It maybe possible for the
+ * restarting to affect the searching through the graph that follows. For
+ * now take the safe route and save the list of transitions to restart
+ * until after all searching is done. */
+ Vector<FsmTrans*> restartTrans;
+
+ /* Set actions that do immediate token recognition, set the longest match part
+ * id and set the token ending. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ FsmState *toState = trans->toState;
+ assert( toState );
+
+ /* Check if there are transitions out, this may be a very
+ * close approximation? Out transitions going nowhere?
+ * FIXME: Check. */
+ if ( toState->outList.length() == 0 ) {
+ /* Can execute the immediate action for the longest match
+ * part. Redirect the action to the start state. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->actOnLast );
+ restartTrans.append( trans );
+ }
+ else {
+ /* Look for non final states that have a non-empty item
+ * set. If these are present then we need to record the
+ * end of the token. Also Find the highest item set
+ * length reachable from here (excluding at transtions to
+ * final states). */
+ bool nonFinalNonEmptyItemSet = false;
+ maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & SB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
+ nonFinalNonEmptyItemSet = true;
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ SB_ISMARKED;
+ }
+ }
+
+ /* If there are reachable states that are not final and
+ * have non empty item sets or that have an item set
+ * length greater than one then we need to set tokend
+ * because the error action that matches the token will
+ * require it. */
+ if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
+ trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
+
+ /* Some states may not know which longest match item to
+ * execute, must set it. */
+ if ( maxItemSetLength > 1 ) {
+ /* There are transitions out, another match may come. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->setActId );
+ }
+ }
+ }
+ }
+ }
+
+ /* Now that all graph searching is done it certainly safe set the
+ * restarting. It may be safe above, however this must be verified. */
+ for ( Vector<FsmTrans*>::Iter rs = restartTrans; rs.lte(); rs++ )
+ restart( graph, *rs );
+
+ int lmErrActionOrd = pd->curActionOrd++;
+
+ /* Embed the error for recognizing a char. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
+ if ( st->isFinState() ) {
+ /* On error execute the onActNext action, which knows that
+ * the last character of the token was one back and restart. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actOnNext, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd,
+ st->lmItemSet[0]->actOnNext );
+ st->eofTarget = graph->startState;
+ }
+ else {
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actLagBehind, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd,
+ st->lmItemSet[0]->actLagBehind );
+ st->eofTarget = graph->startState;
+ }
+ }
+ else if ( st->lmItemSet.length() > 1 ) {
+ /* Need to use the select. Take note of the which items the select
+ * is needed for so only the necessary actions are included. */
+ for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
+ if ( *plmi != 0 )
+ (*plmi)->inLmSelect = true;
+ }
+ /* On error, execute the action select and go to the start state. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &lmActSelect, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
+ st->eofTarget = graph->startState;
+ }
+ }
+
+ /* Finally, the start state should be made final. */
+ graph->setFinState( graph->startState );
+}
+
+void RegionImpl::transferScannerLeavingActions( FsmGraph *graph )
+{
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ if ( st->outActionTable.length() > 0 )
+ graph->setErrorActions( st, st->outActionTable );
+ }
+}
+
+FsmGraph *RegionImpl::walk( Compiler *pd )
+{
+ /* Make each part of the longest match. */
+ int numParts = 0;
+ FsmGraph **parts = new FsmGraph*[tokenInstanceList.length()];
+ for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) {
+ /* Watch out for patternless tokens. */
+ if ( lmi->join != 0 ) {
+ /* Create the machine and embed the setting of the longest match id. */
+ parts[numParts] = lmi->join->walk( pd );
+ parts[numParts]->longMatchAction( pd->curActionOrd++, lmi );
+
+ /* Look for tokens that accept the zero length-word. The first one found
+ * will be used as the default token. */
+ if ( defaultTokenInstance == 0 && parts[numParts]->startState->isFinState() )
+ defaultTokenInstance = lmi;
+
+ numParts += 1;
+ }
+ }
+ FsmGraph *retFsm = parts[0];
+
+ if ( defaultTokenInstance != 0 && defaultTokenInstance->tokenDef->tdLangEl->isIgnore )
+ error() << "ignore token cannot be a scanner's zero-length token" << endp;
+
+ /* The region is empty. Return the empty set. */
+ if ( numParts == 0 ) {
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Before we union the patterns we need to deal with leaving actions. They
+ * are transfered to error transitions out of the final states (like local
+ * error actions) and to eof actions. In the scanner we need to forbid
+ * on_last for any final state that has an leaving action. */
+ for ( int i = 0; i < numParts; i++ )
+ transferScannerLeavingActions( parts[i] );
+
+ /* Union machines one and up with machine zero. */
+ FsmGraph *retFsm = parts[0];
+ for ( int i = 1; i < numParts; i++ ) {
+ retFsm->unionOp( parts[i] );
+ afterOpMinimize( retFsm );
+ }
+
+ runLongestMatch( pd, retFsm );
+ delete[] parts;
+ }
+
+ /* Need the entry point for the region. */
+ retFsm->setEntry( regionNameInst->id, retFsm->startState );
+
+ return retFsm;
+}
+
+/* Walk an expression node. */
+FsmGraph *LexJoin::walk( Compiler *pd )
+{
+ FsmGraph *retFsm = expr->walk( pd );
+
+ /* Maybe the the context. */
+ if ( context != 0 ) {
+ retFsm->leaveFsmAction( pd->curActionOrd++, mark );
+ FsmGraph *contextGraph = context->walk( pd );
+ retFsm->concatOp( contextGraph );
+ }
+
+ return retFsm;
+}
+
+/* Clean up after an expression node. */
+LexExpression::~LexExpression()
+{
+ switch ( type ) {
+ case OrType: case IntersectType: case SubtractType:
+ case StrongSubtractType:
+ delete expression;
+ delete term;
+ break;
+ case TermType:
+ delete term;
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+/* Evaluate a single expression node. */
+FsmGraph *LexExpression::walk( Compiler *pd, bool lastInSeq )
+{
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case OrType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd, false );
+ /* Evaluate the term. */
+ FsmGraph *rhs = term->walk( pd );
+ /* Perform union. */
+ rtnVal->unionOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case IntersectType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmGraph *rhs = term->walk( pd );
+ /* Perform intersection. */
+ rtnVal->intersectOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case SubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmGraph *rhs = term->walk( pd );
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case StrongSubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+
+ /* Evaluate the term and pad it with any* machines. */
+ FsmGraph *rhs = dotStarFsm( pd );
+ FsmGraph *termFsm = term->walk( pd );
+ FsmGraph *trailAnyStar = dotStarFsm( pd );
+ rhs->concatOp( termFsm );
+ rhs->concatOp( trailAnyStar );
+
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case TermType: {
+ /* Return result of the term. */
+ rtnVal = term->walk( pd );
+ break;
+ }
+ case BuiltinType: {
+ /* Duplicate the builtin. */
+ rtnVal = makeBuiltin( builtin, pd );
+ break;
+ }
+ }
+
+ return rtnVal;
+}
+
+/* Clean up after a term node. */
+LexTerm::~LexTerm()
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ delete term;
+ delete factorAug;
+ break;
+ case FactorAugType:
+ delete factorAug;
+ break;
+ }
+}
+
+/* Evaluate a term node. */
+FsmGraph *LexTerm::walk( Compiler *pd, bool lastInSeq )
+{
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case ConcatType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd, false );
+ /* Evaluate the LexFactorRep. */
+ FsmGraph *rhs = factorAug->walk( pd );
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightStartType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the LexFactorRep. */
+ FsmGraph *rhs = factorAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the right get the higher start priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The start transitions right machine get the higher priority.
+ * Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightFinishType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the LexFactorRep. */
+ FsmGraph *rhs = factorAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the finishing transitions to the right
+ * get the higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The finishing transitions of the right machine get the higher
+ * priority. Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case LeftType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the LexFactorRep. */
+ FsmGraph *rhs = factorAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The right machine gets the lower priority. Since
+ * startTransPrior might unnecessarily increase the number of
+ * states during the state machine construction process (due to
+ * isolation), we use allTransPrior instead, which has the same
+ * effect. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case FactorAugType: {
+ rtnVal = factorAug->walk( pd );
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+LexFactorAug::~LexFactorAug()
+{
+ delete factorRep;
+}
+
+void LexFactorAug::assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd )
+{
+ /* Assign actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ switch ( actions[i].type ) {
+ case at_start:
+ graph->startFsmAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_leave:
+ graph->leaveFsmAction( actionOrd[i], actions[i].action );
+ break;
+ }
+ }
+}
+
+/* Evaluate a factor with augmentation node. */
+FsmGraph *LexFactorAug::walk( Compiler *pd )
+{
+ /* Make the array of function orderings. */
+ int *actionOrd = 0;
+ if ( actions.length() > 0 )
+ actionOrd = new int[actions.length()];
+
+ /* First walk the list of actions, assigning order to all starting
+ * actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type == at_start )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ /* Evaluate the factor with repetition. */
+ FsmGraph *rtnVal = factorRep->walk( pd );
+
+ /* Compute the remaining action orderings. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type != at_start )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ assignActions( pd, rtnVal , actionOrd );
+
+ if ( actionOrd != 0 )
+ delete[] actionOrd;
+ return rtnVal;
+}
+
+
+/* Clean up after a factor with repetition node. */
+LexFactorRep::~LexFactorRep()
+{
+ switch ( type ) {
+ case StarType: case StarStarType: case OptionalType: case PlusType:
+ case ExactType: case MaxType: case MinType: case RangeType:
+ delete factorRep;
+ break;
+ case FactorNegType:
+ delete factorNeg;
+ break;
+ }
+}
+
+/* Evaluate a factor with repetition node. */
+FsmGraph *LexFactorRep::walk( Compiler *pd )
+{
+ FsmGraph *retFsm = 0;
+
+ switch ( type ) {
+ case StarType: {
+ /* Evaluate the LexFactorRep. */
+ retFsm = factorRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case StarStarType: {
+ /* Evaluate the LexFactorRep. */
+ retFsm = factorRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Set up the prior descs. All gets priority one, whereas leaving gets
+ * priority zero. Make a unique key so that these priorities don't
+ * interfere with any priorities set by the user. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* Leaveing gets priority 0. Use same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case OptionalType: {
+ /* Make the null fsm. */
+ FsmGraph *nu = new FsmGraph();
+ nu->lambdaFsm( );
+
+ /* Evaluate the LexFactorRep. */
+ retFsm = factorRep->walk( pd );
+
+ /* Perform the question operator. */
+ retFsm->unionOp( nu );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case PlusType: {
+ /* Evaluate the LexFactorRep. */
+ retFsm = factorRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying plus operator to a machine that "
+ "accpets zero length word" << endl;
+ }
+
+ /* Need a duplicated for the star end. */
+ FsmGraph *dup = new FsmGraph( *retFsm );
+
+ /* The start func orders need to be shifted before doing the star. */
+ pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case ExactType: {
+ /* Get an int from the repetition amount. */
+ if ( lowerRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "exactly zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first LexFactorRep. */
+ retFsm = factorRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MaxType: {
+ /* Get an int from the repetition amount. */
+ if ( upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "max zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first LexFactorRep. */
+ retFsm = factorRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying max repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MinType: {
+ /* Evaluate the repeated machine. */
+ retFsm = factorRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying min repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the repetition
+ * and the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Acts just like a star op on the machine to return. */
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* Take a duplicate for the plus. */
+ FsmGraph *dup = new FsmGraph( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ /* Tak on the kleene star. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case RangeType: {
+ /* Check for bogus range. */
+ if ( upperRep - lowerRep < 0 ) {
+ error(loc) << "invalid range repetition" << endl;
+
+ /* Return null machine as recovery. */
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else if ( lowerRep == 0 && upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorRep. This
+ * defeats the purpose so give a warning. */
+ warning(loc) << "zero to zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmGraph();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Now need to evaluate the repeated machine. */
+ retFsm = factorRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying range repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing both kinds
+ * of repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Just doing max repetition. Already guarded against n == 0. */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ else if ( lowerRep == upperRep ) {
+ /* Just doing exact repetition. Already guarded against n == 0. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* This is the case that 0 < lowerRep < upperRep. Take a
+ * duplicate for the optional repeat. */
+ FsmGraph *dup = new FsmGraph( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Do optional repetition on the second half. */
+ dup->optionalRepeatOp( upperRep - lowerRep );
+ afterOpMinimize( dup );
+
+ /* Tak on the duplicate machine. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ }
+ break;
+ }
+ case FactorNegType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factorNeg->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+
+/* Clean up after a factor with negation node. */
+LexFactorNeg::~LexFactorNeg()
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ delete factorNeg;
+ break;
+ case FactorType:
+ delete factor;
+ break;
+ }
+}
+
+/* Evaluate a factor with negation node. */
+FsmGraph *LexFactorNeg::walk( Compiler *pd )
+{
+ FsmGraph *retFsm = 0;
+
+ switch ( type ) {
+ case NegateType: {
+ /* Evaluate the factorNeg. */
+ FsmGraph *toNegate = factorNeg->walk( pd );
+
+ /* Negation is subtract from dot-star. */
+ retFsm = dotStarFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case CharNegateType: {
+ /* Evaluate the factorNeg. */
+ FsmGraph *toNegate = factorNeg->walk( pd );
+
+ /* CharNegation is subtract from dot. */
+ retFsm = dotFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case FactorType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factor->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+/* Clean up after a factor node. */
+LexFactor::~LexFactor()
+{
+ switch ( type ) {
+ case LiteralType:
+ delete literal;
+ break;
+ case RangeType:
+ delete range;
+ break;
+ case OrExprType:
+ delete reItem;
+ break;
+ case RegExprType:
+ delete regExp;
+ break;
+ case ReferenceType:
+ break;
+ case ParenType:
+ delete join;
+ break;
+ }
+}
+
+/* Evaluate a factor node. */
+FsmGraph *LexFactor::walk( Compiler *pd )
+{
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case LiteralType:
+ rtnVal = literal->walk( pd );
+ break;
+ case RangeType:
+ rtnVal = range->walk( pd );
+ break;
+ case OrExprType:
+ rtnVal = reItem->walk( pd, 0 );
+ break;
+ case RegExprType:
+ rtnVal = regExp->walk( pd, 0 );
+ break;
+ case ReferenceType:
+ rtnVal = varDef->walk( pd );
+ break;
+ case ParenType:
+ rtnVal = join->walk( pd );
+ break;
+ }
+
+ return rtnVal;
+}
+
+
+/* Clean up a range object. Must delete the two literals. */
+Range::~Range()
+{
+ delete lowerLit;
+ delete upperLit;
+}
+
+bool Range::verifyRangeFsm( FsmGraph *rangeEnd )
+{
+ /* Must have two states. */
+ if ( rangeEnd->stateList.length() != 2 )
+ return false;
+ /* The start state cannot be final. */
+ if ( rangeEnd->startState->isFinState() )
+ return false;
+ /* There should be only one final state. */
+ if ( rangeEnd->finStateSet.length() != 1 )
+ return false;
+ /* The final state cannot have any transitions out. */
+ if ( rangeEnd->finStateSet[0]->outList.length() != 0 )
+ return false;
+ /* The start state should have only one transition out. */
+ if ( rangeEnd->startState->outList.length() != 1 )
+ return false;
+ /* The singe transition out of the start state should not be a range. */
+ FsmTrans *startTrans = rangeEnd->startState->outList.head;
+ if ( startTrans->lowKey != startTrans->highKey )
+ return false;
+ return true;
+}
+
+/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
+FsmGraph *Range::walk( Compiler *pd )
+{
+ /* Construct and verify the suitability of the lower end of the range. */
+ FsmGraph *lowerFsm = lowerLit->walk( pd );
+ if ( !verifyRangeFsm( lowerFsm ) ) {
+ error(lowerLit->loc) <<
+ "bad range lower end, must be a single character" << endl;
+ }
+
+ /* Construct and verify the upper end. */
+ FsmGraph *upperFsm = upperLit->walk( pd );
+ if ( !verifyRangeFsm( upperFsm ) ) {
+ error(upperLit->loc) <<
+ "bad range upper end, must be a single character" << endl;
+ }
+
+ /* Grab the keys from the machines, then delete them. */
+ Key lowKey = lowerFsm->startState->outList.head->lowKey;
+ Key highKey = upperFsm->startState->outList.head->lowKey;
+ delete lowerFsm;
+ delete upperFsm;
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(lowerLit->loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Return the range now that it is validated. */
+ FsmGraph *retFsm = new FsmGraph();
+ retFsm->rangeFsm( lowKey, highKey );
+ return retFsm;
+}
+
+/* Evaluate a literal object. */
+FsmGraph *Literal::walk( Compiler *pd )
+{
+ /* FsmGraph to return, is the alphabet signed. */
+ FsmGraph *rtnVal = 0;
+
+ switch ( type ) {
+ case Number: {
+ /* Make the fsm key in int format. */
+ Key fsmKey = makeFsmKeyNum( literal.data, loc, pd );
+ /* Make the new machine. */
+ rtnVal = new FsmGraph();
+ rtnVal->concatFsm( fsmKey );
+ break;
+ }
+ case LitString: {
+ /* Make the array of keys in int format. */
+ String interp;
+ bool caseInsensitive;
+ prepareLitString( interp, caseInsensitive, literal, loc );
+ Key *arr = new Key[interp.length()];
+ makeFsmKeyArray( arr, interp.data, interp.length(), pd );
+
+ /* Make the new machine. */
+ rtnVal = new FsmGraph();
+ if ( caseInsensitive )
+ rtnVal->concatFsmCI( arr, interp.length() );
+ else
+ rtnVal->concatFsm( arr, interp.length() );
+ delete[] arr;
+ break;
+ }}
+ return rtnVal;
+}
+
+/* Clean up after a regular expression object. */
+RegExpr::~RegExpr()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete regExp;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmGraph *RegExpr::walk( Compiler *pd, RegExpr *rootRegex )
+{
+ /* This is the root regex, pass down a pointer to this. */
+ if ( rootRegex == 0 )
+ rootRegex = this;
+
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Walk both items. */
+ FsmGraph *fsm1 = regExp->walk( pd, rootRegex );
+ FsmGraph *fsm2 = item->walk( pd, rootRegex );
+ if ( fsm1 == 0 )
+ rtnVal = fsm2;
+ else {
+ fsm1->concatOp( fsm2 );
+ rtnVal = fsm1;
+ }
+ break;
+ }
+ case Empty: {
+ /* FIXME: Return something here. */
+ rtnVal = 0;
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+/* Clean up after an item in a regular expression. */
+ReItem::~ReItem()
+{
+ switch ( type ) {
+ case Data:
+ case Dot:
+ break;
+ case OrBlock:
+ case NegOrBlock:
+ delete orBlock;
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmGraph *ReItem::walk( Compiler *pd, RegExpr *rootRegex )
+{
+ /* The fsm to return, is the alphabet signed? */
+ FsmGraph *rtnVal = 0;
+
+ switch ( type ) {
+ case Data: {
+ /* Move the data into an integer array and make a concat fsm. */
+ Key *arr = new Key[data.length()];
+ makeFsmKeyArray( arr, data.data, data.length(), pd );
+
+ /* Make the concat fsm. */
+ rtnVal = new FsmGraph();
+ if ( rootRegex != 0 && rootRegex->caseInsensitive )
+ rtnVal->concatFsmCI( arr, data.length() );
+ else
+ rtnVal->concatFsm( arr, data.length() );
+ delete[] arr;
+ break;
+ }
+ case Dot: {
+ /* Make the dot fsm. */
+ rtnVal = dotFsm( pd );
+ break;
+ }
+ case OrBlock: {
+ /* Get the or block and minmize it. */
+ rtnVal = orBlock->walk( pd, rootRegex );
+ if ( rtnVal == 0 ) {
+ rtnVal = new FsmGraph();
+ rtnVal->lambdaFsm();
+ }
+ rtnVal->minimizePartition2();
+ break;
+ }
+ case NegOrBlock: {
+ /* Get the or block and minimize it. */
+ FsmGraph *fsm = orBlock->walk( pd, rootRegex );
+ fsm->minimizePartition2();
+
+ /* Make a dot fsm and subtract from it. */
+ rtnVal = dotFsm( pd );
+ rtnVal->subtractOp( fsm );
+ rtnVal->minimizePartition2();
+ break;
+ }
+ }
+
+ return rtnVal;
+}
+
+/* Clean up after an or block of a regular expression. */
+ReOrBlock::~ReOrBlock()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete orBlock;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+
+/* Evaluate an or block of a regular expression. */
+FsmGraph *ReOrBlock::walk( Compiler *pd, RegExpr *rootRegex )
+{
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Evaluate the two fsm. */
+ FsmGraph *fsm1 = orBlock->walk( pd, rootRegex );
+ FsmGraph *fsm2 = item->walk( pd, rootRegex );
+ if ( fsm1 == 0 )
+ rtnVal = fsm2;
+ else {
+ fsm1->unionOp( fsm2 );
+ rtnVal = fsm1;
+ }
+ break;
+ }
+ case Empty: {
+ rtnVal = 0;
+ break;
+ }
+ }
+ return rtnVal;;
+}
+
+/* Evaluate an or block item of a regular expression. */
+FsmGraph *ReOrItem::walk( Compiler *pd, RegExpr *rootRegex )
+{
+ /* The return value, is the alphabet signed? */
+ FsmGraph *rtnVal = 0;
+ switch ( type ) {
+ case Data: {
+ /* Make the or machine. */
+ rtnVal = new FsmGraph();
+
+ /* Put the or data into an array of ints. Note that we find unique
+ * keys. Duplicates are silently ignored. The alternative would be to
+ * issue warning or an error but since we can't with [a0-9a] or 'a' |
+ * 'a' don't bother here. */
+ KeySet keySet;
+ makeFsmUniqueKeyArray( keySet, data.data, data.length(),
+ rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
+
+ /* Run the or operator. */
+ rtnVal->orFsm( keySet.data, keySet.length() );
+ break;
+ }
+ case Range: {
+ /* Make the upper and lower keys. */
+ Key lowKey = makeFsmKeyChar( lower, pd );
+ Key highKey = makeFsmKeyChar( upper, pd );
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Make the range machine. */
+ rtnVal = new FsmGraph();
+ rtnVal->rangeFsm( lowKey, highKey );
+
+ if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
+ if ( lowKey <= 'Z' && 'A' <= highKey ) {
+ Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
+ Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
+
+ otherLow = 'a' + ( otherLow - 'A' );
+ otherHigh = 'a' + ( otherHigh - 'A' );
+
+ FsmGraph *otherRange = new FsmGraph();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ else if ( lowKey <= 'z' && 'a' <= highKey ) {
+ Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
+ Key otherHigh = 'z' < highKey ? Key('z') : highKey;
+
+ otherLow = 'A' + ( otherLow - 'a' );
+ otherHigh = 'A' + ( otherHigh - 'a' );
+
+ FsmGraph *otherRange = new FsmGraph();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ }
+
+ break;
+ }}
+ return rtnVal;
+}
diff --git a/src/parsetree.h b/src/parsetree.h
new file mode 100644
index 00000000..f2d94226
--- /dev/null
+++ b/src/parsetree.h
@@ -0,0 +1,3607 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_PARSETREE_H
+#define _COLM_PARSETREE_H
+
+#include <iostream>
+#include <string.h>
+#include <string>
+
+#include <avlbasic.h>
+#include <avlmap.h>
+#include <bstmap.h>
+#include <bstset.h>
+#include <vector.h>
+#include <dlist.h>
+#include <dlistval.h>
+#include <dlistmel.h>
+
+#include "global.h"
+#include "cstring.h"
+#include "bytecode.h"
+
+
+/* Operators that are represented with single symbol characters. */
+#define OP_DoubleEql 'e'
+#define OP_NotEql 'q'
+#define OP_LessEql 'l'
+#define OP_GrtrEql 'g'
+#define OP_LogicalAnd 'a'
+#define OP_LogicalOr 'o'
+#define OP_Deref 'd'
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+struct NameInst;
+struct FsmGraph;
+struct RedFsm;
+struct ObjectDef;
+struct ElementOf;
+struct UniqueType;
+struct ObjectField;
+struct TransBlock;
+struct CodeBlock;
+struct PdaLiteral;
+struct TypeAlias;
+struct RegionSet;
+struct NameScope;
+struct IterCall;
+struct TemplateType;
+struct ObjectMethod;
+struct Reduction;
+struct Production;
+struct LangVarRef;
+
+/*
+ * Code Vector
+ */
+struct CodeVect : public Vector<code_t>
+{
+ void appendHalf( half_t half )
+ {
+ /* not optimal. */
+ append( half & 0xff );
+ append( (half>>8) & 0xff );
+ }
+
+ void appendWord( word_t word )
+ {
+ /* not optimal. */
+ append( word & 0xff );
+ append( (word>>8) & 0xff );
+ append( (word>>16) & 0xff );
+ append( (word>>24) & 0xff );
+ #if SIZEOF_LONG == 8
+ append( (word>>32) & 0xff );
+ append( (word>>40) & 0xff );
+ append( (word>>48) & 0xff );
+ append( (word>>56) & 0xff );
+ #endif
+ }
+
+ void setHalf( long pos, half_t half )
+ {
+ /* not optimal. */
+ data[pos] = half & 0xff;
+ data[pos+1] = (half>>8) & 0xff;
+ }
+
+ void insertHalf( long pos, half_t half )
+ {
+ /* not optimal. */
+ insert( pos, half & 0xff );
+ insert( pos+1, (half>>8) & 0xff );
+ }
+
+ void insertWord( long pos, word_t word )
+ {
+ /* not at all optimal. */
+ insert( pos, word & 0xff );
+ insert( pos+1, (word>>8) & 0xff );
+ insert( pos+2, (word>>16) & 0xff );
+ insert( pos+3, (word>>24) & 0xff );
+ #if SIZEOF_LONG == 8
+ insert( pos+4, (word>>32) & 0xff );
+ insert( pos+5, (word>>40) & 0xff );
+ insert( pos+6, (word>>48) & 0xff );
+ insert( pos+7, (word>>56) & 0xff );
+ #endif
+ }
+
+ void insertTree( long pos, tree_t *tree )
+ { insertWord( pos, (word_t) tree ); }
+};
+
+
+
+/* Types of builtin machines. */
+enum BuiltinMachine
+{
+ BT_Any,
+ BT_Ascii,
+ BT_Extend,
+ BT_Alpha,
+ BT_Digit,
+ BT_Alnum,
+ BT_Lower,
+ BT_Upper,
+ BT_Cntrl,
+ BT_Graph,
+ BT_Print,
+ BT_Punct,
+ BT_Space,
+ BT_Xdigit,
+ BT_Lambda,
+ BT_Empty
+};
+
+/* Must match the LI defines in pdarun.h. */
+enum LocalType
+{
+ LT_Tree = 1,
+ LT_Iter,
+ LT_RevIter,
+ LT_UserIter
+};
+
+struct LocalLoc
+{
+ LocalLoc( LocalType type, int scope, int offset )
+ : scope(scope), type(type), offset(offset) {}
+
+ int scope;
+ LocalType type;
+ int offset;
+};
+
+struct Locals
+{
+ Vector<LocalLoc> locals;
+
+ void append( const LocalLoc &ll )
+ {
+ int pos = 0;
+ while ( pos < locals.length() && ll.scope >= locals[pos].scope )
+ pos += 1;
+ locals.insert( pos, ll );
+ }
+};
+
+typedef BstSet<char> CharSet;
+typedef Vector<unsigned char> UnsignedCharVect;
+
+struct Compiler;
+struct TypeRef;
+
+/* Leaf type. */
+struct Literal;
+
+/* tree_t nodes. */
+
+struct LexTerm;
+struct LexFactorAug;
+struct LexFactorRep;
+struct LexFactorNeg;
+struct LexFactor;
+struct LexExpression;
+struct LexJoin;
+struct JoinOrLm;
+struct RegionJoinOrLm;
+struct TokenRegion;
+struct Namespace;
+struct StructDef;
+struct TokenDef;
+struct TokenDefListReg;
+struct TokenDefListNs;
+struct TokenInstance;
+struct TokenInstanceListReg;
+struct Range;
+struct LangEl;
+
+enum AugType
+{
+ at_start,
+ at_leave
+};
+
+struct Action;
+struct PriorDesc;
+struct RegExpr;
+struct ReItem;
+struct ReOrBlock;
+struct ReOrItem;
+struct ExplicitMachine;
+struct InlineItem;
+struct InlineList;
+
+/* Reference to a named state. */
+typedef Vector<String> NameRef;
+typedef Vector<NameRef*> NameRefList;
+typedef Vector<NameInst*> NameTargList;
+
+/* Structure for storing location of epsilon transitons. */
+struct EpsilonLink
+{
+ EpsilonLink( const InputLoc &loc, NameRef &target )
+ : loc(loc), target(target) { }
+
+ InputLoc loc;
+ NameRef target;
+};
+
+struct Label
+{
+ Label( const InputLoc &loc, const String &data, ObjectField *objField )
+ : loc(loc), data(data), objField(objField) { }
+
+ InputLoc loc;
+ String data;
+ ObjectField *objField;
+};
+
+/* Structure represents an action assigned to some LexFactorAug node. The
+ * factor with aug will keep an array of these. */
+struct ParserAction
+{
+ ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
+ : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
+
+ InputLoc loc;
+ AugType type;
+ int localErrKey;
+ Action *action;
+};
+
+struct Token
+{
+ String data;
+ InputLoc loc;
+};
+
+void prepareLitString( String &result, bool &caseInsensitive,
+ const String &srcString, const InputLoc &loc );
+
+std::ostream &operator<<(std::ostream &out, const Token &token );
+
+typedef AvlMap< String, TokenInstance*, ColmCmpStr > LiteralDict;
+typedef AvlMapEl< String, TokenInstance* > LiteralDictEl;
+
+/* Store the value and type of a priority augmentation. */
+struct PriorityAug
+{
+ PriorityAug( AugType type, int priorKey, int priorValue ) :
+ type(type), priorKey(priorKey), priorValue(priorValue) { }
+
+ AugType type;
+ int priorKey;
+ int priorValue;
+};
+
+/*
+ * A Variable Definition
+ */
+struct LexDefinition
+{
+ LexDefinition( const String &name, LexJoin *join )
+ : name(name), join(join) { }
+
+ /* Parse tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( const InputLoc &loc, Compiler *pd );
+
+ String name;
+ LexJoin *join;
+};
+
+typedef Vector<String> StringVect;
+typedef CmpTable<String, ColmCmpStr> CmpStrVect;
+
+struct NamespaceQual
+{
+ NamespaceQual()
+ :
+ cachedNspaceQual(0),
+ declInNspace(0)
+ {}
+
+ static NamespaceQual *cons( Namespace *declInNspace )
+ {
+ NamespaceQual *nsq = new NamespaceQual;
+ nsq->declInNspace = declInNspace;
+ return nsq;
+ }
+
+ Namespace *cachedNspaceQual;
+ Namespace *declInNspace;
+
+ StringVect qualNames;
+
+ Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart );
+ Namespace *getQual( Compiler *pd );
+ bool thisOnly()
+ { return qualNames.length() != 0; }
+};
+
+struct ReCapture
+{
+ ReCapture( Action *markEnter, Action *markLeave, ObjectField *objField )
+ : markEnter(markEnter), markLeave(markLeave), objField(objField) {}
+
+ Action *markEnter;
+ Action *markLeave;
+ ObjectField *objField;
+};
+
+
+typedef Vector<ReCapture> ReCaptureVect;
+
+struct TokenDefPtr1
+{
+ TokenDef *prev, *next;
+};
+
+struct TokenDefPtr2
+{
+ TokenDef *prev, *next;
+};
+
+struct TokenDef
+:
+ public TokenDefPtr1,
+ public TokenDefPtr2
+{
+ TokenDef()
+ :
+ action(0), tdLangEl(0), inLmSelect(false), dupOf(0),
+ noPostIgnore(false), noPreIgnore(false), isZero(false)
+ {}
+
+ static TokenDef *cons( const String &name, const String &literal,
+ bool isLiteral, bool isIgnore, LexJoin *join, CodeBlock *codeBlock,
+ const InputLoc &semiLoc, int longestMatchId, Namespace *nspace,
+ RegionSet *regionSet, ObjectDef *objectDef, StructDef *contextIn )
+ {
+ TokenDef *t = new TokenDef;
+
+ t->name = name;
+ t->literal = literal;
+ t->isLiteral = isLiteral;
+ t->isIgnore = isIgnore;
+ t->join = join;
+ t->action = 0;
+ t->codeBlock = codeBlock;
+ t->tdLangEl = 0;
+ t->semiLoc = semiLoc;
+ t->longestMatchId = longestMatchId;
+ t->inLmSelect = false;
+ t->nspace = nspace;
+ t->regionSet = regionSet;
+ t->objectDef = objectDef;
+ t->contextIn = contextIn;
+ t->dupOf = 0;
+ t->noPostIgnore = false;
+ t->noPreIgnore = false;
+ t->isZero = false;
+
+ return t;
+ }
+
+ InputLoc getLoc();
+
+ String name;
+ String literal;
+ bool isLiteral;
+ bool isIgnore;
+ LexJoin *join;
+ Action *action;
+ CodeBlock *codeBlock;
+ LangEl *tdLangEl;
+ InputLoc semiLoc;
+
+ Action *setActId;
+ Action *actOnLast;
+ Action *actOnNext;
+ Action *actLagBehind;
+ int longestMatchId;
+ bool inLmSelect;
+ Namespace *nspace;
+ RegionSet *regionSet;
+ ReCaptureVect reCaptureVect;
+ ObjectDef *objectDef;
+ StructDef *contextIn;
+
+ TokenDef *dupOf;
+ bool noPostIgnore;
+ bool noPreIgnore;
+ bool isZero;
+};
+
+struct TokenInstancePtr
+{
+ TokenInstance *prev, *next;
+};
+
+struct TokenInstance
+:
+ public TokenInstancePtr
+{
+ TokenInstance()
+ :
+ action(0),
+ inLmSelect(false),
+ dupOf(0)
+ {}
+
+ static TokenInstance *cons( TokenDef *tokenDef,
+ LexJoin *join, const InputLoc &semiLoc,
+ int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion )
+ {
+ TokenInstance *t = new TokenInstance;
+
+ t->tokenDef = tokenDef;
+ t->join = join;
+ t->action = 0;
+ t->semiLoc = semiLoc;
+ t->longestMatchId = longestMatchId;
+ t->inLmSelect = false;
+ t->nspace = nspace;
+ t->tokenRegion = tokenRegion;
+ t->dupOf = 0;
+
+ return t;
+ }
+
+ InputLoc getLoc();
+
+ TokenDef *tokenDef;
+ LexJoin *join;
+ Action *action;
+ InputLoc semiLoc;
+
+ Action *setActId;
+ Action *actOnLast;
+ Action *actOnNext;
+ Action *actLagBehind;
+ int longestMatchId;
+ bool inLmSelect;
+ Namespace *nspace;
+ TokenRegion *tokenRegion;
+
+ TokenInstance *dupOf;
+};
+
+struct LelDefList;
+
+struct NtDef
+{
+ static NtDef *cons( const String &name, Namespace *nspace,
+ LelDefList *defList, ObjectDef *objectDef,
+ StructDef *contextIn, bool reduceFirst )
+ {
+ NtDef *nt = new NtDef;
+
+ nt->name = name;
+ nt->nspace = nspace;
+ nt->defList = defList;
+ nt->objectDef = objectDef;
+ nt->contextIn = contextIn;
+ nt->reduceFirst = reduceFirst;
+
+ return nt;
+ }
+
+ static NtDef *cons( const String &name, Namespace *nspace,
+ StructDef *contextIn, bool reduceFirst )
+ {
+ NtDef *nt = new NtDef;
+
+ nt->name = name;
+ nt->nspace = nspace;
+ nt->defList = 0;
+ nt->objectDef = 0;
+ nt->contextIn = contextIn;
+ nt->reduceFirst = reduceFirst;
+
+ return nt;
+ }
+
+ String name;
+ Namespace *nspace;
+ LelDefList *defList;
+ ObjectDef *objectDef;
+ StructDef *contextIn;
+ bool reduceFirst;
+
+ NtDef *prev, *next;
+};
+
+struct NtDefList : DList<NtDef> {};
+
+/* Declare a new type so that ptreetypes.h need not include dlist.h. */
+struct TokenInstanceListReg : DListMel<TokenInstance, TokenInstancePtr> {};
+
+/* Declare a new type so that ptreetypes.h need not include dlist.h. */
+struct TokenDefListReg : DListMel<TokenDef, TokenDefPtr1> {};
+struct TokenDefListNs : DListMel<TokenDef, TokenDefPtr2> {};
+
+struct StructStack
+ : public Vector<StructDef*>
+{
+ StructDef *top()
+ { return length() > 0 ? Vector<StructDef*>::top() : 0; }
+};
+
+struct StructEl;
+
+struct StructDef
+{
+ StructDef( const InputLoc &loc, const String &name, ObjectDef *objectDef )
+ :
+ loc(loc),
+ name(name),
+ objectDef(objectDef),
+ structEl(0)
+ {}
+
+ InputLoc loc;
+ String name;
+ ObjectDef *objectDef;
+ StructEl *structEl;
+
+ StructDef *prev, *next;
+};
+
+struct StructEl
+{
+ StructEl( const String &name, StructDef *structDef )
+ :
+ name(name),
+ structDef(structDef),
+ id(-1)
+ {}
+
+ String name;
+ StructDef *structDef;
+ int id;
+
+ StructEl *prev, *next;
+};
+
+typedef DList<StructEl> StructElList;
+struct StructDefList : DList<StructDef> {};
+
+struct TypeMapEl
+ : public AvlTreeEl<TypeMapEl>
+{
+ enum Type
+ {
+ AliasType = 1,
+ LangElType,
+ StructType
+ };
+
+ const String &getKey() { return key; }
+
+ TypeMapEl( Type type, const String &key, TypeRef *typeRef )
+ : type(type), key(key), value(0), typeRef(typeRef), structEl(0) {}
+
+ TypeMapEl( Type type, const String &key, LangEl *value )
+ : type(type), key(key), value(value), typeRef(0), structEl(0) {}
+
+ TypeMapEl( Type type, const String &key, StructEl *structEl )
+ : type(type), key(key), value(0), typeRef(0), structEl(structEl) {}
+
+ Type type;
+ String key;
+ LangEl *value;
+ TypeRef *typeRef;
+ StructEl *structEl;
+
+ TypeMapEl *prev, *next;
+};
+
+/* Symbol Map. */
+typedef AvlTree< TypeMapEl, String, ColmCmpStr > TypeMap;
+
+typedef Vector<TokenRegion*> RegionVect;
+
+struct RegionImpl
+{
+ RegionImpl()
+ :
+ regionNameInst(0),
+ lmActSelect(0),
+ lmSwitchHandlesError(false),
+ defaultTokenInstance(0),
+ wasEmpty(false)
+ {}
+
+ InputLoc loc;
+
+ /* This gets saved off during the name walk. Can save it off because token
+ * regions are referenced once only. */
+ NameInst *regionNameInst;
+
+ TokenInstanceListReg tokenInstanceList;
+ Action *lmActSelect;
+ bool lmSwitchHandlesError;
+ TokenInstance *defaultTokenInstance;
+
+ /* We alway init empty scanners with a single token. If we had to do this
+ * then wasEmpty is true. */
+ bool wasEmpty;
+
+ RegionImpl *prev, *next;
+
+ void runLongestMatch( Compiler *pd, FsmGraph *graph );
+ void transferScannerLeavingActions( FsmGraph *graph );
+ FsmGraph *walk( Compiler *pd );
+
+ void restart( FsmGraph *graph, FsmTrans *trans );
+ void makeNameTree( const InputLoc &loc, Compiler *pd );
+ void makeActions( Compiler *pd );
+ Action *newAction( Compiler *pd, const InputLoc &loc,
+ const String &name, InlineList *inlineList );
+};
+
+struct TokenRegion
+{
+ /* Construct with a list of joins */
+ TokenRegion( const InputLoc &loc, int id, RegionImpl *impl )
+ :
+ loc(loc),
+ id(id),
+ preEofBlock(0),
+ zeroLel(0),
+ ignoreOnly(0),
+ impl(impl)
+ { }
+
+ InputLoc loc;
+ int id;
+
+ CodeBlock *preEofBlock;
+
+ LangEl *zeroLel;
+ TokenRegion *ignoreOnly;
+
+ RegionImpl *impl;
+
+ TokenRegion *next, *prev;
+
+ /* tree_t traversal. */
+ void findName( Compiler *pd );
+};
+
+struct RegionSet
+{
+ RegionSet( RegionImpl *implTokenIgnore, RegionImpl *implTokenOnly,
+ RegionImpl *implIgnoreOnly, TokenRegion *tokenIgnore,
+ TokenRegion *tokenOnly, TokenRegion *ignoreOnly,
+ TokenRegion *collectIgnore )
+ :
+ implTokenIgnore(implTokenIgnore),
+ implTokenOnly(implTokenOnly),
+ implIgnoreOnly(implIgnoreOnly),
+
+ tokenIgnore(tokenIgnore),
+ tokenOnly(tokenOnly),
+ ignoreOnly(ignoreOnly),
+ collectIgnore(collectIgnore)
+ {}
+
+ /* Provides the scanner state machines. We reuse ignore-only. */
+ RegionImpl *implTokenIgnore;
+ RegionImpl *implTokenOnly;
+ RegionImpl *implIgnoreOnly;
+
+ TokenRegion *tokenIgnore;
+ TokenRegion *tokenOnly;
+ TokenRegion *ignoreOnly;
+ TokenRegion *collectIgnore;
+
+ TokenDefListReg tokenDefList;
+
+ RegionSet *next, *prev;
+};
+
+typedef Vector<RegionSet*> RegionSetVect;
+
+typedef DList<RegionSet> RegionSetList;
+typedef DList<TokenRegion> RegionList;
+typedef DList<RegionImpl> RegionImplList;
+
+typedef Vector<Namespace*> NamespaceVect;
+typedef Vector<Reduction*> ReductionVect;
+
+/* Generics have runtime-representations, so we must track them as unique
+ * types. This gives the runtimes some idea of what is contained in the
+ * structures. */
+struct GenericType
+ : public DListEl<GenericType>
+{
+ GenericType( long typeId, long id, TypeRef *elTr,
+ TypeRef *keyTr, TypeRef *valueTr, ObjectField *el )
+ :
+ typeId(typeId), id(id),
+ elTr(elTr), keyTr(keyTr), valueTr(valueTr),
+ elUt(0), keyUt(0), valueUt(0),
+ objDef(0), el(el), elOffset(0)
+ {}
+
+ void declare( Compiler *pd, Namespace *nspace );
+
+ long typeId;
+ long id;
+
+ TypeRef *elTr;
+ TypeRef *keyTr;
+ TypeRef *valueTr;
+
+ UniqueType *elUt;
+ UniqueType *keyUt;
+ UniqueType *valueUt;
+
+ ObjectDef *objDef;
+ ObjectField *el;
+ long elOffset;
+};
+
+typedef DList<GenericType> GenericList;
+
+/* Graph dictionary. */
+struct GraphDictEl
+:
+ public AvlTreeEl<GraphDictEl>,
+ public DListEl<GraphDictEl>
+{
+ GraphDictEl( const String &key )
+ : key(key), value(0), isInstance(false) { }
+
+ GraphDictEl( const String &key, LexDefinition *value )
+ : key(key), value(value), isInstance(false) { }
+
+ const String &getKey() { return key; }
+
+ String key;
+ LexDefinition *value;
+ bool isInstance;
+
+ /* Location info of graph definition. Points to variable name of assignment. */
+ InputLoc loc;
+};
+
+typedef AvlTree<GraphDictEl, String, ColmCmpStr> GraphDict;
+typedef DList<GraphDictEl> GraphList;
+
+struct TypeAlias
+{
+ TypeAlias( const InputLoc &loc, Namespace *nspace,
+ const String &name, TypeRef *typeRef )
+ :
+ loc(loc),
+ nspace(nspace),
+ name(name),
+ typeRef(typeRef)
+ {}
+
+ InputLoc loc;
+ Namespace *nspace;
+ String name;
+ TypeRef *typeRef;
+
+ TypeAlias *prev, *next;
+};
+
+typedef DList<TypeAlias> TypeAliasList;
+
+typedef AvlMap<String, ObjectField*, ColmCmpStr> FieldMap;
+typedef AvlMapEl<String, ObjectField*> FieldMapEl;
+
+typedef AvlMap<String, ObjectMethod*, ColmCmpStr> MethodMap;
+typedef AvlMapEl<String, ObjectMethod*> MethodMapEl;
+
+/* tree_t of name scopes for an object def. All of the object fields inside this
+ * tree live in one object def. This is used for scoping names in functions. */
+struct NameScope
+{
+ NameScope()
+ :
+ owningObj(0),
+ parentScope(0),
+ childIter(0),
+ caseClauseVarRef(0)
+ {}
+
+ ObjectDef *owningObj;
+ FieldMap fieldMap;
+ MethodMap methodMap;
+
+ NameScope *parentScope;
+ DList<NameScope> children;
+
+ /* For iteration after declaration. */
+ NameScope *childIter;
+ LangVarRef *caseClauseVarRef;
+
+ NameScope *prev, *next;
+
+ int depth()
+ {
+ int depth = 0;
+ NameScope *scope = this;
+ while ( scope != 0 ) {
+ depth += 1;
+ scope = scope->parentScope;
+ }
+ return depth;
+ }
+
+ ObjectField *findField( const String &name ) const;
+ ObjectMethod *findMethod( const String &name ) const;
+
+ ObjectField *checkRedecl( const String &name );
+ void insertField( const String &name, ObjectField *value );
+
+};
+
+
+struct Namespace
+{
+ /* Construct with a list of joins */
+ Namespace( const InputLoc &loc, const String &name, int id,
+ Namespace *parentNamespace ) :
+ loc(loc), name(name), id(id),
+ parentNamespace(parentNamespace)
+ {
+ rootScope = new NameScope;
+ }
+
+ /* tree_t traversal. */
+ Namespace *findNamespace( const String &name );
+ Reduction *findReduction( const String &name );
+
+ InputLoc loc;
+ String name;
+ int id;
+
+ /* Literal patterns and the dictionary mapping literals to the underlying
+ * tokens. */
+ LiteralDict literalDict;
+
+ /* List of tokens defs in the namespace. */
+ TokenDefListNs tokenDefList;
+
+ /* List of nonterminal defs in the namespace. */
+ NtDefList ntDefList;
+
+ StructDefList structDefList;
+
+ /* Dictionary of symbols within the region. */
+ TypeMap typeMap;
+ GenericList genericList;
+
+ /* regular language definitions. */
+ GraphDict rlMap;
+
+ TypeAliasList typeAliasList;
+
+ Namespace *parentNamespace;
+ NamespaceVect childNamespaces;
+
+ ReductionVect reductions;
+
+ NameScope *rootScope;
+
+ Namespace *next, *prev;
+
+ void declare( Compiler *pd );
+};
+
+typedef DList<Namespace> NamespaceList;
+typedef BstSet< Namespace*, CmpOrd<Namespace*> > NamespaceSet;
+
+struct ReduceTextItem
+{
+ enum Type {
+ LhsRef,
+ RhsRef,
+ TreeRef,
+ RhsLoc,
+ Txt
+ };
+
+ ReduceTextItem() : n(0) {}
+
+ Type type;
+ String txt;
+ int n;
+
+ ReduceTextItem *prev, *next;
+};
+
+typedef DList<ReduceTextItem> ReduceTextItemList;
+
+struct ReduceNonTerm
+{
+ ReduceNonTerm( const InputLoc &loc, TypeRef *nonTerm )
+ :
+ loc(loc),
+ nonTerm(nonTerm)
+ {}
+
+ InputLoc loc;
+ TypeRef *nonTerm;
+ ReduceTextItemList itemList;
+
+ ReduceNonTerm *prev, *next;
+};
+
+struct ReduceAction
+{
+ ReduceAction( const InputLoc &loc, TypeRef *nonTerm,
+ const String &prod )
+ :
+ loc(loc), nonTerm(nonTerm),
+ prod(prod),
+ production(0)
+ {}
+
+ InputLoc loc;
+ TypeRef *nonTerm;
+ String prod;
+ ReduceTextItemList itemList;
+
+ Production *production;
+
+ ReduceAction *prev, *next;
+};
+
+typedef DList<ReduceAction> ReduceActionList;
+typedef DList<ReduceNonTerm> ReduceNonTermList;
+
+typedef Vector<ReduceAction*> ReduceActionVect;
+
+struct Reduction
+{
+ Reduction( const InputLoc &loc, String name )
+ :
+ loc(loc), name(name),
+ needData(0), needLoc(0),
+ postfixBased(false),
+ parserBased(false)
+ {
+ static int nextId = 1;
+ id = nextId++;
+ var = name.data;
+ var.data[0] = tolower( var.data[0] );
+ }
+
+ InputLoc loc;
+ String name;
+ String var;
+ int id;
+
+ bool *needData;
+ bool *needLoc;
+
+ bool postfixBased;
+ bool parserBased;
+
+ ReduceActionList reduceActions;
+ ReduceNonTermList reduceNonTerms;
+};
+
+/*
+ * LexJoin
+ */
+struct LexJoin
+{
+ LexJoin()
+ :
+ expr(0),
+ context(0),
+ mark(0)
+ {}
+
+ static LexJoin *cons( LexExpression *expr )
+ {
+ LexJoin *j = new LexJoin;
+ j->expr = expr;
+ return j;
+ }
+
+ /* tree_t traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+ void varDecl( Compiler *pd, TokenDef *tokenDef );
+
+ /* Data. */
+ LexExpression *expr;
+ LexJoin *context;
+ Action *mark;
+};
+
+/*
+ * LexExpression
+ */
+struct LexExpression
+{
+ enum Type {
+ OrType,
+ IntersectType,
+ SubtractType,
+ StrongSubtractType,
+ TermType,
+ BuiltinType
+ };
+
+ LexExpression( ) :
+ expression(0), term(0), builtin((BuiltinMachine)-1),
+ type((Type)-1), prev(this), next(this) { }
+
+ /* Construct with an expression on the left and a term on the right. */
+ static LexExpression *cons( LexExpression *expression, LexTerm *term, Type type )
+ {
+ LexExpression *ret = new LexExpression;
+ ret->type = type;
+ ret->expression = expression;
+ ret->term = term;
+ return ret;
+ }
+
+ /* Construct with only a term. */
+ static LexExpression *cons( LexTerm *term )
+ {
+ LexExpression *ret = new LexExpression;
+ ret->type = TermType;
+ ret->term = term;
+ return ret;
+ }
+
+ /* Construct with a builtin type. */
+ static LexExpression *cons( BuiltinMachine builtin )
+ {
+ LexExpression *ret = new LexExpression;
+ ret->type = BuiltinType;
+ ret->builtin = builtin;
+ return ret;
+ }
+
+ ~LexExpression();
+
+ /* tree_t traversal. */
+ FsmGraph *walk( Compiler *pd, bool lastInSeq = true );
+ void makeNameTree( Compiler *pd );
+ void varDecl( Compiler *pd, TokenDef *tokenDef );
+
+ /* Node data. */
+ LexExpression *expression;
+ LexTerm *term;
+ BuiltinMachine builtin;
+ Type type;
+
+ LexExpression *prev, *next;
+};
+
+/*
+ * LexTerm
+ */
+struct LexTerm
+{
+ enum Type {
+ ConcatType,
+ RightStartType,
+ RightFinishType,
+ LeftType,
+ FactorAugType
+ };
+
+ LexTerm() :
+ term(0), factorAug(0), type((Type)-1) { }
+
+ static LexTerm *cons( LexTerm *term, LexFactorAug *factorAug )
+ {
+ LexTerm *ret = new LexTerm;
+ ret->type = ConcatType;
+ ret->term = term;
+ ret->factorAug = factorAug;
+ return ret;
+ }
+
+ static LexTerm *cons( LexTerm *term, LexFactorAug *factorAug, Type type )
+ {
+ LexTerm *ret = new LexTerm;
+ ret->type = type;
+ ret->term = term;
+ ret->factorAug = factorAug;
+ return ret;
+ }
+
+ static LexTerm *cons( LexFactorAug *factorAug )
+ {
+ LexTerm *ret = new LexTerm;
+ ret->type = FactorAugType;
+ ret->factorAug = factorAug;
+ return ret;
+ }
+
+ ~LexTerm();
+
+ FsmGraph *walk( Compiler *pd, bool lastInSeq = true );
+ void makeNameTree( Compiler *pd );
+ void varDecl( Compiler *pd, TokenDef *tokenDef );
+
+ LexTerm *term;
+ LexFactorAug *factorAug;
+ Type type;
+
+ /* Priority descriptor for RightFinish type. */
+ PriorDesc priorDescs[2];
+};
+
+
+/* Third level of precedence. Augmenting nodes with actions and priorities. */
+struct LexFactorAug
+{
+ LexFactorAug() :
+ factorRep(0) { }
+
+ static LexFactorAug *cons( LexFactorRep *factorRep )
+ {
+ LexFactorAug *f = new LexFactorAug;
+ f->factorRep = factorRep;
+ return f;
+ }
+
+ ~LexFactorAug();
+
+ /* tree_t traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+ void varDecl( Compiler *pd, TokenDef *tokenDef );
+
+ void assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd );
+
+ /* Actions and priorities assigned to the factor node. */
+ Vector<ParserAction> actions;
+ ReCaptureVect reCaptureVect;
+
+ LexFactorRep *factorRep;
+};
+
+/* Fourth level of precedence. Trailing unary operators. Provide kleen star,
+ * optional and plus. */
+struct LexFactorRep
+{
+ enum Type {
+ StarType,
+ StarStarType,
+ OptionalType,
+ PlusType,
+ ExactType,
+ MaxType,
+ MinType,
+ RangeType,
+ FactorNegType
+ };
+
+ LexFactorRep()
+ :
+ factorRep(0),
+ factorNeg(0),
+ lowerRep(0),
+ upperRep(0),
+ type((Type)-1)
+ { }
+
+ static LexFactorRep *cons( const InputLoc &loc, LexFactorRep *factorRep,
+ int lowerRep, int upperRep, Type type )
+ {
+ LexFactorRep *f = new LexFactorRep;
+ f->type = type;
+ f->loc = loc;
+ f->factorRep = factorRep;
+ f->factorNeg = 0;
+ f->lowerRep = lowerRep;
+ f->upperRep = upperRep;
+ return f;
+ }
+
+ static LexFactorRep *cons( LexFactorNeg *factorNeg )
+ {
+ LexFactorRep *f = new LexFactorRep;
+ f->type = FactorNegType;
+ f->factorNeg = factorNeg;
+ return f;
+ }
+
+ ~LexFactorRep();
+
+ /* tree_t traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ InputLoc loc;
+ LexFactorRep *factorRep;
+ LexFactorNeg *factorNeg;
+ int lowerRep, upperRep;
+ Type type;
+
+ /* Priority descriptor for StarStar type. */
+ PriorDesc priorDescs[2];
+};
+
+/* Fifth level of precedence. Provides Negation. */
+struct LexFactorNeg
+{
+ enum Type {
+ NegateType,
+ CharNegateType,
+ FactorType
+ };
+
+ LexFactorNeg()
+ :
+ factorNeg(0),
+ factor(0),
+ type((Type)-1)
+ {}
+
+ static LexFactorNeg *cons( LexFactorNeg *factorNeg, Type type )
+ {
+ LexFactorNeg *f = new LexFactorNeg;
+ f->type = type;
+ f->factorNeg = factorNeg;
+ f->factor = 0;
+ return f;
+ }
+
+ static LexFactorNeg *cons( LexFactor *factor )
+ {
+ LexFactorNeg *f = new LexFactorNeg;
+ f->type = FactorType;
+ f->factorNeg = 0;
+ f->factor = factor;
+ return f;
+ }
+
+ ~LexFactorNeg();
+
+ /* tree_t traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ LexFactorNeg *factorNeg;
+ LexFactor *factor;
+ Type type;
+};
+
+/*
+ * LexFactor
+ */
+struct LexFactor
+{
+ /* Language elements a factor node can be. */
+ enum Type {
+ LiteralType,
+ RangeType,
+ OrExprType,
+ RegExprType,
+ ReferenceType,
+ ParenType
+ };
+
+ LexFactor()
+ :
+ literal(0),
+ range(0),
+ reItem(0),
+ regExp(0),
+ varDef(0),
+ join(0),
+ lower(0),
+ upper(0),
+ type((Type)-1)
+ {}
+
+ /* Construct with a literal fsm. */
+ static LexFactor *cons( Literal *literal )
+ {
+ LexFactor *f = new LexFactor;
+ f->type = LiteralType;
+ f->literal = literal;
+ return f;
+ }
+
+ /* Construct with a range. */
+ static LexFactor *cons( Range *range )
+ {
+ LexFactor *f = new LexFactor;
+ f->type = RangeType;
+ f->range = range;
+ return f;
+ }
+
+ /* Construct with the or part of a regular expression. */
+ static LexFactor *cons( ReItem *reItem )
+ {
+ LexFactor *f = new LexFactor;
+ f->type = OrExprType;
+ f->reItem = reItem;
+ return f;
+ }
+
+ /* Construct with a regular expression. */
+ static LexFactor *cons( RegExpr *regExp )
+ {
+ LexFactor *f = new LexFactor;
+ f->type = RegExprType;
+ f->regExp = regExp;
+ return f;
+ }
+
+ /* Construct with a reference to a var def. */
+ static LexFactor *cons( const InputLoc &loc, LexDefinition *varDef )
+ {
+ LexFactor *f = new LexFactor;
+ f->type = ReferenceType;
+ f->loc = loc;
+ f->varDef = varDef;
+ return f;
+ }
+
+ /* Construct with a parenthesized join. */
+ static LexFactor *cons( LexJoin *join )
+ {
+ LexFactor *f = new LexFactor;
+ f->type = ParenType;
+ f->join = join;
+ return f;
+ }
+
+ /* Cleanup. */
+ ~LexFactor();
+
+ /* tree_t traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+
+ InputLoc loc;
+ Literal *literal;
+ Range *range;
+ ReItem *reItem;
+ RegExpr *regExp;
+ LexDefinition *varDef;
+ LexJoin *join;
+ int lower, upper;
+ Type type;
+};
+
+/* A range machine. Only ever composed of two literals. */
+struct Range
+{
+ static Range *cons( Literal *lowerLit, Literal *upperLit )
+ {
+ Range *r = new Range;
+ r->lowerLit = lowerLit;
+ r->upperLit = upperLit;
+ return r;
+ }
+
+ ~Range();
+ FsmGraph *walk( Compiler *pd );
+ bool verifyRangeFsm( FsmGraph *rangeEnd );
+
+ Literal *lowerLit;
+ Literal *upperLit;
+};
+
+/* Some literal machine. Can be a number or literal string. */
+struct Literal
+{
+ enum LiteralType { Number, LitString };
+
+ static Literal *cons( const InputLoc &loc, const String &literal, LiteralType type )
+ {
+ Literal *l = new Literal;
+ l->loc = loc;
+ l->literal = literal;
+ l->type = type;
+ return l;
+ }
+
+ FsmGraph *walk( Compiler *pd );
+
+ InputLoc loc;
+ String literal;
+ LiteralType type;
+};
+
+/* Regular expression. */
+struct RegExpr
+{
+ enum RegExpType { RecurseItem, Empty };
+
+ /* Constructors. */
+ static RegExpr *cons()
+ {
+ RegExpr *r = new RegExpr;
+ r->type = Empty;
+ r->caseInsensitive = false;
+ return r;
+ }
+
+ static RegExpr *cons( RegExpr *regExp, ReItem *item )
+ {
+ RegExpr *r = new RegExpr;
+ r->regExp = regExp;
+ r->item = item;
+ r->type = RecurseItem;
+ r->caseInsensitive = false;
+ return r;
+ }
+
+ ~RegExpr();
+ FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
+
+ RegExpr *regExp;
+ ReItem *item;
+ RegExpType type;
+ bool caseInsensitive;
+};
+
+/* An item in a regular expression. */
+struct ReItem
+{
+ enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
+
+ static ReItem *cons( const String &data )
+ {
+ ReItem *r = new ReItem;
+ r->data = data;
+ r->type = Data;
+ return r;
+ }
+
+ static ReItem *cons( ReItemType type )
+ {
+ ReItem *r = new ReItem;
+ r->type = type;
+ return r;
+ }
+
+ static ReItem *cons( ReOrBlock *orBlock, ReItemType type )
+ {
+ ReItem *r = new ReItem;
+ r->orBlock = orBlock;
+ r->type = type;
+ return r;
+ }
+
+ ~ReItem();
+ FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
+
+ String data;
+ ReOrBlock *orBlock;
+ ReItemType type;
+};
+
+/* An or block item. */
+struct ReOrBlock
+{
+ enum ReOrBlockType { RecurseItem, Empty };
+
+ /* Constructors. */
+ static ReOrBlock *cons()
+ {
+ ReOrBlock *r = new ReOrBlock;
+ r->type = Empty;
+ return r;
+ }
+
+ static ReOrBlock *cons( ReOrBlock *orBlock, ReOrItem *item )
+ {
+ ReOrBlock *r = new ReOrBlock;
+ r->orBlock = orBlock;
+ r->item = item;
+ r->type = RecurseItem;
+ return r;
+ }
+
+ ~ReOrBlock();
+ FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
+
+ ReOrBlock *orBlock;
+ ReOrItem *item;
+ ReOrBlockType type;
+};
+
+/* An item in an or block. */
+struct ReOrItem
+{
+ enum ReOrItemType { Data, Range };
+
+ static ReOrItem *cons( const InputLoc &loc, const String &data )
+ {
+ ReOrItem *r = new ReOrItem;
+ r->loc = loc;
+ r->data = data;
+ r->type = Data;
+ return r;
+ }
+
+ static ReOrItem *cons( const InputLoc &loc, char lower, char upper )
+ {
+ ReOrItem *r = new ReOrItem;
+ r->loc = loc;
+ r->lower = lower;
+ r->upper = upper;
+ r->type = Range;
+ return r;
+ }
+
+ FsmGraph *walk( Compiler *pd, RegExpr *rootRegex );
+
+ InputLoc loc;
+ String data;
+ char lower;
+ char upper;
+ ReOrItemType type;
+};
+
+
+/*
+ * Inline code tree
+ */
+struct InlineList;
+struct InlineItem
+{
+ enum Type
+ {
+ Text,
+ LmSwitch,
+ LmSetActId,
+ LmSetTokEnd,
+ LmOnLast,
+ LmOnNext,
+ LmOnLagBehind,
+ LmInitAct,
+ LmInitTokStart,
+ LmSetTokStart
+ };
+
+ static InlineItem *cons( const InputLoc &loc, const String &data, Type type )
+ {
+ InlineItem *i = new InlineItem;
+ i->loc = loc;
+ i->data = data;
+ i->nameRef = 0;
+ i->children = 0;
+ i->type = type;
+ return i;
+ }
+
+ static InlineItem *cons( const InputLoc &loc, NameRef *nameRef, Type type )
+ {
+ InlineItem *i = new InlineItem;
+ i->loc = loc;
+ i->nameRef = nameRef;
+ i->children = 0;
+ i->type = type;
+ return i;
+ }
+
+ static InlineItem *cons( const InputLoc &loc, RegionImpl *tokenRegion,
+ TokenInstance *longestMatchPart, Type type )
+ {
+ InlineItem *i = new InlineItem;
+ i->loc = loc;
+ i->nameRef = 0;
+ i->children = 0;
+ i->tokenRegion = tokenRegion;
+ i->longestMatchPart = longestMatchPart;
+ i->type = type;
+ return i;
+ }
+
+ static InlineItem *cons( const InputLoc &loc, NameInst *nameTarg, Type type )
+ {
+ InlineItem *i = new InlineItem;
+ i->loc = loc;
+ i->nameRef = 0;
+ i->nameTarg = nameTarg;
+ i->children = 0;
+ i->type = type;
+ return i;
+ }
+
+ static InlineItem *cons( const InputLoc &loc, Type type )
+ {
+ InlineItem *i = new InlineItem;
+ i->loc = loc;
+ i->nameRef = 0;
+ i->children = 0;
+ i->type = type;
+ return i;
+ }
+
+ InputLoc loc;
+ String data;
+ NameRef *nameRef;
+ NameInst *nameTarg;
+ InlineList *children;
+ RegionImpl *tokenRegion;
+ TokenInstance *longestMatchPart;
+ Type type;
+
+ InlineItem *prev, *next;
+};
+
+struct InlineList
+:
+ public DList<InlineItem>
+{
+ InlineList( int i ) {}
+
+ static InlineList *cons()
+ {
+ return new InlineList( 0 );
+ }
+};
+
+
+struct ProdEl;
+struct LangVarRef;
+struct ObjectField;
+
+struct PatternItem
+{
+ enum Form {
+ TypeRefForm,
+ InputTextForm
+ };
+
+ static PatternItem *cons( Form form, const InputLoc &loc, const String &data )
+ {
+ PatternItem *p = new PatternItem;
+ p->form = form;
+ p->loc = loc;
+ p->prodEl = 0;
+ p->data = data;
+ p->region = 0;
+ p->varRef = 0;
+ p->bindId = 0;
+ return p;
+ }
+
+ static PatternItem *cons( Form form, const InputLoc &loc, ProdEl *prodEl )
+ {
+ PatternItem *p = new PatternItem;
+ p->form = form;
+ p->loc = loc;
+ p->prodEl = prodEl;
+ p->region = 0;
+ p->varRef = 0;
+ p->bindId = 0;
+ return p;
+ }
+
+ Form form;
+ InputLoc loc;
+ ProdEl *prodEl;
+ String data;
+ TokenRegion *region;
+ LangVarRef *varRef;
+ long bindId;
+ PatternItem *prev, *next;
+};
+
+struct LangExpr;
+
+struct PatternItemList
+ : public DList<PatternItem>
+{
+ static PatternItemList *cons( PatternItem *patternItem )
+ {
+ PatternItemList *list = new PatternItemList;
+ list->append( patternItem );
+ return list;
+ }
+};
+
+struct ConsItem
+{
+ enum Trim {
+ TrimYes,
+ TrimNo,
+ TrimDefault
+ };
+
+ enum Type {
+ InputText,
+ ExprType,
+ LiteralType
+ };
+
+ ConsItem()
+ :
+ type((Type)-1),
+ expr(0),
+ langEl(0),
+ prodEl(0),
+ bindId(-1),
+ trim(TrimDefault)
+ {
+ }
+
+ static ConsItem *cons( const InputLoc &loc, Type type, const String &data )
+ {
+ ConsItem *r = new ConsItem;
+ r->loc = loc;
+ r->type = type;
+ r->data = data;
+ return r;
+ }
+
+ static ConsItem *cons( const InputLoc &loc, Type type, LangExpr *expr, Trim trim )
+ {
+ ConsItem *r = new ConsItem;
+ r->loc = loc;
+ r->type = type;
+ r->expr = expr;
+ r->trim = trim;
+ return r;
+ }
+
+ static ConsItem *cons( const InputLoc &loc, Type type, ProdEl *prodEl )
+ {
+ ConsItem *r = new ConsItem;
+ r->loc = loc;
+ r->type = type;
+ r->expr = 0;
+ r->prodEl = prodEl;
+ return r;
+ }
+
+ InputLoc loc;
+ Type type;
+ String data;
+ LangExpr *expr;
+ LangEl *langEl;
+ ProdEl *prodEl;
+ long bindId;
+ Trim trim;
+
+ ConsItem *prev, *next;
+};
+
+struct ConsItemList
+:
+ public DList<ConsItem>
+{
+ static ConsItemList *cons( ConsItem *ci )
+ {
+ ConsItemList *cil = new ConsItemList;
+ cil->append( ci );
+ return cil;
+ }
+
+ static ConsItemList *cons()
+ {
+ return new ConsItemList;
+ }
+
+ void resolve( Compiler *pd );
+ void evaluateSendStream( Compiler *pd, CodeVect &code );
+};
+
+struct Pattern
+{
+ Pattern()
+ :
+ nspace(0),
+ list(0),
+ patRepId(0),
+ langEl(0),
+ pdaRun(0),
+ nextBindId(1)
+ {}
+
+ static Pattern *cons( const InputLoc &loc, Namespace *nspace,
+ PatternItemList *list, int patRepId )
+ {
+ Pattern *p = new Pattern;
+ p->loc = loc;
+ p->nspace = nspace;
+ p->list = list;
+ p->patRepId = patRepId;
+ return p;
+ }
+
+ InputLoc loc;
+ Namespace *nspace;
+ PatternItemList *list;
+ long patRepId;
+ LangEl *langEl;
+ struct pda_run *pdaRun;
+ long nextBindId;
+ Pattern *prev, *next;
+};
+
+typedef DList<Pattern> PatList;
+
+struct Constructor
+{
+ static Constructor *cons( const InputLoc &loc, Namespace *nspace,
+ ConsItemList *list, int patRepId )
+ {
+ Constructor *r = new Constructor;
+ r->loc = loc;
+ r->nspace = nspace;
+ r->list = list;
+ r->patRepId = patRepId;
+ r->langEl = 0;
+ r->pdaRun = 0;
+ r->nextBindId = 1;
+ r->parse = true;
+ return r;
+ }
+
+ InputLoc loc;
+ Namespace *nspace;
+ ConsItemList *list;
+ int patRepId;
+ LangEl *langEl;
+ struct pda_run *pdaRun;
+ long nextBindId;
+ bool parse;
+
+ Constructor *prev, *next;
+};
+
+typedef DList<Constructor> ConsList;
+
+struct ParserText
+{
+ static ParserText *cons( const InputLoc &loc,
+ Namespace *nspace, ConsItemList *list,
+ bool used, bool reduce, bool read,
+ const String &reducer )
+ {
+ ParserText *p = new ParserText;
+ p->loc = loc;
+ p->nspace = nspace;
+ p->list = list;
+ p->langEl = 0;
+ p->pdaRun = 0;
+ p->nextBindId = 1;
+ p->parse = true;
+ p->used = used;
+ p->reduce = reduce;
+ p->read = read;
+ p->reducer = reducer;
+ p->reducerId = -1;
+ return p;
+ }
+
+ InputLoc loc;
+ Namespace *nspace;
+ ConsItemList *list;
+ LangEl *langEl;
+ struct pda_run *pdaRun;
+ long nextBindId;
+ bool parse;
+ bool used;
+ bool reduce;
+ bool read;
+ String reducer;
+ int reducerId;
+
+ ParserText *prev, *next;
+};
+
+typedef DList<ParserText> ParserTextList;
+
+struct Function;
+
+struct IterDef
+{
+ enum Type { Tree, Child, RevChild, Repeat,
+ RevRepeat, User, ListEl,
+ RevListVal, MapEl };
+
+ IterDef( Type type, Function *func );
+ IterDef( Type type );
+
+ Type type;
+
+ Function *func;
+};
+
+struct IterImpl
+{
+ enum Type { Tree, Child, RevChild, Repeat,
+ RevRepeat, User, ListEl, ListVal,
+ RevListVal, MapEl, MapVal };
+
+ IterImpl( Type type, Function *func );
+ IterImpl( Type type );
+
+ Type type;
+
+ Function *func;
+ bool useFuncId;
+ bool useSearchUT;
+ bool useGenericId;
+
+ code_t inCreateWV;
+ code_t inCreateWC;
+ code_t inUnwind;
+ code_t inDestroy;
+ code_t inAdvance;
+
+ code_t inGetCurR;
+ code_t inGetCurWC;
+ code_t inSetCurWC;
+
+ code_t inRefFromCur;
+};
+
+struct CmpIterDef
+{
+ static int compare( const IterDef &id1, const IterDef &id2 )
+ {
+ if ( id1.type < id2.type )
+ return -1;
+ else if ( id1.type > id2.type )
+ return 1;
+ else if ( id1.type == IterDef::User ) {
+ if ( id1.func < id2.func )
+ return -1;
+ else if ( id1.func > id2.func )
+ return 1;
+ }
+
+ return 0;
+ }
+};
+
+typedef AvlSet<IterDef, CmpIterDef> IterDefSet;
+typedef AvlSetEl<IterDef> IterDefSetEl;
+
+
+/*
+ * Unique Types.
+ */
+
+/*
+ * type_ref -> qualified_name
+ * type_ref -> '*' type_ref
+ * type_ref -> '&' type_ref
+ * type_ref -> list type_ref type_ref
+ * type_ref -> map type_ref type_ref
+ * type_ref -> vector type_ref
+ * type_ref -> parser type_ref
+ * type_ref -> iter_tree type_ref
+ * type_ref -> iter_child type_ref
+ * type_ref -> iter_revchild type_ref
+ * type_ref -> iter_repeat type_ref
+ * type_ref -> iter_revrepeat type_ref
+ * type_ref -> iter_user type_ref
+ *
+ * type -> nil
+ * type -> def term
+ * type -> def nonterm
+ * type -> '*' type
+ * type -> '&' type
+ * type -> list type
+ * type -> map type type
+ * type -> vector type
+ * type -> parser type
+ * type -> iter_tree type
+ * type -> iter_child type
+ * type -> iter_revchild type
+ * type -> iter_repeat type
+ * type -> iter_revrepeat type
+ * type -> iter_user type
+ */
+
+struct UniqueType : public AvlTreeEl<UniqueType>
+{
+ UniqueType( enum TYPE typeId ) :
+ typeId(typeId),
+ langEl(0),
+ iterDef(0),
+ structEl(0),
+ generic(0)
+ {}
+
+ UniqueType( enum TYPE typeId, LangEl *langEl ) :
+ typeId(typeId),
+ langEl(langEl),
+ iterDef(0),
+ structEl(0),
+ generic(0)
+ {}
+
+ UniqueType( enum TYPE typeId, IterDef *iterDef ) :
+ typeId(typeId),
+ langEl(0),
+ iterDef(iterDef),
+ structEl(0),
+ generic(0)
+ {}
+
+ UniqueType( enum TYPE typeId, StructEl *structEl ) :
+ typeId(typeId),
+ langEl(0),
+ iterDef(0),
+ structEl(structEl),
+ generic(0)
+ {}
+
+ UniqueType( enum TYPE typeId, GenericType *generic ) :
+ typeId(typeId),
+ langEl(0),
+ iterDef(0),
+ structEl(0),
+ generic(generic)
+ {}
+
+ enum TYPE typeId;
+ LangEl *langEl;
+ IterDef *iterDef;
+ StructEl *structEl;
+ GenericType *generic;
+
+ ObjectDef *objectDef();
+
+ bool tree()
+ { return typeId == TYPE_TREE; }
+
+ bool parser()
+ { return typeId == TYPE_GENERIC && generic->typeId == GEN_PARSER; }
+
+ bool ptr()
+ { return typeId == TYPE_STRUCT || typeId == TYPE_GENERIC; }
+
+ bool listOf( UniqueType *ut )
+ { return typeId == TYPE_GENERIC && generic->typeId == GEN_LIST && generic->valueUt == ut; }
+
+ bool val() {
+ return typeId == TYPE_STRUCT ||
+ typeId == TYPE_GENERIC ||
+ typeId == TYPE_INT ||
+ typeId == TYPE_BOOL;
+ }
+};
+
+struct CmpUniqueType
+{
+ static int compare( const UniqueType &ut1, const UniqueType &ut2 );
+};
+
+typedef AvlBasic< UniqueType, CmpUniqueType > UniqueTypeMap;
+
+enum RepeatType {
+ RepeatNone = 1,
+ RepeatRepeat,
+ RepeatList,
+ RepeatOpt,
+ RepeatLeftRepeat,
+ RepeatLeftList,
+};
+
+/*
+ * Repeat types.
+ */
+
+struct UniqueRepeat
+ : public AvlTreeEl<UniqueRepeat>
+{
+ UniqueRepeat( RepeatType repeatType, LangEl *langEl ) :
+ repeatType(repeatType),
+ langEl(langEl), declLangEl(0) {}
+
+ RepeatType repeatType;
+ LangEl *langEl;
+ LangEl *declLangEl;
+};
+
+struct CmpUniqueRepeat
+{
+ static int compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 );
+};
+
+typedef AvlBasic< UniqueRepeat, CmpUniqueRepeat > UniqueRepeatMap;
+
+/*
+ * Unique generics. Allows us to do singleton declarations of generic types and
+ * supporting structures. For example, the list type, but also the list element
+ * struct created for the list type.
+ */
+
+struct UniqueGeneric
+ : public AvlTreeEl<UniqueGeneric>
+{
+ enum Type
+ {
+ List,
+ ListEl,
+ Map,
+ MapEl,
+ Parser
+ };
+
+ UniqueGeneric( Type type, UniqueType *value )
+ :
+ type(type),
+ key(0),
+ value(value),
+ generic(0),
+ structEl(0)
+ {}
+
+ UniqueGeneric( Type type, UniqueType *key, UniqueType *value )
+ :
+ type(type),
+ key(key),
+ value(value),
+ generic(0),
+ structEl(0)
+ {}
+
+ Type type;
+ UniqueType *key;
+ UniqueType *value;
+
+ GenericType *generic;
+ StructEl *structEl;
+};
+
+struct CmpUniqueGeneric
+{
+ static int compare( const UniqueGeneric &ut1,
+ const UniqueGeneric &ut2 );
+};
+
+typedef AvlBasic< UniqueGeneric, CmpUniqueGeneric > UniqueGenericMap;
+
+/*
+ *
+ */
+
+typedef AvlMap< StringVect, int, CmpStrVect > VectorTypeIdMap;
+typedef AvlMapEl< StringVect, int > VectorTypeIdMapEl;
+
+typedef Vector<TypeRef*> TypeRefVect;
+
+struct TypeRef
+{
+ enum Type
+ {
+ Unspecified,
+ Name,
+ Literal,
+ Iterator,
+ List,
+ ListPtrs,
+ ListEl,
+ Map,
+ MapEl,
+ MapPtrs,
+ Parser,
+ Ref
+ };
+
+ TypeRef()
+ :
+ type((Type)-1),
+ nspaceQual(0),
+ pdaLiteral(0),
+ iterCall(0),
+ iterDef(0),
+ typeRef1(0),
+ typeRef2(0),
+ typeRef3(0),
+ repeatType(RepeatNone),
+ parsedVarRef(0),
+ parsedTypeRef(0),
+ nspace(0),
+ uniqueType(0),
+ searchUniqueType(0),
+ generic(0),
+ searchTypeRef(0)
+ {}
+
+ /* Qualification and a type name. These require lookup. */
+ static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual,
+ const String &typeName )
+ {
+ TypeRef *t = new TypeRef;
+ t->type = Name;
+ t->loc = loc;
+ t->nspaceQual = nspaceQual;
+ t->typeName = typeName;
+ t->repeatType = RepeatNone;
+ return t;
+ }
+
+ /* Qualification and a type name. These require lookup. */
+ static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual,
+ String typeName, RepeatType repeatType )
+ {
+ TypeRef *t = cons( loc, nspaceQual, typeName );
+ t->repeatType = repeatType;
+ return t;
+ }
+
+ static TypeRef *cons( const InputLoc &loc, LangVarRef *parsedVarRef,
+ NamespaceQual *nspaceQual, String typeName, RepeatType repeatType )
+ {
+ TypeRef *t = cons( loc, nspaceQual, typeName );
+ t->parsedVarRef = parsedVarRef;
+ t->repeatType = repeatType;
+ return t;
+ }
+
+ static TypeRef *cons( const InputLoc &loc, TypeRef *parsedTypeRef,
+ NamespaceQual *nspaceQual, String typeName, RepeatType repeatType )
+ {
+ TypeRef *t = cons( loc, nspaceQual, typeName );
+ t->parsedTypeRef = parsedTypeRef;
+ t->repeatType = repeatType;
+ return t;
+ }
+
+ /* Qualification and a type name. These require lookup. */
+ static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual,
+ PdaLiteral *pdaLiteral )
+ {
+ TypeRef *t = new TypeRef;
+ t->type = Literal;
+ t->loc = loc;
+ t->nspaceQual = nspaceQual;
+ t->pdaLiteral = pdaLiteral;
+ t->repeatType = RepeatNone;
+ return t;
+ }
+
+ static TypeRef *cons( const InputLoc &loc, TypeRef *parsedTypeRef,
+ NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral )
+ {
+ TypeRef *t = cons( loc, nspaceQual, pdaLiteral );
+ t->parsedTypeRef = parsedTypeRef;
+ return t;
+ }
+
+ /* Qualification and a type name. These require lookup. */
+ static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual,
+ PdaLiteral *pdaLiteral, RepeatType repeatType )
+ {
+ TypeRef *t = cons( loc, nspaceQual, pdaLiteral );
+ t->repeatType = repeatType;
+ return t;
+ }
+
+ static TypeRef *cons( const InputLoc &loc, LangVarRef *parsedVarRef,
+ NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral, RepeatType repeatType )
+ {
+ TypeRef *t = cons( loc, nspaceQual, pdaLiteral );
+ t->parsedVarRef = parsedVarRef;
+ t->repeatType = repeatType;
+ return t;
+ }
+
+ static TypeRef *cons( const InputLoc &loc, TypeRef *parsedTypeRef,
+ NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral, RepeatType repeatType )
+ {
+ TypeRef *t = cons( loc, nspaceQual, pdaLiteral );
+ t->parsedTypeRef = parsedTypeRef;
+ t->repeatType = repeatType;
+ return t;
+ }
+
+ /* Generics. */
+ static TypeRef *cons( const InputLoc &loc, Type type,
+ NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 )
+ {
+ TypeRef *t = new TypeRef;
+ t->type = type;
+ t->loc = loc;
+ t->nspaceQual = nspaceQual;
+ t->typeRef1 = typeRef1;
+ t->typeRef2 = typeRef2;
+ t->repeatType = RepeatNone;
+ return t;
+ }
+
+ static TypeRef *cons( const InputLoc &loc, Type type,
+ NamespaceQual *nspaceQual, TypeRef *typeRef1,
+ TypeRef *typeRef2, TypeRef *typeRef3 )
+ {
+ TypeRef *t = new TypeRef;
+ t->type = type;
+ t->loc = loc;
+ t->nspaceQual = nspaceQual;
+ t->typeRef1 = typeRef1;
+ t->typeRef2 = typeRef2;
+ t->typeRef3 = typeRef3;
+ t->repeatType = RepeatNone;
+ return t;
+ }
+
+ /* Pointers and Refs. */
+ static TypeRef *cons( const InputLoc &loc, Type type, TypeRef *typeRef1 )
+ {
+ TypeRef *t = new TypeRef;
+ t->type = type;
+ t->loc = loc;
+ t->typeRef1 = typeRef1;
+ t->repeatType = RepeatNone;
+ return t;
+ }
+
+ /* Resolution not needed. */
+
+ /* Iterator definition. */
+ static TypeRef *cons( const InputLoc &loc, TypeRef *typeRef, IterCall *iterCall )
+ {
+ TypeRef *t = new TypeRef;
+ t->type = Iterator;
+ t->loc = loc;
+ t->repeatType = RepeatNone;
+ t->iterCall = iterCall;
+ t->searchTypeRef = typeRef;
+ return t;
+ }
+
+ /* Unique type is given directly. */
+ static TypeRef *cons( const InputLoc &loc, UniqueType *uniqueType )
+ {
+ TypeRef *t = new TypeRef;
+ t->type = Unspecified;
+ t->loc = loc;
+ t->repeatType = RepeatNone;
+ t->uniqueType = uniqueType;
+ return t;
+ }
+
+ void resolveRepeat( Compiler *pd );
+
+ Namespace *resolveNspace( Compiler *pd );
+ UniqueType *resolveIterator( Compiler *pd );
+ UniqueType *resolveTypeName( Compiler *pd );
+ UniqueType *resolveTypeLiteral( Compiler *pd );
+ UniqueType *resolveTypeList( Compiler *pd );
+ UniqueType *resolveTypeListEl( Compiler *pd );
+ UniqueType *resolveTypeMap( Compiler *pd );
+ UniqueType *resolveTypeMapEl( Compiler *pd );
+ UniqueType *resolveTypeParser( Compiler *pd );
+ UniqueType *resolveType( Compiler *pd );
+ UniqueType *resolveTypeRef( Compiler *pd );
+
+ bool uniqueGeneric( UniqueGeneric *&inMap,
+ Compiler *pd, const UniqueGeneric &searchKey );
+
+ StructEl *declareMapElStruct( Compiler *pd, TypeRef *keyType, TypeRef *valType );
+ StructEl *declareListEl( Compiler *pd, TypeRef *valType );
+
+ Type type;
+ InputLoc loc;
+ NamespaceQual *nspaceQual;
+ String typeName;
+ PdaLiteral *pdaLiteral;
+ IterCall *iterCall;
+ IterDef *iterDef;
+ TypeRef *typeRef1;
+ TypeRef *typeRef2;
+ TypeRef *typeRef3;
+ RepeatType repeatType;
+
+ /* For pattern and constructor context. */
+ LangVarRef *parsedVarRef;
+ TypeRef *parsedTypeRef;
+
+ /* Resolved. */
+ Namespace *nspace;
+ UniqueType *uniqueType;
+ UniqueType *searchUniqueType;
+ GenericType *generic;
+ TypeRef *searchTypeRef;
+};
+
+typedef DList<ObjectField> ParameterList;
+
+struct ObjectMethod
+{
+ enum Type
+ {
+ Call,
+ ParseFinish
+ };
+
+ ObjectMethod( TypeRef *returnTypeRef, String name,
+ int opcodeWV, int opcodeWC, int numParams,
+ UniqueType **types, ParameterList *paramList, bool isConst )
+ :
+ type(Call),
+ returnUT(0),
+ returnTypeRef(returnTypeRef),
+ returnTypeId(0),
+ name(name),
+ opcodeWV(opcodeWV),
+ opcodeWC(opcodeWC),
+ numParams(numParams),
+ paramList(paramList),
+ isConst(isConst),
+ funcId(0),
+ useFuncId(false),
+ useCallObj(true),
+ func(0),
+ iterDef(0),
+ useFnInstr(false),
+ useGenericId(false),
+ generic(0)
+ {
+ }
+
+ ObjectMethod( UniqueType *returnUT, String name,
+ int opcodeWV, int opcodeWC, int numParams,
+ UniqueType **types, ParameterList *paramList,
+ bool isConst )
+ :
+ type(Call),
+ returnUT(returnUT),
+ returnTypeRef(0),
+ returnTypeId(0),
+ name(name),
+ opcodeWV(opcodeWV),
+ opcodeWC(opcodeWC),
+ numParams(numParams),
+ paramList(paramList),
+ isConst(isConst),
+ funcId(0),
+ useFuncId(false),
+ useCallObj(true),
+ func(0),
+ iterDef(0),
+ useFnInstr(false),
+ useGenericId(false),
+ generic(0)
+ {
+ this->paramUTs = new UniqueType*[numParams];
+ memcpy( this->paramUTs, types, sizeof(UniqueType*)*numParams );
+ }
+
+ Type type;
+ UniqueType *returnUT;
+ TypeRef *returnTypeRef;
+ long returnTypeId;
+ String name;
+ long opcodeWV;
+ long opcodeWC;
+ long numParams;
+ UniqueType **paramUTs;
+ ParameterList *paramList;
+ bool isConst;
+ long funcId;
+ bool useFuncId;
+ bool useCallObj;
+ Function *func;
+ IterDef *iterDef;
+ bool useFnInstr;
+
+ bool useGenericId;
+ GenericType *generic;
+};
+
+struct RhsVal
+{
+ RhsVal( ProdEl *prodEl )
+ :
+ prodEl(prodEl)
+ {}
+
+ ProdEl *prodEl;
+};
+
+struct ObjectField
+{
+ enum Type
+ {
+ UserLocalType = 1,
+ UserFieldType,
+ StructFieldType,
+ LhsElType,
+ RedRhsType,
+ InbuiltFieldType,
+ InbuiltOffType,
+ InbuiltObjectType,
+ RhsNameType,
+ ParamValType,
+ ParamRefType,
+ LexSubstrType,
+ GenericElementType,
+ GenericDependentType
+ };
+
+ ObjectField()
+ :
+ typeRef(0),
+ scope(0),
+ offset(0),
+ beenReferenced(false),
+ isConst(false),
+ refActive(false),
+ isExport(false),
+ isConstVal(false),
+ useGenericId(false),
+ generic(0),
+ mapKeyField(0),
+ dirtyTree(false),
+ inGetR( IN_HALT ),
+ inGetWC( IN_HALT ),
+ inGetWV( IN_HALT ),
+ inSetWC( IN_HALT ),
+ inSetWV( IN_HALT ),
+ inGetValR( IN_HALT ),
+ inGetValWC( IN_HALT ),
+ inGetValWV( IN_HALT ),
+ inSetValWC( IN_HALT ),
+ inSetValWV( IN_HALT ),
+ iterImpl( 0 )
+ {}
+
+ static ObjectField *cons( const InputLoc &loc,
+ Type type, TypeRef *typeRef, const String &name )
+ {
+ ObjectField *c = new ObjectField;
+ c->loc = loc;
+ c->type = type;
+ c->typeRef = typeRef;
+ c->name = name;
+ c->initField( );
+ return c;
+ }
+
+ void initField();
+
+ bool isParam()
+ { return type == ParamValType || type == ParamRefType; }
+
+ bool isLhsEl()
+ { return type == LhsElType; }
+
+ bool isRhsGet()
+ { return type == RhsNameType; }
+
+ bool useOffset()
+ {
+ return type != RhsNameType &&
+ type != InbuiltFieldType &&
+ type != InbuiltObjectType;
+ }
+
+ bool isInbuiltObject()
+ { return type == InbuiltObjectType; }
+
+ bool exists()
+ {
+ switch ( type ) {
+ case ObjectField::LhsElType:
+ case ObjectField::UserLocalType:
+ case ObjectField::RedRhsType:
+ case ObjectField::UserFieldType:
+ case ObjectField::StructFieldType:
+ case ObjectField::GenericDependentType:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ InputLoc loc;
+ Type type;
+ TypeRef *typeRef;
+ String name;
+ NameScope *scope;
+ long offset;
+ bool beenReferenced;
+ /* Declared const. */
+ bool isConst;
+ bool refActive;
+ bool isExport;
+
+ /* Value is a const thing when that retrieved by the runtime. Requires a
+ * const val id. */
+ bool isConstVal;
+ int constValId;
+ String constValArg;
+
+ bool useGenericId;
+ GenericType *generic;
+
+ ObjectField *mapKeyField;
+
+ /* True if some aspect of the tree has possibly been written to. This does
+ * not include attributes. This is here so we can optimize the storage of
+ * old lhs vars. If only a lhs attribute changes we don't need to preserve
+ * the original for backtracking. */
+ bool dirtyTree;
+
+ Vector<RhsVal> rhsVal;
+
+ code_t inGetR;
+ code_t inGetWC;
+ code_t inGetWV;
+ code_t inSetWC;
+ code_t inSetWV;
+ code_t inGetValR;
+ code_t inGetValWC;
+ code_t inGetValWV;
+ code_t inSetValWC;
+ code_t inSetValWV;
+
+ IterImpl *iterImpl;
+
+ ObjectField *prev, *next;
+};
+
+typedef DListVal<ObjectField*> FieldList;
+
+typedef DList<ObjectField> ParameterList;
+
+
+struct ObjectDef
+{
+ enum Type {
+ UserType,
+ FrameType,
+ IterType,
+ BuiltinType,
+ StructType
+ };
+
+ ObjectDef()
+ :
+ nextOffset(0),
+ firstNonTree(0)
+ {}
+
+ static ObjectDef *cons( Type type, String name, int id )
+ {
+ ObjectDef *o = new ObjectDef;
+
+ o->type = type;
+ o->name = name;
+ o->id = id;
+
+ o->rootScope = new NameScope;
+ o->rootScope->owningObj = o;
+
+ return o;
+ }
+
+ Type type;
+ String name;
+ FieldList fieldList;
+
+ NameScope *rootScope;
+
+ NameScope *pushScope( NameScope *curScope );
+
+ long id;
+ long nextOffset;
+ long firstNonTree;
+
+ void referenceField( Compiler *pd, ObjectField *field );
+ void placeField( Compiler *pd, ObjectField *field );
+ void createCode( Compiler *pd, CodeVect &code );
+ ObjectField *findFieldInScope( const NameScope *scope, const String &name ) const;
+ ObjectField *checkRedecl( NameScope *inScope, const String &name );
+ void insertField( NameScope *inScope, const String &name, ObjectField *value );
+ void resolve( Compiler *pd );
+ ObjectField *findFieldNum( long offset );
+ ObjectField *findFieldType( Compiler *pd, UniqueType *ut );
+
+ long size() { return nextOffset; }
+ long sizeTrees() { return firstNonTree; }
+};
+
+struct CallArg
+{
+ CallArg( LangExpr *expr )
+ : expr(expr), exprUT(0), offTmp(-1), offQualRef(-1) {}
+
+ LangExpr *expr;
+ UniqueType *exprUT;
+ int offTmp;
+ int offQualRef;
+};
+
+typedef Vector<LangExpr*> ExprVect;
+typedef Vector<CallArg*> CallArgVect;
+typedef Vector<String> StringVect;
+
+struct FieldInit
+{
+ static FieldInit *cons( const InputLoc &loc, String name, LangExpr *expr )
+ {
+ FieldInit *fi = new FieldInit;
+ fi->loc = loc;
+ fi->name = name;
+ fi->expr = expr;
+ return fi;
+ }
+
+ InputLoc loc;
+ String name;
+ LangExpr *expr;
+
+ UniqueType *exprUT;
+};
+
+typedef Vector<FieldInit*> FieldInitVect;
+
+struct VarRefLookup
+{
+ VarRefLookup( int lastPtrInQual, int firstConstPart,
+ ObjectDef *inObject, NameScope *inScope )
+ :
+ lastPtrInQual(lastPtrInQual),
+ firstConstPart(firstConstPart),
+ inObject(inObject),
+ inScope(inScope),
+ objField(0),
+ objMethod(0),
+ uniqueType(0),
+ iterSearchUT(0)
+ {}
+
+ int lastPtrInQual;
+ int firstConstPart;
+ ObjectDef *inObject;
+ NameScope *inScope;
+ ObjectField *objField;
+ ObjectMethod *objMethod;
+ UniqueType *uniqueType;
+ UniqueType *iterSearchUT;
+};
+
+struct QualItem
+{
+ enum Form { Dot, Arrow };
+
+ QualItem( Form form, const InputLoc &loc, const String &data )
+ : form(form), loc(loc), data(data) {}
+
+ Form form;
+ InputLoc loc;
+ String data;
+};
+
+typedef Vector<QualItem> QualItemVect;
+
+struct LangVarRef
+{
+ static LangVarRef *cons( const InputLoc &loc, Namespace *nspace,
+ StructDef *structDef, NameScope *scope,
+ NamespaceQual *nspaceQual, QualItemVect *qual,
+ const String &name )
+ {
+ LangVarRef *l = new LangVarRef;
+ l->loc = loc;
+ l->nspace = nspace;
+ l->structDef = structDef;
+ l->scope = scope;
+ l->nspaceQual = nspaceQual;
+ l->qual = qual;
+ l->name = name;
+ return l;
+ }
+
+ static LangVarRef *cons( const InputLoc &loc, Namespace *nspace,
+ StructDef *structDef, NameScope *scope, const String &name )
+ {
+ return cons( loc, nspace, structDef, scope,
+ NamespaceQual::cons( nspace ), new QualItemVect, name );
+ }
+
+ void resolve( Compiler *pd ) const;
+ UniqueType *lookup( Compiler *pd ) const;
+
+ UniqueType *loadField( Compiler *pd, CodeVect &code, ObjectDef *inObject,
+ ObjectField *el, bool forWriting, bool revert ) const;
+
+ VarRefLookup lookupIterCall( Compiler *pd ) const;
+ VarRefLookup lookupMethod( Compiler *pd ) const;
+ VarRefLookup lookupField( Compiler *pd ) const;
+
+ VarRefLookup lookupQualification( Compiler *pd, NameScope *rootScope ) const;
+ VarRefLookup lookupObj( Compiler *pd ) const;
+ VarRefLookup lookupMethodObj( Compiler *pd ) const;
+
+ bool isInbuiltObject() const;
+ bool isLocalRef() const;
+ bool isProdRef( Compiler *pd ) const;
+ bool isStructRef() const;
+ void loadQualification( Compiler *pd, CodeVect &code, NameScope *rootScope,
+ int lastPtrInQual, bool forWriting, bool revert ) const;
+ void loadInbuiltObject( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const;
+ void loadLocalObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const;
+ void loadContextObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const;
+ void loadGlobalObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const;
+ void loadObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const;
+ void loadScopedObj( Compiler *pd, CodeVect &code,
+ NameScope *scope, int lastPtrInQual, bool forWriting ) const;
+
+ void verifyRefPossible( Compiler *pd, VarRefLookup &lookup ) const;
+ bool canTakeRef( Compiler *pd, VarRefLookup &lookup ) const;
+
+ void setFieldIter( Compiler *pd, CodeVect &code, ObjectDef *inObject,
+ ObjectField *objField, UniqueType *objUT, UniqueType *exprType,
+ bool revert ) const;
+ void setFieldSearch( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, UniqueType *exprType ) const;
+ void setField( Compiler *pd, CodeVect &code, ObjectDef *inObject,
+ ObjectField *el, UniqueType *exprUT, bool revert ) const;
+
+ void assignValue( Compiler *pd, CodeVect &code, UniqueType *exprUT ) const;
+
+ IterImpl *chooseTriterCall( Compiler *pd, UniqueType *searchUT, CallArgVect *args );
+
+ /* The deref generics value is for iterator calls with lists and maps as args. */
+ ObjectField **evaluateArgs( Compiler *pd, CodeVect &code,
+ VarRefLookup &lookup, CallArgVect *args );
+
+ void callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const;
+ UniqueType *evaluateCall( Compiler *pd, CodeVect &code, CallArgVect *args );
+ UniqueType *evaluate( Compiler *pd, CodeVect &code, bool forWriting = false ) const;
+ ObjectField *evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const;
+ ObjectField *preEvaluateRef( Compiler *pd, CodeVect &code ) const;
+ void resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjectField **paramRefs ) const;
+ long loadQualificationRefs( Compiler *pd, CodeVect &code, NameScope *rootScope ) const;
+ void popRefQuals( Compiler *pd, CodeVect &code,
+ VarRefLookup &lookup, CallArgVect *args, bool temps ) const;
+
+ bool isFinishCall( VarRefLookup &lookup ) const;
+
+ InputLoc loc;
+ Namespace *nspace;
+ StructDef *structDef;
+ NameScope *scope;
+ NamespaceQual *nspaceQual;
+ QualItemVect *qual;
+ String name;
+ long argSize;
+};
+
+struct LangTerm
+{
+ enum Type {
+ VarRefType,
+ MethodCallType,
+ NumberType,
+ StringType,
+ MatchType,
+ ProdCompareType,
+ NewType,
+ ConstructType,
+ TypeIdType,
+ SearchType,
+ NilType,
+ TrueType,
+ FalseType,
+ ParseType,
+ ParseTreeType,
+ ParseStopType,
+ SendType,
+ SendTreeType,
+ MakeTreeType,
+ MakeTokenType,
+ EmbedStringType,
+ CastType
+ };
+
+ LangTerm()
+ :
+ generic(0),
+ constructor(0),
+ consItemList(0),
+ parserText(0)
+ {}
+
+ static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->varRef = varRef;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, LangVarRef *varRef, CallArgVect *args )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = MethodCallType;
+ t->varRef = varRef;
+ t->args = args;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, CallArgVect *args )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->args = args;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, String data )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->varRef = 0;
+ t->data = data;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->varRef = 0;
+ t->typeRef = 0;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, TypeRef *typeRef )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->varRef = 0;
+ t->typeRef = typeRef;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, TypeRef *typeRef,
+ LangExpr *langExpr )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->varRef = 0;
+ t->typeRef = typeRef;
+ t->expr = langExpr;
+ return t;
+ }
+
+ static LangTerm *consMatch( const InputLoc &loc,
+ LangVarRef *varRef, Pattern *pattern )
+ {
+ LangTerm *t = new LangTerm;
+ t->type = MatchType;
+ t->loc = loc;
+ t->varRef = varRef;
+ t->pattern = pattern;
+ return t;
+ }
+
+ static LangTerm *consProdCompare( const InputLoc &loc,
+ LangVarRef *varRef, const String &prod, LangExpr *matchExpr )
+ {
+ LangTerm *t = new LangTerm;
+ t->type = ProdCompareType;
+ t->loc = loc;
+ t->varRef = varRef;
+ t->prod = prod;
+ t->expr = matchExpr;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef,
+ Pattern *pattern )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->varRef = varRef;
+ t->pattern = pattern;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, TypeRef *typeRef,
+ LangVarRef *varRef )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->varRef = varRef;
+ t->typeRef = typeRef;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef,
+ ObjectField *objField, TypeRef *typeRef, FieldInitVect *fieldInitArgs,
+ Constructor *constructor )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->varRef = varRef;
+ t->objField = objField;
+ t->typeRef = typeRef;
+ t->fieldInitArgs = fieldInitArgs;
+ t->constructor = constructor;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef,
+ ObjectField *objField, TypeRef *typeRef, FieldInitVect *fieldInitArgs,
+ ConsItemList *consItemList, ParserText *parserText )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->varRef = varRef;
+ t->objField = objField;
+ t->typeRef = typeRef;
+ t->fieldInitArgs = fieldInitArgs;
+ t->consItemList = consItemList;
+ t->parserText = parserText;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, LangExpr *expr )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = type;
+ t->expr = expr;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, ConsItemList *consItemList )
+ {
+ LangTerm *t = new LangTerm;
+ t->loc = loc;
+ t->type = EmbedStringType;
+ t->consItemList = consItemList;
+ return t;
+ }
+
+ static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef,
+ ParserText *parserText )
+ {
+ LangTerm *s = new LangTerm;
+ s->loc = loc;
+ s->type = type;
+ s->varRef = varRef;
+ s->parserText = parserText;
+ return s;
+ }
+
+ static LangTerm *consSend( const InputLoc &loc, LangVarRef *varRef,
+ ParserText *parserText, bool eof )
+ {
+ LangTerm *s = new LangTerm;
+ s->loc = loc;
+ s->type = SendType;
+ s->varRef = varRef;
+ s->parserText = parserText;
+ s->eof = eof;
+ return s;
+ }
+
+ static LangTerm *consSendTree( const InputLoc &loc, LangVarRef *varRef,
+ ParserText *parserText, bool eof )
+ {
+ LangTerm *s = new LangTerm;
+ s->loc = loc;
+ s->type = SendTreeType;
+ s->varRef = varRef;
+ s->parserText = parserText;
+ s->eof = eof;
+ return s;
+ }
+
+ static LangTerm *consNew( const InputLoc &loc, TypeRef *typeRef,
+ LangVarRef *captureVarRef, FieldInitVect *fieldInitArgs )
+ {
+ LangTerm *s = new LangTerm;
+ s->type = NewType;
+ s->loc = loc;
+ s->typeRef = typeRef;
+ s->varRef = captureVarRef;
+ s->fieldInitArgs = fieldInitArgs;
+ return s;
+ }
+
+ void resolveFieldArgs( Compiler *pd );
+ void resolve( Compiler *pd );
+
+ void evaluateCapture( Compiler *pd, CodeVect &code, UniqueType *valUt ) const;
+ void evaluateCapture( Compiler *pd, CodeVect &code, bool isTree ) const;
+ UniqueType *evaluateNew( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateConstruct( Compiler *pd, CodeVect &code ) const;
+
+ static void parseFrag( Compiler *pd, CodeVect &code, int stopId );
+
+ UniqueType *evaluateParse( Compiler *pd, CodeVect &code, bool tree, bool stop ) const;
+ UniqueType *evaluateReadReduce( Compiler *pd, CodeVect &code ) const;
+ void evaluateSendStream( Compiler *pd, CodeVect &code ) const;
+ void evaluateSendParser( Compiler *pd, CodeVect &code, bool strings ) const;
+ UniqueType *evaluateSend( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateSendTree( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateMatch( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateProdCompare( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluate( Compiler *pd, CodeVect &code ) const;
+ void assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const;
+ UniqueType *evaluateMakeToken( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateMakeTree( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateEmbedString( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateSearch( Compiler *pd, CodeVect &code ) const;
+ UniqueType *evaluateCast( Compiler *pd, CodeVect &code ) const;
+ void resolveFieldArgs( Compiler *pd ) const;
+
+ InputLoc loc;
+ Type type;
+ LangVarRef *varRef;
+ CallArgVect *args;
+ NamespaceQual *nspaceQual;
+ String data;
+ ObjectField *objField;
+ TypeRef *typeRef;
+ Pattern *pattern;
+ String prod;
+ FieldInitVect *fieldInitArgs;
+ GenericType *generic;
+ Constructor *constructor;
+ ConsItemList *consItemList;
+ ParserText *parserText;
+ LangExpr *expr;
+ bool eof;
+};
+
+struct LangExpr
+{
+ enum Type {
+ BinaryType,
+ UnaryType,
+ TermType
+ };
+
+ static LangExpr *cons( const InputLoc &loc, LangExpr *left,
+ char op, LangExpr *right )
+ {
+ LangExpr *e = new LangExpr;
+ e->loc = loc;
+ e->type = BinaryType;
+ e->left = left;
+ e->op = op;
+ e->right = right;
+ return e;
+ }
+
+ static LangExpr *cons( const InputLoc &loc, char op, LangExpr *right )
+ {
+ LangExpr *e = new LangExpr;
+ e->loc = loc;
+ e->type = UnaryType;
+ e->left = 0;
+ e->op = op;
+ e->right =right;
+ return e;
+ }
+
+ static LangExpr *cons( LangTerm *term )
+ {
+ LangExpr *e = new LangExpr;
+ e->type = TermType;
+ e->term = term;
+ return e;
+ }
+
+ void resolve( Compiler *pd ) const;
+
+ UniqueType *evaluate( Compiler *pd, CodeVect &code ) const;
+ bool canTakeRef( Compiler *pd ) const;
+
+ InputLoc loc;
+ Type type;
+ LangExpr *left;
+ char op;
+ LangExpr *right;
+ LangTerm *term;
+};
+
+struct LangStmt;
+typedef DList<LangStmt> StmtList;
+
+struct IterCall
+{
+ enum Form {
+ Call,
+ Expr
+ };
+
+ IterCall()
+ :
+ langTerm(0),
+ langExpr(0),
+ wasExpr(false)
+ {}
+
+ static IterCall *cons( Form form, LangTerm *langTerm )
+ {
+ IterCall *iterCall = new IterCall;
+ iterCall->form = form;
+ iterCall->langTerm = langTerm;
+ return iterCall;
+ }
+
+ static IterCall *cons( Form form, LangExpr *langExpr )
+ {
+ IterCall *iterCall = new IterCall;
+ iterCall->form = form;
+ iterCall->langExpr = langExpr;
+ return iterCall;
+ }
+
+ void resolve( Compiler *pd ) const;
+
+ Form form;
+ LangTerm *langTerm;
+ LangExpr *langExpr;
+ bool wasExpr;
+};
+
+struct LangStmt
+{
+ enum Type {
+ AssignType,
+ ExprType,
+ IfType,
+ ElseType,
+ RejectType,
+ WhileType,
+ ReturnType,
+ YieldType,
+ ForIterType,
+ BreakType
+ };
+
+ LangStmt()
+ :
+ type((Type)-1),
+ varRef(0),
+ langTerm(0),
+ objField(0),
+ typeRef(0),
+ expr(0),
+ constructor(0),
+ parserText(0),
+ exprPtrVect(0),
+ fieldInitVect(0),
+ stmtList(0),
+ elsePart(0),
+ iterCall(0),
+ context(0),
+ scope(0),
+ consItemList(0),
+
+ /* Normally you don't need to initialize double list pointers, however,
+ * we make use of the next pointer for returning a pair of statements
+ * using one pointer to a LangStmt, so we need to initialize the
+ * pointers. */
+ prev(0),
+ next(0)
+ {}
+
+ static LangStmt *cons( const InputLoc &loc, Type type, FieldInitVect *fieldInitVect )
+ {
+ LangStmt *s = new LangStmt;
+ s->loc = loc;
+ s->type = type;
+ s->fieldInitVect = fieldInitVect;
+ return s;
+ }
+
+ static LangStmt *cons( const InputLoc &loc, Type type, CallArgVect *exprPtrVect )
+ {
+ LangStmt *s = new LangStmt;
+ s->loc = loc;
+ s->type = type;
+ s->exprPtrVect = exprPtrVect;
+ return s;
+ }
+
+ static LangStmt *cons( const InputLoc &loc, Type type, LangExpr *expr )
+ {
+ LangStmt *s = new LangStmt;
+ s->loc = loc;
+ s->type = type;
+ s->expr = expr;
+ return s;
+ }
+
+ static LangStmt *cons( Type type, LangVarRef *varRef )
+ {
+ LangStmt *s = new LangStmt;
+ s->type = type;
+ s->varRef = varRef;
+ return s;
+ }
+
+ static LangStmt *cons( const InputLoc &loc, Type type, ObjectField *objField )
+ {
+ LangStmt *s = new LangStmt;
+ s->loc = loc;
+ s->type = type;
+ s->objField = objField;
+ return s;
+ }
+
+ static LangStmt *cons( const InputLoc &loc, Type type, LangVarRef *varRef, LangExpr *expr )
+ {
+ LangStmt *s = new LangStmt;
+ s->loc = loc;
+ s->type = type;
+ s->varRef = varRef;
+ s->expr = expr;
+ return s;
+ }
+
+ static LangStmt *cons( Type type, LangExpr *expr, StmtList *stmtList )
+ {
+ LangStmt *s = new LangStmt;
+ s->type = type;
+ s->expr = expr;
+ s->stmtList = stmtList;
+ return s;
+ }
+
+ static LangStmt *cons( Type type, LangExpr *expr, StmtList *stmtList, LangStmt *elsePart )
+ {
+ LangStmt *s = new LangStmt;
+ s->type = type;
+ s->expr = expr;
+ s->stmtList = stmtList;
+ s->elsePart = elsePart;
+ return s;
+ }
+
+ void setElsePart( LangStmt *elsePart )
+ {
+ this->elsePart = elsePart;
+ }
+
+ static LangStmt *cons( Type type, StmtList *stmtList )
+ {
+ LangStmt *s = new LangStmt;
+ s->type = type;
+ s->stmtList = stmtList;
+ return s;
+ }
+
+
+ static LangStmt *cons( const InputLoc &loc, Type type )
+ {
+ LangStmt *s = new LangStmt;
+ s->loc = loc;
+ s->type = type;
+ return s;
+ }
+
+ static LangStmt *cons( Type type, LangVarRef *varRef, Constructor *constructor )
+ {
+ LangStmt *s = new LangStmt;
+ s->type = type;
+ s->varRef = varRef;
+ s->constructor = constructor;
+ return s;
+ }
+
+ static LangStmt *cons( const InputLoc &loc, Type type, ObjectField *objField,
+ TypeRef *typeRef, LangTerm *langTerm, StmtList *stmtList )
+ {
+ LangStmt *s = new LangStmt;
+ s->loc = loc;
+ s->type = type;
+ s->langTerm = langTerm;
+ s->objField = objField;
+ s->typeRef = typeRef;
+ s->stmtList = stmtList;
+ return s;
+ }
+
+ static LangStmt *cons( const InputLoc &loc, Type type, ObjectField *objField,
+ TypeRef *typeRef, IterCall *iterCall, StmtList *stmtList,
+ StructDef *context, NameScope *scope )
+ {
+ LangStmt *s = new LangStmt;
+ s->loc = loc;
+ s->type = type;
+ s->objField = objField;
+ s->typeRef = typeRef;
+ s->iterCall = iterCall;
+ s->stmtList = stmtList;
+ s->context = context;
+ s->scope = scope;
+ return s;
+ }
+
+ static LangStmt *cons( const InputLoc &loc, Type type, ConsItemList *consItemList )
+ {
+ LangStmt *s = new LangStmt;
+ s->loc = loc;
+ s->type = type;
+ s->consItemList = consItemList;
+ return s;
+ }
+
+ static LangStmt *cons( Type type )
+ {
+ LangStmt *s = new LangStmt;
+ s->type = type;
+ return s;
+ }
+
+ void declareForIter( Compiler *pd ) const;
+
+ void declare( Compiler *pd ) const;
+
+ void resolveForIter( Compiler *pd ) const;
+ void resolve( Compiler *pd ) const;
+ void resolveParserItems( Compiler *pd ) const;
+
+ void chooseDefaultIter( Compiler *pd, IterCall *iterCall ) const;
+ void compileWhile( Compiler *pd, CodeVect &code ) const;
+ void compileForIterBody( Compiler *pd, CodeVect &code, UniqueType *iterUT ) const;
+ void compileForIter( Compiler *pd, CodeVect &code ) const;
+ void compile( Compiler *pd, CodeVect &code ) const;
+
+ InputLoc loc;
+ Type type;
+ LangVarRef *varRef;
+ LangTerm *langTerm;
+ ObjectField *objField;
+ TypeRef *typeRef;
+ LangExpr *expr;
+ Constructor *constructor;
+ ParserText *parserText;
+ CallArgVect *exprPtrVect;
+ FieldInitVect *fieldInitVect;
+ StmtList *stmtList;
+ /* Either another if, or an else. */
+ LangStmt *elsePart;
+ String name;
+ IterCall *iterCall;
+ StructDef *context;
+ NameScope *scope;
+ ConsItemList *consItemList;
+
+ /* Normally you don't need to initialize double list pointers, however, we
+ * make use of the next pointer for returning a pair of statements using
+ * one pointer to a LangStmt, so we need to initialize it above. */
+ LangStmt *prev, *next;
+};
+
+struct CodeBlock
+{
+ CodeBlock()
+ :
+ frameId(-1),
+ context(0)
+ {}
+
+ static CodeBlock *cons( StmtList *stmtList, ObjectDef *localFrame )
+ {
+ CodeBlock *c = new CodeBlock;
+ c->stmtList = stmtList;
+ c->localFrame = localFrame;
+ return c;
+ }
+
+ void declare( Compiler *pd ) const;
+ void resolve( Compiler *pd ) const;
+ void compile( Compiler *pd, CodeVect &code ) const;
+
+ long frameId;
+ StmtList *stmtList;
+ ObjectDef *localFrame;
+ Locals locals;
+ StructDef *context;
+
+ /* Each frame has two versions of
+ * the code: revert and commit. */
+ CodeVect codeWV, codeWC;
+};
+
+struct Function
+{
+ Function()
+ :
+ nspace(0),
+ paramListSize(0),
+ paramUTs(0),
+ inContext(0),
+ objMethod(0),
+ inHost(false)
+ {}
+
+ static Function *cons( Namespace *nspace, TypeRef *typeRef, const String &name,
+ ParameterList *paramList, CodeBlock *codeBlock,
+ int funcId, bool isUserIter, bool exprt )
+ {
+ Function *f = new Function;
+
+ f->nspace = nspace;
+ f->typeRef = typeRef;
+ f->name = name;
+ f->paramList = paramList;
+ f->codeBlock = codeBlock;
+ f->funcId = funcId;
+ f->isUserIter = isUserIter;
+ f->exprt = exprt;
+
+ return f;
+ }
+
+ Namespace *nspace;
+ TransBlock *transBlock;
+ TypeRef *typeRef;
+ String name;
+ String hostCall;
+ ParameterList *paramList;
+ CodeBlock *codeBlock;
+ ObjectDef *localFrame;
+ long funcId;
+ bool isUserIter;
+ long paramListSize;
+ UniqueType **paramUTs;
+ StructDef *inContext;
+ bool exprt;
+ ObjectMethod *objMethod;
+ bool inHost;
+
+ Function *prev, *next;
+};
+
+typedef DList<Function> FunctionList;
+
+#endif /* _COLM_PARSETREE_H */
+
diff --git a/src/pcheck.cc b/src/pcheck.cc
new file mode 100644
index 00000000..6f41a7ce
--- /dev/null
+++ b/src/pcheck.cc
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pcheck.h"
+
+#include <stdbool.h>
+
+/* Construct a new parameter checker with for paramSpec. */
+ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv )
+:
+ state(noparam),
+ argOffset(0),
+ curArg(0),
+ iCurArg(1),
+ paramSpec(paramSpec),
+ argc(argc),
+ argv(argv)
+{
+}
+
+/* Check a single option. Returns the index of the next parameter. Sets p to
+ * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if
+ * there is one, NULL otherwise. */
+bool ParamCheck::check()
+{
+ bool requiresParam;
+
+ if ( iCurArg >= argc ) { /* Off the end of the arg list. */
+ state = noparam;
+ return false;
+ }
+
+ if ( argOffset != 0 && *argOffset == 0 ) {
+ /* We are at the end of an arg string. */
+ iCurArg += 1;
+ if ( iCurArg >= argc ) {
+ state = noparam;
+ return false;
+ }
+ argOffset = 0;
+ }
+
+ if ( argOffset == 0 ) {
+ /* Set the current arg. */
+ curArg = argv[iCurArg];
+
+ /* We are at the beginning of an arg string. */
+ if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */
+ argv[iCurArg][0] != '-' || /* Not a param. */
+ argv[iCurArg][1] == 0 ) { /* Only a dash. */
+ parameter = 0;
+ parameterArg = 0;
+
+ iCurArg += 1;
+ state = noparam;
+ return true;
+ }
+ argOffset = argv[iCurArg] + 1;
+ }
+
+ /* Get the arg char. */
+ char argChar = *argOffset;
+
+ /* Loop over all the parms and look for a match. */
+ const char *pSpec = paramSpec;
+ while ( *pSpec != 0 ) {
+ char pSpecChar = *pSpec;
+
+ /* If there is a ':' following the char then
+ * it requires a parm. If a parm is required
+ * then move ahead two in the parmspec. Otherwise
+ * move ahead one in the parm spec. */
+ if ( pSpec[1] == ':' ) {
+ requiresParam = true;
+ pSpec += 2;
+ }
+ else {
+ requiresParam = false;
+ pSpec += 1;
+ }
+
+ /* Do we have a match. */
+ if ( argChar == pSpecChar ) {
+ if ( requiresParam ) {
+ if ( argOffset[1] == 0 ) {
+ /* The param must follow. */
+ if ( iCurArg + 1 == argc ) {
+ /* We are the last arg so there
+ * cannot be a parameter to it. */
+ parameter = argChar;
+ parameterArg = 0;
+ iCurArg += 1;
+ argOffset = 0;
+ state = invalid;
+ return true;
+ }
+ else {
+ /* the parameter to the arg is the next arg. */
+ parameter = pSpecChar;
+ parameterArg = argv[iCurArg + 1];
+ iCurArg += 2;
+ argOffset = 0;
+ state = match;
+ return true;
+ }
+ }
+ else {
+ /* The param for the arg is built in. */
+ parameter = pSpecChar;
+ parameterArg = argOffset + 1;
+ iCurArg += 1;
+ argOffset = 0;
+ state = match;
+ return true;
+ }
+ }
+ else {
+ /* Good, we matched the parm and no
+ * arg is required. */
+ parameter = pSpecChar;
+ parameterArg = 0;
+ argOffset += 1;
+ state = match;
+ return true;
+ }
+ }
+ }
+
+ /* We did not find a match. Bad Argument. */
+ parameter = argChar;
+ parameterArg = 0;
+ argOffset += 1;
+ state = invalid;
+ return true;
+}
+
+
diff --git a/src/pcheck.h b/src/pcheck.h
new file mode 100644
index 00000000..96746470
--- /dev/null
+++ b/src/pcheck.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2001-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_PCHECK_H
+#define _COLM_PCHECK_H
+
+class ParamCheck
+{
+public:
+ ParamCheck( const char *paramSpec, int argc, const char **argv );
+
+ bool check();
+
+ const char *parameterArg; /* The argument to the parameter. */
+ char parameter; /* The parameter matched. */
+ enum { match, invalid, noparam } state;
+
+ const char *argOffset; /* If we are reading params inside an
+ * arg this points to the offset. */
+
+ const char *curArg; /* Pointer to the current arg. */
+ int iCurArg; /* Index to the current arg. */
+
+private:
+ const char *paramSpec; /* Parameter spec supplied by the coder. */
+ int argc; /* Arguement data from the command line. */
+ const char **argv;
+};
+
+#endif /* _COLM_PCHECK_H */
+
diff --git a/src/pdabuild.cc b/src/pdabuild.cc
new file mode 100644
index 00000000..27cd9616
--- /dev/null
+++ b/src/pdabuild.cc
@@ -0,0 +1,2205 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define EOF_REGION 0
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include <iostream>
+
+/* Dumping the fsm. */
+#include <mergesort.h>
+
+/* Parsing. */
+#include "compiler.h"
+#include "pdacodegen.h"
+
+using std::endl;
+using std::cerr;
+using std::cout;
+
+char startDefName[] = "start";
+
+extern "C" tree_t **internal_host_call( program_t *prg, long code, tree_t **sp )
+{
+ return 0;
+}
+
+extern "C" void internal_commit_reduce_forward( program_t *prg, tree_t **root,
+ struct pda_run *pda_run, parse_tree_t *pt )
+{
+ commit_clear_parse_tree( prg, root, pda_run, pt->child );
+}
+
+extern "C" long internal_commit_union_sz( int reducer )
+{
+ return 0;
+}
+
+extern "C" void internal_init_need()
+{
+}
+
+extern "C" int internal_reducer_need_tok( program_t *prg, struct pda_run *, int id )
+{
+ return 3;
+}
+
+extern "C" int internal_reducer_need_ign( program_t *prg, struct pda_run * )
+{
+ return 3;
+}
+
+/* Count the transitions in the fsm by walking the state list. */
+int countTransitions( PdaGraph *fsm )
+{
+ int numTrans = 0;
+ PdaState *state = fsm->stateList.head;
+ while ( state != 0 ) {
+ numTrans += state->transMap.length();
+ state = state->next;
+ }
+ return numTrans;
+}
+
+LangEl::LangEl( Namespace *nspace, const String &name, Type type )
+:
+ nspace(nspace),
+ name(name),
+ lit(name),
+ type(type),
+ id(-1),
+ numAppearances(0),
+ commit(false),
+ isIgnore(false),
+ reduceFirst(false),
+ isLiteral(false),
+ isRepeat(false),
+ isList(false),
+ isOpt(false),
+ parseStop(false),
+ isEOF(false),
+ leftRecursive(false),
+ repeatOf(0),
+ tokenDef(0),
+ rootDef(0),
+ termDup(0),
+ eofLel(0),
+ pdaGraph(0),
+ pdaTables(0),
+ transBlock(0),
+ objectDef(0),
+ thisSize(0),
+ ofiOffset(0),
+ parserId(-1),
+ predType(PredNone),
+ predValue(0),
+ contextDef(0),
+ contextIn(0),
+ noPreIgnore(false),
+ noPostIgnore(false),
+ isZero(false)
+{
+}
+
+PdaGraph *ProdElList::walk( Compiler *pd, Production *prod )
+{
+ PdaGraph *prodFsm = new PdaGraph();
+ PdaState *last = prodFsm->addState();
+ prodFsm->setStartState( last );
+
+ int prodLength = 0;
+ for ( Iter prodEl = first(); prodEl.lte(); prodEl++, prodLength++ ) {
+ //PdaGraph *itemFsm = prodEl->walk( pd );
+ long value = prodEl->langEl->id;
+
+ PdaState *newState = prodFsm->addState();
+ PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value );
+
+ newTrans->isShift = true;
+ newTrans->shiftPrior = prodEl->priorVal;
+ //cerr << "PRIOR VAL: " << newTrans->shiftPrior << endl;
+
+ if ( prodEl->commit ) {
+ //cout << "COMMIT: inserting commit of length: " << pd->prodLength << endl;
+ /* Insert the commit into transitions out of last */
+ for ( TransMap::Iter trans = last->transMap; trans.lte(); trans++ )
+ trans->value->commits.insert( prodLength );
+ }
+
+ last = newState;
+ }
+
+ /* Make the last state the final state. */
+ prodFsm->setFinState( last );
+ return prodFsm;
+}
+
+
+ProdElList *Compiler::makeProdElList( LangEl *langEl )
+{
+ ProdElList *prodElList = new ProdElList();
+ UniqueType *uniqueType = findUniqueType( TYPE_TREE, langEl );
+ TypeRef *typeRef = TypeRef::cons( internal, uniqueType );
+ prodElList->append( new ProdEl( internal, typeRef ) );
+ prodElList->tail->langEl = langEl;
+ return prodElList;
+}
+
+void Compiler::makeDefinitionNames()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ int prodNum = 1;
+ for ( LelDefList::Iter def = lel->defList; def.lte(); def++ ) {
+ def->data.setAs( lel->name.length() + 32, "%s-%i",
+ lel->name.data, prodNum++ );
+ }
+ }
+}
+
+/* Make sure there there are no language elements whose type is unkonwn. This
+ * can happen when an id is used on the rhs of a definition but is not defined
+ * as anything. */
+void Compiler::noUndefindLangEls()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->type == LangEl::Unknown )
+ error() << "'" << lel->name << "' was not defined as anything" << endp;
+ }
+}
+
+void Compiler::makeLangElIds()
+{
+ /* The first id 0 is reserved for the stack sentinal. A negative id means
+ * error to the parsing function, inducing backtracking. */
+ nextLelId = 1;
+
+ /* First pass assigns to the user terminals. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Must be a term, and not any of the special reserved terminals.
+ * Remember if the non terminal is a user non terminal. */
+ if ( lel->type == LangEl::Term &&
+ !lel->isEOF &&
+ lel != errorLangEl &&
+ lel != noTokenLangEl )
+ {
+ lel->id = nextLelId++;
+ }
+ }
+
+ //eofLangEl->id = nextLelId++;
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Must be a term, and not any of the special reserved terminals.
+ * Remember if the non terminal is a user non terminal. */
+ if ( lel->isEOF )
+ lel->id = nextLelId++;
+ }
+
+ /* Next assign to the eof notoken, which we always create. */
+ noTokenLangEl->id = nextLelId++;
+
+ /* Possibly assign to the error language element. */
+ if ( errorLangEl != 0 )
+ errorLangEl->id = nextLelId++;
+
+ /* Save this for the code generation. */
+ firstNonTermId = nextLelId;
+
+ /* A third and final pass assigns to everything else. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Anything else not yet assigned gets assigned now. */
+ if ( lel->id < 0 )
+ lel->id = nextLelId++;
+ }
+
+ assert( ptrLangEl->id == LEL_ID_PTR );
+ assert( strLangEl->id == LEL_ID_STR );
+ assert( ignoreLangEl->id == LEL_ID_IGNORE );
+}
+
+void Compiler::makeStructElIds()
+{
+ firstStructElId = nextLelId;
+
+ /* Start at the next lang el id and go up from there. Using disjoint sets
+ * allows us to verify that a tree is a tree and struct is a struct because
+ * the ID field is at the same offset. */
+ int nextId = nextLelId;
+ for ( StructElList::Iter sel = structEls; sel.lte(); sel++ )
+ sel->id = nextId++;
+
+ structInbuiltId = nextId++;
+ structInputId = nextId++;
+ structStreamId = nextId++;
+}
+
+void Compiler::refNameSpace( LangEl *lel, Namespace *nspace )
+{
+ if ( nspace == rootNamespace ) {
+ lel->refName = "::" + lel->refName;
+ return;
+ }
+
+ lel->refName = nspace->name + "::" + lel->refName;
+ lel->declName = nspace->name + "::" + lel->declName;
+ lel->xmlTag = nspace->name + "::" + lel->xmlTag;
+ refNameSpace( lel, nspace->parentNamespace );
+}
+
+void Compiler::makeLangElNames()
+{
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ lel->fullName = lel->name;
+ lel->fullLit = lel->lit;
+ lel->refName = lel->lit;
+ lel->declName = lel->lit;
+ lel->xmlTag = lel->name;
+
+ /* If there is also a namespace next to the type, we add a prefix to
+ * the type. It's not convenient to name C++ classes the same as a
+ * namespace in the same scope. We don't want to restrict colm, so we
+ * add a workaround for the least-common case. The type gets t_ prefix.
+ * */
+ Namespace *nspace = lel->nspace->findNamespace( lel->name );
+ if ( nspace != 0 ) {
+ lel->refName = "t_" + lel->refName;
+ lel->fullName = "t_" + lel->fullName;
+ lel->declName = "t_" + lel->declName;
+ lel->xmlTag = "t_" + lel->xmlTag;
+ }
+
+ refNameSpace( lel, lel->nspace );
+ }
+}
+
+/* Set up dot sets, shift info, and prod sets. */
+void Compiler::makeProdFsms()
+{
+ /* There are two items in the index for each production (high and low). */
+ int indexLen = prodList.length() * 2;
+ dotItemIndex.setAsNew( indexLen );
+ int dsiLow = 0, indexPos = 0;
+
+ /* Build FSMs for all production language elements. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ prod->fsm = prod->prodElList->walk( this, prod );
+
+ makeNonTermFirstSets();
+ makeFirstSets();
+
+ /* Build FSMs for all production language elements. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( addUniqueEmptyProductions ) {
+ /* This must be re-implemented. */
+ assert( false );
+ //if ( !prod->isLeftRec && prod->uniqueEmptyLeader != 0 ) {
+ // PdaGraph *emptyLeader = prod->uniqueEmptyLeader->walk( this );
+ // emptyLeader->concatOp( prod->fsm );
+ // prod->fsm = emptyLeader;
+ //}
+ }
+
+ /* Compute the machine's length. */
+ prod->fsmLength = prod->fsm->fsmLength( );
+
+ /* Productions have a unique production id for each final state.
+ * This lets us use a production length specific to each final state.
+ * Start states are always isolated therefore if the start state is
+ * final then reductions from it will always have a fixed production
+ * length. This is a simple method for determining the length
+ * of zero-length derivations when reducing. */
+
+ /* Number of dot items needed for the production is elements + 1
+ * because the dot can be before the first and after the last element. */
+ int numForProd = prod->fsm->stateList.length() + 1;
+
+ /* Set up the low and high values in the index for this production. */
+ dotItemIndex.data[indexPos].key = dsiLow;
+ dotItemIndex.data[indexPos].value = prod;
+ dotItemIndex.data[indexPos+1].key = dsiLow + numForProd - 1;
+ dotItemIndex.data[indexPos+1].value = prod;
+
+ int dsi = dsiLow;
+ for ( PdaStateList::Iter state = prod->fsm->stateList; state.lte(); state++, dsi++ ) {
+ /* All transitions are shifts. */
+ for ( TransMap::Iter out = state->transMap; out.lte(); out++ )
+ assert( out->value->isShift );
+
+ state->dotSet.insert( dsi );
+ }
+
+ /* Move over the production. */
+ dsiLow += numForProd;
+ indexPos += 2;
+
+ if ( prod->prodCommit ) {
+ for ( PdaStateSet::Iter fin = prod->fsm->finStateSet; fin.lte(); fin++ ) {
+ int length = prod->fsmLength;
+ //cerr << "PENDING COMMIT IN FINAL STATE of " << prod->prodId <<
+ // " with len: " << length << endl;
+ (*fin)->pendingCommits.insert( ProdIdPair( prod->prodId, length ) );
+ }
+ }
+ }
+
+ /* Make the final state specific prod id to prod id mapping. */
+ prodIdIndex = new Production*[prodList.length()];
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ prodIdIndex[prod->prodId] = prod;
+}
+
+/* Want the first set of over src. If the first set contains epsilon, go over
+ * it and over tab. If overSrc is the end of the production, find the follow
+ * from the table, taking only the characters on which the parent is reduced.
+ * */
+void Compiler::findFollow( AlphSet &result, PdaState *overTab,
+ PdaState *overSrc, Production *parentDef )
+{
+ if ( overSrc->isFinState() ) {
+ assert( overSrc->transMap.length() == 0 );
+
+ /* At the end of the production. Turn to the table. */
+ long redCode = makeReduceCode( parentDef->prodId, false );
+ for ( TransMap::Iter tabTrans = overTab->transMap; tabTrans.lte(); tabTrans++ ) {
+ for ( ActDataList::Iter adl = tabTrans->value->actions; adl.lte(); adl++ ) {
+ if ( *adl == redCode )
+ result.insert( tabTrans->key );
+ }
+ }
+ }
+ else {
+ /* Get the first set of the item. If the first set contains epsilon
+ * then move over overSrc and overTab and recurse. */
+ assert( overSrc->transMap.length() == 1 );
+ TransMap::Iter pastTrans = overSrc->transMap;
+
+ LangEl *langEl = langElIndex[pastTrans->key];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ bool hasEpsilon = false;
+ for ( LelDefList::Iter def = langEl->defList; def.lte(); def++ ) {
+ result.insert( def->firstSet );
+
+ if ( def->firstSet.find( -1 ) )
+ hasEpsilon = true;
+ }
+
+ /* Find the equivalent state in the parser. */
+ if ( hasEpsilon ) {
+ PdaTrans *tabTrans = overTab->findTrans( pastTrans->key );
+ findFollow( result, tabTrans->toState,
+ pastTrans->value->toState, parentDef );
+ }
+
+ /* Now possibly the dup. */
+ if ( langEl->termDup != 0 )
+ result.insert( langEl->termDup->id );
+ }
+ else {
+ result.insert( pastTrans->key );
+ }
+ }
+}
+
+PdaState *Compiler::followProd( PdaState *tabState, PdaState *prodState )
+{
+ while ( prodState->transMap.length() == 1 ) {
+ TransMap::Iter prodTrans = prodState->transMap;
+ PdaTrans *tabTrans = tabState->findTrans( prodTrans->key );
+ prodState = prodTrans->value->toState;
+ tabState = tabTrans->toState;
+ }
+ return tabState;
+}
+
+void Compiler::trySetTime( PdaTrans *trans, long code, long &time )
+{
+ /* Find the item. */
+ for ( ActDataList::Iter adl = trans->actions; adl.lte(); adl++ ) {
+ if ( *adl == code ) {
+ /* If the time of the shift is not already set, set it. */
+ if ( trans->actOrds[adl.pos()] == 0 ) {
+ //cerr << "setting time: state = " << tabState->stateNum
+ // << ", trans = " << tabTrans->lowKey
+ // << ", time = " << time << endl;
+ trans->actOrds[adl.pos()] = time++;
+ }
+ break;
+ }
+ }
+}
+
+/* Go down a defintiion and then handle the follow actions. */
+void Compiler::pdaOrderFollow( LangEl *rootEl, PdaState *tabState,
+ PdaTrans *tabTrans, PdaTrans *srcTrans, Production *parentDef,
+ Production *definition, long &time )
+{
+ /* We need the follow from tabState/srcState over the defintion we are
+ * currently processing. */
+ PdaState *overTab = tabTrans->toState;
+ PdaState *overSrc = srcTrans->toState;
+
+ AlphSet alphSet;
+ if ( parentDef == rootEl->rootDef )
+ alphSet.insert( rootEl->eofLel->id );
+ else
+ findFollow( alphSet, overTab, overSrc, parentDef );
+
+ /* Now follow the production to find out where it expands to. */
+ PdaState *expandToState = followProd( tabState, definition->fsm->startState );
+
+ /* Find the reduce item. */
+ long redCode = makeReduceCode( definition->prodId, false );
+
+ for ( TransMap::Iter tt = expandToState->transMap; tt.lte(); tt++ ) {
+ if ( alphSet.find( tt->key ) ) {
+ trySetTime( tt->value, redCode, time );
+
+ /* If the items token region is not recorded in the state, do it now. */
+ addRegion( expandToState, tt->value, tt->key,
+ tt->value->noPreIgnore, tt->value->noPostIgnore );
+ }
+ }
+}
+
+bool regionVectHas( RegionVect &regVect, TokenRegion *region )
+{
+ for ( RegionVect::Iter trvi = regVect; trvi.lte(); trvi++ ) {
+ if ( *trvi == region )
+ return true;
+ }
+ return false;
+}
+
+void Compiler::addRegion( PdaState *tabState, PdaTrans *tabTrans,
+ long pdaKey, bool noPreIgnore, bool noPostIgnore )
+{
+ LangEl *langEl = langElIndex[pdaKey];
+ if ( langEl != 0 && langEl->type == LangEl::Term ) {
+ TokenRegion *region = 0;
+ RegionSet *regionSet = 0;
+
+ /* If it is not the eof, then use the region associated
+ * with the token definition. */
+ if ( langEl->isZero ) {
+ region = langEl->tokenDef->regionSet->collectIgnore;
+ regionSet = langEl->tokenDef->regionSet;
+ }
+ else if ( !langEl->isEOF && langEl->tokenDef != 0 ) {
+ region = langEl->tokenDef->regionSet->tokenIgnore;
+ regionSet = langEl->tokenDef->regionSet;
+ }
+
+ if ( region != 0 ) {
+ /* region. */
+ TokenRegion *scanRegion = region;
+
+ if ( langEl->noPreIgnore )
+ scanRegion = regionSet->tokenOnly;
+
+ if ( !regionVectHas( tabState->regions, scanRegion ) )
+ tabState->regions.append( scanRegion );
+
+ /* Pre-region of to state */
+ PdaState *toState = tabTrans->toState;
+ if ( !langEl->noPostIgnore &&
+ regionSet->ignoreOnly != 0 &&
+ !regionVectHas( toState->preRegions, regionSet->ignoreOnly ) )
+ {
+ toState->preRegions.append( regionSet->ignoreOnly );
+ }
+ }
+ }
+}
+
+#if 0
+ orderState( tabState, prodState, time ):
+ if not tabState.dotSet.find( prodState.dotID )
+ tabState.dotSet.insert( prodState.dotID )
+ tabTrans = tabState.findMatchingTransition( prodState.getTransition() )
+
+ if tabTrans is NonTerminal:
+ for production in tabTrans.nonTerm.prodList:
+ orderState( tabState, production.startState, time )
+
+ for all expandToState in tabTrans.expandToStates:
+ for all followTrans in expandToState.transList
+ reduceAction = findAction( production.reduction )
+ if reduceAction.time is unset:
+ reduceAction.time = time++
+ end
+ end
+ end
+ end
+ end
+
+ shiftAction = tabTrans.findAction( shift )
+ if shiftAction.time is unset:
+ shiftAction.time = time++
+ end
+
+ orderState( tabTrans.toState, prodTrans.toState, time )
+ end
+ end
+
+ orderState( parseTable.startState, startProduction.startState, 1 )
+#endif
+
+void Compiler::pdaOrderProd( LangEl *rootEl, PdaState *tabState,
+ PdaState *srcState, Production *parentDef, long &time )
+{
+ assert( srcState->dotSet.length() == 1 );
+ if ( tabState->dotSet2.find( srcState->dotSet[0] ) )
+ return;
+ tabState->dotSet2.insert( srcState->dotSet[0] );
+
+ assert( srcState->transMap.length() == 0 || srcState->transMap.length() == 1 );
+
+ if ( srcState->transMap.length() == 1 ) {
+ TransMap::Iter srcTrans = srcState->transMap;
+
+ /* Find the equivalent state in the parser. */
+ PdaTrans *tabTrans = tabState->findTrans( srcTrans->key );
+
+ /* Recurse into the transition if it is a non-terminal. */
+ LangEl *langEl = langElIndex[srcTrans->key];
+ if ( langEl != 0 ) {
+ if ( langEl->reduceFirst ) {
+ /* Use a shortest match ordering for the contents of this
+ * nonterminal. Does follows for all productions first, then
+ * goes down the productions. */
+ for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) {
+ pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value,
+ parentDef, expDef, time );
+ }
+ for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ )
+ pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time );
+
+ }
+ else {
+ /* The default action ordering. For each prod, goes down the
+ * prod then sets the follow before going to the next prod. */
+ for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) {
+ pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time );
+
+ pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value,
+ parentDef, expDef, time );
+ }
+ }
+ }
+
+ trySetTime( tabTrans, SHIFT_CODE, time );
+
+ /* Now possibly for the dup. */
+ if ( langEl != 0 && langEl->termDup != 0 ) {
+ PdaTrans *dupTrans = tabState->findTrans( langEl->termDup->id );
+ trySetTime( dupTrans, SHIFT_CODE, time );
+ }
+
+ /* If the items token region is not recorded in the state, do it now. */
+ addRegion( tabState, tabTrans, srcTrans->key,
+ srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore );
+
+ /* Go over one in the production. */
+ pdaOrderProd( rootEl, tabTrans->toState,
+ srcTrans->value->toState, parentDef, time );
+ }
+}
+
+void Compiler::pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( (state->stateBits & SB_ISMARKED) == 0 );
+
+ /* Traverse the src state's transitions. */
+ long last = 0;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( ! trans.first() )
+ assert( last < trans->key );
+ last = trans->key;
+ }
+ }
+
+ /* Compute the action orderings, record the max value. */
+ long time = 1;
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ PdaState *startState = (*pe)->rootDef->fsm->startState;
+ pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time );
+
+ /* Walk over the start lang el and set the time for shift of
+ * the eof action that completes the parse. */
+ PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id );
+ PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id );
+ eofTrans->actOrds[0] = time++;
+ }
+
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( state->regions.length() == 0 ) {
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ /* There are no regions and EOF leaves the state. Add the eof
+ * token region. */
+ PdaTrans *trans = tel->value;
+ LangEl *lel = langElIndex[trans->lowKey];
+ if ( lel != 0 && lel->isEOF )
+ state->regions.append( EOF_REGION );
+ }
+ }
+ }
+
+ ///* Warn about states with empty token region lists. */
+ //for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ // if ( state->regions.length() == 0 ) {
+ // warning() << "state has an empty token region, state: " <<
+ // state->stateNum << endl;
+ // }
+ //}
+
+ /* Some actions may not have an ordering. I believe these to be actions
+ * that result in a parse error and they arise because the state tables
+ * are LALR(1) but the action ordering is LR(1). LALR(1) causes some
+ * reductions that lead nowhere. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ PdaTrans *trans = tel->value;
+ /* Check every action has an ordering. */
+ for ( ActDataList::Iter adl = trans->actOrds; adl.lte(); adl++ ) {
+ if ( *adl == 0 )
+ *adl = time++;
+ }
+ }
+ }
+}
+
+void Compiler::advanceReductions( PdaGraph *pdaGraph )
+{
+ /* Loop all states. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( !state->advanceReductions )
+ continue;
+
+ bool outHasShift = false;
+ ReductionMap outReds;
+ LongSet outCommits;
+ for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) {
+ /* Get the transition from the trans el. */
+ if ( out->value->isShift )
+ outHasShift = true;
+ outReds.insert( out->value->reductions );
+ outCommits.insert( out->value->commits );
+ }
+
+ bool inHasShift = false;
+ ReductionMap inReds;
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ /* Get the transition from the trans el. */
+ if ( in->isShift )
+ inHasShift = true;
+ inReds.insert( in->reductions );
+ }
+
+ if ( !outHasShift && outReds.length() == 1 &&
+ inHasShift && inReds.length() == 0 )
+ {
+ //cerr << "moving reduction to shift" << endl;
+
+ /* Move the reduction to all in transitions. */
+ for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) {
+ assert( in->actions.length() == 1 );
+ assert( in->actions[0] == SHIFT_CODE );
+ in->actions[0] = makeReduceCode( outReds[0].key, true );
+ in->afterShiftCommits.insert( outCommits );
+ }
+
+ /*
+ * Remove all transitions out of the state.
+ */
+
+ /* Detach out range transitions. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ pdaGraph->detachTrans( state, trans->value->toState, trans->value );
+ delete trans->value;
+ }
+ state->transMap.empty();
+
+ /* Redirect all the in transitions to the actionDestState. */
+ pdaGraph->inTransMove( actionDestState, state );
+ }
+ }
+
+ pdaGraph->removeUnreachableStates();
+}
+
+void Compiler::sortActions( PdaGraph *pdaGraph )
+{
+ /* Sort the actions. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ PdaTrans *trans = tel->value;
+
+ /* Sort by the action ords. */
+ ActDataList actions( trans->actions );
+ ActDataList actOrds( trans->actOrds );
+ ActDataList actPriors( trans->actPriors );
+ trans->actions.empty();
+ trans->actOrds.empty();
+ trans->actPriors.empty();
+ while ( actOrds.length() > 0 ) {
+ int min = 0;
+ for ( int i = 1; i < actOrds.length(); i++ ) {
+ if ( actPriors[i] > actPriors[min] ||
+ (actPriors[i] == actPriors[min] &&
+ actOrds[i] < actOrds[min] ) )
+ {
+ min = i;
+ }
+ }
+ trans->actions.append( actions[min] );
+ trans->actOrds.append( actOrds[min] );
+ trans->actPriors.append( actPriors[min] );
+ actions.remove(min);
+ actOrds.remove(min);
+ actPriors.remove(min);
+ }
+
+ if ( branchPointInfo && trans->actions.length() > 1 ) {
+ cerr << "info: branch point"
+ << " state: " << state->stateNum
+ << " trans: ";
+ LangEl *lel = langElIndex[trans->lowKey];
+ if ( lel == 0 )
+ cerr << (char)trans->lowKey << endl;
+ else
+ cerr << lel->lit << endl;
+
+ for ( ActDataList::Iter act = trans->actions; act.lte(); act++ ) {
+ switch ( *act & 0x3 ) {
+ case 1:
+ cerr << " shift" << endl;
+ break;
+ case 2:
+ cerr << " reduce " <<
+ prodIdIndex[(*act >> 2)]->data << endl;
+ break;
+ case 3:
+ cerr << " shift-reduce" << endl;
+ break;
+ }
+ }
+ }
+
+ /* Verify that shifts of nonterminals don't have any branch
+ * points or commits. */
+ if ( trans->lowKey >= firstNonTermId ) {
+ if ( trans->actions.length() != 1 ||
+ (trans->actions[0] & 0x3) != 1 )
+ {
+ error() << "TRANS ON NONTERMINAL is something "
+ "other than a shift" << endl;
+ }
+ if ( trans->commits.length() > 0 )
+ error() << "TRANS ON NONTERMINAL has a commit" << endl;
+ }
+
+ /* TODO: Shift-reduces are optimizations. Verify that
+ * shift-reduces exist only if they don't entail a conflict. */
+ }
+ }
+}
+
+void Compiler::reduceActions( PdaGraph *pdaGraph )
+{
+ /* Reduce the actions. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) {
+ PdaTrans *trans = tel->value;
+ PdaActionSetEl *inSet;
+
+ int commitLen = trans->commits.length() > 0 ?
+ trans->commits[trans->commits.length()-1] : 0;
+
+ if ( trans->afterShiftCommits.length() > 0 ) {
+ int afterShiftCommit = trans->afterShiftCommits[
+ trans->afterShiftCommits.length()-1];
+
+ if ( commitLen > 0 && commitLen+1 > afterShiftCommit )
+ commitLen = ( commitLen + 1 );
+ else
+ commitLen = afterShiftCommit;
+ }
+ else {
+ commitLen = commitLen * -1;
+ }
+
+ //if ( commitLen != 0 ) {
+ // cerr << "FINAL ACTION COMMIT LEN: " << commitLen << endl;
+ //}
+
+ pdaGraph->actionSet.insert( ActionData( trans->toState->stateNum,
+ trans->actions, commitLen ), &inSet );
+ trans->actionSetEl = inSet;
+ }
+ }
+}
+
+void Compiler::computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph )
+{
+ /* Get the entry into the graph and traverse over the root. The resulting
+ * state can have eof, nothing else can. */
+ PdaState *overStart = pdaGraph->followFsm(
+ langEl->startState,
+ langEl->rootDef->fsm );
+
+ /* The graph must reduce to root all on it's own. It cannot depend on
+ * require EOF. */
+ for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
+ if ( st == overStart )
+ continue;
+
+ for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
+ if ( tr->value->lowKey == langEl->eofLel->id )
+ st->advanceReductions = true;
+ }
+ }
+}
+
+void Compiler::verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph )
+{
+ /* Get the entry into the graph and traverse over the root. The resulting
+ * state can have eof, nothing else can. */
+ PdaState *overStart = pdaGraph->followFsm(
+ langEl->startState,
+ langEl->rootDef->fsm );
+
+ /* The graph must reduce to root all on it's own. It cannot depend on
+ * require EOF. */
+ for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
+ if ( st == overStart )
+ continue;
+
+ for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
+ if ( tr->value->lowKey == langEl->eofLel->id ) {
+ /* This needs a better error message. Appears to be voodoo. */
+ error() << "grammar is not usable with parse_stop" << endp;
+ }
+ }
+ }
+}
+
+LangEl *Compiler::predOf( PdaTrans *trans, long action )
+{
+ LangEl *lel;
+ if ( action == SHIFT_CODE )
+ lel = langElIndex[trans->lowKey];
+ else
+ lel = prodIdIndex[action >> 2]->predOf;
+ return lel;
+}
+
+
+bool Compiler::precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 )
+{
+ bool swap = false;
+ if ( l2->predValue > l1->predValue )
+ swap = true;
+ else if ( l1->predValue == l2->predValue ) {
+ if ( l1->predType == PredLeft && action1 == SHIFT_CODE )
+ swap = true;
+ else if ( l1->predType == PredRight && action2 == SHIFT_CODE )
+ swap = true;
+ }
+ return swap;
+}
+
+bool Compiler::precedenceRemoveBoth( LangEl *l1, LangEl *l2 )
+{
+ if ( l1->predValue == l2->predValue && l1->predType == PredNonassoc )
+ return true;
+ return false;
+}
+
+void Compiler::resolvePrecedence( PdaGraph *pdaGraph )
+{
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 );
+
+ for ( long t = 0; t < state->transMap.length(); /* increment at end */ ) {
+ PdaTrans *trans = state->transMap[t].value;
+
+again:
+ /* Find action with precedence. */
+ for ( int i = 0; i < trans->actions.length(); i++ ) {
+ LangEl *li = predOf( trans, trans->actions[i] );
+
+ if ( li != 0 && li->predType != PredNone ) {
+ /* Find another action with precedence. */
+ for ( int j = i+1; j < trans->actions.length(); j++ ) {
+ LangEl *lj = predOf( trans, trans->actions[j] );
+
+ if ( lj != 0 && lj->predType != PredNone ) {
+ /* Conflict to check. */
+ bool swap = precedenceSwap( trans->actions[i],
+ trans->actions[j], li, lj );
+
+ if ( swap ) {
+ long t = trans->actions[i];
+ trans->actions[i] = trans->actions[j];
+ trans->actions[j] = t;
+ }
+
+ trans->actions.remove( j );
+ if ( precedenceRemoveBoth( li, lj ) )
+ trans->actions.remove( i );
+
+ goto again;
+ }
+ }
+ }
+ }
+
+ /* If there are still actions then move to the next one. If not,
+ * (due to nonassoc) then remove the transition. */
+ if ( trans->actions.length() > 0 )
+ t += 1;
+ else
+ state->transMap.vremove( t );
+ }
+ }
+}
+
+void Compiler::analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls )
+{
+ pdaGraph->maxState = pdaGraph->stateList.length() - 1;
+ pdaGraph->maxLelId = nextLelId - 1;
+ pdaGraph->maxOffset = pdaGraph->stateList.length() * pdaGraph->maxLelId;
+
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->value->isShift ) {
+ trans->value->actions.append( SHIFT_CODE );
+ trans->value->actPriors.append( trans->value->shiftPrior );
+ }
+ for ( ReductionMap::Iter red = trans->value->reductions; red.lte(); red++ ) {
+ trans->value->actions.append( makeReduceCode( red->key, false ) );
+ trans->value->actPriors.append( red->value );
+ }
+ trans->value->actOrds.appendDup( 0, trans->value->actions.length() );
+ }
+ }
+
+ pdaActionOrder( pdaGraph, parserEls );
+ sortActions( pdaGraph );
+ resolvePrecedence( pdaGraph );
+
+ /* Verify that any type we parse_stop can actually be parsed that way. */
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ LangEl *lel = *pe;
+ if ( lel->parseStop )
+ computeAdvanceReductions(lel , pdaGraph);
+ }
+
+ advanceReductions( pdaGraph );
+ pdaGraph->setStateNumbers();
+ reduceActions( pdaGraph );
+
+ /* Set the action ids. */
+ int actionSetId = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ asi->key.id = actionSetId++;
+
+ /* Get the max index. */
+ pdaGraph->maxIndex = actionSetId - 1;
+
+ /* Compute the max prod length. */
+ pdaGraph->maxProdLen = 0;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( (unsigned)prod->fsmLength > pdaGraph->maxProdLen )
+ pdaGraph->maxProdLen = prod->fsmLength;
+ }
+
+ /* Asserts that any transition with a nonterminal has a single action
+ * which is either a shift or a shift-reduce. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ LangEl *langEl = langElIndex[trans->value->lowKey];
+ if ( langEl != 0 && langEl->type == LangEl::NonTerm ) {
+ assert( trans->value->actions.length() == 1 );
+ assert( trans->value->actions[0] == SHIFT_CODE ||
+ (trans->value->actions[0] & 0x3) == SHIFT_REDUCE_CODE );
+ }
+ }
+ }
+
+ /* Assert that shift reduces always appear on their own. */
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) {
+ if ( (*act & 0x3) == SHIFT_REDUCE_CODE )
+ assert( trans->value->actions.length() == 1 );
+ }
+ }
+ }
+
+ /* Verify that any type we parse_stop can actually be parsed that way. */
+ for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ LangEl *lel = *pe;
+ if ( lel->parseStop )
+ verifyParseStopGrammar(lel , pdaGraph);
+ }
+}
+
+void Compiler::wrapNonTerminals()
+{
+ /* Make a language element that will be used to make the root productions.
+ * These are used for making parsers rooted at any production (including
+ * the start symbol). */
+ rootLangEl = declareLangEl( this, rootNamespace, "_root", LangEl::NonTerm );
+
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Make a single production used when the lel is a root. */
+ ProdElList *prodElList = makeProdElList( lel );
+ lel->rootDef = Production::cons( InputLoc(), rootLangEl,
+ prodElList, String(), false, 0,
+ prodList.length(), rootLangEl->defList.length() );
+ prodList.append( lel->rootDef );
+ rootLangEl->defList.append( lel->rootDef );
+
+ /* First resolve. */
+ for ( ProdElList::Iter prodEl = *prodElList; prodEl.lte(); prodEl++ )
+ resolveProdEl( prodEl );
+ }
+}
+
+bool Compiler::makeNonTermFirstSetProd( Production *prod, PdaState *state )
+{
+ bool modified = false;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->key >= firstNonTermId ) {
+ long *inserted = prod->nonTermFirstSet.insert( trans->key );
+ if ( inserted != 0 )
+ modified = true;
+
+ bool hasEpsilon = false;
+ LangEl *lel = langElIndex[trans->key];
+ for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) {
+ for ( ProdIdSet::Iter pid = ldef->nonTermFirstSet;
+ pid.lte(); pid++ )
+ {
+ if ( *pid == -1 )
+ hasEpsilon = true;
+ else {
+ long *inserted = prod->nonTermFirstSet.insert( *pid );
+ if ( inserted != 0 )
+ modified = true;
+ }
+ }
+ }
+
+ if ( hasEpsilon ) {
+ if ( trans->value->toState->isFinState() ) {
+ long *inserted = prod->nonTermFirstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeNonTermFirstSetProd( prod, trans->value->toState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+ }
+ return modified;
+}
+
+
+void Compiler::makeNonTermFirstSets()
+{
+ bool modified = true;
+ while ( modified ) {
+ modified = false;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->fsm->startState->isFinState() ) {
+ long *inserted = prod->nonTermFirstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeNonTermFirstSetProd( prod, prod->fsm->startState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->nonTermFirstSet.find( prod->prodName->id ) )
+ prod->isLeftRec = true;
+ }
+}
+
+void Compiler::printNonTermFirstSets()
+{
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ cerr << prod->data << ": ";
+ for ( ProdIdSet::Iter pid = prod->nonTermFirstSet; pid.lte(); pid++ )
+ {
+ if ( *pid < 0 )
+ cerr << " <EPSILON>";
+ else {
+ LangEl *lel = langElIndex[*pid];
+ cerr << " " << lel->name;
+ }
+ }
+ cerr << endl;
+
+ if ( prod->isLeftRec )
+ cerr << "PROD IS LEFT REC: " << prod->data << endl;
+ }
+}
+
+bool Compiler::makeFirstSetProd( Production *prod, PdaState *state )
+{
+ bool modified = false;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->key < firstNonTermId ) {
+ long *inserted = prod->firstSet.insert( trans->key );
+ if ( inserted != 0 )
+ modified = true;
+ }
+ else {
+ long *inserted = prod->firstSet.insert( trans->key );
+ if ( inserted != 0 )
+ modified = true;
+
+ LangEl *klangEl = langElIndex[trans->key];
+ if ( klangEl != 0 && klangEl->termDup != 0 ) {
+ long *inserted2 = prod->firstSet.insert( klangEl->termDup->id );
+ if ( inserted2 != 0 )
+ modified = true;
+ }
+
+ bool hasEpsilon = false;
+ LangEl *lel = langElIndex[trans->key];
+ for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) {
+ for ( ProdIdSet::Iter pid = ldef->firstSet;
+ pid.lte(); pid++ )
+ {
+ if ( *pid == -1 )
+ hasEpsilon = true;
+ else {
+ long *inserted = prod->firstSet.insert( *pid );
+ if ( inserted != 0 )
+ modified = true;
+ }
+ }
+ }
+
+ if ( hasEpsilon ) {
+ if ( trans->value->toState->isFinState() ) {
+ long *inserted = prod->firstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeFirstSetProd( prod, trans->value->toState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+ }
+ return modified;
+}
+
+
+void Compiler::makeFirstSets()
+{
+ bool modified = true;
+ while ( modified ) {
+ modified = false;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->fsm->startState->isFinState() ) {
+ long *inserted = prod->firstSet.insert( -1 );
+ if ( inserted != 0 )
+ modified = true;
+ }
+
+ bool lmod = makeFirstSetProd( prod, prod->fsm->startState );
+ if ( lmod )
+ modified = true;
+ }
+ }
+}
+
+void Compiler::printFirstSets()
+{
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ cerr << prod->data << ": ";
+ for ( ProdIdSet::Iter pid = prod->firstSet; pid.lte(); pid++ )
+ {
+ if ( *pid < 0 )
+ cerr << " <EPSILON>";
+ else {
+ LangEl *lel = langElIndex[*pid];
+ if ( lel != 0 )
+ cerr << endl << " " << lel->name;
+ else
+ cerr << endl << " " << *pid;
+ }
+ }
+ cerr << endl;
+ }
+}
+
+void Compiler::insertUniqueEmptyProductions()
+{
+ int limit = prodList.length();
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->prodId == limit )
+ break;
+
+ /* Get a language element. */
+ char name[20];
+ sprintf(name, "U%li", prodList.length());
+ LangEl *prodName = addLangEl( this, rootNamespace, name, LangEl::NonTerm );
+ Production *newDef = Production::cons( InputLoc(), prodName,
+ 0, String(), false, 0, prodList.length(), prodName->defList.length() );
+ prodName->defList.append( newDef );
+ prodList.append( newDef );
+
+ prod->uniqueEmptyLeader = prodName;
+ }
+}
+
+struct local_info *Compiler::makeLocalInfo( Locals &locals )
+{
+ struct local_info *localInfo = new local_info[locals.locals.length()];
+ memset( localInfo, 0, sizeof(struct local_info) * locals.locals.length() );
+
+ for ( Vector<LocalLoc>::Iter l = locals.locals; l.lte(); l++ ) {
+ localInfo[l.pos()].type = (int) l->type;
+ localInfo[l.pos()].offset = l->offset;
+ }
+ return localInfo;
+}
+
+short *Compiler::makeTrees( ObjectDef *objectDef, int &numTrees )
+{
+ numTrees = 0;
+ for ( FieldList::Iter of = objectDef->fieldList; of.lte(); of++ ) {
+ if ( of->value->exists() ) {
+ UniqueType *ut = of->value->typeRef->resolveType( this );
+ if ( ut->typeId == TYPE_TREE )
+ numTrees += 1;
+ }
+ }
+
+ short *trees = new short[numTrees];
+ memset( trees, 0, sizeof(short) * numTrees );
+
+ short pos = 0;
+ for ( FieldList::Iter of = objectDef->fieldList; of.lte(); of++ ) {
+ if ( of->value->exists() ) {
+ UniqueType *ut = of->value->typeRef->resolveType( this );
+ if ( ut->typeId == TYPE_TREE ) {
+ trees[pos] = of->value->offset;
+ pos += 1;
+ }
+ }
+ }
+
+ return trees;
+}
+
+
+void Compiler::makeRuntimeData()
+{
+ long count = 0;
+
+ /*
+ * ProdLengths
+ * ProdLhsIs
+ * ProdNames
+ * ProdCodeBlocks
+ * ProdCodeBlockLens
+ */
+
+ runtimeData->frame_info = new frame_info[nextFrameId];
+ runtimeData->num_frames = nextFrameId;
+ memset( runtimeData->frame_info, 0, sizeof(struct frame_info) * nextFrameId );
+
+ /*
+ * Init code block.
+ */
+ if ( rootCodeBlock == 0 ) {
+ runtimeData->root_code = 0;
+ runtimeData->root_code_len = 0;
+ runtimeData->root_frame_id = 0;
+ }
+ else {
+ runtimeData->root_code = rootCodeBlock->codeWC.data;
+ runtimeData->root_code_len = rootCodeBlock->codeWC.length();
+ runtimeData->root_frame_id = rootCodeBlock->frameId;
+ }
+
+ runtimeData->frame_info[rootCodeBlock->frameId].codeWV = 0;
+ runtimeData->frame_info[rootCodeBlock->frameId].codeLenWV = 0;
+
+ runtimeData->frame_info[rootCodeBlock->frameId].locals = makeLocalInfo( rootCodeBlock->locals );
+ runtimeData->frame_info[rootCodeBlock->frameId].locals_len = rootCodeBlock->locals.locals.length();
+
+ runtimeData->frame_info[rootCodeBlock->frameId].frame_size = rootLocalFrame->size();
+ runtimeData->frame_info[rootCodeBlock->frameId].arg_size = 0;
+ runtimeData->frame_info[rootCodeBlock->frameId].ret_tree = false;
+
+ /*
+ * prodInfo
+ */
+ count = prodList.length();
+ runtimeData->prod_info = new prod_info[count];
+ runtimeData->num_prods = count;
+
+ count = 0;
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ runtimeData->prod_info[count].lhs_id = prod->prodName->id;
+ runtimeData->prod_info[count].prod_num = prod->prodNum;
+ runtimeData->prod_info[count].length = prod->fsmLength;
+ runtimeData->prod_info[count].name = prod->data;
+ runtimeData->prod_info[count].frame_id = -1;
+
+ CodeBlock *block = prod->redBlock;
+ if ( block != 0 ) {
+ runtimeData->prod_info[count].frame_id = block->frameId;
+ runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals );
+ runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length();
+
+ runtimeData->frame_info[block->frameId].frame_size = block->localFrame->size();
+ runtimeData->frame_info[block->frameId].arg_size = 0;
+ runtimeData->frame_info[block->frameId].ret_tree = false;
+ }
+
+ runtimeData->prod_info[count].lhs_upref = true;
+ runtimeData->prod_info[count].copy = prod->copy.data;
+ runtimeData->prod_info[count].copy_len = prod->copy.length() / 2;
+ count += 1;
+ }
+
+ /*
+ * regionInfo
+ */
+ runtimeData->num_regions = regionList.length()+1;
+ runtimeData->region_info = new region_info[runtimeData->num_regions];
+ memset( runtimeData->region_info, 0,
+ sizeof(struct region_info) * runtimeData->num_regions );
+
+ runtimeData->region_info[0].default_token = -1;
+ runtimeData->region_info[0].eof_frame_id = -1;
+ runtimeData->region_info[0].ci_lel_id = 0;
+
+ for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+ long regId = reg->id+1;
+ runtimeData->region_info[regId].default_token =
+ reg->impl->defaultTokenInstance == 0 ?
+ -1 :
+ reg->impl->defaultTokenInstance->tokenDef->tdLangEl->id;
+ runtimeData->region_info[regId].eof_frame_id = -1;
+ runtimeData->region_info[regId].ci_lel_id = reg->zeroLel != 0 ? reg->zeroLel->id : 0;
+
+ CodeBlock *block = reg->preEofBlock;
+ if ( block != 0 ) {
+ runtimeData->region_info[regId].eof_frame_id = block->frameId;
+ runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals );
+ runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length();
+
+ runtimeData->frame_info[block->frameId].frame_size = block->localFrame->size();
+ runtimeData->frame_info[block->frameId].arg_size = 0;
+ runtimeData->frame_info[block->frameId].ret_tree = false;
+ }
+ }
+
+ /*
+ * lelInfo
+ */
+
+ count = nextLelId;
+ runtimeData->lel_info = new lang_el_info[count];
+ runtimeData->num_lang_els = count;
+ memset( runtimeData->lel_info, 0, sizeof(struct lang_el_info)*count );
+
+ for ( int i = 0; i < nextLelId; i++ ) {
+ LangEl *lel = langElIndex[i];
+ if ( lel != 0 ) {
+ runtimeData->lel_info[i].name = lel->fullLit;
+ runtimeData->lel_info[i].xml_tag = lel->xmlTag;
+ runtimeData->lel_info[i].repeat = lel->isRepeat;
+ runtimeData->lel_info[i].list = lel->isList;
+ runtimeData->lel_info[i].literal = lel->isLiteral;
+ runtimeData->lel_info[i].ignore = lel->isIgnore;
+ runtimeData->lel_info[i].frame_id = -1;
+
+ CodeBlock *block = lel->transBlock;
+ if ( block != 0 ) {
+ runtimeData->lel_info[i].frame_id = block->frameId;
+ runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length();
+
+ runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals );
+ runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length();
+
+ runtimeData->frame_info[block->frameId].frame_size = block->localFrame->size();
+ runtimeData->frame_info[block->frameId].arg_size = 0;
+ runtimeData->frame_info[block->frameId].ret_tree = false;
+ }
+
+ runtimeData->lel_info[i].object_type_id =
+ lel->objectDef == 0 ? 0 : lel->objectDef->id;
+ runtimeData->lel_info[i].ofi_offset = lel->ofiOffset;
+ runtimeData->lel_info[i].object_length =
+ lel->objectDef != 0 ? lel->objectDef->size() : 0;
+
+// runtimeData->lelInfo[i].contextTypeId = 0;
+// lel->context == 0 ? 0 : lel->context->contextObjDef->id;
+// runtimeData->lelInfo[i].contextLength = 0; //lel->context == 0 ? 0 :
+// lel->context->contextObjDef->size();
+// if ( lel->context != 0 ) {
+// cout << "type: " << runtimeData->lelInfo[i].contextTypeId << " length: " <<
+// runtimeData->lelInfo[i].contextLength << endl;
+// }
+
+ runtimeData->lel_info[i].term_dup_id = lel->termDup == 0 ? 0 : lel->termDup->id;
+
+ if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 &&
+ lel->tokenDef->join->context != 0 )
+ runtimeData->lel_info[i].mark_id = lel->tokenDef->join->mark->markId;
+ else
+ runtimeData->lel_info[i].mark_id = -1;
+
+ runtimeData->lel_info[i].num_capture_attr = 0;
+ }
+ else {
+ memset(&runtimeData->lel_info[i], 0, sizeof(struct lang_el_info) );
+ runtimeData->lel_info[i].name = "__UNUSED";
+ runtimeData->lel_info[i].xml_tag = "__UNUSED";
+ runtimeData->lel_info[i].frame_id = -1;
+ }
+ }
+
+ /*
+ * struct_el_info
+ */
+
+ count = structEls.length();
+ runtimeData->sel_info = new struct_el_info[count];
+ runtimeData->num_struct_els = count;
+ memset( runtimeData->sel_info, 0, sizeof(struct struct_el_info)*count );
+ StructElList::Iter sel = structEls;
+ for ( int i = 0; i < count; i++, sel++ ) {
+ int treesLen;
+ runtimeData->sel_info[i].size = sel->structDef->objectDef->size();
+ runtimeData->sel_info[i].trees = makeTrees( sel->structDef->objectDef, treesLen );
+ runtimeData->sel_info[i].trees_len = treesLen;
+ }
+
+ /*
+ * function_info
+ */
+ count = functionList.length();
+
+ runtimeData->function_info = new function_info[count];
+ runtimeData->num_functions = count;
+ memset( runtimeData->function_info, 0, sizeof(struct function_info)*count );
+ for ( FunctionList::Iter func = functionList; func.lte(); func++ ) {
+
+ runtimeData->function_info[func->funcId].frame_id = -1;
+
+ CodeBlock *block = func->codeBlock;
+ if ( block != 0 ) {
+ runtimeData->function_info[func->funcId].frame_id = block->frameId;
+
+ /* Name. */
+ runtimeData->frame_info[block->frameId].name = func->name;
+
+ /* Code. */
+ runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data;
+ runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length();
+ runtimeData->frame_info[block->frameId].codeWC = block->codeWC.data;
+ runtimeData->frame_info[block->frameId].codeLenWC = block->codeWC.length();
+
+ /* Locals. */
+ runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals );
+ runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length();
+
+ /* Meta. */
+ runtimeData->frame_info[block->frameId].frame_size = func->localFrame->size();
+ runtimeData->frame_info[block->frameId].arg_size = func->paramListSize;
+
+ bool retTree = false;
+ if ( func->typeRef ) {
+ UniqueType *ut = func->typeRef->resolveType( this );
+ retTree = ut->tree();
+ }
+ runtimeData->frame_info[block->frameId].ret_tree = retTree;
+ }
+
+ runtimeData->function_info[func->funcId].frame_size = func->localFrame->size();
+ runtimeData->function_info[func->funcId].arg_size = func->paramListSize;
+ }
+
+ /*
+ * pat_cons_info
+ */
+
+ /* Filled in later after patterns are parsed. */
+ runtimeData->pat_repl_info = new pat_cons_info[nextPatConsId];
+ memset( runtimeData->pat_repl_info, 0, sizeof(struct pat_cons_info) * nextPatConsId );
+ runtimeData->num_patterns = nextPatConsId;
+ runtimeData->pat_repl_nodes = 0;
+ runtimeData->num_pattern_nodes = 0;
+
+
+ /*
+ * generic_info
+ */
+ count = 1;
+ for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ )
+ count += nspace->genericList.length();
+ assert( count == nextGenericId );
+
+ runtimeData->generic_info = new generic_info[count];
+ runtimeData->num_generics = count;
+ memset( &runtimeData->generic_info[0], 0, sizeof(struct generic_info) );
+ for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) {
+ for ( GenericList::Iter gen = nspace->genericList; gen.lte(); gen++ ) {
+ runtimeData->generic_info[gen->id].type = gen->typeId;
+
+ runtimeData->generic_info[gen->id].el_struct_id =
+ ( gen->typeId == GEN_MAP || gen->typeId == GEN_LIST ) ?
+ gen->elUt->structEl->id : -1;
+ runtimeData->generic_info[gen->id].el_offset =
+ gen->el != 0 ? gen->el->offset : -1;
+
+ runtimeData->generic_info[gen->id].key_type =
+ gen->keyUt != 0 ? gen->keyUt->typeId : TYPE_NOTYPE;
+ runtimeData->generic_info[gen->id].key_offset = 0;
+
+ runtimeData->generic_info[gen->id].value_type =
+ gen->valueUt != 0 ? gen->valueUt->typeId : TYPE_NOTYPE;
+ runtimeData->generic_info[gen->id].value_offset = 0;
+
+ runtimeData->generic_info[gen->id].parser_id =
+ gen->typeId == GEN_PARSER ? gen->elUt->langEl->parserId : -1;
+ }
+ }
+
+ runtimeData->argv_generic_id = argvTypeRef->generic->id;
+ runtimeData->stds_generic_id = stdsTypeRef->generic->id;
+
+ /*
+ * Literals
+ */
+ runtimeData->num_literals = literalStrings.length();
+ runtimeData->litdata = new const char *[literalStrings.length()];
+ runtimeData->litlen = new long [literalStrings.length()];
+ runtimeData->literals = 0;
+ for ( StringMap::Iter el = literalStrings; el.lte(); el++ ) {
+ /* Data. */
+ char *data = new char[el->key.length()+1];
+ memcpy( data, el->key.data, el->key.length() );
+ data[el->key.length()] = 0;
+ runtimeData->litdata[el->value] = data;
+
+ /* Length. */
+ runtimeData->litlen[el->value] = el->key.length();
+ }
+
+ /* Captured attributes. Loop over tokens and count first. */
+ long numCapturedAttr = 0;
+// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+// for ( TokenInstanceListReg::Iter td = reg->tokenInstanceList; td.lte(); td++ )
+// numCapturedAttr += td->reCaptureVect.length();
+// }
+ runtimeData->capture_attr = new CaptureAttr[numCapturedAttr];
+ runtimeData->num_captured_attr = numCapturedAttr;
+ memset( runtimeData->capture_attr, 0, sizeof( CaptureAttr ) * numCapturedAttr );
+
+ count = 0;
+// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+// for ( TokenInstanceListReg::Iter td = reg->tokenInstanceList; td.lte(); td++ ) {
+// runtimeData->lelInfo[td->token->id].captureAttr = count;
+// runtimeData->lelInfo[td->token->id].numCaptureAttr = td->reCaptureVect.length();
+// for ( ReCaptureVect::Iter c = td->reCaptureVect; c.lte(); c++ ) {
+// runtimeData->captureAttr[count].mark_enter = c->markEnter->markId;
+// runtimeData->captureAttr[count].mark_leave = c->markLeave->markId;
+// runtimeData->captureAttr[count].offset = c->objField->offset;
+//
+// count += 1;
+// }
+// }
+// }
+
+ runtimeData->fsm_tables = fsmTables;
+ runtimeData->pda_tables = pdaTables;
+
+ /* FIXME: need a parser descriptor. */
+ runtimeData->start_states = new int[nextParserId];
+ runtimeData->eof_lel_ids = new int[nextParserId];
+ runtimeData->parser_lel_ids = new int[nextParserId];
+ runtimeData->num_parsers = nextParserId;
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->parserId >= 0 ) {
+ runtimeData->start_states[lel->parserId] = lel->startState->stateNum;
+ runtimeData->eof_lel_ids[lel->parserId] = lel->eofLel->id;
+ runtimeData->parser_lel_ids[lel->parserId] = lel->id;
+ }
+ }
+
+ runtimeData->global_size = globalObjectDef->size();
+
+ /*
+ * Boundary between terms and non-terms.
+ */
+ runtimeData->first_non_term_id = firstNonTermId;
+
+ /*
+ * Boundary between trees and structs
+ */
+ runtimeData->first_struct_el_id = firstStructElId;
+
+ /* Special trees. */
+ runtimeData->integer_id = -1; //intLangEl->id;
+ runtimeData->string_id = strLangEl->id;
+ runtimeData->any_id = anyLangEl->id;
+ runtimeData->eof_id = 0; //eofLangEl->id;
+ runtimeData->no_token_id = noTokenLangEl->id;
+ runtimeData->global_id = globalSel->id;
+ runtimeData->argv_el_id = argvElSel->id;
+ runtimeData->stds_el_id = stdsElSel->id;
+ runtimeData->struct_inbuilt_id = structInbuiltId;
+ runtimeData->struct_stream_id = structStreamId;
+ runtimeData->struct_input_id = structInputId;
+
+ runtimeData->fsm_execute = &internalFsmExecute;
+ runtimeData->send_named_lang_el = &internalSendNamedLangEl;
+ runtimeData->init_bindings = &internalInitBindings;
+ runtimeData->pop_binding = &internalPopBinding;
+
+ runtimeData->host_call = &internal_host_call;
+ runtimeData->commit_reduce_forward = &internal_commit_reduce_forward;
+ runtimeData->commit_union_sz = &internal_commit_union_sz;
+ runtimeData->init_need = &internal_init_need;
+ runtimeData->reducer_need_tok = &internal_reducer_need_tok;
+ runtimeData->reducer_need_ign = &internal_reducer_need_ign;
+}
+
+/* Borrow alg->state for mapsTo. */
+void countNodes( program_t *prg, int &count, parse_tree_t *parseTree, kid_t *kid )
+{
+ if ( kid != 0 ) {
+ count += 1;
+
+ /* Should't have to recurse here. */
+ tree_t *ignoreList = tree_left_ignore( prg, kid->tree );
+ if ( ignoreList != 0 ) {
+ kid_t *ignore = ignoreList->child;
+ while ( ignore != 0 ) {
+ count += 1;
+ ignore = ignore->next;
+ }
+ }
+
+ ignoreList = tree_right_ignore( prg, kid->tree );
+ if ( ignoreList != 0 ) {
+ kid_t *ignore = ignoreList->child;
+ while ( ignore != 0 ) {
+ count += 1;
+ ignore = ignore->next;
+ }
+ }
+
+ //count += prg->rtd->lelInfo[kid->tree->id].numCaptureAttr;
+
+ if ( !( parseTree->flags & PF_NAMED ) &&
+ !( parseTree->flags & PF_ARTIFICIAL ) &&
+ tree_child( prg, kid->tree ) != 0 )
+ {
+ countNodes( prg, count, parseTree->child, tree_child( prg, kid->tree ) );
+ }
+ countNodes( prg, count, parseTree->next, kid->next );
+ }
+}
+
+void fillNodes( program_t *prg, int &nextAvail, struct bindings *bindings, long &bindId,
+ struct pat_cons_node *nodes, parse_tree_t *parseTree, kid_t *kid, int ind )
+{
+ if ( kid != 0 ) {
+ struct pat_cons_node &node = nodes[ind];
+
+ kid_t *child =
+ !( parseTree->flags & PF_NAMED ) &&
+ !( parseTree->flags & PF_ARTIFICIAL ) &&
+ tree_child( prg, kid->tree ) != 0
+ ?
+ tree_child( prg, kid->tree ) : 0;
+
+ parse_tree_t *ptChild =
+ !( parseTree->flags & PF_NAMED ) &&
+ !( parseTree->flags & PF_ARTIFICIAL ) &&
+ tree_child( prg, kid->tree ) != 0
+ ?
+ parseTree->child : 0;
+
+ /* Set up the fields. */
+ node.id = kid->tree->id;
+ node.prod_num = kid->tree->prod_num;
+ node.length = string_length( kid->tree->tokdata );
+ node.data = string_data( kid->tree->tokdata );
+
+ /* Ignore items. */
+ tree_t *ignoreList = tree_left_ignore( prg, kid->tree );
+ kid_t *ignore = ignoreList == 0 ? 0 : ignoreList->child;
+ node.left_ignore = ignore == 0 ? -1 : nextAvail;
+
+ while ( ignore != 0 ) {
+ struct pat_cons_node &node = nodes[nextAvail++];
+
+ memset( &node, 0, sizeof(struct pat_cons_node) );
+ node.id = ignore->tree->id;
+ node.prod_num = ignore->tree->prod_num;
+ node.next = ignore->next == 0 ? -1 : nextAvail;
+
+ node.length = string_length( ignore->tree->tokdata );
+ node.data = string_data( ignore->tree->tokdata );
+
+ ignore = ignore->next;
+ }
+
+ /* Ignore items. */
+ ignoreList = tree_right_ignore( prg, kid->tree );
+ ignore = ignoreList == 0 ? 0 : ignoreList->child;
+ node.right_ignore = ignore == 0 ? -1 : nextAvail;
+
+ while ( ignore != 0 ) {
+ struct pat_cons_node &node = nodes[nextAvail++];
+
+ memset( &node, 0, sizeof(struct pat_cons_node) );
+ node.id = ignore->tree->id;
+ node.prod_num = ignore->tree->prod_num;
+ node.next = ignore->next == 0 ? -1 : nextAvail;
+
+ node.length = string_length( ignore->tree->tokdata );
+ node.data = string_data( ignore->tree->tokdata );
+
+ ignore = ignore->next;
+ }
+
+ ///* The captured attributes. */
+ //for ( int i = 0; i < prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; i++ ) {
+ // CaptureAttr *cap = prg->rtd->captureAttr +
+ // prg->rtd->lelInfo[kid->tree->id].captureAttr + i;
+ //
+ // tree_t *attr = colm_get_attr( kid->tree, cap->offset );
+ //
+ // struct pat_cons_node &node = nodes[nextAvail++];
+ // memset( &node, 0, sizeof(struct pat_cons_node) );
+ //
+ // node.id = attr->id;
+ // node.prodNum = attr->prodNum;
+ // node.length = stringLength( attr->tokdata );
+ // node.data = stringData( attr->tokdata );
+ //}
+
+ node.stop = parseTree->flags & PF_TERM_DUP;
+
+ node.child = child == 0 ? -1 : nextAvail++;
+
+ /* Recurse. */
+ fillNodes( prg, nextAvail, bindings, bindId, nodes, ptChild, child, node.child );
+
+ /* Since the parser is bottom up the bindings are in a bottom up
+ * traversal order. Check after recursing. */
+ node.bind_id = 0;
+ if ( bindId < bindings->length() && bindings->data[bindId] == parseTree ) {
+ /* Remember that binding ids are indexed from one. */
+ node.bind_id = bindId++;
+
+ //cout << "binding match in " << __PRETTY_FUNCTION__ << endl;
+ //cout << "bindId: " << node.bindId << endl;
+ }
+
+ node.next = kid->next == 0 ? -1 : nextAvail++;
+
+ /* Move to the next child. */
+ fillNodes( prg, nextAvail, bindings, bindId, nodes, parseTree->next, kid->next, node.next );
+ }
+}
+
+void Compiler::fillInPatterns( program_t *prg )
+{
+ /*
+ * patReplNodes
+ */
+
+ /* Count is referenced and computed by mapNode. */
+ int count = 0;
+ for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
+ countNodes( prg, count,
+ pat->pdaRun->stack_top->next,
+ pat->pdaRun->stack_top->next->shadow );
+ }
+
+ for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) {
+ countNodes( prg, count,
+ repl->pdaRun->stack_top->next,
+ repl->pdaRun->stack_top->next->shadow );
+ }
+
+ runtimeData->pat_repl_nodes = new pat_cons_node[count];
+ runtimeData->num_pattern_nodes = count;
+
+ int nextAvail = 0;
+
+ for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
+ int ind = nextAvail++;
+ runtimeData->pat_repl_info[pat->patRepId].offset = ind;
+
+ /* BindIds are indexed base one. */
+ runtimeData->pat_repl_info[pat->patRepId].num_bindings =
+ pat->pdaRun->bindings->length() - 1;
+
+ /* Init the bind */
+ long bindId = 1;
+ fillNodes( prg, nextAvail, pat->pdaRun->bindings, bindId,
+ runtimeData->pat_repl_nodes,
+ pat->pdaRun->stack_top->next,
+ pat->pdaRun->stack_top->next->shadow,
+ ind );
+ }
+
+ for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) {
+ int ind = nextAvail++;
+ runtimeData->pat_repl_info[repl->patRepId].offset = ind;
+
+ /* BindIds are indexed base one. */
+ runtimeData->pat_repl_info[repl->patRepId].num_bindings =
+ repl->pdaRun->bindings->length() - 1;
+
+ long bindId = 1;
+ fillNodes( prg, nextAvail, repl->pdaRun->bindings, bindId,
+ runtimeData->pat_repl_nodes,
+ repl->pdaRun->stack_top->next,
+ repl->pdaRun->stack_top->next->shadow,
+ ind );
+ }
+
+ assert( nextAvail == count );
+}
+
+
+int Compiler::findIndexOff( struct pda_tables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &curLen )
+{
+ for ( int start = 0; start < curLen; ) {
+ int offset = start;
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( pdaTables->owners[offset] != -1 )
+ goto next_start;
+
+ offset++;
+ if ( ! trans.last() ) {
+ TransMap::Iter next = trans.next();
+ offset += next->key - trans->key - 1;
+ }
+ }
+
+ /* Got though the whole list without a conflict. */
+ return start;
+
+next_start:
+ start++;
+ }
+
+ return curLen;
+}
+
+struct CmpSpan
+{
+ static int compare( PdaState *state1, PdaState *state2 )
+ {
+ int dist1 = 0, dist2 = 0;
+
+ if ( state1->transMap.length() > 0 ) {
+ TransMap::Iter first1 = state1->transMap.first();
+ TransMap::Iter last1 = state1->transMap.last();
+ dist1 = last1->key - first1->key;
+ }
+
+ if ( state2->transMap.length() > 0 ) {
+ TransMap::Iter first2 = state2->transMap.first();
+ TransMap::Iter last2 = state2->transMap.last();
+ dist2 = last2->key - first2->key;
+ }
+
+ if ( dist1 < dist2 )
+ return 1;
+ else if ( dist2 < dist1 )
+ return -1;
+ return 0;
+ }
+};
+
+PdaGraph *Compiler::makePdaGraph( LangElSet &parserEls )
+{
+ //for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ // cerr << prod->prodId << " " << prod->data << endl;
+
+ PdaGraph *pdaGraph = new PdaGraph();
+ lalr1GenerateParser( pdaGraph, parserEls );
+ pdaGraph->setStateNumbers();
+ analyzeMachine( pdaGraph, parserEls );
+
+ //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
+
+ return pdaGraph;
+}
+
+struct pda_tables *Compiler::makePdaTables( PdaGraph *pdaGraph )
+{
+ int count, pos;
+ struct pda_tables *pdaTables = new pda_tables;
+
+ /*
+ * Counting max indices.
+ */
+ count = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ count++;
+ if ( ! trans.last() ) {
+ TransMap::Iter next = trans.next();
+ count += next->key - trans->key - 1;
+ }
+ }
+ }
+
+
+ /* Allocate indices and owners. */
+ pdaTables->num_indices = count;
+ pdaTables->indices = new int[count];
+ pdaTables->owners = new int[count];
+ for ( long i = 0; i < count; i++ ) {
+ pdaTables->indices[i] = -1;
+ pdaTables->owners[i] = -1;
+ }
+
+ /* Allocate offsets. */
+ int numStates = pdaGraph->stateList.length();
+ pdaTables->offsets = new unsigned int[numStates];
+ pdaTables->num_states = numStates;
+
+ /* Place transitions into indices/owners */
+ PdaState **states = new PdaState*[numStates];
+ long ds = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ states[ds++] = state;
+
+ /* Sorting baseded on span length. Gives an improvement, but incures a
+ * cost. Off for now. */
+ //MergeSort< PdaState*, CmpSpan > mergeSort;
+ //mergeSort.sort( states, numStates );
+
+ int indLen = 0;
+ for ( int s = 0; s < numStates; s++ ) {
+ PdaState *state = states[s];
+
+ int indOff = findIndexOff( pdaTables, pdaGraph, state, indLen );
+ pdaTables->offsets[state->stateNum] = indOff;
+
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ pdaTables->indices[indOff] = trans->value->actionSetEl->key.id;
+ pdaTables->owners[indOff] = state->stateNum;
+ indOff++;
+
+ if ( ! trans.last() ) {
+ TransMap::Iter next = trans.next();
+ indOff += next->key - trans->key - 1;
+ }
+ }
+
+ if ( indOff > indLen )
+ indLen = indOff;
+ }
+
+ /* We allocated the max, but cmpression gives us less. */
+ pdaTables->num_indices = indLen;
+ delete[] states;
+
+
+ /*
+ * Keys
+ */
+ count = pdaGraph->stateList.length() * 2;;
+ pdaTables->keys = new int[count];
+ pdaTables->num_keys = count;
+
+ count = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ if ( state->transMap.length() == 0 ) {
+ pdaTables->keys[count+0] = 0;
+ pdaTables->keys[count+1] = 0;
+ }
+ else {
+ TransMap::Iter first = state->transMap.first();
+ TransMap::Iter last = state->transMap.last();
+ pdaTables->keys[count+0] = first->key;
+ pdaTables->keys[count+1] = last->key;
+ }
+ count += 2;
+ }
+
+ /*
+ * Targs
+ */
+ count = pdaGraph->actionSet.length();
+ pdaTables->targs = new unsigned int[count];
+ pdaTables->num_targs = count;
+
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ pdaTables->targs[count++] = asi->key.targ;
+
+ /*
+ * ActInds
+ */
+ count = pdaGraph->actionSet.length();
+ pdaTables->act_inds = new unsigned int[count];
+ pdaTables->num_act_inds = count;
+
+ count = pos = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) {
+ pdaTables->act_inds[count++] = pos;
+ pos += asi->key.actions.length() + 1;
+ }
+
+ /*
+ * Actions
+ */
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ count += asi->key.actions.length() + 1;
+
+ pdaTables->actions = new unsigned int[count];
+ pdaTables->num_actions = count;
+
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) {
+ for ( ActDataList::Iter ali = asi->key.actions; ali.lte(); ali++ )
+ pdaTables->actions[count++] = *ali;
+
+ pdaTables->actions[count++] = 0;
+ }
+
+ /*
+ * CommitLen
+ */
+ count = pdaGraph->actionSet.length();
+ pdaTables->commit_len = new int[count];
+ pdaTables->num_commit_len = count;
+
+ count = 0;
+ for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ )
+ pdaTables->commit_len[count++] = asi->key.commitLen;
+
+ /*
+ * tokenRegionInds. Start at one so region index 0 is null (unset).
+ */
+ count = 0;
+ pos = 1;
+ pdaTables->token_region_inds = new int[pdaTables->num_states];
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ pdaTables->token_region_inds[count++] = pos;
+ pos += state->regions.length() + 1;
+ }
+
+
+ /*
+ * tokenRegions. Build in a null at the beginning.
+ */
+
+ count = 1;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ count += state->regions.length() + 1;
+
+ pdaTables->num_region_items = count;
+ pdaTables->token_regions = new int[pdaTables->num_region_items];
+
+ count = 0;
+ pdaTables->token_regions[count++] = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) {
+ int id = ( *reg == EOF_REGION ) ? 0 : (*reg)->id + 1;
+ pdaTables->token_regions[count++] = id;
+ }
+
+ pdaTables->token_regions[count++] = 0;
+ }
+
+ /*
+ * tokenPreRegions. Build in a null at the beginning.
+ */
+
+ count = 1;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ )
+ count += state->regions.length() + 1;
+
+ pdaTables->num_pre_region_items = count;
+ pdaTables->token_pre_regions = new int[pdaTables->num_pre_region_items];
+
+ count = 0;
+ pdaTables->token_pre_regions[count++] = 0;
+ for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
+ for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) {
+ assert( state->preRegions.length() <= 1 );
+ if ( state->preRegions.length() == 0 || state->preRegions[0]->impl->wasEmpty )
+ pdaTables->token_pre_regions[count++] = -1;
+ else
+ pdaTables->token_pre_regions[count++] = state->preRegions[0]->id + 1;
+ }
+
+ pdaTables->token_pre_regions[count++] = 0;
+ }
+
+
+ return pdaTables;
+}
+
+void Compiler::makeParser( LangElSet &parserEls )
+{
+ pdaGraph = makePdaGraph( parserEls );
+ pdaTables = makePdaTables( pdaGraph );
+}
+
diff --git a/src/pdacodegen.cc b/src/pdacodegen.cc
new file mode 100644
index 00000000..d6435ea9
--- /dev/null
+++ b/src/pdacodegen.cc
@@ -0,0 +1,698 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+
+#include <iostream>
+
+#include "compiler.h"
+#include "pdacodegen.h"
+
+using std::cerr;
+using std::endl;
+
+#define FRESH_BLOCK 8128
+#define act_sb "0x1"
+#define act_rb "0x2"
+#define lower "0x0000ffff"
+#define upper "0xffff0000"
+
+void escapeLiteralString( std::ostream &out, const char *path, int length )
+{
+ for ( const char *pc = path, *end = path+length; pc != end; pc++ ) {
+ switch ( *pc ) {
+ case '\\': out << "\\\\"; break;
+ case '"': out << "\\\""; break;
+ case '\a': out << "\\a"; break;
+ case '\b': out << "\\b"; break;
+ case '\t': out << "\\t"; break;
+ case '\n': out << "\\n"; break;
+ case '\v': out << "\\v"; break;
+ case '\f': out << "\\f"; break;
+ case '\r': out << "\\r"; break;
+ default: out << *pc; break;
+ }
+ }
+}
+
+void escapeLiteralString( std::ostream &out, const char *path )
+{
+ escapeLiteralString( out, path, strlen(path) );
+}
+
+void PdaCodeGen::defineRuntime()
+{
+ out <<
+ "extern struct colm_sections " << objectName << ";\n"
+ "\n";
+}
+
+void PdaCodeGen::writeRuntimeData( colm_sections *runtimeData, struct pda_tables *pdaTables )
+{
+ /*
+ * Blocks of code in frames.
+ */
+ for ( int i = 0; i < runtimeData->num_frames; i++ ) {
+ /* FIXME: horrible code cloning going on here. */
+ if ( runtimeData->frame_info[i].codeLenWV > 0 ) {
+ out << "static code_t code_" << i << "_wv[] = {\n\t";
+
+ code_t *block = runtimeData->frame_info[i].codeWV;
+ for ( int j = 0; j < runtimeData->frame_info[i].codeLenWV; j++ ) {
+ out << (unsigned long) block[j];
+
+ if ( j < runtimeData->frame_info[i].codeLenWV-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+ }
+
+ if ( runtimeData->frame_info[i].codeLenWC > 0 ) {
+ out << "static code_t code_" << i << "_wc[] = {\n\t";
+
+ code_t *block = runtimeData->frame_info[i].codeWC;
+ for ( int j = 0; j < runtimeData->frame_info[i].codeLenWC; j++ ) {
+ out << (unsigned long) block[j];
+
+ if ( j < runtimeData->frame_info[i].codeLenWC-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+ }
+
+ if ( runtimeData->frame_info[i].locals_len > 0 ) {
+ out << "static struct local_info locals_" << i << "[] = {\n\t";
+
+ struct local_info *li = runtimeData->frame_info[i].locals;
+ for ( int j = 0; j < runtimeData->frame_info[i].locals_len; j++ ) {
+ out << "{ " << (int)li[j].type << ", " << li[j].offset << " }";
+
+ if ( j < runtimeData->frame_info[i].locals_len-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+ }
+ }
+
+ /*
+ * Blocks in production info.
+ */
+ for ( int i = 0; i < runtimeData->num_prods; i++ ) {
+ if ( runtimeData->prod_info[i].copy_len > 0 ) {
+ out << "static unsigned char copy_" << i << "[] = {\n\t";
+
+ unsigned char *block = runtimeData->prod_info[i].copy;
+ for ( int j = 0; j < runtimeData->prod_info[i].copy_len; j++ ) {
+ out << (long) block[j*2] << ", " << (long) block[j*2+1];
+
+ if ( j < runtimeData->prod_info[i].copy_len-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+ }
+ }
+
+ /*
+ * Init code.
+ */
+ out << "static code_t " << rootCode() << "[] = {\n\t";
+ code_t *block = runtimeData->root_code ;
+ for ( int j = 0; j < runtimeData->root_code_len; j++ ) {
+ out << (unsigned int) block[j];
+
+ if ( j < runtimeData->root_code_len-1 ) {
+ out << ", ";
+ if ( (j+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ /*
+ * lelInfo
+ */
+ out << "static struct lang_el_info " << lelInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_lang_els; i++ ) {
+ struct lang_el_info *el = &runtimeData->lel_info[i];
+ out << "\t{";
+
+ /* Name. */
+ out << " \"";
+ escapeLiteralString( out, el->name );
+ out << "\", ";
+
+ /* Name. */
+ out << " \"";
+ escapeLiteralString( out, el->xml_tag );
+ out << "\", ";
+
+ /* Repeat, literal, ignore flags. */
+ out << (int)el->repeat << ", ";
+ out << (int)el->list << ", ";
+ out << (int)el->literal << ", ";
+ out << (int)el->ignore << ", ";
+ out << el->frame_id << ", ";
+ out << el->object_type_id << ", ";
+ out << el->ofi_offset << ", ";
+ out << el->object_length << ", ";
+ out << el->term_dup_id << ", ";
+ out << el->mark_id << ", ";
+ out << el->capture_attr << ", ";
+ out << el->num_capture_attr;
+
+ out << " }";
+
+ if ( i < runtimeData->num_lang_els-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+
+ for ( int i = 0; i < runtimeData->num_struct_els; i++ ) {
+ struct struct_el_info *el = &runtimeData->sel_info[i];
+ if ( el->trees_len > 0 ) {
+ out << "static short struct_trees_" << i << "[] = {\n\t";
+
+ short *ti = el->trees;
+ for ( int j = 0; j < el->trees_len; j++ )
+ out << ti[j] << ", ";
+ out << "\n};\n\n";
+ }
+ }
+
+ /*
+ * selInfo
+ */
+ out << "static struct struct_el_info " << selInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_struct_els; i++ ) {
+ struct struct_el_info *el = &runtimeData->sel_info[i];
+ out << "\t{ ";
+ out << el->size << ", ";
+
+ /* trees. */
+ if ( el->trees_len > 0 )
+ out << "struct_trees_" << i << ", ";
+ else
+ out << "0, ";
+ out << el->trees_len << ", ";
+
+ out << " },\n";
+ }
+ out << "\n};\n\n";
+
+ /*
+ * frameInfo
+ */
+ out << "static struct frame_info " << frameInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_frames; i++ ) {
+ out << "\t{ ";
+
+ /* The Name. */
+ if ( runtimeData->frame_info[i].name )
+ out << "\"" << runtimeData->frame_info[i].name << "\", ";
+ else
+ out << "\"\", ";
+
+ if ( runtimeData->frame_info[i].codeLenWV > 0 )
+ out << "code_" << i << "_wv, ";
+ else
+ out << "0, ";
+ out << runtimeData->frame_info[i].codeLenWV << ", ";
+
+ if ( runtimeData->frame_info[i].codeLenWC > 0 )
+ out << "code_" << i << "_wc, ";
+ else
+ out << "0, ";
+ out << runtimeData->frame_info[i].codeLenWC << ", ";
+
+ /* locals. */
+ if ( runtimeData->frame_info[i].locals_len > 0 )
+ out << "locals_" << i << ", ";
+ else
+ out << "0, ";
+
+ out << runtimeData->frame_info[i].locals_len << ", ";
+
+ out <<
+ runtimeData->frame_info[i].arg_size << ", " <<
+ runtimeData->frame_info[i].frame_size;
+
+ out << " }";
+
+ if ( i < runtimeData->num_frames-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+
+ /*
+ * prodInfo
+ */
+ out << "static struct prod_info " << prodInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_prods; i++ ) {
+ out << "\t{ ";
+
+ out << runtimeData->prod_info[i].lhs_id << ", ";
+ out << runtimeData->prod_info[i].prod_num << ", ";
+ out << runtimeData->prod_info[i].length << ", ";
+
+ out <<
+ '"' << runtimeData->prod_info[i].name << "\", " <<
+ runtimeData->prod_info[i].frame_id << ", " <<
+ (int)runtimeData->prod_info[i].lhs_upref << ", ";
+
+ if ( runtimeData->prod_info[i].copy_len > 0 )
+ out << "copy_" << i << ", ";
+ else
+ out << "0, ";
+
+ out << runtimeData->prod_info[i].copy_len << ", ";
+
+
+ out << " }";
+
+ if ( i < runtimeData->num_prods-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+ /*
+ * patReplInfo
+ */
+ out << "static struct pat_cons_info " << patReplInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_patterns; i++ ) {
+ out << " { " << runtimeData->pat_repl_info[i].offset << ", " <<
+ runtimeData->pat_repl_info[i].num_bindings << " },\n";
+ }
+ out << "};\n\n";
+
+ /*
+ * patReplNodes
+ */
+ out << "static struct pat_cons_node " << patReplNodes() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_pattern_nodes; i++ ) {
+ struct pat_cons_node &node = runtimeData->pat_repl_nodes[i];
+ out << " { " << node.id << ", " <<
+ node.prod_num << ", " << node.next << ", " <<
+ node.child << ", " << node.bind_id << ", ";
+ if ( node.data == 0 )
+ out << "0";
+ else {
+ out << '\"';
+ escapeLiteralString( out, node.data, node.length );
+ out << '\"';
+ }
+ out << ", " << node.length << ", ";
+
+ out << node.left_ignore << ", ";
+ out << node.right_ignore << ", ";
+
+ out << (int)node.stop << " },\n";
+ }
+ out << "};\n\n";
+
+ /*
+ * functionInfo
+ */
+ out << "static struct function_info " << functionInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_functions; i++ ) {
+ out << "\t{ " <<
+ runtimeData->function_info[i].frame_id << ", " <<
+ runtimeData->function_info[i].arg_size << ", " <<
+ runtimeData->function_info[i].frame_size;
+ out << " }";
+
+ if ( i < runtimeData->num_functions-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+ /*
+ * regionInfo
+ */
+ out << "static struct region_info " << regionInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_regions; i++ ) {
+ out << "\t{ " << runtimeData->region_info[i].default_token <<
+ ", " << runtimeData->region_info[i].eof_frame_id <<
+ ", " << runtimeData->region_info[i].ci_lel_id <<
+ " }";
+
+ if ( i < runtimeData->num_regions-1 )
+ out << ",\n";
+ }
+ out << "\n};\n\n";
+
+ /*
+ * genericInfo
+ */
+ out << "static struct generic_info " << genericInfo() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_generics; i++ ) {
+ out << "\t{ " <<
+ runtimeData->generic_info[i].type << ", " <<
+ runtimeData->generic_info[i].el_struct_id << ", " <<
+ runtimeData->generic_info[i].el_offset << ", " <<
+ runtimeData->generic_info[i].key_type << ", " <<
+ runtimeData->generic_info[i].key_offset << ", " <<
+ runtimeData->generic_info[i].value_type << ", " <<
+ runtimeData->generic_info[i].value_offset << ", " <<
+ runtimeData->generic_info[i].parser_id;
+ out << " },\n";
+ }
+ out << "};\n\n";
+
+ /*
+ * literals
+ */
+ out << "static const char *" << litdata() << "[] = {\n";
+ for ( int i = 0; i < runtimeData->num_literals; i++ ) {
+ out << "\t\"";
+ escapeLiteralString( out, runtimeData->litdata[i], runtimeData->litlen[i] );
+ out << "\",\n";
+ }
+ out << "};\n\n";
+
+ out << "static long " << litlen() << "[] = {\n\t";
+ for ( int i = 0; i < runtimeData->num_literals; i++ )
+ out << runtimeData->litlen[i] << ", ";
+ out << "};\n\n";
+
+ out << "static head_t *" << literals() << "[] = {\n\t";
+ for ( int i = 0; i < runtimeData->num_literals; i++ )
+ out << "0, ";
+ out << "};\n\n";
+
+ out << "static int startStates[] = {\n\t";
+ for ( long i = 0; i < runtimeData->num_parsers; i++ ) {
+ out << runtimeData->start_states[i] << ", ";
+ }
+ out << "};\n\n";
+
+ out << "static int eofLelIds[] = {\n\t";
+ for ( long i = 0; i < runtimeData->num_parsers; i++ ) {
+ out << runtimeData->eof_lel_ids[i] << ", ";
+ }
+ out << "};\n\n";
+
+ out << "static int parserLelIds[] = {\n\t";
+ for ( long i = 0; i < runtimeData->num_parsers; i++ ) {
+ out << runtimeData->parser_lel_ids[i] << ", ";
+ }
+ out << "};\n\n";
+
+ out << "static CaptureAttr captureAttr[] = {\n";
+ for ( long i = 0; i < runtimeData->num_captured_attr; i++ ) {
+ out << "\t{ " <<
+ runtimeData->capture_attr[i].mark_enter << ", " <<
+ runtimeData->capture_attr[i].mark_leave << ", " <<
+ runtimeData->capture_attr[i].offset << " },\n";
+ }
+
+ out << "};\n\n";
+
+ out <<
+ "tree_t **" << objectName << "_host_call( program_t *prg, long code, tree_t **sp );\n"
+ "void " << objectName << "_commit_reduce_forward( program_t *prg, tree_t **root,\n"
+ " struct pda_run *pda_run, parse_tree_t *pt );\n"
+ "long " << objectName << "_commit_union_sz( int reducer );\n"
+ "void " << objectName << "_init_need();\n"
+ "int " << objectName << "_reducer_need_tok( program_t *prg, "
+ "struct pda_run *pda_run, int id );\n"
+ "int " << objectName << "_reducer_need_ign( program_t *prg, "
+ "struct pda_run *pda_run );\n"
+ "void " << objectName << "_read_reduce( program_t *prg, int reducer, input_t *stream );\n"
+ "\n";
+
+ out <<
+ "struct colm_sections " << objectName << " = \n"
+ "{\n"
+ " " << lelInfo() << ",\n"
+ " " << runtimeData->num_lang_els << ",\n"
+ "\n"
+ " " << selInfo() << ",\n"
+ " " << runtimeData->num_struct_els << ",\n"
+ "\n"
+ " " << prodInfo() << ",\n"
+ " " << runtimeData->num_prods << ",\n"
+ "\n"
+ " " << regionInfo() << ",\n"
+ " " << runtimeData->num_regions << ",\n"
+ "\n"
+ " " << rootCode() << ",\n"
+ " " << runtimeData->root_code_len << ",\n"
+ " " << runtimeData->root_frame_id << ",\n"
+ "\n"
+ " " << frameInfo() << ",\n"
+ " " << runtimeData->num_frames << ",\n"
+ "\n"
+ " " << functionInfo() << ",\n"
+ " " << runtimeData->num_functions << ",\n"
+ "\n"
+ " " << patReplInfo() << ",\n"
+ " " << runtimeData->num_patterns << ",\n"
+ "\n"
+ " " << patReplNodes() << ",\n"
+ " " << runtimeData->num_pattern_nodes << ",\n"
+ "\n"
+ " " << genericInfo() << ",\n"
+ " " << runtimeData->num_generics << ",\n"
+ " " << runtimeData->argv_generic_id << ",\n"
+ " " << runtimeData->stds_generic_id << ",\n"
+ "\n"
+ " " << litdata() << ",\n"
+ " " << litlen() << ",\n"
+ " " << literals() << ",\n"
+ " " << runtimeData->num_literals << ",\n"
+ "\n"
+ " captureAttr,\n"
+ " " << runtimeData->num_captured_attr << ",\n"
+ "\n"
+ " &fsmTables_start,\n"
+ " &pid_0_pdaTables,\n"
+ " startStates, eofLelIds, parserLelIds, " << runtimeData->num_parsers << ",\n"
+ "\n"
+ " " << runtimeData->global_size << ",\n"
+ "\n"
+ " " << runtimeData->first_non_term_id << ",\n"
+ " " << runtimeData->first_struct_el_id << ",\n"
+ " " << runtimeData->integer_id << ",\n"
+ " " << runtimeData->string_id << ",\n"
+ " " << runtimeData->any_id << ",\n"
+ " " << runtimeData->eof_id << ",\n"
+ " " << runtimeData->no_token_id << ",\n"
+ " " << runtimeData->global_id << ",\n"
+ " " << runtimeData->argv_el_id << ",\n"
+ " " << runtimeData->stds_el_id << ",\n"
+ " " << runtimeData->struct_inbuilt_id << ",\n"
+ " " << runtimeData->struct_inbuilt_id << ",\n"
+ " " << runtimeData->struct_stream_id << ",\n"
+ " &fsm_execute,\n"
+ " &sendNamedLangEl,\n"
+ " &initBindings,\n"
+ " &popBinding,\n"
+ " &" << objectName << "_host_call,\n"
+ " &" << objectName << "_commit_reduce_forward,\n"
+ " &" << objectName << "_commit_union_sz,\n"
+ " &" << objectName << "_init_need,\n"
+ " &" << objectName << "_reducer_need_tok,\n"
+ " &" << objectName << "_reducer_need_ign,\n"
+ " &" << objectName << "_read_reduce,\n"
+ "};\n"
+ "\n";
+}
+
+void PdaCodeGen::writeParserData( long id, struct pda_tables *tables )
+{
+ String prefix = "pid_" + String(0, "%ld", id) + "_";
+
+ out << "static int " << prefix << indices() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_indices; i++ ) {
+ out << tables->indices[i];
+
+ if ( i < tables->num_indices-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static int " << prefix << owners() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_indices; i++ ) {
+ out << tables->owners[i];
+
+ if ( i < tables->num_indices-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static int " << prefix << keys() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_keys; i++ ) {
+ out << tables->keys[i];
+
+ if ( i < tables->num_keys-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static unsigned int " << prefix << offsets() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_states; i++ ) {
+ out << tables->offsets[i];
+
+ if ( i < tables->num_states-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static unsigned int " << prefix << targs() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_targs; i++ ) {
+ out << tables->targs[i];
+
+ if ( i < tables->num_targs-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static unsigned int " << prefix << actInds() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_act_inds; i++ ) {
+ out << tables->act_inds[i];
+
+ if ( i < tables->num_act_inds-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static unsigned int " << prefix << actions() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_actions; i++ ) {
+ out << tables->actions[i];
+
+ if ( i < tables->num_actions-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static int " << prefix << commitLen() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_commit_len; i++ ) {
+ out << tables->commit_len[i];
+
+ if ( i < tables->num_commit_len-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static int " << prefix << tokenRegionInds() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_states; i++ ) {
+ out << tables->token_region_inds[i];
+
+ if ( i < tables->num_states-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static int " << prefix << tokenRegions() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_region_items; i++ ) {
+ out << tables->token_regions[i];
+
+ if ( i < tables->num_region_items-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out << "static int " << prefix << tokenPreRegions() << "[] = {\n\t";
+ for ( int i = 0; i < tables->num_pre_region_items; i++ ) {
+ out << tables->token_pre_regions[i];
+
+ if ( i < tables->num_pre_region_items-1 ) {
+ out << ", ";
+ if ( (i+1) % 8 == 0 )
+ out << "\n\t";
+ }
+ }
+ out << "\n};\n\n";
+
+ out <<
+ "static struct pda_tables " << prefix << "pdaTables =\n"
+ "{\n"
+ " " << prefix << indices() << ",\n"
+ " " << prefix << owners() << ",\n"
+ " " << prefix << keys() << ",\n"
+ " " << prefix << offsets() << ",\n"
+ " " << prefix << targs() << ",\n"
+ " " << prefix << actInds() << ",\n"
+ " " << prefix << actions() << ",\n"
+ " " << prefix << commitLen() << ",\n"
+
+ " " << prefix << tokenRegionInds() << ",\n"
+ " " << prefix << tokenRegions() << ",\n"
+ " " << prefix << tokenPreRegions() << ",\n"
+ "\n"
+ " " << tables->num_indices << ",\n"
+ " " << tables->num_keys << ",\n"
+ " " << tables->num_states << ",\n"
+ " " << tables->num_targs << ",\n"
+ " " << tables->num_act_inds << ",\n"
+ " " << tables->num_actions << ",\n"
+ " " << tables->num_commit_len << ",\n"
+ " " << tables->num_region_items << ",\n"
+ " " << tables->num_pre_region_items << "\n"
+ "};\n"
+ "\n";
+}
+
diff --git a/src/pdacodegen.h b/src/pdacodegen.h
new file mode 100644
index 00000000..759dd6e0
--- /dev/null
+++ b/src/pdacodegen.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_PDACODEGEN_H
+#define _COLM_PDACODEGEN_H
+
+struct Compiler;
+
+struct PdaCodeGen
+{
+ PdaCodeGen( ostream &out )
+ :
+ out(out)
+ {}
+
+ /*
+ * Code Generation.
+ */
+ void startCodeGen();
+ void endCodeGen( int endLine );
+
+ void writeReference( Production *prod, char *data );
+ void writeUndoReference( Production *prod, char *data );
+ void writeFinalReference( Production *prod, char *data );
+ void writeFirstLocate( Production *prod );
+ void writeRhsLocate( Production *prod );
+
+ void defineRuntime();
+ void writeRuntimeData( colm_sections *runtimeData, struct pda_tables *pdaTables );
+ void writeParserData( long id, struct pda_tables *tables );
+
+ String PARSER() { return "parser_"; }
+
+ String startState() { return PARSER() + "startState"; }
+ String indices() { return PARSER() + "indices"; }
+ String owners() { return PARSER() + "owners"; }
+ String keys() { return PARSER() + "keys"; }
+ String offsets() { return PARSER() + "offsets"; }
+ String targs() { return PARSER() + "targs"; }
+ String actInds() { return PARSER() + "actInds"; }
+ String actions() { return PARSER() + "actions"; }
+ String commitLen() { return PARSER() + "commitLen"; }
+ String fssProdIdIndex() { return PARSER() + "fssProdIdIndex"; }
+ String prodLengths() { return PARSER() + "prodLengths"; }
+ String prodLhsIds() { return PARSER() + "prodLhsIds"; }
+ String prodNames() { return PARSER() + "prodNames"; }
+ String lelInfo() { return PARSER() + "lelInfo"; }
+ String selInfo() { return PARSER() + "selInfo"; }
+ String prodInfo() { return PARSER() + "prodInfo"; }
+ String tokenRegionInds() { return PARSER() + "tokenRegionInds"; }
+ String tokenRegions() { return PARSER() + "tokenRegions"; }
+ String tokenPreRegions() { return PARSER() + "tokenPreRegions"; }
+ String prodCodeBlocks() { return PARSER() + "prodCodeBlocks"; }
+ String prodCodeBlockLens() { return PARSER() + "prodCodeBlockLens"; }
+ String rootCode() { return PARSER() + "rootCode"; }
+ String frameInfo() { return PARSER() + "frameInfo"; }
+ String functionInfo() { return PARSER() + "functionInfo"; }
+ String objFieldInfo() { return PARSER() + "objFieldInfo"; }
+ String patReplInfo() { return PARSER() + "patReplInfo"; }
+ String patReplNodes() { return PARSER() + "patReplNodes"; }
+ String regionInfo() { return PARSER() + "regionInfo"; }
+ String genericInfo() { return PARSER() + "genericInfo"; }
+ String litdata() { return PARSER() + "litdata"; }
+ String litlen() { return PARSER() + "litlen"; }
+ String literals() { return PARSER() + "literals"; }
+ String fsmTables() { return PARSER() + "fsmTables"; }
+
+ /*
+ * Graphviz Generation
+ */
+ void writeTransList( PdaState *state );
+ void writeDotFile( PdaGraph *graph );
+ void writeDotFile( );
+
+ ostream &out;
+};
+
+extern "C"
+{
+ void internalFsmExecute( struct pda_run *pdaRun, struct input_impl *inputStream );
+ void internalSendNamedLangEl( program_t *prg, tree_t **sp,
+ struct pda_run *pdaRun, struct input_impl *is );
+ void internalInitBindings( struct pda_run *pdaRun );
+ void internalPopBinding( struct pda_run *pdaRun, parse_tree_t *parseTree );
+}
+
+#endif /* _COLM_PDACODEGEN_H */
+
diff --git a/src/pdagraph.cc b/src/pdagraph.cc
new file mode 100644
index 00000000..c18c61e1
--- /dev/null
+++ b/src/pdagraph.cc
@@ -0,0 +1,533 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pdagraph.h"
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include <iostream>
+
+using std::cerr;
+using std::endl;
+
+/* Create a new fsm state. State has not out transitions or in transitions, not
+ * out out transition data and not number. */
+PdaState::PdaState()
+:
+ /* No in transitions. */
+ inRange(),
+
+ /* No entry points, or epsilon trans. */
+ pendingCommits(),
+
+ stateSet(0),
+
+ /* Only used during merging. Normally null. */
+ stateDictEl(0),
+
+ /* No state identification bits. */
+ stateBits(0),
+
+ onClosureQueue(false),
+ inClosedMap(false),
+ followMarked(false),
+
+ advanceReductions(false)
+{
+}
+
+/* Copy everything except the action transitions. That is left up to the
+ * PdaGraph copy constructor. */
+PdaState::PdaState(const PdaState &other)
+:
+ inRange(),
+
+ /* Duplicate the entry id set, epsilon transitions and context sets. These
+ * are sets of integers and as such need no fixing. */
+ pendingCommits(other.pendingCommits),
+
+ stateSet(0),
+
+ /* This is only used during merging. Normally null. */
+ stateDictEl(0),
+
+ /* Fsm state data. */
+ stateBits(other.stateBits),
+
+ dotSet(other.dotSet),
+ onClosureQueue(false),
+ inClosedMap(false),
+ followMarked(false),
+
+ transMap()
+{
+ /* Duplicate all the transitions. */
+ for ( TransMap::Iter trans = other.transMap; trans.lte(); trans++ ) {
+ /* Dupicate and store the orginal target in the transition. This will
+ * be corrected once all the states have been created. */
+ PdaTrans *newTrans = new PdaTrans(*trans->value);
+ newTrans->toState = trans->value->toState;
+ transMap.append( TransMapEl( newTrans->lowKey, newTrans ) );
+ }
+}
+
+/* If there is a state dict element, then delete it. Everything else is left
+ * up to the FsmGraph destructor. */
+PdaState::~PdaState()
+{
+ if ( stateDictEl != 0 )
+ delete stateDictEl;
+}
+
+/* Graph constructor. */
+PdaGraph::PdaGraph()
+:
+ /* No start state. */
+ startState(0)
+{
+}
+
+/* Copy all graph data including transitions. */
+PdaGraph::PdaGraph( const PdaGraph &graph )
+:
+ /* Lists start empty. Will be filled by copy. */
+ stateList(),
+ misfitList(),
+
+ /* Copy in the entry points,
+ * pointers will be resolved later. */
+ startState(graph.startState),
+
+ /* Will be filled by copy. */
+ finStateSet()
+{
+ /* Create the states and record their map in the original state. */
+ PdaStateList::Iter origState = graph.stateList;
+ for ( ; origState.lte(); origState++ ) {
+ /* Make the new state. */
+ PdaState *newState = new PdaState( *origState );
+
+ /* Add the state to the list. */
+ stateList.append( newState );
+
+ /* Set the mapsTo item of the old state. */
+ origState->stateMap = newState;
+ }
+
+ /* Derefernce all the state maps. */
+ for ( PdaStateList::Iter state = stateList; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ /* The points to the original in the src machine. The taget's duplicate
+ * is in the statemap. */
+ PdaState *toState = trans->value->toState != 0 ?
+ trans->value->toState->stateMap : 0;
+
+ /* Attach The transition to the duplicate. */
+ trans->value->toState = 0;
+ attachTrans( state, toState, trans->value );
+ }
+ }
+
+ /* Fix the start state pointer and the new start state's count of in
+ * transiions. */
+ startState = startState->stateMap;
+
+ /* Build the final state set. */
+ PdaStateSet::Iter st = graph.finStateSet;
+ for ( ; st.lte(); st++ )
+ finStateSet.insert((*st)->stateMap);
+}
+
+/* Deletes all transition data then deletes each state. */
+PdaGraph::~PdaGraph()
+{
+ /* Delete all the transitions. */
+ PdaStateList::Iter state = stateList;
+ for ( ; state.lte(); state++ ) {
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ )
+ delete trans->value;
+ }
+
+ /* Delete all the states. */
+ stateList.empty();
+}
+
+/* Set a state final. The state has its isFinState set to true and the state
+ * is added to the finStateSet. */
+void PdaGraph::setFinState( PdaState *state )
+{
+ /* Is it already a fin state. */
+ if ( state->stateBits & SB_ISFINAL )
+ return;
+
+ state->stateBits |= SB_ISFINAL;
+ finStateSet.insert( state );
+}
+
+void PdaGraph::unsetAllFinStates( )
+{
+ for ( PdaStateSet::Iter st = finStateSet; st.lte(); st++ ) {
+ PdaState *state = *st;
+ state->stateBits &= ~ SB_ISFINAL;
+ }
+ finStateSet.empty();
+}
+
+/* Set and unset a state as the start state. */
+void PdaGraph::setStartState( PdaState *state )
+{
+ /* Sould change from unset to set. */
+ assert( startState == 0 );
+ startState = state;
+}
+
+/* Mark all states reachable from state. Traverses transitions forward. Used
+ * for removing states that have no path into them. */
+void PdaGraph::markReachableFromHere( PdaState *state )
+{
+ /* Base case: return; */
+ if ( state->stateBits & SB_ISMARKED )
+ return;
+
+ /* Set this state as processed. We are going to visit all states that this
+ * state has a transition to. */
+ state->stateBits |= SB_ISMARKED;
+
+ /* Recurse on all out transitions. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ if ( trans->value->toState != 0 )
+ markReachableFromHere( trans->value->toState );
+ }
+}
+
+void PdaGraph::setStateNumbers()
+{
+ int curNum = 0;
+ PdaStateList::Iter state = stateList;
+ for ( ; state.lte(); state++ )
+ state->stateNum = curNum++;
+}
+
+/* Insert a transition into an inlist. The head must be supplied. */
+void PdaGraph::attachToInList( PdaState *from, PdaState *to,
+ PdaTrans *&head, PdaTrans *trans )
+{
+ trans->ilnext = head;
+ trans->ilprev = 0;
+
+ /* If in trans list is not empty, set the head->prev to trans. */
+ if ( head != 0 )
+ head->ilprev = trans;
+
+ /* Now insert ourselves at the front of the list. */
+ head = trans;
+};
+
+/* Detach a transition from an inlist. The head of the inlist must be supplied. */
+void PdaGraph::detachFromInList( PdaState *from, PdaState *to,
+ PdaTrans *&head, PdaTrans *trans )
+{
+ /* Detach in the inTransList. */
+ if ( trans->ilprev == 0 )
+ head = trans->ilnext;
+ else
+ trans->ilprev->ilnext = trans->ilnext;
+
+ if ( trans->ilnext != 0 )
+ trans->ilnext->ilprev = trans->ilprev;
+}
+
+/* Attach states on the default transition, range list or on out/in list key.
+ * Type of attaching and is controlled by keyType. First makes a new
+ * transition. If there is already a transition out from fromState on the
+ * default, then will assertion fail. */
+PdaTrans *PdaGraph::appendNewTrans( PdaState *from, PdaState *to, long lowKey, long )
+{
+ /* Make the new transition. */
+ PdaTrans *retVal = new PdaTrans();
+
+ /* The transition is now attached. Remember the parties involved. */
+ retVal->fromState = from;
+ retVal->toState = to;
+
+ /* Make the entry in the out list for the transitions. */
+ from->transMap.append( TransMapEl( lowKey, retVal ) );
+
+ /* Set the the keys of the new trans. */
+ retVal->lowKey = lowKey;
+
+ /* Attach using inRange as the head pointer. */
+ attachToInList( from, to, to->inRange.head, retVal );
+
+ return retVal;
+}
+
+PdaTrans *PdaGraph::insertNewTrans( PdaState *from, PdaState *to, long lowKey, long )
+{
+ /* Make the new transition. */
+ PdaTrans *retVal = new PdaTrans();
+
+ /* The transition is now attached. Remember the parties involved. */
+ retVal->fromState = from;
+ retVal->toState = to;
+
+ /* Make the entry in the out list for the transitions. */
+ from->transMap.insert( lowKey, retVal );
+
+ /* Set the the keys of the new trans. */
+ retVal->lowKey = lowKey;
+
+ /* Attach using inRange as the head pointer. */
+ attachToInList( from, to, to->inRange.head, retVal );
+
+ return retVal;
+}
+
+/* Attach for range lists or for the default transition. Type of attaching is
+ * controlled by the keyType parameter. This attach should be used when a
+ * transition already is allocated and must be attached to a target state.
+ * Does not handle adding the transition into the out list. */
+void PdaGraph::attachTrans( PdaState *from, PdaState *to, PdaTrans *trans )
+{
+ assert( trans->fromState == 0 && trans->toState == 0 );
+ trans->fromState = from;
+ trans->toState = to;
+
+ /* Attach using the inRange pointer as the head pointer. */
+ attachToInList( from, to, to->inRange.head, trans );
+}
+
+/* Detach for out/in lists or for default transition. The type of detaching is
+ * controlled by the keyType parameter. */
+void PdaGraph::detachTrans( PdaState *from, PdaState *to, PdaTrans *trans )
+{
+ assert( trans->fromState == from && trans->toState == to );
+ trans->fromState = 0;
+ trans->toState = 0;
+
+ /* Detach using to's inRange pointer as the head. */
+ detachFromInList( from, to, to->inRange.head, trans );
+}
+
+
+/* Detach a state from the graph. Detaches and deletes transitions in and out
+ * of the state. Empties inList and outList. Removes the state from the final
+ * state set. A detached state becomes useless and should be deleted. */
+void PdaGraph::detachState( PdaState *state )
+{
+ /* Detach the in transitions from the inRange list of transitions. */
+ while ( state->inRange.head != 0 ) {
+ /* Get pointers to the trans and the state. */
+ PdaTrans *trans = state->inRange.head;
+ PdaState *fromState = trans->fromState;
+
+ /* Detach the transitions from the source state. */
+ detachTrans( fromState, state, trans );
+
+ /* Ok to delete the transition. */
+ fromState->transMap.remove( trans->lowKey );
+ delete trans;
+ }
+
+ /* Detach out range transitions. */
+ for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) {
+ detachTrans( state, trans->value->toState, trans->value );
+ delete trans->value;
+ }
+
+ /* Delete all of the out range pointers. */
+ state->transMap.empty();
+
+ /* Unset final stateness before detaching from graph. */
+ if ( state->stateBits & SB_ISFINAL )
+ finStateSet.remove( state );
+}
+
+/* Move all the transitions that go into src so that they go into dest. */
+void PdaGraph::inTransMove( PdaState *dest, PdaState *src )
+{
+ /* Do not try to move in trans to and from the same state. */
+ assert( dest != src );
+
+ /* If src is the start state, dest becomes the start state. */
+ assert( src != startState );
+
+ /* Move the transitions in inRange. */
+ while ( src->inRange.head != 0 ) {
+ /* Get trans and from state. */
+ PdaTrans *trans = src->inRange.head;
+ PdaState *fromState = trans->fromState;
+
+ /* Detach from src, reattach to dest. */
+ detachTrans( fromState, src, trans );
+ attachTrans( fromState, dest, trans );
+ }
+}
+
+void PdaGraph::addInReduction( PdaTrans *dest, long prodId, long prior )
+{
+ /* Look for the reduction. If not there insert it, otherwise take
+ * the max of the priorities. */
+ ReductionMapEl *redMapEl = dest->reductions.find( prodId );
+ if ( redMapEl == 0 )
+ dest->reductions.insert( prodId, prior );
+ else if ( prior > redMapEl->value )
+ redMapEl->value = prior;
+}
+
+/* Callback invoked when another trans (or possibly this) is added into this
+ * transition during the merging process. Draw in any properties of srcTrans
+ * into this transition. AddInTrans is called when a new transitions is made
+ * that will be a duplicate of another transition or a combination of several
+ * other transitions. AddInTrans will be called for each transition that the
+ * new transition is to represent. */
+void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans )
+{
+ /* Protect against adding in from ourselves. */
+ if ( srcTrans != destTrans ) {
+
+ /* Add in the shift priority. */
+ if ( destTrans->isShift && srcTrans->isShift ) {
+ /* Both shifts are set. We want the max of the two. */
+ if ( srcTrans->shiftPrior > destTrans->shiftPrior )
+ destTrans->shiftPrior = srcTrans->shiftPrior;
+ }
+ else if ( srcTrans->isShift ) {
+ /* Just the source is set, copy the source prior over. */
+ destTrans->shiftPrior = srcTrans->shiftPrior;
+ }
+
+ /* If either is a shift, dest is a shift. */
+ destTrans->isShift = destTrans->isShift || srcTrans->isShift;
+
+ /* Add in the reductions. */
+ for ( ReductionMap::Iter red = srcTrans->reductions; red.lte(); red++ )
+ addInReduction( destTrans, red->key, red->value );
+
+ /* Add in the commit points. */
+ destTrans->commits.insert( srcTrans->commits );
+
+ if ( srcTrans->toState->advanceReductions )
+ destTrans->toState->advanceReductions = true;
+
+ if ( srcTrans->noPreIgnore )
+ destTrans->noPreIgnore = true;
+ if ( srcTrans->noPostIgnore )
+ destTrans->noPostIgnore = true;
+ }
+}
+
+/* NO LONGER USED. */
+void PdaGraph::addInState( PdaState *destState, PdaState *srcState )
+{
+ /* Draw in any properties of srcState into destState. */
+ if ( srcState != destState ) {
+ /* Get the epsilons, context, out priorities. */
+ destState->pendingCommits.insert( srcState->pendingCommits );
+ if ( srcState->pendingCommits.length() > 0 )
+ cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl;
+
+ /* Parser generation data. */
+ destState->dotSet.insert( srcState->dotSet );
+
+ if ( srcState->onClosureQueue && !destState->onClosureQueue ) {
+ stateClosureQueue.append( destState );
+ destState->onClosureQueue = true;
+ }
+ }
+}
+
+/* Make a new state. The new state will be put on the graph's
+ * list of state. The new state can be created final or non final. */
+PdaState *PdaGraph::addState()
+{
+ /* Make the new state to return. */
+ PdaState *state = new PdaState();
+
+ /* Create the new state. */
+ stateList.append( state );
+
+ return state;
+}
+
+
+/* Follow from to the final state of srcFsm. */
+PdaState *PdaGraph::followFsm( PdaState *from, PdaGraph *srcFsm )
+{
+ PdaState *followSrc = srcFsm->startState;
+
+ while ( ! followSrc->isFinState() ) {
+ assert( followSrc->transMap.length() == 1 );
+ PdaTrans *followTrans = followSrc->transMap[0].value;
+
+ PdaTrans *inTrans = from->findTrans( followTrans->lowKey );
+ assert( inTrans != 0 );
+
+ from = inTrans->toState;
+ followSrc = followTrans->toState;
+ }
+
+ return from;
+}
+
+int PdaGraph::fsmLength( )
+{
+ int length = 0;
+ PdaState *state = startState;
+ while ( ! state->isFinState() ) {
+ length += 1;
+ state = state->transMap[0].value->toState;
+ }
+ return length;
+}
+
+/* Remove states that have no path to them from the start state. Recursively
+ * traverses the graph marking states that have paths into them. Then removes
+ * all states that did not get marked. */
+void PdaGraph::removeUnreachableStates()
+{
+ /* Mark all the states that can be reached
+ * through the existing set of entry points. */
+ if ( startState != 0 )
+ markReachableFromHere( startState );
+
+ for ( PdaStateSet::Iter si = entryStateSet; si.lte(); si++ )
+ markReachableFromHere( *si );
+
+ /* Delete all states that are not marked
+ * and unmark the ones that are marked. */
+ PdaState *state = stateList.head;
+ while ( state ) {
+ PdaState *next = state->next;
+
+ if ( state->stateBits & SB_ISMARKED )
+ state->stateBits &= ~ SB_ISMARKED;
+ else {
+ detachState( state );
+ stateList.detach( state );
+ delete state;
+ }
+
+ state = next;
+ }
+}
diff --git a/src/pdagraph.h b/src/pdagraph.h
new file mode 100644
index 00000000..5cfc2a76
--- /dev/null
+++ b/src/pdagraph.h
@@ -0,0 +1,517 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_PDAGRAPH_H
+#define _COLM_PDAGRAPH_H
+
+#include <assert.h>
+
+#include <avltree.h>
+#include <bstmap.h>
+#include <vector.h>
+#include <sbstmap.h>
+#include <sbstset.h>
+#include <sbsttable.h>
+#include <bstset.h>
+#include <compare.h>
+#include <avltree.h>
+#include <dlist.h>
+#include <avlset.h>
+#include <dlistmel.h>
+
+/* Flags for states. */
+#define SB_ISFINAL 0x04
+#define SB_ISMARKED 0x08
+#define SB_ISSTART 0x10
+
+/* Flags for transitions. */
+#define TB_ISMARKED 0x01
+
+struct PdaTrans;
+struct PdaState;
+struct PdaGraph;
+struct TokenInstance;
+struct Production;
+struct LangEl;
+struct TokenRegion;
+
+typedef Vector<TokenRegion*> RegionVect;
+
+typedef Vector<long> ActDataList;
+
+struct ActionData
+{
+ ActionData( int targ, ActDataList &actions, int commitLen )
+ : targ(targ), commitLen(commitLen), id(0), actions(actions) { }
+
+ int targ;
+ int commitLen;
+ int id;
+
+ ActDataList actions;
+};
+
+
+struct CmpActionData
+{
+ static int compare( const ActionData &ap1, const ActionData &ap2 )
+ {
+ if ( ap1.targ < ap2.targ )
+ return -1;
+ else if ( ap1.targ > ap2.targ )
+ return 1;
+ else if ( ap1.commitLen < ap2.commitLen )
+ return -1;
+ else if ( ap1.commitLen > ap2.commitLen )
+ return 1;
+ else if ( ap1.id < ap2.id )
+ return -1;
+ else if ( ap1.id > ap2.id )
+ return 1;
+
+ return CmpTable< long, CmpOrd<long> >::
+ compare( ap1.actions, ap2.actions );
+ }
+};
+
+typedef AvlSet<ActionData, CmpActionData> PdaActionSet;
+typedef AvlSetEl<ActionData> PdaActionSetEl;
+
+/* List pointers for the closure queue. Goes into state. */
+struct ClosureQueueListEl { PdaState *prev, *next; };
+
+/* Queue of states, transitions to be closed. */
+typedef DListMel< PdaState, ClosureQueueListEl > StateClosureQueue;
+typedef DList<PdaTrans> TransClosureQueue;
+
+typedef BstSet< Production*, CmpOrd<Production*> > DefSet;
+typedef CmpTable< Production*, CmpOrd<Production*> > CmpDefSet;
+typedef BstSet< DefSet, CmpDefSet > DefSetSet;
+
+typedef Vector< Production* > DefVect;
+typedef BstSet< long, CmpOrd<long> > AlphSet;
+
+struct ExpandToEl
+{
+ ExpandToEl( PdaState *state, int prodId )
+ : state(state), prodId(prodId) { }
+
+ PdaState *state;
+ int prodId;
+};
+
+struct CmpExpandToEl
+{
+ static inline int compare( const ExpandToEl &etel1, const ExpandToEl &etel2 )
+ {
+ if ( etel1.state < etel2.state )
+ return -1;
+ else if ( etel1.state > etel2.state )
+ return 1;
+ else if ( etel1.prodId < etel2.prodId )
+ return -1;
+ else if ( etel1.prodId > etel2.prodId )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+typedef BstSet<ExpandToEl, CmpExpandToEl> ExpandToSet;
+typedef BstSet< int, CmpOrd<int> > IntSet;
+typedef CmpTable< int, CmpOrd<int> > CmpIntSet;
+
+typedef BstSet< long, CmpOrd<long> > LongSet;
+typedef CmpTable< long, CmpOrd<long> > CmpLongSet;
+
+typedef BstMap< long, long, CmpOrd<long> > LongMap;
+typedef BstMapEl< long, long > LongMapEl;
+
+typedef LongSet ProdIdSet;
+typedef CmpLongSet CmpProdIdSet;
+
+/* Set of states, list of states. */
+typedef BstSet<PdaState*> PdaStateSet;
+typedef Vector<PdaState*> StateVect;
+typedef DList<PdaState> PdaStateList;
+
+typedef LongMap FollowToAdd;
+typedef LongMap ReductionMap;
+typedef LongMapEl ReductionMapEl;
+
+struct ProdIdPair
+{
+ ProdIdPair( int onReduce, int length )
+ : onReduce(onReduce), length(length) {}
+
+ int onReduce;
+ int length;
+};
+
+struct CmpProdIdPair
+{
+ static inline int compare( const ProdIdPair &pair1, const ProdIdPair &pair2 )
+ {
+ if ( pair1.onReduce < pair2.onReduce )
+ return -1;
+ else if ( pair1.onReduce > pair2.onReduce )
+ return 1;
+ else if ( pair1.length < pair2.length )
+ return -1;
+ else if ( pair1.length > pair2.length )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+typedef BstSet< ProdIdPair, CmpProdIdPair > ProdIdPairSet;
+
+/* Transition class that implements actions and priorities. */
+struct PdaTrans
+{
+ PdaTrans() :
+ fromState(0),
+ toState(0),
+ isShift(false),
+ isShiftReduce(false),
+ shiftPrior(0),
+ noPreIgnore(false),
+ noPostIgnore(false)
+ { }
+
+ PdaTrans( const PdaTrans &other ) :
+ lowKey(other.lowKey),
+ fromState(0), toState(0),
+ isShift(other.isShift),
+ isShiftReduce(other.isShiftReduce),
+ shiftPrior(other.shiftPrior),
+ reductions(other.reductions),
+ commits(other.commits),
+ noPreIgnore(false),
+ noPostIgnore(false)
+ { }
+
+ long lowKey;
+ PdaState *fromState;
+ PdaState *toState;
+
+ /* Pointers for outlist. */
+ PdaTrans *prev, *next;
+
+ /* Pointers for in-list. */
+ PdaTrans *ilprev, *ilnext;
+
+ long maxPrior();
+
+ /* Parse Table construction data. */
+ bool isShift, isShiftReduce;
+ int shiftPrior;
+ ReductionMap reductions;
+ ActDataList actions;
+ ActDataList actOrds;
+ ActDataList actPriors;
+
+ ExpandToSet expandTo;
+
+ PdaActionSetEl *actionSetEl;
+
+ LongSet commits;
+ LongSet afterShiftCommits;
+
+ bool noPreIgnore;
+ bool noPostIgnore;
+};
+
+/* In transition list. Like DList except only has head pointers, which is all
+ * that is required. Insertion and deletion is handled by the graph. This
+ * class provides the iterator of a single list. */
+struct PdaTransInList
+{
+ PdaTransInList() : head(0) { }
+
+ PdaTrans *head;
+
+ struct Iter
+ {
+ /* Default construct. */
+ Iter() : ptr(0) { }
+
+ /* Construct, assign from a list. */
+ Iter( const PdaTransInList &il ) : ptr(il.head) { }
+ Iter &operator=( const PdaTransInList &dl ) { ptr = dl.head; return *this; }
+
+ /* At the end */
+ bool lte() const { return ptr != 0; }
+ bool end() const { return ptr == 0; }
+
+ /* At the first, last element. */
+ bool first() const { return ptr && ptr->ilprev == 0; }
+ bool last() const { return ptr && ptr->ilnext == 0; }
+
+ /* Cast, dereference, arrow ops. */
+ operator PdaTrans*() const { return ptr; }
+ PdaTrans &operator *() const { return *ptr; }
+ PdaTrans *operator->() const { return ptr; }
+
+ /* Increment, decrement. */
+ inline void operator++(int) { ptr = ptr->ilnext; }
+ inline void operator--(int) { ptr = ptr->ilprev; }
+
+ /* The iterator is simply a pointer. */
+ PdaTrans *ptr;
+ };
+};
+
+typedef DList<PdaTrans> PdaTransList;
+
+/* A element in a state dict. */
+struct PdaStateDictEl
+:
+ public AvlTreeEl<PdaStateDictEl>
+{
+ PdaStateDictEl(const PdaStateSet &stateSet)
+ : stateSet(stateSet) { }
+
+ const PdaStateSet &getKey() { return stateSet; }
+ PdaStateSet stateSet;
+ PdaState *targState;
+};
+
+/* Dictionary mapping a set of states to a target state. */
+typedef AvlTree< PdaStateDictEl, PdaStateSet, CmpTable<PdaState*> > PdaStateDict;
+
+/* What items does a particular state encompass. */
+typedef BstSet< long, CmpOrd<long> > DotSet;
+typedef CmpTable< long, CmpOrd<long> > CmpDotSet;
+
+/* Map of dot sets to states. */
+typedef AvlTree< PdaState, DotSet, CmpDotSet > DotSetMap;
+typedef PdaState DotSetMapEl;
+
+typedef BstMap< long, PdaTrans* > TransMap;
+typedef BstMapEl< long, PdaTrans* > TransMapEl;
+
+/* State class that implements actions and priorities. */
+struct PdaState
+:
+ public ClosureQueueListEl,
+ public AvlTreeEl< PdaState >
+{
+ PdaState();
+ PdaState(const PdaState &other);
+ ~PdaState();
+
+ /* Is the state final? */
+ bool isFinState() { return stateBits & SB_ISFINAL; }
+
+ PdaTrans *findTrans( long key )
+ {
+ TransMapEl *transMapEl = transMap.find( key );
+ if ( transMapEl == 0 )
+ return 0;
+ return transMapEl->value;
+ }
+
+ /* In transition list. */
+ PdaTransInList inRange;
+
+ ProdIdPairSet pendingCommits;
+
+ /* When duplicating the fsm we need to map each
+ * state to the new state representing it. */
+ PdaState *stateMap;
+
+ /* When merging states (state machine operations) this next pointer is
+ * used for the list of states that need to be filled in. */
+ PdaState *alg_next;
+
+ PdaStateSet *stateSet;
+
+ /* Identification for printing and stable minimization. */
+ int stateNum;
+
+ /* A pointer to a dict element that contains the set of states this state
+ * represents. This cannot go into alg, because alg.next is used during
+ * the merging process. */
+ PdaStateDictEl *stateDictEl;
+
+ /* Bits controlling the behaviour of the state during collapsing to dfa. */
+ int stateBits;
+
+ /* State list elements. */
+ PdaState *next, *prev;
+
+ /* For dotset map. */
+ DotSet &getKey() { return dotSet; }
+
+ /* Closure management. */
+ DotSet dotSet;
+ DotSet dotSet2;
+ bool onClosureQueue;
+ bool inClosedMap;
+ bool followMarked;
+ bool onStateList;
+
+ TransMap transMap;
+
+ RegionVect regions;
+ RegionVect preRegions;
+
+ bool advanceReductions;
+};
+
+/* Compare lists of epsilon transitions. Entries are name ids of targets. */
+typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans;
+
+/* Compare sets of context values. */
+typedef CmpTable< int, CmpOrd<int> > CmpContextSets;
+
+/* Graph class that implements actions and priorities. */
+struct PdaGraph
+{
+ /* Constructors/Destructors. */
+ PdaGraph();
+ PdaGraph( const PdaGraph &graph );
+ ~PdaGraph();
+
+ /* The list of states. */
+ PdaStateList stateList;
+ PdaStateList misfitList;
+
+ /* The start state. */
+ PdaState *startState;
+ PdaStateSet entryStateSet;
+
+ /* The set of final states. */
+ PdaStateSet finStateSet;
+
+ /* Closure queues and maps. */
+ DotSetMap closedMap;
+ StateClosureQueue stateClosureQueue;
+ StateClosureQueue stateClosedList;
+
+ TransClosureQueue transClosureQueue;
+ PdaState *stateClosureHead;
+
+ LangEl **langElIndex;
+
+ void setStartState( PdaState *state );
+ void unsetStartState( );
+
+ /*
+ * Basic attaching and detaching.
+ */
+
+ /* Common to attaching/detaching list and default. */
+ void attachToInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans );
+ void detachFromInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans );
+
+ /* Attach with a new transition. */
+ PdaTrans *appendNewTrans( PdaState *from, PdaState *to, long onChar1, long );
+ PdaTrans *insertNewTrans( PdaState *from, PdaState *to, long lowKey, long );
+
+ /* Attach with an existing transition that already in an out list. */
+ void attachTrans( PdaState *from, PdaState *to, PdaTrans *trans );
+
+ /* Detach a transition from a target state. */
+ void detachTrans( PdaState *from, PdaState *to, PdaTrans *trans );
+
+ /* Detach a state from the graph. */
+ void detachState( PdaState *state );
+
+ /*
+ * Callbacks.
+ */
+
+ /* Add in the properties of srcTrans into this. */
+ void addInReduction( PdaTrans *dest, long prodId, long prior );
+ void addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans );
+ void addInState( PdaState *destState, PdaState *srcState );
+
+ /*
+ * Allocation.
+ */
+
+ /* New up a state and add it to the graph. */
+ PdaState *addState();
+
+ /*
+ * Fsm operators.
+ */
+
+ /* Follow to the fin state of src fsm. */
+ PdaState *followFsm( PdaState *from, PdaGraph *srcFsm );
+
+ /*
+ * Final states
+ */
+
+ /* Set and Unset a state as final. */
+ void setFinState( PdaState *state );
+ void unsetFinState( PdaState *state );
+ void unsetAllFinStates( );
+
+ /* Set State numbers starting at 0. */
+ void setStateNumbers();
+
+ /*
+ * Path pruning
+ */
+
+ /* Mark all states reachable from state. */
+ void markReachableFromHere( PdaState *state );
+
+ /* Removes states that cannot be reached by any path in the fsm and are
+ * thus wasted silicon. */
+ void removeUnreachableStates();
+
+ /* Remove error actions from states on which the error transition will
+ * never be taken. */
+ bool outListCovers( PdaState *state );
+
+ /* Remove states that are on the misfit list. */
+ void removeMisfits();
+
+
+ /*
+ * Other
+ */
+
+ /* Move the in trans into src into dest. */
+ void inTransMove(PdaState *dest, PdaState *src);
+
+ int fsmLength( );
+
+ /* Collected machine information. */
+ unsigned long long maxState;
+ unsigned long long maxAction;
+ unsigned long long maxLelId;
+ unsigned long long maxOffset;
+ unsigned long long maxIndex;
+ unsigned long long maxProdLen;
+
+ PdaActionSet actionSet;
+};
+
+#endif /* _COLM_PDAGRAPH_H */
+
diff --git a/src/pdarun.c b/src/pdarun.c
new file mode 100644
index 00000000..f1885ec6
--- /dev/null
+++ b/src/pdarun.c
@@ -0,0 +1,2265 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pdarun.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "config.h"
+#include "debug.h"
+#include "bytecode.h"
+#include "tree.h"
+#include "pool.h"
+#include "internal.h"
+
+#define act_sb 0x1
+#define act_rb 0x2
+
+#define read_word_p( i, p ) do { \
+ i = ((word_t) p[0]); \
+ i |= ((word_t) p[1]) << 8; \
+ i |= ((word_t) p[2]) << 16; \
+ i |= ((word_t) p[3]) << 24; \
+} while(0)
+
+#define read_tree_p( i, p ) do { \
+ word_t w; \
+ w = ((word_t) p[0]); \
+ w |= ((word_t) p[1]) << 8; \
+ w |= ((word_t) p[2]) << 16; \
+ w |= ((word_t) p[3]) << 24; \
+ i = (tree_t*)w; \
+} while(0)
+
+/* bit 0: data needed. bit 1: loc needed */
+#define RN_NONE 0x0
+#define RN_DATA 0x1
+#define RN_LOC 0x2
+#define RN_BOTH 0x3
+
+
+static void init_fsm_run( program_t *prg, struct pda_run *pda_run )
+{
+ pda_run->fsm_tables = prg->rtd->fsm_tables;
+
+ pda_run->consume_buf = 0;
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->scan_eof = 0;
+
+ pda_run->pre_region = -1;
+}
+
+static void clear_fsm_run( program_t *prg, struct pda_run *pda_run )
+{
+ if ( pda_run->consume_buf != 0 ) {
+ /* Transfer the run buf list to the program */
+ struct run_buf *head = pda_run->consume_buf;
+ struct run_buf *tail = head;
+ while ( tail->next != 0 )
+ tail = tail->next;
+
+ tail->next = prg->alloc_run_buf;
+ prg->alloc_run_buf = head;
+ }
+}
+
+void colm_increment_steps( struct pda_run *pda_run )
+{
+ pda_run->steps += 1;
+ //debug( prg, REALM_PARSE, "steps up to %ld\n", pdaRun->steps );
+}
+
+void colm_decrement_steps( struct pda_run *pda_run )
+{
+ pda_run->steps -= 1;
+ //debug( prg, REALM_PARSE, "steps down to %ld\n", pdaRun->steps );
+}
+
+head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is, long length )
+{
+ if ( pda_run != 0 ) {
+ struct run_buf *run_buf = pda_run->consume_buf;
+ if ( length > ( FSM_BUFSIZE - run_buf->length ) ) {
+ run_buf = new_run_buf( 0 );
+ run_buf->next = pda_run->consume_buf;
+ pda_run->consume_buf = run_buf;
+ }
+
+ alph_t *dest = run_buf->data + run_buf->length;
+
+ is->funcs->get_data( prg, is, dest, length );
+ location_t *loc = location_allocate( prg );
+ is->funcs->consume_data( prg, is, length, loc );
+
+ run_buf->length += length;
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+
+ head_t *tokdata = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
+ tokdata->location = loc;
+
+ return tokdata;
+ }
+ else {
+ head_t *head = init_str_space( length );
+ alph_t *dest = (alph_t*)head->data;
+
+ is->funcs->get_data( prg, is, dest, length );
+ location_t *loc = location_allocate( prg );
+ is->funcs->consume_data( prg, is, length, loc );
+ head->location = loc;
+
+ return head;
+ }
+}
+
+/* Should only be sending back whole tokens/ignores, therefore the send back
+ * should never cross a buffer boundary. Either we slide back data, or we move to
+ * a previous buffer and slide back data. */
+static void send_back_text( struct colm_program *prg, struct input_impl *is, const alph_t *data, long length )
+{
+ //debug( REALM_PARSE, "push back of %ld characters\n", length );
+
+ if ( length == 0 )
+ return;
+
+ //debug( REALM_PARSE, "sending back text: %.*s\n",
+ // (int)length, data );
+
+ is->funcs->undo_consume_data( prg, is, data, length );
+}
+
+static void send_back_tree( struct colm_program *prg, struct input_impl *is, tree_t *tree )
+{
+ is->funcs->undo_consume_tree( prg, is, tree, false );
+}
+
+/*
+ * Stops on:
+ * PCR_REVERSE
+ */
+static void send_back_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, parse_tree_t *parse_tree )
+{
+ #ifdef DEBUG
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ debug( prg, REALM_PARSE, "sending back: %s%s\n",
+ lel_info[parse_tree->shadow->tree->id].name,
+ parse_tree->flags & PF_ARTIFICIAL ? " (artificial)" : "" );
+ #endif
+
+ head_t *head = parse_tree->shadow->tree->tokdata;
+ int artificial = parse_tree->flags & PF_ARTIFICIAL;
+
+ if ( head != 0 ) {
+ if ( artificial )
+ send_back_tree( prg, is, parse_tree->shadow->tree );
+ else
+ send_back_text( prg, is, colm_alph_from_cstr( string_data( head ) ), head->length );
+ }
+
+ colm_decrement_steps( pda_run );
+
+ /* Check for reverse code. */
+ if ( parse_tree->flags & PF_HAS_RCODE ) {
+ pda_run->on_deck = true;
+ parse_tree->flags &= ~PF_HAS_RCODE;
+ }
+
+ if ( pda_run->steps == pda_run->target_steps ) {
+ debug( prg, REALM_PARSE, "trigger parse stop, steps = "
+ "target = %d\n", pda_run->target_steps );
+ pda_run->stop = true;
+ }
+}
+
+static void reset_token( struct pda_run *pda_run )
+{
+ /* If there is a token started, but never finished for a lack of data, we
+ * must first backup over it. */
+ if ( pda_run->tokstart != 0 ) {
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->scan_eof = 0;
+ }
+}
+
+/* Stops on:
+ * PCR_REVERSE
+ */
+
+static void send_back( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is, parse_tree_t *parse_tree )
+{
+ debug( prg, REALM_PARSE, "sending back: %s\n",
+ prg->rtd->lel_info[parse_tree->id].name );
+
+ if ( parse_tree->flags & PF_NAMED ) {
+ /* Send the named lang el back first, then send back any leading
+ * whitespace. */
+ is->funcs->undo_consume_lang_el( prg, is );
+ }
+
+ colm_decrement_steps( pda_run );
+
+ /* Artifical were not parsed, instead sent in as items. */
+ if ( parse_tree->flags & PF_ARTIFICIAL ) {
+ /* Check for reverse code. */
+ if ( parse_tree->flags & PF_HAS_RCODE ) {
+ debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" );
+ pda_run->on_deck = true;
+ parse_tree->flags &= ~PF_HAS_RCODE;
+ }
+
+ colm_tree_upref( prg, parse_tree->shadow->tree );
+
+ send_back_tree( prg, is, parse_tree->shadow->tree );
+ }
+ else {
+ /* Check for reverse code. */
+ if ( parse_tree->flags & PF_HAS_RCODE ) {
+ debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" );
+ pda_run->on_deck = true;
+ parse_tree->flags &= ~PF_HAS_RCODE;
+ }
+
+ /* Push back the token data. */
+ send_back_text( prg, is, colm_alph_from_cstr( string_data( parse_tree->shadow->tree->tokdata ) ),
+ string_length( parse_tree->shadow->tree->tokdata ) );
+
+ /* If eof was just sent back remember that it needs to be sent again. */
+ if ( parse_tree->id == prg->rtd->eof_lel_ids[pda_run->parser_id] )
+ pda_run->eof_term_recvd = false;
+
+ /* If the item is bound then store remove it from the bindings array. */
+ prg->rtd->pop_binding( pda_run, parse_tree );
+ }
+
+ if ( pda_run->steps == pda_run->target_steps ) {
+ debug( prg, REALM_PARSE, "trigger parse stop, "
+ "steps = target = %d\n", pda_run->target_steps );
+ pda_run->stop = true;
+ }
+
+ /* Downref the tree that was sent back and free the kid. */
+ colm_tree_downref( prg, sp, parse_tree->shadow->tree );
+ kid_free( prg, parse_tree->shadow );
+ parse_tree_free( pda_run, parse_tree );
+}
+
+static void set_region( struct pda_run *pda_run, int empty_ignore, parse_tree_t *tree )
+{
+ if ( empty_ignore ) {
+ /* Recording the next region. */
+ tree->retry_region = pda_run->next_region_ind;
+ if ( pda_run->pda_tables->token_regions[tree->retry_region+1] != 0 )
+ pda_run->num_retry += 1;
+ }
+}
+
+static void ignore_tree( program_t *prg, struct pda_run *pda_run, tree_t *tree )
+{
+ int empty_ignore = pda_run->accum_ignore == 0;
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->shadow = kid_allocate( prg );
+ parse_tree->shadow->tree = tree;
+
+ parse_tree->next = pda_run->accum_ignore;
+ pda_run->accum_ignore = parse_tree;
+
+ colm_transfer_reverse_code( pda_run, parse_tree );
+
+ if ( pda_run->pre_region >= 0 )
+ parse_tree->flags |= PF_RIGHT_IGNORE;
+
+ set_region( pda_run, empty_ignore, pda_run->accum_ignore );
+}
+
+static void ignore_tree_art( program_t *prg, struct pda_run *pda_run, tree_t *tree )
+{
+ int empty_ignore = pda_run->accum_ignore == 0;
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->flags |= PF_ARTIFICIAL;
+ parse_tree->shadow = kid_allocate( prg );
+ parse_tree->shadow->tree = tree;
+
+ parse_tree->next = pda_run->accum_ignore;
+ pda_run->accum_ignore = parse_tree;
+
+ colm_transfer_reverse_code( pda_run, parse_tree );
+
+ set_region( pda_run, empty_ignore, pda_run->accum_ignore );
+}
+
+kid_t *make_token_with_data( program_t *prg, struct pda_run *pda_run,
+ struct input_impl *is, int id, head_t *tokdata )
+{
+ /* Make the token object. */
+ long object_length = prg->rtd->lel_info[id].object_length;
+ kid_t *attrs = alloc_attrs( prg, object_length );
+
+ kid_t *input = 0;
+ input = kid_allocate( prg );
+ input->tree = tree_allocate( prg );
+
+ debug( prg, REALM_PARSE, "made token %p\n", input->tree );
+
+ input->tree->refs = 1;
+ input->tree->id = id;
+ input->tree->tokdata = tokdata;
+
+ /* No children and ignores get added later. */
+ input->tree->child = attrs;
+
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ if ( lel_info[id].num_capture_attr > 0 ) {
+ int i;
+ for ( i = 0; i < lel_info[id].num_capture_attr; i++ ) {
+ CaptureAttr *ca = &prg->rtd->capture_attr[lel_info[id].capture_attr + i];
+ head_t *data = string_alloc_full( prg,
+ colm_cstr_from_alph( pda_run->mark[ca->mark_enter] ),
+ pda_run->mark[ca->mark_leave] -
+ pda_run->mark[ca->mark_enter] );
+ tree_t *string = construct_string( prg, data );
+ colm_tree_upref( prg, string );
+ colm_tree_set_field( prg, input->tree, ca->offset, string );
+ }
+ }
+
+ return input;
+}
+
+static void report_parse_error( program_t *prg, tree_t **sp, struct pda_run *pda_run )
+{
+ kid_t *kid = pda_run->bt_point;
+ head_t *deepest = 0;
+ while ( kid != 0 ) {
+ head_t *head = kid->tree->tokdata;
+ if ( head != 0 && head->location != 0 ) {
+ if ( deepest == 0 || head->location->byte > deepest->location->byte )
+ deepest = head;
+ }
+ kid = kid->next;
+ }
+
+ head_t *error_head = 0;
+
+ /* If there are no error points on record assume the error occurred at the
+ * beginning of the stream. */
+ if ( deepest == 0 ) {
+ error_head = string_alloc_full( prg, "<input>:1:1: parse error", 32 );
+ error_head->location = location_allocate( prg );
+ error_head->location->line = 1;
+ error_head->location->column = 1;
+ }
+ else {
+ debug( prg, REALM_PARSE, "deepest location byte: %d\n",
+ deepest->location->byte );
+
+ const char *name = deepest->location->name;
+ long line = deepest->location->line;
+ long i, column = deepest->location->column;
+ long byte = deepest->location->byte;
+
+ for ( i = 0; i < deepest->length; i++ ) {
+ if ( deepest->data[i] != '\n' )
+ column += 1;
+ else {
+ line += 1;
+ column = 1;
+ }
+ byte += 1;
+ }
+
+ if ( name == 0 )
+ name = "<input>";
+ char *formatted = malloc( strlen( name ) + 128 );
+ sprintf( formatted, "%s:%ld:%ld: parse error", name, line, column );
+ error_head = string_alloc_full( prg, formatted, strlen(formatted) );
+ free( formatted );
+
+ error_head->location = location_allocate( prg );
+
+ error_head->location->name = deepest->location->name;
+ error_head->location->line = line;
+ error_head->location->column = column;
+ error_head->location->byte = byte;
+ }
+
+ tree_t *tree = construct_string( prg, error_head );
+ colm_tree_downref( prg, sp, pda_run->parse_error_text );
+ pda_run->parse_error_text = tree;
+ colm_tree_upref( prg, pda_run->parse_error_text );
+}
+
+static void attach_right_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *parse_tree )
+{
+ if ( pda_run->accum_ignore == 0 )
+ return;
+
+ if ( pda_run->stack_top->id > 0 &&
+ pda_run->stack_top->id < prg->rtd->first_non_term_id )
+ {
+ /* OK, do it */
+ debug( prg, REALM_PARSE, "attaching right ignore\n" );
+
+ /* Reset. */
+ assert( ! ( parse_tree->flags & PF_RIGHT_IL_ATTACHED ) );
+
+ parse_tree_t *accum = pda_run->accum_ignore;
+
+ parse_tree_t *stop_at = 0, *use = accum;
+ while ( use != 0 ) {
+ if ( ! (use->flags & PF_RIGHT_IGNORE) )
+ stop_at = use;
+ use = use->next;
+ }
+
+ if ( stop_at != 0 ) {
+ /* Stop at was set. Make it the last item in the igore list. Take
+ * the rest. */
+ accum = stop_at->next;
+ stop_at->next = 0;
+ }
+ else {
+ /* Stop at was never set. All right ignore. Use it all. */
+ pda_run->accum_ignore = 0;
+ }
+
+ /* The data list needs to be extracted and reversed. The parse tree list
+ * can remain in stack order. */
+ parse_tree_t *child = accum, *last = 0;
+ kid_t *data_child = 0, *data_last = 0;
+
+ while ( child ) {
+ data_child = child->shadow;
+ parse_tree_t *next = child->next;
+
+ /* Reverse the lists. */
+ data_child->next = data_last;
+ child->next = last;
+
+ /* Detach the parse tree from the data tree. */
+ child->shadow = 0;
+
+ /* Keep the last for reversal. */
+ data_last = data_child;
+ last = child;
+
+ child = next;
+ }
+
+ /* Last is now the first. */
+ parse_tree->right_ignore = last;
+
+ if ( data_child != 0 ) {
+ debug( prg, REALM_PARSE, "attaching ignore right\n" );
+
+ kid_t *ignore_kid = data_last;
+
+ /* Copy the ignore list first if we need to attach it as a right
+ * ignore. */
+ tree_t *right_ignore = 0;
+
+ right_ignore = tree_allocate( prg );
+ right_ignore->id = LEL_ID_IGNORE;
+ right_ignore->child = ignore_kid;
+
+ tree_t *push_to = parse_tree->shadow->tree;
+
+ push_to = push_right_ignore( prg, push_to, right_ignore );
+
+ parse_tree->shadow->tree = push_to;
+
+ parse_tree->flags |= PF_RIGHT_IL_ATTACHED;
+ }
+ }
+}
+
+static void attach_left_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *parse_tree )
+{
+ /* Reset. */
+ assert( ! ( parse_tree->flags & PF_LEFT_IL_ATTACHED ) );
+
+ parse_tree_t *accum = pda_run->accum_ignore;
+ pda_run->accum_ignore = 0;
+
+ /* The data list needs to be extracted and reversed. The parse tree list
+ * can remain in stack order. */
+ parse_tree_t *child = accum, *last = 0;
+ kid_t *data_child = 0, *data_last = 0;
+
+ while ( child ) {
+ data_child = child->shadow;
+ parse_tree_t *next = child->next;
+
+ /* Reverse the lists. */
+ data_child->next = data_last;
+ child->next = last;
+
+ /* Detach the parse tree from the data tree. */
+ child->shadow = 0;
+
+ /* Keep the last for reversal. */
+ data_last = data_child;
+ last = child;
+
+ child = next;
+ }
+
+ /* Last is now the first. */
+ parse_tree->left_ignore = last;
+
+ if ( data_child != 0 ) {
+ debug( prg, REALM_PARSE, "attaching left ignore\n" );
+
+ kid_t *ignore_kid = data_child;
+
+ /* Make the ignore list for the left-ignore. */
+ tree_t *left_ignore = tree_allocate( prg );
+ left_ignore->id = LEL_ID_IGNORE;
+ left_ignore->child = ignore_kid;
+
+ tree_t *push_to = parse_tree->shadow->tree;
+
+ push_to = push_left_ignore( prg, push_to, left_ignore );
+
+ parse_tree->shadow->tree = push_to;
+
+ parse_tree->flags |= PF_LEFT_IL_ATTACHED;
+ }
+}
+
+/* Not currently used. Need to revive this. WARNING: untested changes here */
+static void detach_right_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *parse_tree )
+{
+ /* Right ignore are immediately discarded since they are copies of
+ * left-ignores. */
+ tree_t *right_ignore = 0;
+ if ( parse_tree->flags & PF_RIGHT_IL_ATTACHED ) {
+ tree_t *pop_from = parse_tree->shadow->tree;
+
+ pop_from = pop_right_ignore( prg, sp, pop_from, &right_ignore );
+
+ parse_tree->shadow->tree = pop_from;
+
+ parse_tree->flags &= ~PF_RIGHT_IL_ATTACHED;
+ }
+
+ if ( parse_tree->right_ignore != 0 ) {
+ assert( right_ignore != 0 );
+
+ /* Transfer the trees to accumIgnore. */
+ parse_tree_t *ignore = parse_tree->right_ignore;
+ parse_tree->right_ignore = 0;
+
+ kid_t *data_ignore = right_ignore->child;
+ right_ignore->child = 0;
+
+ parse_tree_t *last = 0;
+ kid_t *data_last = 0;
+ while ( ignore != 0 ) {
+ parse_tree_t *next = ignore->next;
+ kid_t *data_next = data_ignore->next;
+
+ /* Put the data trees underneath the parse trees. */
+ ignore->shadow = data_ignore;
+
+ /* Reverse. */
+ ignore->next = last;
+ data_ignore->next = data_last;
+
+ /* Keep last for reversal. */
+ last = ignore;
+ data_last = data_ignore;
+
+ ignore = next;
+ data_ignore = data_next;
+ }
+
+ pda_run->accum_ignore = last;
+
+ colm_tree_downref( prg, sp, right_ignore );
+ }
+}
+
+static void detach_left_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *parse_tree )
+{
+ /* Detach left. */
+ tree_t *left_ignore = 0;
+ if ( parse_tree->flags & PF_LEFT_IL_ATTACHED ) {
+ tree_t *pop_from = parse_tree->shadow->tree;
+
+ pop_from = pop_left_ignore( prg, sp, pop_from, &left_ignore );
+
+ parse_tree->shadow->tree = pop_from;
+
+ parse_tree->flags &= ~PF_LEFT_IL_ATTACHED;
+ }
+
+ if ( parse_tree->left_ignore != 0 ) {
+ assert( left_ignore != 0 );
+
+ /* Transfer the trees to accumIgnore. */
+ parse_tree_t *ignore = parse_tree->left_ignore;
+ parse_tree->left_ignore = 0;
+
+ kid_t *data_ignore = left_ignore->child;
+ left_ignore->child = 0;
+
+ parse_tree_t *last = 0;
+ kid_t *data_last = 0;
+ while ( ignore != 0 ) {
+ parse_tree_t *next = ignore->next;
+ kid_t *data_next = data_ignore->next;
+
+ /* Put the data trees underneath the parse trees. */
+ ignore->shadow = data_ignore;
+
+ /* Reverse. */
+ ignore->next = last;
+ data_ignore->next = data_last;
+
+ /* Keep last for reversal. */
+ last = ignore;
+ data_last = data_ignore;
+
+ ignore = next;
+ data_ignore = data_next;
+ }
+
+ pda_run->accum_ignore = last;
+ }
+
+ colm_tree_downref( prg, sp, left_ignore );
+}
+
+static int is_parser_stop_finished( struct pda_run *pda_run )
+{
+ int done =
+ pda_run->stack_top->next != 0 &&
+ pda_run->stack_top->next->next == 0 &&
+ pda_run->stack_top->id == pda_run->stop_target;
+ return done;
+}
+
+static void handle_error( program_t *prg, tree_t **sp, struct pda_run *pda_run )
+{
+ /* Check the result. */
+ if ( pda_run->parse_error ) {
+ /* Error occured in the top-level parser. */
+ report_parse_error( prg, sp, pda_run );
+ }
+ else {
+ if ( is_parser_stop_finished( pda_run ) ) {
+ debug( prg, REALM_PARSE, "stopping the parse\n" );
+ pda_run->stop_parsing = true;
+ }
+ }
+}
+
+static head_t *extract_match( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ //debug( prg, REALM_PARSE, "extracting token of length: %ld\n", length );
+
+ struct run_buf *run_buf = pda_run->consume_buf;
+ if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) {
+ run_buf = new_run_buf( length );
+ run_buf->next = pda_run->consume_buf;
+ pda_run->consume_buf = run_buf;
+ }
+
+ alph_t *dest = run_buf->data + run_buf->length;
+
+ is->funcs->get_data( prg, is, (alph_t*)dest, length );
+ location_t *location = location_allocate( prg );
+ is->funcs->consume_data( prg, is, length, location );
+
+ run_buf->length += length;
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->tokstart = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
+
+ head->location = location;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte );
+
+ return head;
+}
+
+static head_t *extract_no_d( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ /* Just a consume, no data allocate. */
+ location_t *location = location_allocate( prg );
+ is->funcs->consume_data( prg, is, length, location );
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->tokstart = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, 0, 0 );
+
+ head->location = location;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte );
+
+ return head;
+}
+
+static head_t *extract_no_l( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ //debug( prg, REALM_PARSE, "extracting token of length: %ld\n", length );
+
+ struct run_buf *run_buf = pda_run->consume_buf;
+ if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) {
+ run_buf = new_run_buf( length );
+ run_buf->next = pda_run->consume_buf;
+ pda_run->consume_buf = run_buf;
+ }
+
+ alph_t *dest = run_buf->data + run_buf->length;
+
+ is->funcs->get_data( prg, is, dest, length );
+
+ /* Using a dummpy location. */
+ location_t location;
+ memset( &location, 0, sizeof( location ) );
+ is->funcs->consume_data( prg, is, length, &location );
+
+ run_buf->length += length;
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->tokstart = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
+
+ /* Don't pass the location. */
+ head->location = 0;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", location.byte );
+
+ return head;
+}
+
+static head_t *consume_match( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ /* No data or location returned. We just consume the data. */
+ location_t dummy_loc;
+ memset( &dummy_loc, 0, sizeof(dummy_loc) );
+ is->funcs->consume_data( prg, is, length, &dummy_loc );
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->tokstart = 0;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", dummy_loc.byte );
+
+ return 0;
+}
+
+
+static head_t *peek_match( program_t *prg, struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ struct run_buf *run_buf = pda_run->consume_buf;
+ if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) {
+ run_buf = new_run_buf( 0 );
+ run_buf->next = pda_run->consume_buf;
+ pda_run->consume_buf = run_buf;
+ }
+
+ alph_t *dest = run_buf->data + run_buf->length;
+
+ is->funcs->get_data( prg, is, dest, length );
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
+
+ head->location = location_allocate( prg );
+ is->funcs->transfer_loc( prg, head->location, is );
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte );
+
+ return head;
+}
+
+
+static void send_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, long id )
+{
+ if ( prg->rtd->reducer_need_ign( prg, pda_run ) == RN_NONE ) {
+ consume_match( prg, sp, pda_run, is );
+ }
+ else {
+ debug( prg, REALM_PARSE, "ignoring: %s\n", prg->rtd->lel_info[id].name );
+
+ /* Make the ignore string. */
+ head_t *ignore_str = extract_match( prg, sp, pda_run, is );
+
+ debug( prg, REALM_PARSE, "ignoring: %.*s\n", ignore_str->length, ignore_str->data );
+
+ tree_t *tree = tree_allocate( prg );
+ tree->refs = 1;
+ tree->id = id;
+ tree->tokdata = ignore_str;
+
+ /* Send it to the pdaRun. */
+ ignore_tree( prg, pda_run, tree );
+ }
+}
+
+static void send_token( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, long id )
+{
+ int empty_ignore = pda_run->accum_ignore == 0;
+
+ /* Make the token data. */
+ head_t *tokdata = 0;
+ int rn = prg->rtd->reducer_need_tok( prg, pda_run, id );
+
+ switch ( rn ) {
+ case RN_NONE:
+ tokdata = consume_match( prg, sp, pda_run, is );
+ break;
+ case RN_DATA:
+ tokdata = extract_no_l( prg, sp, pda_run, is );
+ break;
+ case RN_LOC:
+ tokdata = extract_no_d( prg, sp, pda_run, is );
+ break;
+ case RN_BOTH:
+ tokdata = extract_match( prg, sp, pda_run, is );
+ break;
+ }
+
+ debug( prg, REALM_PARSE, "token: %s text: %.*s\n",
+ prg->rtd->lel_info[id].name,
+ string_length(tokdata), string_data(tokdata) );
+
+ kid_t *input = make_token_with_data( prg, pda_run, is, id, tokdata );
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->id = input->tree->id;
+ parse_tree->shadow = input;
+
+ pda_run->parse_input = parse_tree;
+
+ /* Store any alternate scanning region. */
+ if ( input != 0 && pda_run->pda_cs >= 0 )
+ set_region( pda_run, empty_ignore, parse_tree );
+}
+
+static void send_tree( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is )
+{
+ kid_t *input = kid_allocate( prg );
+ input->tree = is->funcs->consume_tree( prg, is );
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->id = input->tree->id;
+ parse_tree->flags |= PF_ARTIFICIAL;
+ parse_tree->shadow = input;
+
+ pda_run->parse_input = parse_tree;
+}
+
+static void send_ignore_tree( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ tree_t *tree = is->funcs->consume_tree( prg, is );
+ ignore_tree_art( prg, pda_run, tree );
+}
+
+static void send_collect_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, int id )
+{
+ debug( prg, REALM_PARSE, "token: CI\n" );
+
+ int empty_ignore = pda_run->accum_ignore == 0;
+
+ /* Make the token data. */
+ head_t *tokdata = head_allocate( prg );
+ tokdata->location = location_allocate( prg );
+ is->funcs->transfer_loc( prg, tokdata->location, is );
+
+ debug( prg, REALM_PARSE, "token: %s text: %.*s\n",
+ prg->rtd->lel_info[id].name,
+ string_length(tokdata), string_data(tokdata) );
+
+ kid_t *input = make_token_with_data( prg, pda_run, is, id, tokdata );
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->id = input->tree->id;
+ parse_tree->shadow = input;
+
+ pda_run->parse_input = parse_tree;
+
+ /* Store any alternate scanning region. */
+ if ( input != 0 && pda_run->pda_cs >= 0 )
+ set_region( pda_run, empty_ignore, parse_tree );
+}
+
+/* Offset can be used to look at the next nextRegionInd. */
+static int get_next_region( struct pda_run *pda_run, int offset )
+{
+ return pda_run->pda_tables->token_regions[pda_run->next_region_ind+offset];
+}
+
+static int get_next_pre_region( struct pda_run *pda_run )
+{
+ return pda_run->pda_tables->token_pre_regions[pda_run->next_region_ind];
+}
+
+static void send_eof( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is )
+{
+ debug( prg, REALM_PARSE, "token: _EOF\n" );
+
+ colm_increment_steps( pda_run );
+
+ head_t *head = head_allocate( prg );
+ head->location = location_allocate( prg );
+ is->funcs->transfer_loc( prg, head->location, is );
+
+ kid_t *input = kid_allocate( prg );
+ input->tree = tree_allocate( prg );
+
+ input->tree->refs = 1;
+ input->tree->id = prg->rtd->eof_lel_ids[pda_run->parser_id];
+ input->tree->tokdata = head;
+
+ /* Set the state using the state of the parser. */
+ pda_run->region = get_next_region( pda_run, 0 );
+ pda_run->pre_region = get_next_pre_region( pda_run );
+ pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->region];
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->id = input->tree->id;
+ parse_tree->shadow = input;
+
+ pda_run->parse_input = parse_tree;
+}
+
+static void new_token( program_t *prg, struct pda_run *pda_run )
+{
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->scan_eof = 0;
+
+ /* Init the scanner vars. */
+ pda_run->act = 0;
+ pda_run->tokstart = 0;
+ pda_run->tokend = 0;
+ pda_run->matched_token = 0;
+
+ /* Set the state using the state of the parser. */
+ pda_run->region = get_next_region( pda_run, 0 );
+ pda_run->pre_region = get_next_pre_region( pda_run );
+ if ( pda_run->pre_region > 0 ) {
+ pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->pre_region];
+ pda_run->next_cs = pda_run->fsm_tables->entry_by_region[pda_run->region];
+ }
+ else {
+ pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->region];
+ }
+
+
+ /* Clear the mark array. */
+ memset( pda_run->mark, 0, sizeof(pda_run->mark) );
+}
+
+static void push_bt_point( program_t *prg, struct pda_run *pda_run )
+{
+ tree_t *tree = 0;
+ if ( pda_run->accum_ignore != 0 )
+ tree = pda_run->accum_ignore->shadow->tree;
+ else if ( pda_run->token_list != 0 )
+ tree = pda_run->token_list->kid->tree;
+
+ if ( tree != 0 ) {
+ debug( prg, REALM_PARSE, "pushing bt point with location byte %d\n",
+ ( tree != 0 && tree->tokdata != 0 && tree->tokdata->location != 0 ) ?
+ tree->tokdata->location->byte : 0 );
+
+ kid_t *kid = kid_allocate( prg );
+ kid->tree = tree;
+ colm_tree_upref( prg, tree );
+ kid->next = pda_run->bt_point;
+ pda_run->bt_point = kid;
+ }
+}
+
+
+#define SCAN_UNDO -7
+#define SCAN_IGNORE -6
+#define SCAN_TREE -5
+#define SCAN_TRY_AGAIN_LATER -4
+#define SCAN_ERROR -3
+#define SCAN_LANG_EL -2
+#define SCAN_EOF -1
+
+static long scan_token( program_t *prg, struct pda_run *pda_run, struct input_impl *is )
+{
+ if ( pda_run->trigger_undo )
+ return SCAN_UNDO;
+
+ while ( true ) {
+ alph_t *pd = 0;
+ int len = 0;
+ int tokpref = pda_run->tokpref;
+ int type = is->funcs->get_parse_block( prg, is, &tokpref, &pd, &len );
+
+ switch ( type ) {
+ case INPUT_DATA:
+ pda_run->p = pd;
+ pda_run->pe = pd + len;
+ break;
+
+ case INPUT_EOS:
+ pda_run->p = pda_run->pe = 0;
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ debug( prg, REALM_SCAN, "EOS *******************\n" );
+ break;
+
+ case INPUT_EOF:
+ pda_run->p = pda_run->pe = 0;
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ else
+ return SCAN_EOF;
+ break;
+
+ case INPUT_EOD:
+ pda_run->p = pda_run->pe = 0;
+ return SCAN_TRY_AGAIN_LATER;
+
+ case INPUT_LANG_EL:
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ else
+ return SCAN_LANG_EL;
+ break;
+
+ case INPUT_TREE:
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ else
+ return SCAN_TREE;
+ break;
+ case INPUT_IGNORE:
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ else
+ return SCAN_IGNORE;
+ break;
+ }
+
+ prg->rtd->fsm_execute( pda_run, is );
+
+ /* First check if scanning stopped because we have a token. */
+ if ( pda_run->matched_token > 0 ) {
+ /* If the token has a marker indicating the end (due to trailing
+ * context) then adjust data now. */
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ if ( lel_info[pda_run->matched_token].mark_id >= 0 )
+ pda_run->p = pda_run->mark[lel_info[pda_run->matched_token].mark_id];
+
+ return pda_run->matched_token;
+ }
+
+ /* Check for error. */
+ if ( pda_run->fsm_cs == pda_run->fsm_tables->error_state ) {
+ /* If a token was started, but not finished (tokstart != 0) then
+ * restore data to the beginning of that token. */
+ if ( pda_run->tokstart != 0 )
+ pda_run->p = pda_run->tokstart;
+
+ /* Check for a default token in the region. If one is there
+ * then send it and continue with the processing loop. */
+ if ( prg->rtd->region_info[pda_run->region].default_token >= 0 ) {
+ pda_run->tokpref = 0;
+ return prg->rtd->region_info[pda_run->region].default_token;
+ }
+
+ return SCAN_ERROR;
+ }
+
+ /* Check for no match on eof (trailing data that partially matches a token). */
+ if ( pda_run->scan_eof )
+ return SCAN_ERROR;
+
+ /* Got here because the state machine didn't match a token or encounter
+ * an error. Must be because we got to the end of the buffer data. */
+ assert( pda_run->p == pda_run->pe );
+ }
+
+ /* Should not be reached. */
+ return SCAN_ERROR;
+}
+
+tree_t *get_parsed_root( struct pda_run *pda_run, int stop )
+{
+ if ( pda_run->parse_error )
+ return 0;
+ else if ( stop ) {
+ if ( pda_run->stack_top->shadow != 0 )
+ return pda_run->stack_top->shadow->tree;
+ }
+ else {
+ if ( pda_run->stack_top->next->shadow != 0 )
+ return pda_run->stack_top->next->shadow->tree;
+ }
+ return 0;
+}
+
+static void clear_parse_tree( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *pt )
+{
+ tree_t **top = vm_ptop();
+
+ if ( pt == 0 )
+ return;
+
+free_tree:
+ if ( pt->next != 0 ) {
+ vm_push_ptree( pt->next );
+ }
+
+ if ( pt->left_ignore != 0 ) {
+ vm_push_ptree( pt->left_ignore );
+ }
+
+ if ( pt->child != 0 ) {
+ vm_push_ptree( pt->child );
+ }
+
+ if ( pt->right_ignore != 0 ) {
+ vm_push_ptree( pt->right_ignore );
+ }
+
+ if ( pt->shadow != 0 ) {
+ colm_tree_downref( prg, sp, pt->shadow->tree );
+ kid_free( prg, pt->shadow );
+ }
+
+ parse_tree_free( pda_run, pt );
+
+ /* Any trees to downref? */
+ if ( sp != top ) {
+ pt = vm_pop_ptree();
+ goto free_tree;
+ }
+}
+
+void colm_pda_clear( program_t *prg, tree_t **sp, struct pda_run *pda_run )
+{
+ clear_fsm_run( prg, pda_run );
+
+ /* Remaining stack and parse trees underneath. */
+ clear_parse_tree( prg, sp, pda_run, pda_run->stack_top );
+ pda_run->stack_top = 0;
+
+ /* Traverse the token list downreffing. */
+ ref_t *ref = pda_run->token_list;
+ while ( ref != 0 ) {
+ ref_t *next = ref->next;
+ kid_free( prg, (kid_t*)ref );
+ ref = next;
+ }
+ pda_run->token_list = 0;
+
+ /* Traverse the btPoint list downreffing */
+ kid_t *btp = pda_run->bt_point;
+ while ( btp != 0 ) {
+ kid_t *next = btp->next;
+ colm_tree_downref( prg, sp, btp->tree );
+ kid_free( prg, (kid_t*)btp );
+ btp = next;
+ }
+ pda_run->bt_point = 0;
+
+ /* Clear out any remaining ignores. */
+ clear_parse_tree( prg, sp, pda_run, pda_run->accum_ignore );
+ pda_run->accum_ignore = 0;
+
+ /* Clear the input list (scanned tokes, sent trees). */
+ clear_parse_tree( prg, sp, pda_run, pda_run->parse_input );
+ pda_run->parse_input = 0;
+
+ colm_rcode_downref_all( prg, sp, &pda_run->reverse_code );
+ colm_rt_code_vect_empty( &pda_run->reverse_code );
+ colm_rt_code_vect_empty( &pda_run->rcode_collect );
+
+ colm_tree_downref( prg, sp, pda_run->parse_error_text );
+
+ if ( pda_run->reducer ) {
+ long local_lost = pool_alloc_num_lost( &pda_run->local_pool );
+
+ if ( local_lost )
+ message( "warning: reducer local lost parse trees: %ld\n", local_lost );
+ pool_alloc_clear( &pda_run->local_pool );
+ }
+}
+
+void colm_pda_init( program_t *prg, struct pda_run *pda_run, struct pda_tables *tables,
+ int parser_id, long stop_target, int revert_on, struct_t *context, int reducer )
+{
+ memset( pda_run, 0, sizeof(struct pda_run) );
+
+ pda_run->pda_tables = tables;
+ pda_run->parser_id = parser_id;
+ pda_run->stop_target = stop_target;
+ pda_run->revert_on = revert_on;
+ pda_run->target_steps = -1;
+ pda_run->reducer = reducer;
+
+ /* An initial commit shift count of -1 means we won't ever back up to zero
+ * shifts and think parsing cannot continue. */
+ pda_run->shift_count = 0;
+ pda_run->commit_shift_count = -1;
+
+ if ( reducer ) {
+ init_pool_alloc( &pda_run->local_pool, sizeof(parse_tree_t) +
+ prg->rtd->commit_union_sz(reducer) );
+ pda_run->parse_tree_pool = &pda_run->local_pool;
+ }
+ else {
+ pda_run->parse_tree_pool = &prg->parse_tree_pool;
+ }
+
+ debug( prg, REALM_PARSE, "initializing struct pda_run %s\n",
+ prg->rtd->lel_info[prg->rtd->parser_lel_ids[parser_id]].name );
+
+ /* FIXME: need the right one here. */
+ pda_run->pda_cs = prg->rtd->start_states[pda_run->parser_id];
+
+ kid_t *sentinal = kid_allocate( prg );
+ sentinal->tree = tree_allocate( prg );
+ sentinal->tree->refs = 1;
+
+ /* Init the element allocation variables. */
+ pda_run->stack_top = parse_tree_allocate( pda_run );
+ pda_run->stack_top->state = -1;
+ pda_run->stack_top->shadow = sentinal;
+
+ pda_run->num_retry = 0;
+ pda_run->next_region_ind = pda_run->pda_tables->token_region_inds[pda_run->pda_cs];
+ pda_run->stop_parsing = false;
+ pda_run->accum_ignore = 0;
+ pda_run->bt_point = 0;
+ pda_run->check_next = false;
+ pda_run->check_stop = false;
+
+ prg->rtd->init_bindings( pda_run );
+
+ init_rt_code_vect( &pda_run->reverse_code );
+ init_rt_code_vect( &pda_run->rcode_collect );
+
+ pda_run->context = context;
+ pda_run->parse_error = 0;
+ pda_run->parse_input = 0;
+ pda_run->trigger_undo = 0;
+
+ pda_run->token_id = 0;
+
+ pda_run->on_deck = false;
+ pda_run->parsed = 0;
+ pda_run->reject = false;
+
+ pda_run->rc_block_count = 0;
+ pda_run->eof_term_recvd = 0;
+
+ init_fsm_run( prg, pda_run );
+ new_token( prg, pda_run );
+}
+
+static long stack_top_target( program_t *prg, struct pda_run *pda_run )
+{
+ long state;
+ if ( pda_run->stack_top->state < 0 )
+ state = prg->rtd->start_states[pda_run->parser_id];
+ else {
+ unsigned shift = pda_run->stack_top->id -
+ pda_run->pda_tables->keys[pda_run->stack_top->state<<1];
+ unsigned offset = pda_run->pda_tables->offsets[pda_run->stack_top->state] + shift;
+ int index = pda_run->pda_tables->indices[offset];
+ state = pda_run->pda_tables->targs[index];
+ }
+ return state;
+}
+
+/*
+ * shift: retry goes into lower of shifted node.
+ * reduce: retry goes into upper of reduced node.
+ * shift-reduce: cannot be a retry
+ */
+
+/* Stops on:
+ * PCR_REDUCTION
+ * PCR_REVERSE
+ */
+static long parse_token( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, long entry )
+{
+ int pos;
+ unsigned int *action;
+ int rhs_len;
+ int owner;
+ int induce_reject;
+ int ind_pos;
+
+ /* COROUTINE */
+ switch ( entry ) {
+ case PCR_START:
+
+ /* The scanner will send a null token if it can't find a token. */
+ if ( pda_run->parse_input == 0 )
+ goto parse_error;
+
+ /* This will cause parseInput to be lost. This
+ * path should be traced. */
+ if ( pda_run->pda_cs < 0 )
+ return PCR_DONE;
+
+ /* Record the state in the parse tree. */
+ pda_run->parse_input->state = pda_run->pda_cs;
+
+again:
+ if ( pda_run->parse_input == 0 )
+ goto _out;
+
+ pda_run->lel = pda_run->parse_input;
+ pda_run->cur_state = pda_run->pda_cs;
+
+ if ( pda_run->lel->id < pda_run->pda_tables->keys[pda_run->cur_state<<1] ||
+ pda_run->lel->id > pda_run->pda_tables->keys[(pda_run->cur_state<<1)+1] )
+ {
+ debug( prg, REALM_PARSE, "parse error, no transition 1\n" );
+ push_bt_point( prg, pda_run );
+ goto parse_error;
+ }
+
+ ind_pos = pda_run->pda_tables->offsets[pda_run->cur_state] +
+ (pda_run->lel->id - pda_run->pda_tables->keys[pda_run->cur_state<<1]);
+
+ owner = pda_run->pda_tables->owners[ind_pos];
+ if ( owner != pda_run->cur_state ) {
+ debug( prg, REALM_PARSE, "parse error, no transition 2\n" );
+ push_bt_point( prg, pda_run );
+ goto parse_error;
+ }
+
+ pos = pda_run->pda_tables->indices[ind_pos];
+ if ( pos < 0 ) {
+ debug( prg, REALM_PARSE, "parse error, no transition 3\n" );
+ push_bt_point( prg, pda_run );
+ goto parse_error;
+ }
+
+ /* Checking complete. */
+
+ induce_reject = false;
+ pda_run->pda_cs = pda_run->pda_tables->targs[pos];
+ action = pda_run->pda_tables->actions + pda_run->pda_tables->act_inds[pos];
+ if ( pda_run->lel->retry_lower )
+ action += pda_run->lel->retry_lower;
+
+ /*
+ * Shift
+ */
+
+ if ( *action & act_sb ) {
+ debug( prg, REALM_PARSE, "shifted: %s\n",
+ prg->rtd->lel_info[pda_run->lel->id].name );
+ /* Consume. */
+ pda_run->parse_input = pda_run->parse_input->next;
+
+ pda_run->lel->state = pda_run->cur_state;
+
+ /* If its a token then attach ignores and record it in the token list
+ * of the next ignore attachment to use. */
+ if ( pda_run->lel->id < prg->rtd->first_non_term_id ) {
+ if ( pda_run->lel->cause_reduce == 0 )
+ attach_right_ignore( prg, sp, pda_run, pda_run->stack_top );
+ }
+
+ pda_run->lel->next = pda_run->stack_top;
+ pda_run->stack_top = pda_run->lel;
+
+ /* If its a token then attach ignores and record it in the token list
+ * of the next ignore attachment to use. */
+ if ( pda_run->lel->id < prg->rtd->first_non_term_id ) {
+ attach_left_ignore( prg, sp, pda_run, pda_run->lel );
+
+ ref_t *ref = (ref_t*)kid_allocate( prg );
+ ref->kid = pda_run->lel->shadow;
+ //colm_tree_upref( prg, pdaRun->tree );
+ ref->next = pda_run->token_list;
+ pda_run->token_list = ref;
+ }
+
+ if ( action[1] == 0 )
+ pda_run->lel->retry_lower = 0;
+ else {
+ debug( prg, REALM_PARSE, "retry: %p\n", pda_run->stack_top );
+ pda_run->lel->retry_lower += 1;
+ assert( pda_run->lel->retry_upper == 0 );
+ /* FIXME: Has the retry already been counted? */
+ pda_run->num_retry += 1;
+ }
+
+ pda_run->shift_count += 1;
+ }
+
+ /*
+ * Commit
+ */
+
+ if ( pda_run->pda_tables->commit_len[pos] != 0 ) {
+ debug( prg, REALM_PARSE, "commit point\n" );
+ pda_run->commit_shift_count = pda_run->shift_count;
+
+ /* Not in a reverting context and the parser result is not used. */
+ if ( pda_run->reducer )
+ commit_reduce( prg, sp, pda_run );
+
+ if ( pda_run->fail_parsing )
+ goto fail;
+
+ }
+
+ /*
+ * Reduce
+ */
+
+ if ( *action & act_rb ) {
+ int r, object_length;
+ parse_tree_t *last, *child;
+ kid_t *attrs;
+ kid_t *data_last, *data_child;
+
+ /* If there was shift don't attach again. */
+ if ( !( *action & act_sb ) && pda_run->lel->id < prg->rtd->first_non_term_id )
+ attach_right_ignore( prg, sp, pda_run, pda_run->stack_top );
+
+ pda_run->reduction = *action >> 2;
+
+ if ( pda_run->parse_input != 0 )
+ pda_run->parse_input->cause_reduce += 1;
+
+ kid_t *value = kid_allocate( prg );
+ value->tree = tree_allocate( prg );
+ value->tree->refs = 1;
+ value->tree->id = prg->rtd->prod_info[pda_run->reduction].lhs_id;
+ value->tree->prod_num = prg->rtd->prod_info[pda_run->reduction].prod_num;
+
+ pda_run->red_lel = parse_tree_allocate( pda_run );
+ pda_run->red_lel->id = prg->rtd->prod_info[pda_run->reduction].lhs_id;
+ pda_run->red_lel->next = 0;
+ pda_run->red_lel->cause_reduce = 0;
+ pda_run->red_lel->retry_lower = 0;
+ pda_run->red_lel->shadow = value;
+
+ /* Transfer. */
+ pda_run->red_lel->retry_upper = pda_run->lel->retry_lower;
+ pda_run->lel->retry_lower = 0;
+
+ /* Allocate the attributes. */
+ object_length = prg->rtd->lel_info[pda_run->red_lel->id].object_length;
+ attrs = alloc_attrs( prg, object_length );
+
+ /* Build the list of children. We will be giving up a reference when we
+ * detach parse tree and data tree, but gaining the reference when we
+ * put the children under the new data tree. No need to alter refcounts
+ * here. */
+ rhs_len = prg->rtd->prod_info[pda_run->reduction].length;
+ child = last = 0;
+ data_child = data_last = 0;
+ for ( r = 0; r < rhs_len; r++ ) {
+
+ /* The child. */
+ child = pda_run->stack_top;
+ data_child = child->shadow;
+
+ /* Pop. */
+ pda_run->stack_top = pda_run->stack_top->next;
+
+ /* Detach the parse tree from the data. */
+ child->shadow = 0;
+
+ /* Reverse list. */
+ child->next = last;
+ data_child->next = data_last;
+
+ /* Track last for reversal. */
+ last = child;
+ data_last = data_child;
+ }
+
+ pda_run->red_lel->child = child;
+ pda_run->red_lel->shadow->tree->child = kid_list_concat( attrs, data_child );
+
+ debug( prg, REALM_PARSE, "reduced: %s rhsLen %d\n",
+ prg->rtd->prod_info[pda_run->reduction].name, rhs_len );
+ if ( action[1] == 0 )
+ pda_run->red_lel->retry_upper = 0;
+ else {
+ pda_run->red_lel->retry_upper += 1;
+ assert( pda_run->lel->retry_lower == 0 );
+ pda_run->num_retry += 1;
+ debug( prg, REALM_PARSE, "retry: %p\n", pda_run->red_lel );
+ }
+
+ /* When the production is of zero length we stay in the same state.
+ * Otherwise we use the state stored in the first child. */
+ pda_run->pda_cs = rhs_len == 0 ? pda_run->cur_state : child->state;
+
+ if ( prg->ctx_dep_parsing && prg->rtd->prod_info[pda_run->reduction].frame_id >= 0 ) {
+ /* Frame info for reduction. */
+ pda_run->fi = &prg->rtd->frame_info[prg->rtd->prod_info[pda_run->reduction].frame_id];
+ pda_run->frame_id = prg->rtd->prod_info[pda_run->reduction].frame_id;
+ pda_run->reject = false;
+ pda_run->parsed = 0;
+ pda_run->code = pda_run->fi->codeWV;
+
+ /* COROUTINE */
+ return PCR_REDUCTION;
+ case PCR_REDUCTION:
+
+ if ( prg->induce_exit )
+ goto fail;
+
+ /* If the lhs was stored and it changed then we need to restore the
+ * original upon backtracking, otherwise downref since we took a
+ * copy above. */
+ if ( pda_run->parsed != 0 ) {
+ if ( pda_run->parsed != pda_run->red_lel->shadow->tree ) {
+ debug( prg, REALM_PARSE, "lhs tree was modified, "
+ "adding a restore instruction\n" );
+//
+// /* Make it into a parse tree. */
+// tree_t *newPt = prepParseTree( prg, sp, pdaRun->redLel->tree );
+// colm_tree_downref( prg, sp, pdaRun->redLel->tree );
+//
+// /* Copy it in. */
+// pdaRun->redLel->tree = newPt;
+// colm_tree_upref( prg, pdaRun->redLel->tree );
+
+ /* Add the restore instruct. */
+ append_code_val( &pda_run->rcode_collect, IN_RESTORE_LHS );
+ append_word( &pda_run->rcode_collect, (word_t)pda_run->parsed );
+ append_code_val( &pda_run->rcode_collect, SIZEOF_CODE + SIZEOF_WORD );
+ }
+ else {
+ /* Not changed. Done with parsed. */
+ colm_tree_downref( prg, sp, pda_run->parsed );
+ }
+ pda_run->parsed = 0;
+ }
+
+ /* Pull out the reverse code, if any. */
+ colm_make_reverse_code( pda_run );
+ colm_transfer_reverse_code( pda_run, pda_run->red_lel );
+
+ /* Perhaps the execution environment is telling us we need to
+ * reject the reduction. */
+ induce_reject = pda_run->reject;
+ }
+
+ /* If the left hand side was replaced then the only parse algorithm
+ * data that is contained in it will the PF_HAS_RCODE flag. Everthing
+ * else will be in the original. This requires that we restore first
+ * when going backwards and when doing a commit. */
+
+ if ( induce_reject ) {
+ debug( prg, REALM_PARSE, "error induced during reduction of %s\n",
+ prg->rtd->lel_info[pda_run->red_lel->id].name );
+ pda_run->red_lel->state = pda_run->cur_state;
+ pda_run->red_lel->next = pda_run->stack_top;
+ pda_run->stack_top = pda_run->red_lel;
+ /* FIXME: What is the right argument here? */
+ push_bt_point( prg, pda_run );
+ goto parse_error;
+ }
+
+ pda_run->red_lel->next = pda_run->parse_input;
+ pda_run->parse_input = pda_run->red_lel;
+ }
+
+ goto again;
+
+parse_error:
+ debug( prg, REALM_PARSE, "hit error, backtracking\n" );
+
+#if 0
+ if ( pda_run->num_retry == 0 ) {
+ debug( prg, REALM_PARSE, "out of retries failing parse\n" );
+ goto fail;
+ }
+#endif
+
+ while ( 1 ) {
+ if ( pda_run->on_deck ) {
+ debug( prg, REALM_BYTECODE, "dropping out for reverse code call\n" );
+
+ pda_run->frame_id = -1;
+ pda_run->code = colm_pop_reverse_code( &pda_run->reverse_code );
+
+ /* COROUTINE */
+ return PCR_REVERSE;
+ case PCR_REVERSE:
+
+ colm_decrement_steps( pda_run );
+ }
+ else if ( pda_run->check_next ) {
+ pda_run->check_next = false;
+
+ if ( pda_run->next > 0 && pda_run->pda_tables->token_regions[pda_run->next] != 0 ) {
+ debug( prg, REALM_PARSE, "found a new region\n" );
+ pda_run->num_retry -= 1;
+ pda_run->pda_cs = stack_top_target( prg, pda_run );
+ pda_run->next_region_ind = pda_run->next;
+ return PCR_DONE;
+ }
+ }
+ else if ( pda_run->check_stop ) {
+ pda_run->check_stop = false;
+
+ if ( pda_run->stop ) {
+ debug( prg, REALM_PARSE, "stopping the backtracking, "
+ "steps is %d\n", pda_run->steps );
+
+ pda_run->pda_cs = stack_top_target( prg, pda_run );
+ goto _out;
+ }
+ }
+ else if ( pda_run->parse_input != 0 ) {
+ /* Either we are dealing with a terminal that was shifted or a
+ * nonterminal that was reduced. */
+ if ( pda_run->parse_input->id < prg->rtd->first_non_term_id ) {
+ /* This is a terminal. */
+ assert( pda_run->parse_input->retry_upper == 0 );
+
+ if ( pda_run->parse_input->retry_lower != 0 ) {
+ debug( prg, REALM_PARSE, "found retry targ: %p\n", pda_run->parse_input );
+
+ pda_run->num_retry -= 1;
+ pda_run->pda_cs = pda_run->parse_input->state;
+ goto again;
+ }
+
+ if ( pda_run->parse_input->cause_reduce != 0 ) {
+ /* The terminal caused a reduce. Unshift the reduced thing
+ * (will unreduce in the next step. */
+ if ( pda_run->shift_count == pda_run->commit_shift_count ) {
+ debug( prg, REALM_PARSE, "backed up to commit point, "
+ "failing parse\n" );
+ goto fail;
+ }
+ pda_run->shift_count -= 1;
+
+ pda_run->undo_lel = pda_run->stack_top;
+
+ /* Check if we've arrived at the stack sentinal. This guard
+ * is here to allow us to initially set numRetry to one to
+ * cause the parser to backup all the way to the beginning
+ * when an error occurs. */
+ if ( pda_run->undo_lel->next == 0 )
+ break;
+
+ /* Either we are dealing with a terminal that was
+ * shifted or a nonterminal that was reduced. */
+ assert( !(pda_run->stack_top->id < prg->rtd->first_non_term_id) );
+
+ debug( prg, REALM_PARSE, "backing up over non-terminal: %s\n",
+ prg->rtd->lel_info[pda_run->stack_top->id].name );
+
+ /* Pop the item from the stack. */
+ pda_run->stack_top = pda_run->stack_top->next;
+
+ /* Queue it as next parseInput item. */
+ pda_run->undo_lel->next = pda_run->parse_input;
+ pda_run->parse_input = pda_run->undo_lel;
+ }
+ else {
+ long region = pda_run->parse_input->retry_region;
+ pda_run->next = region > 0 ? region + 1 : 0;
+ pda_run->check_next = true;
+ pda_run->check_stop = true;
+
+ send_back( prg, sp, pda_run, is, pda_run->parse_input );
+
+ pda_run->parse_input = 0;
+ }
+ }
+ else if ( pda_run->parse_input->flags & PF_HAS_RCODE ) {
+ debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" );
+ pda_run->on_deck = true;
+ pda_run->parsed = 0;
+
+ /* Only the RCODE flag was in the replaced lhs. All the rest is in
+ * the the original. We read it after restoring. */
+
+ pda_run->parse_input->flags &= ~PF_HAS_RCODE;
+ }
+ else {
+ /* Remove it from the input queue. */
+ pda_run->undo_lel = pda_run->parse_input;
+ pda_run->parse_input = pda_run->parse_input->next;
+
+ /* Extract children from the child list. */
+ parse_tree_t *first = pda_run->undo_lel->child;
+ pda_run->undo_lel->child = 0;
+
+ /* This will skip the ignores/attributes, etc. */
+ kid_t *data_first = tree_extract_child( prg, pda_run->undo_lel->shadow->tree );
+
+ /* Walk the child list and and push the items onto the parsing
+ * stack one at a time. */
+ while ( first != 0 ) {
+ /* Get the next item ahead of time. */
+ parse_tree_t *next = first->next;
+ kid_t *data_next = data_first->next;
+
+ /* Push onto the stack. */
+ first->next = pda_run->stack_top;
+ pda_run->stack_top = first;
+
+ /* Reattach the data and the parse tree. */
+ first->shadow = data_first;
+
+ first = next;
+ data_first = data_next;
+ }
+
+ /* If there is an parseInput queued, this is one less reduction it has
+ * caused. */
+ if ( pda_run->parse_input != 0 )
+ pda_run->parse_input->cause_reduce -= 1;
+
+ if ( pda_run->undo_lel->retry_upper != 0 ) {
+ /* There is always an parseInput item here because reduce
+ * conflicts only happen on a lookahead character. */
+ assert( pda_run->parse_input != pda_run->undo_lel );
+ assert( pda_run->parse_input != 0 );
+ assert( pda_run->undo_lel->retry_lower == 0 );
+ assert( pda_run->parse_input->retry_upper == 0 );
+
+ /* Transfer the retry from undoLel to parseInput. */
+ pda_run->parse_input->retry_lower = pda_run->undo_lel->retry_upper;
+ pda_run->parse_input->retry_upper = 0;
+ pda_run->parse_input->state = stack_top_target( prg, pda_run );
+ }
+
+ /* Free the reduced item. */
+ colm_tree_downref( prg, sp, pda_run->undo_lel->shadow->tree );
+ kid_free( prg, pda_run->undo_lel->shadow );
+ parse_tree_free( pda_run, pda_run->undo_lel );
+
+ /* If the stacktop had right ignore attached, detach now. */
+ if ( pda_run->stack_top->flags & PF_RIGHT_IL_ATTACHED )
+ detach_right_ignore( prg, sp, pda_run, pda_run->stack_top );
+ }
+ }
+ else if ( pda_run->accum_ignore != 0 ) {
+ debug( prg, REALM_PARSE, "have accumulated ignore to undo\n" );
+
+ /* Send back any accumulated ignore tokens, then trigger error
+ * in the the parser. */
+ parse_tree_t *ignore = pda_run->accum_ignore;
+ pda_run->accum_ignore = pda_run->accum_ignore->next;
+ ignore->next = 0;
+
+ long region = ignore->retry_region;
+ pda_run->next = region > 0 ? region + 1 : 0;
+ pda_run->check_next = true;
+ pda_run->check_stop = true;
+
+ send_back_ignore( prg, sp, pda_run, is, ignore );
+
+ colm_tree_downref( prg, sp, ignore->shadow->tree );
+ kid_free( prg, ignore->shadow );
+ parse_tree_free( pda_run, ignore );
+ }
+ else {
+ if ( pda_run->shift_count == pda_run->commit_shift_count ) {
+ debug( prg, REALM_PARSE, "backed up to commit point, failing parse\n" );
+ goto fail;
+ }
+
+ pda_run->shift_count -= 1;
+
+ /* Now it is time to undo something. Pick an element from the top of
+ * the stack. */
+ pda_run->undo_lel = pda_run->stack_top;
+
+ /* Check if we've arrived at the stack sentinal. This guard is
+ * here to allow us to initially set numRetry to one to cause the
+ * parser to backup all the way to the beginning when an error
+ * occurs. */
+ if ( pda_run->undo_lel->next == 0 )
+ break;
+
+ /* Either we are dealing with a terminal that was
+ * shifted or a nonterminal that was reduced. */
+ if ( pda_run->stack_top->id < prg->rtd->first_non_term_id ) {
+ debug( prg, REALM_PARSE, "backing up over effective terminal: %s\n",
+ prg->rtd->lel_info[pda_run->stack_top->id].name );
+
+ /* Pop the item from the stack. */
+ pda_run->stack_top = pda_run->stack_top->next;
+
+ /* Queue it as next parseInput item. */
+ pda_run->undo_lel->next = pda_run->parse_input;
+ pda_run->parse_input = pda_run->undo_lel;
+
+ /* Pop from the token list. */
+ ref_t *ref = pda_run->token_list;
+ pda_run->token_list = ref->next;
+ kid_free( prg, (kid_t*)ref );
+
+ assert( pda_run->accum_ignore == 0 );
+ detach_left_ignore( prg, sp, pda_run, pda_run->parse_input );
+ }
+ else {
+ debug( prg, REALM_PARSE, "backing up over non-terminal: %s\n",
+ prg->rtd->lel_info[pda_run->stack_top->id].name );
+
+ /* Pop the item from the stack. */
+ pda_run->stack_top = pda_run->stack_top->next;
+
+ /* Queue it as next parseInput item. */
+ pda_run->undo_lel->next = pda_run->parse_input;
+ pda_run->parse_input = pda_run->undo_lel;
+ }
+
+ /* Undo attach of right ignore. */
+ if ( pda_run->stack_top->flags & PF_RIGHT_IL_ATTACHED )
+ detach_right_ignore( prg, sp, pda_run, pda_run->stack_top );
+ }
+ }
+
+fail:
+ pda_run->pda_cs = -1;
+ pda_run->parse_error = 1;
+
+ /* FIXME: do we still need to fall through here? A fail is permanent now,
+ * no longer called into again. */
+
+ return PCR_DONE;
+
+_out:
+ pda_run->next_region_ind = pda_run->pda_tables->token_region_inds[pda_run->pda_cs];
+
+ /* COROUTINE */
+ case PCR_DONE:
+ break; }
+
+ return PCR_DONE;
+}
+
+/*
+ * colm_parse_loop
+ *
+ * Stops on:
+ * PCR_PRE_EOF
+ * PCR_GENERATION
+ * PCR_REDUCTION
+ * PCR_REVERSE
+ */
+
+long colm_parse_loop( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is, long entry )
+{
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+
+ /* COROUTINE */
+ switch ( entry ) {
+ case PCR_START:
+
+ pda_run->stop = false;
+
+ while ( true ) {
+ debug( prg, REALM_PARSE, "parse loop start\n" );
+
+ /* Pull the current scanner from the parser. This can change during
+ * parsing due to inputStream pushes, usually for the purpose of includes.
+ * */
+ pda_run->token_id = scan_token( prg, pda_run, is );
+
+ if ( pda_run->token_id == SCAN_ERROR ) {
+ if ( pda_run->pre_region >= 0 ) {
+ pda_run->pre_region = -1;
+ pda_run->fsm_cs = pda_run->next_cs;
+ pda_run->tokpref = 0;
+ continue;
+ }
+ }
+
+ if ( pda_run->token_id == SCAN_ERROR &&
+ ( prg->rtd->region_info[pda_run->region].ci_lel_id > 0 ) )
+ {
+ debug( prg, REALM_PARSE, "sending a collect ignore\n" );
+ send_collect_ignore( prg, sp, pda_run, is,
+ prg->rtd->region_info[pda_run->region].ci_lel_id );
+ goto yes;
+ }
+
+ if ( pda_run->token_id == SCAN_TRY_AGAIN_LATER ) {
+ debug( prg, REALM_PARSE, "scanner says try again later\n" );
+ break;
+ }
+
+ assert( pda_run->parse_input == 0 );
+ pda_run->parse_input = 0;
+
+ /* Check for EOF. */
+ if ( pda_run->token_id == SCAN_EOF ) {
+ pda_run->eof_term_recvd = true;
+ send_eof( prg, sp, pda_run, is );
+
+ pda_run->frame_id = prg->rtd->region_info[pda_run->region].eof_frame_id;
+
+ if ( prg->ctx_dep_parsing && pda_run->frame_id >= 0 ) {
+ debug( prg, REALM_PARSE, "HAVE PRE_EOF BLOCK\n" );
+
+ pda_run->fi = &prg->rtd->frame_info[pda_run->frame_id];
+ pda_run->code = pda_run->fi->codeWV;
+
+ /* COROUTINE */
+ return PCR_PRE_EOF;
+ case PCR_PRE_EOF:
+
+ colm_make_reverse_code( pda_run );
+ }
+ }
+ else if ( pda_run->token_id == SCAN_UNDO ) {
+ /* Fall through with parseInput = 0. FIXME: Do we need to send back ignore? */
+ debug( prg, REALM_PARSE, "invoking undo from the scanner\n" );
+ }
+ else if ( pda_run->token_id == SCAN_ERROR ) {
+ /* Scanner error, maybe retry. */
+ if ( pda_run->accum_ignore == 0 && get_next_region( pda_run, 1 ) != 0 ) {
+ debug( prg, REALM_PARSE, "scanner failed, trying next region\n" );
+
+ pda_run->next_region_ind += 1;
+ goto skip_send;
+ }
+ else { // if ( pdaRun->numRetry > 0 ) {
+ debug( prg, REALM_PARSE, "invoking parse error from the scanner\n" );
+
+ /* Fall through to send null (error). */
+ push_bt_point( prg, pda_run );
+ }
+#if 0
+ else {
+ debug( prg, REALM_PARSE, "no alternate scanning regions\n" );
+
+ /* There are no alternative scanning regions to try, nor are
+ * there any alternatives stored in the current parse tree. No
+ * choice but to end the parse. */
+ push_bt_point( prg, pda_run );
+
+ report_parse_error( prg, sp, pda_run );
+ pda_run->parse_error = 1;
+ goto skip_send;
+ }
+#endif
+ }
+ else if ( pda_run->token_id == SCAN_LANG_EL ) {
+ debug( prg, REALM_PARSE, "sending an named lang el\n" );
+
+ /* A named language element (parsing colm program). */
+ prg->rtd->send_named_lang_el( prg, sp, pda_run, is );
+ }
+ else if ( pda_run->token_id == SCAN_TREE ) {
+ debug( prg, REALM_PARSE, "sending a tree\n" );
+
+ /* A tree already built. */
+ send_tree( prg, sp, pda_run, is );
+ }
+ else if ( pda_run->token_id == SCAN_IGNORE ) {
+ debug( prg, REALM_PARSE, "sending an ignore token\n" );
+
+ /* A tree to ignore. */
+ send_ignore_tree( prg, sp, pda_run, is );
+ goto skip_send;
+ }
+ else if ( prg->ctx_dep_parsing && lel_info[pda_run->token_id].frame_id >= 0 ) {
+ /* Has a generation action. */
+ debug( prg, REALM_PARSE, "token gen action: %s\n",
+ prg->rtd->lel_info[pda_run->token_id].name );
+
+ /* Make the token data. */
+ pda_run->tokdata = peek_match( prg, pda_run, is );
+
+ /* Note that we don't update the position now. It is done when the token
+ * data is pulled from the inputStream. */
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->scan_eof = 0;
+
+ pda_run->fi = &prg->rtd->frame_info[prg->rtd->lel_info[pda_run->token_id].frame_id];
+ pda_run->frame_id = prg->rtd->lel_info[pda_run->token_id].frame_id;
+ pda_run->code = pda_run->fi->codeWV;
+
+ /* COROUTINE */
+ return PCR_GENERATION;
+ case PCR_GENERATION:
+
+ colm_make_reverse_code( pda_run );
+
+ /* Finished with the match text. */
+ string_free( prg, pda_run->tokdata );
+
+ goto skip_send;
+ }
+ else if ( lel_info[pda_run->token_id].ignore ) {
+ debug( prg, REALM_PARSE, "sending an ignore token: %s\n",
+ prg->rtd->lel_info[pda_run->token_id].name );
+
+ /* Is an ignore token. */
+ send_ignore( prg, sp, pda_run, is, pda_run->token_id );
+ goto skip_send;
+ }
+ else {
+ debug( prg, REALM_PARSE, "sending a plain old token: %s\n",
+ prg->rtd->lel_info[pda_run->token_id].name );
+
+ /* Is a plain token. */
+ send_token( prg, sp, pda_run, is, pda_run->token_id );
+ }
+yes:
+
+ if ( pda_run->parse_input != 0 )
+ colm_transfer_reverse_code( pda_run, pda_run->parse_input );
+
+ if ( pda_run->parse_input != 0 ) {
+ /* If it's a nonterminal with a termdup then flip the parse tree to
+ * the terminal. */
+ if ( pda_run->parse_input->id >= prg->rtd->first_non_term_id ) {
+ pda_run->parse_input->id =
+ prg->rtd->lel_info[pda_run->parse_input->id].term_dup_id;
+ pda_run->parse_input->flags |= PF_TERM_DUP;
+ }
+ }
+
+ long pcr = parse_token( prg, sp, pda_run, is, PCR_START );
+
+ while ( pcr != PCR_DONE ) {
+
+ /* COROUTINE */
+ return pcr;
+ case PCR_REDUCTION:
+ case PCR_REVERSE:
+
+ pcr = parse_token( prg, sp, pda_run, is, entry );
+ }
+
+ assert( pcr == PCR_DONE );
+
+ handle_error( prg, sp, pda_run );
+
+skip_send:
+ new_token( prg, pda_run );
+
+ /* Various stop conditions. This should all be coverned by one test
+ * eventually. */
+
+ if ( pda_run->trigger_undo ) {
+ debug( prg, REALM_PARSE, "parsing stopped by triggerUndo\n" );
+ break;
+ }
+
+ if ( pda_run->eof_term_recvd ) {
+ debug( prg, REALM_PARSE, "parsing stopped by EOF\n" );
+ break;
+ }
+
+ if ( pda_run->stop_parsing ) {
+ debug( prg, REALM_PARSE, "scanner has been stopped\n" );
+ break;
+ }
+
+ if ( pda_run->stop ) {
+ debug( prg, REALM_PARSE, "parsing has been stopped by consumedCount\n" );
+ break;
+ }
+
+ if ( prg->induce_exit ) {
+ debug( prg, REALM_PARSE, "parsing has been stopped by a call to exit\n" );
+ break;
+ }
+
+ if ( pda_run->parse_error ) {
+ debug( prg, REALM_PARSE, "parsing stopped by a parse error\n" );
+ break;
+ }
+
+ /* Disregard any alternate parse paths, just go right to failure. */
+ if ( pda_run->fail_parsing ) {
+ debug( prg, REALM_PARSE, "parsing failed by explicit request\n" );
+ break;
+ }
+ }
+
+ /* COROUTINE */
+ case PCR_DONE:
+ break; }
+
+ return PCR_DONE;
+}
+
+
+long colm_parse_frag( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, input_t *input, long entry )
+{
+ /* COROUTINE */
+ switch ( entry ) {
+ case PCR_START:
+
+ if ( ! pda_run->parse_error ) {
+ long pcr = colm_parse_loop( prg, sp, pda_run,
+ input_to_impl( input ), entry );
+
+ while ( pcr != PCR_DONE ) {
+
+ /* COROUTINE */
+ return pcr;
+ case PCR_REDUCTION:
+ case PCR_GENERATION:
+ case PCR_PRE_EOF:
+ case PCR_REVERSE:
+
+ pcr = colm_parse_loop( prg, sp, pda_run,
+ input_to_impl( input ), entry );
+ }
+ }
+
+ /* COROUTINE */
+ case PCR_DONE:
+ break; }
+
+ return PCR_DONE;
+}
+
+long colm_parse_undo_frag( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ input_t *input, long entry, long steps )
+{
+ debug( prg, REALM_PARSE,
+ "undo parse frag, target steps: %ld, pdarun steps: %ld\n",
+ steps, pda_run->steps );
+
+ reset_token( pda_run );
+
+ /* COROUTINE */
+ switch ( entry ) {
+ case PCR_START:
+
+ if ( steps < pda_run->steps ) {
+ /* Setup environment for going backwards until we reduced steps to
+ * what we want. */
+ pda_run->num_retry += 1;
+ pda_run->target_steps = steps;
+ pda_run->trigger_undo = 1;
+
+ /* The parse loop will recognise the situation. */
+ long pcr = colm_parse_loop( prg, sp, pda_run, input_to_impl(input), entry );
+ while ( pcr != PCR_DONE ) {
+
+ /* COROUTINE */
+ return pcr;
+ case PCR_REDUCTION:
+ case PCR_GENERATION:
+ case PCR_PRE_EOF:
+ case PCR_REVERSE:
+
+ pcr = colm_parse_loop( prg, sp, pda_run, input_to_impl(input), entry );
+ }
+
+ /* Reset environment. */
+ pda_run->trigger_undo = 0;
+ pda_run->target_steps = -1;
+ pda_run->num_retry -= 1;
+ }
+
+ /* COROUTINE */
+ case PCR_DONE:
+ break; }
+
+ return PCR_DONE;
+}
+
+void colm_parse_reduce_commit( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run )
+{
+ /* Flush out anything not committed. */
+ if ( pda_run->reducer )
+ commit_reduce( prg, sp, pda_run );
+}
+
diff --git a/src/pdarun.h b/src/pdarun.h
new file mode 100644
index 00000000..4003b9be
--- /dev/null
+++ b/src/pdarun.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_PDARUN_H
+#define _COLM_PDARUN_H
+
+#include <colm/input.h>
+#include <colm/defs.h>
+#include <colm/tree.h>
+#include <colm/struct.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct colm_program;
+
+#define MARK_SLOTS 32
+
+struct fsm_tables
+{
+ long *actions;
+ long *key_offsets;
+ char *trans_keys;
+ long *single_lengths;
+ long *range_lengths;
+ long *index_offsets;
+ long *transTargsWI;
+ long *transActionsWI;
+ long *to_state_actions;
+ long *from_state_actions;
+ long *eof_actions;
+ long *eof_targs;
+ long *entry_by_region;
+
+ long num_states;
+ long num_actions;
+ long num_trans_keys;
+ long num_single_lengths;
+ long num_range_lengths;
+ long num_index_offsets;
+ long numTransTargsWI;
+ long numTransActionsWI;
+ long num_regions;
+
+ long start_state;
+ long first_final;
+ long error_state;
+
+ struct GenAction **action_switch;
+ long num_action_switch;
+};
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+struct colm_execution;
+
+struct rt_code_vect
+{
+ code_t *data;
+ long tab_len;
+ long alloc_len;
+
+ /* FIXME: leak when freed. */
+};
+
+void list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el );
+void list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el );
+
+void list_prepend( list_t *list, list_el_t *new_el );
+void list_append( list_t *list, list_el_t *new_el );
+
+list_el_t *list_detach( list_t *list, list_el_t *el );
+list_el_t *list_detach_first(list_t *list );
+list_el_t *list_detach_last(list_t *list );
+
+long list_length(list_t *list);
+
+struct function_info
+{
+ long frame_id;
+ long arg_size;
+ long frame_size;
+};
+
+/*
+ * Program Data.
+ */
+
+struct pat_cons_info
+{
+ long offset;
+ long num_bindings;
+};
+
+struct pat_cons_node
+{
+ long id;
+ long prod_num;
+ long next;
+ long child;
+ long bind_id;
+ const char *data;
+ long length;
+ long left_ignore;
+ long right_ignore;
+
+ /* Just match nonterminal, don't go inside. */
+ unsigned char stop;
+};
+
+/* FIXME: should have a descriptor for object types to give the length. */
+
+struct lang_el_info
+{
+ const char *name;
+ const char *xml_tag;
+ unsigned char repeat;
+ unsigned char list;
+ unsigned char literal;
+ unsigned char ignore;
+
+ long frame_id;
+
+ long object_type_id;
+ long ofi_offset;
+ long object_length;
+
+ long term_dup_id;
+ long mark_id;
+ long capture_attr;
+ long num_capture_attr;
+};
+
+struct struct_el_info
+{
+ long size;
+ short *trees;
+ long trees_len;
+};
+
+struct prod_info
+{
+ unsigned long lhs_id;
+ short prod_num;
+ long length;
+ const char *name;
+ long frame_id;
+ unsigned char lhs_upref;
+ unsigned char *copy;
+ long copy_len;
+};
+
+/* Must match the LocalType enum. */
+#define LI_Tree 1
+#define LI_Iter 2
+#define LI_RevIter 3
+#define LI_UserIter 4
+
+struct local_info
+{
+ char type;
+ short offset;
+};
+
+struct frame_info
+{
+ const char *name;
+ code_t *codeWV;
+ long codeLenWV;
+ code_t *codeWC;
+ long codeLenWC;
+ struct local_info *locals;
+ long locals_len;
+ long arg_size;
+ long frame_size;
+ char ret_tree;
+};
+
+struct region_info
+{
+ long default_token;
+ long eof_frame_id;
+ int ci_lel_id;
+};
+
+typedef struct _CaptureAttr
+{
+ long mark_enter;
+ long mark_leave;
+ long offset;
+} CaptureAttr;
+
+struct pda_tables
+{
+ /* Parser table data. */
+ int *indices;
+ int *owners;
+ int *keys;
+ unsigned int *offsets;
+ unsigned int *targs;
+ unsigned int *act_inds;
+ unsigned int *actions;
+ int *commit_len;
+ int *token_region_inds;
+ int *token_regions;
+ int *token_pre_regions;
+
+ int num_indices;
+ int num_keys;
+ int num_states;
+ int num_targs;
+ int num_act_inds;
+ int num_actions;
+ int num_commit_len;
+ int num_region_items;
+ int num_pre_region_items;
+};
+
+struct pool_block
+{
+ void *data;
+ struct pool_block *next;
+};
+
+struct pool_item
+{
+ struct pool_item *next;
+};
+
+struct pool_alloc
+{
+ struct pool_block *head;
+ long nextel;
+ struct pool_item *pool;
+ int sizeofT;
+};
+
+struct pda_run
+{
+ /*
+ * Scanning.
+ */
+ struct fsm_tables *fsm_tables;
+
+ struct run_buf *consume_buf;
+
+ long region, pre_region;
+ long fsm_cs, next_cs, act;
+ alph_t *start;
+ alph_t *tokstart;
+ long tokend;
+ long tokpref;
+ alph_t *p, *pe;
+ char scan_eof;
+
+ char return_result;
+ char skip_tokpref;
+ char eof_term_recvd;
+
+ alph_t *mark[MARK_SLOTS];
+ long matched_token;
+
+ /*
+ * Parsing
+ */
+ int num_retry;
+ parse_tree_t *stack_top;
+ ref_t *token_list;
+ int pda_cs;
+ int next_region_ind;
+
+ struct pda_tables *pda_tables;
+ int parser_id;
+
+ /* Reused. */
+ struct rt_code_vect rcode_collect;
+ struct rt_code_vect reverse_code;
+
+ int stop_parsing;
+ long stop_target;
+
+ parse_tree_t *accum_ignore;
+
+ kid_t *bt_point;
+
+ struct bindings *bindings;
+
+ int revert_on;
+
+ struct colm_struct *context;
+
+ int stop;
+ int parse_error;
+
+ long steps;
+ long target_steps;
+
+ /* The shift count simply tracks the number of shifts that have happend.
+ * The commit shift count is the shift count when the last commit occurred.
+ * If we back up to this number of shifts then we decide we cannot proceed.
+ * The commit shift count is initialized to -1. */
+ long shift_count;
+ long commit_shift_count;
+
+ int on_deck;
+
+ /*
+ * Data we added when refactoring the parsing engine into a coroutine.
+ */
+
+ parse_tree_t *parse_input;
+ struct frame_info *fi;
+ int reduction;
+ parse_tree_t *red_lel;
+ int cur_state;
+ parse_tree_t *lel;
+ int trigger_undo;
+
+ int token_id;
+ head_t *tokdata;
+ int frame_id;
+ int next;
+ parse_tree_t *undo_lel;
+
+ int check_next;
+ int check_stop;
+
+ /* The lhs is sometimes saved before reduction actions in case it is
+ * replaced and we need to restore it on backtracking */
+ tree_t *parsed;
+
+ int reject;
+
+ /* Instruction pointer to use when we stop parsing and execute code. */
+ code_t *code;
+
+ int rc_block_count;
+
+ tree_t *parse_error_text;
+
+ /* Zero indicates parsing proper. Nonzero is the reducer id. */
+ int reducer;
+
+ parse_tree_t *last_final;
+
+ struct pool_alloc *parse_tree_pool;
+ struct pool_alloc local_pool;
+
+ /* Disregard any alternate parse paths, just go right to failure. */
+ int fail_parsing;
+};
+
+void colm_pda_init( struct colm_program *prg, struct pda_run *pda_run,
+ struct pda_tables *tables, int parser_id, long stop_target,
+ int revert_on, struct colm_struct *context, int reducer );
+
+void colm_pda_clear( struct colm_program *prg, struct colm_tree **sp,
+ struct pda_run *pda_run );
+
+void colm_rt_code_vect_replace( struct rt_code_vect *vect, long pos,
+ const code_t *val, long len );
+void colm_rt_code_vect_empty( struct rt_code_vect *vect );
+void colm_rt_code_vect_remove( struct rt_code_vect *vect, long pos, long len );
+
+void init_rt_code_vect( struct rt_code_vect *code_vect );
+
+inline static void append_code_val( struct rt_code_vect *vect, const code_t val );
+inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len );
+inline static void append_half( struct rt_code_vect *vect, half_t half );
+inline static void append_word( struct rt_code_vect *vect, word_t word );
+
+inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len )
+{
+ colm_rt_code_vect_replace( vect, vect->tab_len, val, len );
+}
+
+inline static void append_code_val( struct rt_code_vect *vect, const code_t val )
+{
+ colm_rt_code_vect_replace( vect, vect->tab_len, &val, 1 );
+}
+
+inline static void append_half( struct rt_code_vect *vect, half_t half )
+{
+ /* not optimal. */
+ append_code_val( vect, half & 0xff );
+ append_code_val( vect, (half>>8) & 0xff );
+}
+
+inline static void append_word( struct rt_code_vect *vect, word_t word )
+{
+ /* not optimal. */
+ append_code_val( vect, word & 0xff );
+ append_code_val( vect, (word>>8) & 0xff );
+ append_code_val( vect, (word>>16) & 0xff );
+ append_code_val( vect, (word>>24) & 0xff );
+ #if SIZEOF_LONG == 8
+ append_code_val( vect, (word>>32) & 0xff );
+ append_code_val( vect, (word>>40) & 0xff );
+ append_code_val( vect, (word>>48) & 0xff );
+ append_code_val( vect, (word>>56) & 0xff );
+ #endif
+}
+
+void colm_increment_steps( struct pda_run *pda_run );
+void colm_decrement_steps( struct pda_run *pda_run );
+
+void colm_clear_stream_impl( struct colm_program *prg, tree_t **sp, struct stream_impl *input_stream );
+
+#define PCR_START 1
+#define PCR_DONE 2
+#define PCR_REDUCTION 3
+#define PCR_GENERATION 4
+#define PCR_PRE_EOF 5
+#define PCR_REVERSE 6
+
+head_t *colm_stream_pull( struct colm_program *prg, struct colm_tree **sp,
+ struct pda_run *pda_run, struct input_impl *is, long length );
+head_t *colm_string_alloc_pointer( struct colm_program *prg, const char *data, long length );
+
+kid_t *make_token_with_data( struct colm_program *prg, struct pda_run *pda_run,
+ struct input_impl *input_stream, int id, head_t *tokdata );
+
+long colm_parse_loop( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *input_stream, long entry );
+
+long colm_parse_frag( struct colm_program *prg, tree_t **sp,
+ struct pda_run *pda_run, input_t *input, long entry );
+long colm_parse_finish( struct colm_program *prg, tree_t **sp,
+ struct pda_run *pda_run, stream_t *input, long entry );
+long colm_parse_undo_frag( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run,
+ input_t *input, long entry, long steps );
+
+void commit_clear_kid_list( program_t *prg, tree_t **sp, kid_t *kid );
+void commit_clear_parse_tree( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *pt );
+void commit_reduce( program_t *prg, tree_t **root,
+ struct pda_run *pda_run );
+
+tree_t *get_parsed_root( struct pda_run *pda_run, int stop );
+
+void colm_parse_reduce_commit( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COLM_PDRUN_H */
+
diff --git a/src/pool.c b/src/pool.c
new file mode 100644
index 00000000..ffb32636
--- /dev/null
+++ b/src/pool.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright 2010-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <colm/pool.h>
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <colm/pdarun.h>
+#include <colm/debug.h>
+
+void init_pool_alloc( struct pool_alloc *pool_alloc, int sizeofT )
+{
+ pool_alloc->head = 0;
+ pool_alloc->nextel = FRESH_BLOCK;
+ pool_alloc->pool = 0;
+ pool_alloc->sizeofT = sizeofT;
+}
+
+static void *pool_alloc_allocate( struct pool_alloc *pool_alloc )
+{
+ //debug( REALM_POOL, "pool allocation\n" );
+
+#ifdef POOL_MALLOC
+ void *res = malloc( pool_alloc->sizeofT );
+ memset( res, 0, pool_alloc->sizeofT );
+ return res;
+#else
+
+ void *new_el = 0;
+ if ( pool_alloc->pool == 0 ) {
+ if ( pool_alloc->nextel == FRESH_BLOCK ) {
+ struct pool_block *new_block = (struct pool_block*)malloc( sizeof(struct pool_block) );
+ new_block->data = malloc( pool_alloc->sizeofT * FRESH_BLOCK );
+ new_block->next = pool_alloc->head;
+ pool_alloc->head = new_block;
+ pool_alloc->nextel = 0;
+ }
+
+ new_el = (char*)pool_alloc->head->data + pool_alloc->sizeofT * pool_alloc->nextel++;
+ }
+ else {
+ new_el = pool_alloc->pool;
+ pool_alloc->pool = pool_alloc->pool->next;
+ }
+ memset( new_el, 0, pool_alloc->sizeofT );
+ return new_el;
+#endif
+}
+
+void pool_alloc_free( struct pool_alloc *pool_alloc, void *el )
+{
+ #if 0
+ /* Some sanity checking. Best not to normally run with this on. */
+ char *p = (char*)el + sizeof(struct pool_item*);
+ char *pe = (char*)el + sizeof(T);
+ for ( ; p < pe; p++ )
+ assert( *p != 0xcc );
+ memset( el, 0xcc, sizeof(T) );
+ #endif
+
+#ifdef POOL_MALLOC
+ free( el );
+#else
+ struct pool_item *pi = (struct pool_item*) el;
+ pi->next = pool_alloc->pool;
+ pool_alloc->pool = pi;
+#endif
+}
+
+void pool_alloc_clear( struct pool_alloc *pool_alloc )
+{
+ struct pool_block *block = pool_alloc->head;
+ while ( block != 0 ) {
+ struct pool_block *next = block->next;
+ free( block->data );
+ free( block );
+ block = next;
+ }
+
+ pool_alloc->head = 0;
+ pool_alloc->nextel = 0;
+ pool_alloc->pool = 0;
+}
+
+long pool_alloc_num_lost( struct pool_alloc *pool_alloc )
+{
+ /* Count the number of items allocated. */
+ long lost = 0;
+ struct pool_block *block = pool_alloc->head;
+ if ( block != 0 ) {
+ lost = pool_alloc->nextel;
+ block = block->next;
+ while ( block != 0 ) {
+ lost += FRESH_BLOCK;
+ block = block->next;
+ }
+ }
+
+ /* Subtract. Items that are on the free list. */
+ struct pool_item *pi = pool_alloc->pool;
+ while ( pi != 0 ) {
+ lost -= 1;
+ pi = pi->next;
+ }
+
+ return lost;
+}
+
+/*
+ * kid_t
+ */
+
+kid_t *kid_allocate( program_t *prg )
+{
+ return (kid_t*) pool_alloc_allocate( &prg->kid_pool );
+}
+
+void kid_free( program_t *prg, kid_t *el )
+{
+ pool_alloc_free( &prg->kid_pool, el );
+}
+
+void kid_clear( program_t *prg )
+{
+ pool_alloc_clear( &prg->kid_pool );
+}
+
+long kid_num_lost( program_t *prg )
+{
+ return pool_alloc_num_lost( &prg->kid_pool );
+}
+
+/*
+ * tree_t
+ */
+
+tree_t *tree_allocate( program_t *prg )
+{
+ return (tree_t*) pool_alloc_allocate( &prg->tree_pool );
+}
+
+void tree_free( program_t *prg, tree_t *el )
+{
+ pool_alloc_free( &prg->tree_pool, el );
+}
+
+void tree_clear( program_t *prg )
+{
+ pool_alloc_clear( &prg->tree_pool );
+}
+
+long tree_num_lost( program_t *prg )
+{
+ return pool_alloc_num_lost( &prg->tree_pool );
+}
+
+/*
+ * parse_tree_t
+ */
+
+parse_tree_t *parse_tree_allocate( struct pda_run *pda_run )
+{
+ return (parse_tree_t*) pool_alloc_allocate( pda_run->parse_tree_pool );
+}
+
+void parse_tree_free( struct pda_run *pda_run, parse_tree_t *el )
+{
+ pool_alloc_free( pda_run->parse_tree_pool, el );
+}
+
+void parse_tree_clear( struct pool_alloc *pool_alloc )
+{
+ pool_alloc_clear( pool_alloc );
+}
+
+long parse_tree_num_lost( struct pool_alloc *pool_alloc )
+{
+ return pool_alloc_num_lost( pool_alloc );
+}
+
+/*
+ * head_t
+ */
+
+head_t *head_allocate( program_t *prg )
+{
+ return (head_t*) pool_alloc_allocate( &prg->head_pool );
+}
+
+void head_free( program_t *prg, head_t *el )
+{
+ pool_alloc_free( &prg->head_pool, el );
+}
+
+void head_clear( program_t *prg )
+{
+ pool_alloc_clear( &prg->head_pool );
+}
+
+long head_num_lost( program_t *prg )
+{
+ return pool_alloc_num_lost( &prg->head_pool );
+}
+
+/*
+ * location_t
+ */
+
+location_t *location_allocate( program_t *prg )
+{
+ return (location_t*) pool_alloc_allocate( &prg->location_pool );
+}
+
+void location_free( program_t *prg, location_t *el )
+{
+ pool_alloc_free( &prg->location_pool, el );
+}
+
+void location_clear( program_t *prg )
+{
+ pool_alloc_clear( &prg->location_pool );
+}
+
+long location_num_lost( program_t *prg )
+{
+ return pool_alloc_num_lost( &prg->location_pool );
+}
diff --git a/src/pool.h b/src/pool.h
new file mode 100644
index 00000000..5e8f1de0
--- /dev/null
+++ b/src/pool.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_POOL_H
+#define _COLM_POOL_H
+
+/* Allocation, number of items. */
+#define FRESH_BLOCK 8128
+
+#include <colm/pdarun.h>
+#include <colm/map.h>
+#include <colm/tree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void init_pool_alloc( struct pool_alloc *pool_alloc, int sizeofT );
+
+kid_t *kid_allocate( program_t *prg );
+void kid_free( program_t *prg, kid_t *el );
+void kid_clear( program_t *prg );
+long kid_num_lost( program_t *prg );
+
+tree_t *tree_allocate( program_t *prg );
+void tree_free( program_t *prg, tree_t *el );
+void tree_clear( program_t *prg );
+long tree_num_lost( program_t *prg );
+
+/* Parse tree allocators go into pda_run structs. */
+parse_tree_t *parse_tree_allocate( struct pda_run *pda_run );
+void parse_tree_free( struct pda_run *pda_run, parse_tree_t *el );
+void parse_tree_clear( struct pool_alloc *pool_alloc );
+long parse_tree_num_lost( struct pool_alloc *pool_alloc );
+
+head_t *head_allocate( program_t *prg );
+void head_free( program_t *prg, head_t *el );
+void head_clear( program_t *prg );
+long head_num_lost( program_t *prg );
+
+location_t *location_allocate( program_t *prg );
+void location_free( program_t *prg, location_t *el );
+void location_clear( program_t *prg );
+long location_num_lost( program_t *prg );
+
+void pool_alloc_clear( struct pool_alloc *pool_alloc );
+long pool_alloc_num_lost( struct pool_alloc *pool_alloc );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COLM_POOL_H */
+
diff --git a/src/print.c b/src/print.c
new file mode 100644
index 00000000..363a7eea
--- /dev/null
+++ b/src/print.c
@@ -0,0 +1,775 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <colm/tree.h>
+#include <colm/pool.h>
+#include <colm/bytecode.h>
+#include <colm/debug.h>
+
+#define BUFFER_INITIAL_SIZE 4096
+
+static void xml_escape_data( struct colm_print_args *print_args, const char *data, long len )
+{
+ int i;
+ for ( i = 0; i < len; i++ ) {
+ if ( data[i] == '<' )
+ print_args->out( print_args, "&lt;", 4 );
+ else if ( data[i] == '>' )
+ print_args->out( print_args, "&gt;", 4 );
+ else if ( data[i] == '&' )
+ print_args->out( print_args, "&amp;", 5 );
+ else if ( (32 <= data[i] && data[i] <= 126) ||
+ data[i] == '\t' || data[i] == '\n' || data[i] == '\r' )
+ {
+ print_args->out( print_args, &data[i], 1 );
+ }
+ else {
+ char out[64];
+ sprintf( out, "&#%u;", ((unsigned)data[i]) );
+ print_args->out( print_args, out, strlen(out) );
+ }
+ }
+}
+
+void init_str_collect( str_collect_t *collect )
+{
+ collect->data = malloc( BUFFER_INITIAL_SIZE );
+ collect->allocated = BUFFER_INITIAL_SIZE;
+ collect->length = 0;
+ collect->indent.indent = 0;
+ collect->indent.level = COLM_INDENT_OFF;
+}
+
+void str_collect_destroy( str_collect_t *collect )
+{
+ free( collect->data );
+}
+
+void str_collect_append( str_collect_t *collect, const char *data, long len )
+{
+ long new_len = collect->length + len;
+ if ( new_len > collect->allocated ) {
+ collect->allocated = new_len * 2;
+ collect->data = realloc( collect->data, collect->allocated );
+ }
+ memcpy( collect->data + collect->length, data, len );
+ collect->length += len;
+}
+
+void str_collect_clear( str_collect_t *collect )
+{
+ collect->length = 0;
+}
+
+#define INT_SZ 32
+
+void print_str( struct colm_print_args *print_args, head_t *str )
+{
+ print_args->out( print_args, str->data, str->length );
+}
+
+void append_collect( struct colm_print_args *args, const char *data, int length )
+{
+ str_collect_append( (str_collect_t*) args->arg, data, length );
+}
+
+void append_file( struct colm_print_args *args, const char *data, int length )
+{
+ struct stream_impl_data *impl = (struct stream_impl_data*) args->arg;
+ fwrite( data, 1, length, impl->file );
+}
+
+static void out_indent( struct colm_print_args *args, const char *data, int length )
+{
+ int level;
+restart:
+ if ( args->indent->indent ) {
+ /* Consume mode. */
+ while ( length > 0 && ( *data == ' ' || *data == '\t' ) ) {
+ data += 1;
+ length -= 1;
+ }
+
+ if ( length > 0 ) {
+ /* Found some data, print the indentation and turn off indentation
+ * mode. */
+ for ( level = 0; level < args->indent->level; level++ )
+ args->out( args, "\t", 1 );
+
+ args->indent->indent = 0;
+
+ goto restart;
+ }
+ }
+ else {
+ const char *nl;
+ if ( args->indent->level != COLM_INDENT_OFF &&
+ (nl = memchr( data, '\n', length )) )
+ {
+ /* Print up to and including the newline. */
+ int wl = nl - data + 1;
+ args->out( args, data, wl );
+
+ /* Go into consume state. If we see more non-indentation chars we
+ * will generate the appropriate indentation level. */
+ data += wl;
+ length -= wl;
+ args->indent->indent = 1;
+ goto restart;
+ }
+ else {
+ /* Indentation off, or no indent trigger (newline). */
+ args->out( args, data, length );
+ }
+ }
+}
+
+
+tree_t *tree_trim( struct colm_program *prg, tree_t **sp, tree_t *tree )
+{
+ if ( tree == 0 )
+ return 0;
+
+ debug( prg, REALM_PARSE, "attaching left ignore\n" );
+
+ /* Make the ignore list for the left-ignore. */
+ tree_t *left_ignore = tree_allocate( prg );
+ left_ignore->id = LEL_ID_IGNORE;
+ left_ignore->flags |= AF_SUPPRESS_RIGHT;
+
+ tree = push_left_ignore( prg, tree, left_ignore );
+
+ debug( prg, REALM_PARSE, "attaching ignore right\n" );
+
+ /* Copy the ignore list first if we need to attach it as a right
+ * ignore. */
+ tree_t *right_ignore = 0;
+ right_ignore = tree_allocate( prg );
+ right_ignore->id = LEL_ID_IGNORE;
+ right_ignore->flags |= AF_SUPPRESS_LEFT;
+
+ tree = push_right_ignore( prg, tree, right_ignore );
+
+ return tree;
+}
+
+enum ReturnType
+{
+ Done = 1,
+ CollectIgnoreLeft,
+ CollectIgnoreRight,
+ RecIgnoreList,
+ ChildPrint
+};
+
+enum VisitType
+{
+ IgnoreWrapper,
+ IgnoreData,
+ Term,
+ NonTerm
+};
+
+#define TF_TERM_SEEN 0x1
+
+void print_kid( program_t *prg, tree_t **sp, struct colm_print_args *print_args, kid_t *kid )
+{
+ enum ReturnType rt;
+ kid_t *parent = 0;
+ kid_t *leading_ignore = 0;
+ enum VisitType visit_type;
+ int flags = 0;
+
+ /* Iterate the kids passed in. We are expecting a next, which will allow us
+ * to print the trailing ignore list. */
+ while ( kid != 0 ) {
+ vm_push_type( enum ReturnType, Done );
+ goto rec_call;
+ rec_return_top:
+ kid = kid->next;
+ }
+
+ return;
+
+rec_call:
+ if ( kid->tree == 0 )
+ goto skip_null;
+
+ /* If not currently skipping ignore data, then print it. Ignore data can
+ * be associated with terminals and nonterminals. */
+ if ( kid->tree->flags & AF_LEFT_IGNORE ) {
+ vm_push_kid( parent );
+ vm_push_kid( kid );
+ parent = kid;
+ kid = tree_left_ignore_kid( prg, kid->tree );
+ vm_push_type( enum ReturnType, CollectIgnoreLeft );
+ goto rec_call;
+ rec_return_ign_left:
+ kid = vm_pop_kid();
+ parent = vm_pop_kid();
+ }
+
+ if ( kid->tree->id == LEL_ID_IGNORE )
+ visit_type = IgnoreWrapper;
+ else if ( parent != 0 && parent->tree->id == LEL_ID_IGNORE )
+ visit_type = IgnoreData;
+ else if ( kid->tree->id < prg->rtd->first_non_term_id )
+ visit_type = Term;
+ else
+ visit_type = NonTerm;
+
+ debug( prg, REALM_PRINT, "visit type: %d\n", visit_type );
+
+ if ( visit_type == IgnoreData ) {
+ debug( prg, REALM_PRINT, "putting %p on ignore list\n", kid->tree );
+ kid_t *new_ignore = kid_allocate( prg );
+ new_ignore->next = leading_ignore;
+ leading_ignore = new_ignore;
+ leading_ignore->tree = kid->tree;
+ goto skip_node;
+ }
+
+ if ( visit_type == IgnoreWrapper ) {
+ kid_t *new_ignore = kid_allocate( prg );
+ new_ignore->next = leading_ignore;
+ leading_ignore = new_ignore;
+ leading_ignore->tree = kid->tree;
+ /* Don't skip. */
+ }
+
+ /* print leading ignore? Triggered by terminals. */
+ if ( visit_type == Term ) {
+ /* Reverse the leading ignore list. */
+ if ( leading_ignore != 0 ) {
+ kid_t *ignore = 0, *last = 0;
+
+ /* Reverse the list and take the opportunity to implement the
+ * suppress left. */
+ while ( true ) {
+ kid_t *next = leading_ignore->next;
+ leading_ignore->next = last;
+
+ if ( leading_ignore->tree->flags & AF_SUPPRESS_LEFT ) {
+ /* We are moving left. Chop off the tail. */
+ debug( prg, REALM_PRINT, "suppressing left\n" );
+ free_kid_list( prg, next );
+ break;
+ }
+
+ if ( next == 0 )
+ break;
+
+ last = leading_ignore;
+ leading_ignore = next;
+ }
+
+ /* Print the leading ignore list. Also implement the suppress right
+ * in the process. */
+ if ( print_args->comm && (!print_args->trim ||
+ (flags & TF_TERM_SEEN && kid->tree->id > 0)) )
+ {
+ ignore = leading_ignore;
+ while ( ignore != 0 ) {
+ if ( ignore->tree->flags & AF_SUPPRESS_RIGHT )
+ break;
+
+ if ( ignore->tree->id != LEL_ID_IGNORE ) {
+ vm_push_type( enum VisitType, visit_type );
+ vm_push_kid( leading_ignore );
+ vm_push_kid( ignore );
+ vm_push_kid( parent );
+ vm_push_kid( kid );
+
+ leading_ignore = 0;
+ kid = ignore;
+ parent = 0;
+
+ debug( prg, REALM_PRINT, "rec call on %p\n", kid->tree );
+ vm_push_type( enum ReturnType, RecIgnoreList );
+ goto rec_call;
+ rec_return_il:
+
+ kid = vm_pop_kid();
+ parent = vm_pop_kid();
+ ignore = vm_pop_kid();
+ leading_ignore = vm_pop_kid();
+ visit_type = vm_pop_type(enum VisitType);
+ }
+
+ ignore = ignore->next;
+ }
+ }
+
+ /* Free the leading ignore list. */
+ free_kid_list( prg, leading_ignore );
+ leading_ignore = 0;
+ }
+ }
+
+ if ( visit_type == Term || visit_type == NonTerm ) {
+ /* Open the tree. */
+ print_args->open_tree( prg, sp, print_args, parent, kid );
+ }
+
+ if ( visit_type == Term )
+ flags |= TF_TERM_SEEN;
+
+ if ( visit_type == Term || visit_type == IgnoreData ) {
+ /* Print contents. */
+ if ( kid->tree->id < prg->rtd->first_non_term_id ) {
+ debug( prg, REALM_PRINT, "printing terminal %p\n", kid->tree );
+ if ( kid->tree->id != 0 )
+ print_args->print_term( prg, sp, print_args, kid );
+ }
+ }
+
+ /* Print children. */
+ kid_t *child = print_args->attr ?
+ tree_attr( prg, kid->tree ) :
+ tree_child( prg, kid->tree );
+
+ if ( child != 0 ) {
+ vm_push_type( enum VisitType, visit_type );
+ vm_push_kid( parent );
+ vm_push_kid( kid );
+ parent = kid;
+ kid = child;
+ while ( kid != 0 ) {
+ vm_push_type( enum ReturnType, ChildPrint );
+ goto rec_call;
+ rec_return:
+ kid = kid->next;
+ }
+ kid = vm_pop_kid();
+ parent = vm_pop_kid();
+ visit_type = vm_pop_type(enum VisitType);
+ }
+
+ if ( visit_type == Term || visit_type == NonTerm ) {
+ /* close the tree. */
+ print_args->close_tree( prg, sp, print_args, parent, kid );
+ }
+
+skip_node:
+
+ /* If not currently skipping ignore data, then print it. Ignore data can
+ * be associated with terminals and nonterminals. */
+ if ( kid->tree->flags & AF_RIGHT_IGNORE ) {
+ debug( prg, REALM_PRINT, "right ignore\n" );
+ vm_push_kid( parent );
+ vm_push_kid( kid );
+ parent = kid;
+ kid = tree_right_ignore_kid( prg, kid->tree );
+ vm_push_type( enum ReturnType, CollectIgnoreRight );
+ goto rec_call;
+ rec_return_ign_right:
+ kid = vm_pop_kid();
+ parent = vm_pop_kid();
+ }
+
+/* For skiping over content on null. */
+skip_null:
+
+ rt = vm_pop_type(enum ReturnType);
+ switch ( rt ) {
+ case Done:
+ debug( prg, REALM_PRINT, "return: done\n" );
+ goto rec_return_top;
+ break;
+ case CollectIgnoreLeft:
+ debug( prg, REALM_PRINT, "return: ignore left\n" );
+ goto rec_return_ign_left;
+ case CollectIgnoreRight:
+ debug( prg, REALM_PRINT, "return: ignore right\n" );
+ goto rec_return_ign_right;
+ case RecIgnoreList:
+ debug( prg, REALM_PRINT, "return: ignore list\n" );
+ goto rec_return_il;
+ case ChildPrint:
+ debug( prg, REALM_PRINT, "return: child print\n" );
+ goto rec_return;
+ }
+}
+
+void colm_print_tree_args( program_t *prg, tree_t **sp,
+ struct colm_print_args *print_args, tree_t *tree )
+{
+ if ( tree == 0 )
+ out_indent( print_args, "NIL", 3 );
+ else {
+ /* This term tree allows us to print trailing ignores. */
+ tree_t term_tree;
+ memset( &term_tree, 0, sizeof(term_tree) );
+
+ kid_t kid, term;
+ term.tree = &term_tree;
+ term.next = 0;
+
+ kid.tree = tree;
+ kid.next = &term;
+
+ print_kid( prg, sp, print_args, &kid );
+ }
+}
+
+void colm_print_null( program_t *prg, tree_t **sp,
+ struct colm_print_args *args, kid_t *parent, kid_t *kid )
+{
+}
+
+void colm_print_term_tree( program_t *prg, tree_t **sp,
+ struct colm_print_args *args, kid_t *kid )
+{
+ debug( prg, REALM_PRINT, "printing term %p\n", kid->tree );
+
+ if ( kid->tree->id == LEL_ID_PTR ) {
+ char buf[INT_SZ];
+ out_indent( args, "#<", 2 );
+ sprintf( buf, "%lx", ((pointer_t*)kid->tree)->value );
+ out_indent( args, buf, strlen(buf) );
+ out_indent( args, ">", 1 );
+ }
+ else if ( kid->tree->id == LEL_ID_STR ) {
+ print_str( args, ((str_t*)kid->tree)->value );
+ }
+// else if ( kid->tree->id == LEL_ID_STREAM ) {
+// char buf[INT_SZ];
+// printArgs->out( printArgs, "#", 1 );
+// sprintf( buf, "%p", (void*) ((stream_t*)kid->tree)->in->file );
+// printArgs->out( printArgs, buf, strlen(buf) );
+// }
+ else if ( kid->tree->tokdata != 0 &&
+ string_length( kid->tree->tokdata ) > 0 )
+ {
+ out_indent( args, string_data( kid->tree->tokdata ),
+ string_length( kid->tree->tokdata ) );
+ }
+
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ if ( strcmp( lel_info[kid->tree->id].name, "_IN_" ) == 0 ) {
+ if ( args->indent->level == COLM_INDENT_OFF ) {
+ args->indent->level = 1;
+ args->indent->indent = 1;
+ }
+ else {
+ args->indent->level += 1;
+ }
+ }
+
+ if ( strcmp( lel_info[kid->tree->id].name, "_EX_" ) == 0 )
+ args->indent->level -= 1;
+}
+
+void colm_print_tree_collect( program_t *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim )
+{
+ struct colm_print_args print_args = {
+ collect, true, false, trim, &collect->indent,
+ &append_collect, &colm_print_null,
+ &colm_print_term_tree, &colm_print_null
+ };
+
+ colm_print_tree_args( prg, sp, &print_args, tree );
+}
+
+void colm_print_tree_collect_a( program_t *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim )
+{
+ struct colm_print_args print_args = {
+ collect, true, true, trim, &collect->indent,
+ &append_collect, &colm_print_null,
+ &colm_print_term_tree, &colm_print_null
+ };
+
+ colm_print_tree_args( prg, sp, &print_args, tree );
+}
+
+void colm_print_tree_file( program_t *prg, tree_t **sp,
+ struct stream_impl_data *impl, tree_t *tree, int trim )
+{
+ struct colm_print_args print_args = {
+ impl, true, false, trim, &impl->indent,
+ &append_file, &colm_print_null,
+ &colm_print_term_tree, &colm_print_null
+ };
+
+ colm_print_tree_args( prg, sp, &print_args, tree );
+}
+
+static void xml_open( program_t *prg, tree_t **sp, struct colm_print_args *args,
+ kid_t *parent, kid_t *kid )
+{
+ /* Skip the terminal that is for forcing trailing ignores out. */
+ if ( kid->tree->id == 0 )
+ return;
+
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+
+ /* List flattening: skip the repeats and lists that are a continuation of
+ * the list. */
+ if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 &&
+ ( lel_info[parent->tree->id].repeat || lel_info[parent->tree->id].list ) )
+ {
+ return;
+ }
+
+ const char *name = lel_info[kid->tree->id].xml_tag;
+ args->out( args, "<", 1 );
+ args->out( args, name, strlen( name ) );
+ args->out( args, ">", 1 );
+}
+
+static void xml_term( program_t *prg, tree_t **sp,
+ struct colm_print_args *print_args, kid_t *kid )
+{
+ //kid_t *child;
+
+ /*child = */ tree_child( prg, kid->tree );
+ if ( kid->tree->id == LEL_ID_PTR ) {
+ char ptr[INT_SZ];
+ sprintf( ptr, "%lx", ((pointer_t*)kid->tree)->value );
+ print_args->out( print_args, ptr, strlen(ptr) );
+ }
+ else if ( kid->tree->id == LEL_ID_STR ) {
+ head_t *head = (head_t*) ((str_t*)kid->tree)->value;
+
+ xml_escape_data( print_args, head->data, head->length );
+ }
+ else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->first_non_term_id &&
+ kid->tree->id != LEL_ID_IGNORE &&
+ kid->tree->tokdata != 0 &&
+ string_length( kid->tree->tokdata ) > 0 )
+ {
+ xml_escape_data( print_args, string_data( kid->tree->tokdata ),
+ string_length( kid->tree->tokdata ) );
+ }
+}
+
+static void xml_close( program_t *prg, tree_t **sp,
+ struct colm_print_args *args, kid_t *parent, kid_t *kid )
+{
+ /* Skip the terminal that is for forcing trailing ignores out. */
+ if ( kid->tree->id == 0 )
+ return;
+
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+
+ /* List flattening: skip the repeats and lists that are a continuation of
+ * the list. */
+ if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 &&
+ ( lel_info[parent->tree->id].repeat || lel_info[parent->tree->id].list ) )
+ {
+ return;
+ }
+
+ const char *name = lel_info[kid->tree->id].xml_tag;
+ args->out( args, "</", 2 );
+ args->out( args, name, strlen( name ) );
+ args->out( args, ">", 1 );
+}
+
+void colm_print_xml_stdout( program_t *prg, tree_t **sp,
+ struct stream_impl_data *impl, tree_t *tree,
+ int comm_attr, int trim )
+{
+ struct colm_print_args print_args = {
+ impl, comm_attr, comm_attr, trim, &impl->indent,
+ &append_file, &xml_open, &xml_term, &xml_close };
+ colm_print_tree_args( prg, sp, &print_args, tree );
+}
+
+static void postfix_open( program_t *prg, tree_t **sp, struct colm_print_args *args,
+ kid_t *parent, kid_t *kid )
+{
+}
+
+static void postfix_term_data( struct colm_print_args *args, const char *data, long len )
+{
+ int i;
+ for ( i = 0; i < len; i++ ) {
+ if ( data[i] == '\\' )
+ args->out( args, "\\5c", 3 );
+ else if ( 33 <= data[i] && data[i] <= 126 )
+ args->out( args, &data[i], 1 );
+ else {
+ char out[64];
+ sprintf( out, "\\%02x", ((unsigned char)data[i]) );
+ args->out( args, out, strlen(out) );
+ }
+ }
+}
+
+static void postfix_term( program_t *prg, tree_t **sp,
+ struct colm_print_args *args, kid_t *kid )
+{
+ //kid_t *child;
+
+ /*child = */ tree_child( prg, kid->tree );
+ if ( kid->tree->id == LEL_ID_PTR ) {
+ //char ptr[INT_SZ];
+ //sprintf( ptr, "%lx", ((pointer_t*)kid->tree)->value );
+ //args->out( args, ptr, strlen(ptr) );
+ args->out( args, "p\n", 2 );
+ }
+ else if ( kid->tree->id == LEL_ID_STR ) {
+ //head_t *head = (head_t*) ((str_t*)kid->tree)->value;
+
+ //xml_escape_data( args, (char*)(head->data), head->length );
+ args->out( args, "s\n", 2 );
+ }
+ else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->first_non_term_id &&
+ kid->tree->id != LEL_ID_IGNORE //&&
+ //kid->tree->tokdata != 0 &&
+ //string_length( kid->tree->tokdata ) > 0 )
+ )
+ {
+ char buf[512];
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ const char *name = lel_info[kid->tree->id].xml_tag;
+
+ args->out( args, "t ", 2 );
+ args->out( args, name, strlen( name ) );
+
+ /* id. */
+ sprintf( buf, " %d", kid->tree->id );
+ args->out( args, buf, strlen( buf ) );
+
+ /* location. */
+ if ( kid->tree->tokdata == 0 ) {
+ args->out( args, " 0 0 0 -", 8 );
+ }
+ else {
+ struct colm_data *tokdata = kid->tree->tokdata;
+ struct colm_location *loc = tokdata->location;
+ if ( loc == 0 ) {
+ args->out( args, " 0 0 0 ", 7 );
+ }
+ else {
+ sprintf( buf, " %ld %ld %ld ", loc->line, loc->column, loc->byte );
+ args->out( args, buf, strlen( buf ) );
+ }
+
+ if ( string_length( tokdata ) == 0 ) {
+ args->out( args, "-", 1 );
+ }
+ else {
+ postfix_term_data( args, string_data( tokdata ), string_length( tokdata ) );
+ }
+ }
+
+ args->out( args, "\n", 1 );
+ }
+}
+
+static void postfix_close( program_t *prg, tree_t **sp,
+ struct colm_print_args *args, kid_t *parent, kid_t *kid )
+{
+ /* Skip the terminal that is for forcing trailing ignores out. */
+ if ( kid->tree->id == 0 )
+ return;
+
+ if ( kid->tree->id >= prg->rtd->first_non_term_id ) {
+ char buf[512];
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ const char *name = lel_info[kid->tree->id].xml_tag;
+
+ args->out( args, "r ", 2 );
+ args->out( args, name, strlen( name ) );
+
+ /* id. */
+ sprintf( buf, " %d", kid->tree->id );
+ args->out( args, buf, strlen( buf ) );
+
+ /* Production number. */
+ sprintf( buf, " %d", kid->tree->prod_num );
+ args->out( args, buf, strlen( buf ) );
+
+ /* Child count. */
+ int children = 0;
+ kid_t *child = tree_child( prg, kid->tree );
+ while ( child != 0 ) {
+ child = child->next;
+ children += 1;
+ }
+
+ sprintf( buf, " %d", children );
+ args->out( args, buf, strlen( buf ) );
+ args->out( args, "\n", 1 );
+ }
+}
+
+void colm_postfix_tree_collect( program_t *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim )
+{
+ struct colm_print_args print_args = {
+ collect, false, false, false, &collect->indent,
+ &append_collect, &postfix_open, &postfix_term, &postfix_close
+ };
+
+ colm_print_tree_args( prg, sp, &print_args, tree );
+}
+
+#if 0
+void colm_postfix_tree_file( program_t *prg, tree_t **sp, struct stream_impl *impl,
+ tree_t *tree, int trim )
+{
+ struct colm_print_args print_args = {
+ impl, false, false, false, &append_file,
+ &postfix_open, &postfix_term, &postfix_close
+ };
+
+ colm_print_tree_args( prg, sp, &print_args, tree );
+
+ //struct stream_impl *impl = (struct stream_impl*) args->arg;
+ fflush( impl->file );
+}
+#endif
+
+void colm_print_tree_collect_xml( program_t *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim )
+{
+ struct colm_print_args print_args = {
+ collect, false, false, trim, &collect->indent,
+ &append_collect, &xml_open, &xml_term, &xml_close
+ };
+
+ colm_print_tree_args( prg, sp, &print_args, tree );
+}
+
+void colm_print_tree_collect_xml_ac( program_t *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim )
+{
+ struct colm_print_args print_args = {
+ collect, true, true, trim, &collect->indent,
+ &append_collect, &xml_open, &xml_term, &xml_close
+ };
+
+ colm_print_tree_args( prg, sp, &print_args, tree );
+}
+
diff --git a/src/prog.lm b/src/prog.lm
new file mode 100644
index 00000000..3a11e342
--- /dev/null
+++ b/src/prog.lm
@@ -0,0 +1,88 @@
+include 'colm.lm'
+
+export ColmTree: start
+export ColmError: str
+
+A: str = argv->pop()
+F: stream = open( A, 'r' )
+parse P: start [ F ]
+
+alias prod_map map<prod_list, id>
+alias unique_prod map_el<prod_list, id>
+
+global PM: prod_map = new prod_map()
+global NextId: int = 1
+global Modified: bool = false
+
+prod_list cons_prod( SLA: prod_sublist )
+{
+ if match SLA [Left: prod_sublist BAR prod_el_list]
+ return cons prod_list[ cons_prod(Left) ' | [ ' SLA.prod_el_list ' ] ' ]
+ else
+ return cons prod_list[ '[ ' SLA.prod_el_list ' ]' ]
+}
+
+cfl_def rewrite_cfl_def( CflDef: ref<cfl_def> )
+{
+ NewDef: cfl_def
+ for PE: prod_el in CflDef {
+ if match PE [
+ OptName: opt_prod_el_name POPEN PS: prod_sublist PCLOSE OptRep: opt_repeat]
+ {
+ PL: prod_list = cons_prod(PS)
+
+ Name: id = PM->find( PL )
+ if ( !Name ) {
+ Name = parse id
+ "_sublist_[sprintf("%d", NextId)]"
+ NextId = NextId + 1
+
+ PM->insert( PL, Name )
+
+ NewDef = cons cfl_def
+ "def [Name] [PL]"
+ }
+
+ PE = cons prod_el
+ [OptName Name OptRep " "]
+
+ Modified = true
+
+ # Currently can return only one item.
+ if ( NewDef )
+ break
+ }
+ }
+ return NewDef
+}
+
+void rewrite( P: ref<start> )
+{
+ Modified = false
+
+ for RIL: root_item<* in P {
+ require RIL [Head: root_item<* CflDef: cfl_def]
+
+ NewDef: cfl_def
+
+ NewDef = rewrite_cfl_def( CflDef )
+
+ if NewDef {
+ RIL = cons root_item<* [Head NewDef "\n\n" CflDef]
+ Modified = true
+ }
+ else {
+ RIL = cons root_item<* [Head CflDef]
+ }
+
+ }
+
+ return Modified
+}
+
+if P {
+ while ( rewrite( P ) ) {}
+}
+
+ColmTree = P
+ColmError = error
diff --git a/src/program.c b/src/program.c
new file mode 100644
index 00000000..a9459ccf
--- /dev/null
+++ b/src/program.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <colm/pdarun.h>
+#include <colm/tree.h>
+#include <colm/bytecode.h>
+#include <colm/pool.h>
+#include <colm/debug.h>
+#include <colm/struct.h>
+
+#define VM_STACK_SIZE (8192)
+
+static void colm_alloc_global( program_t *prg )
+{
+ /* Alloc the global. */
+ prg->global = colm_struct_new( prg, prg->rtd->global_id ) ;
+}
+
+void vm_init( program_t *prg )
+{
+ struct stack_block *b = malloc( sizeof(struct stack_block) );
+ b->data = malloc( sizeof(tree_t*) * VM_STACK_SIZE );
+ b->len = VM_STACK_SIZE;
+ b->offset = 0;
+ b->next = 0;
+
+ prg->stack_block = b;
+
+ prg->sb_beg = prg->stack_block->data;
+ prg->sb_end = prg->stack_block->data + prg->stack_block->len;
+
+ prg->stack_root = prg->sb_end;
+}
+
+tree_t **colm_vm_root( program_t *prg )
+{
+ return prg->stack_root;
+}
+
+tree_t **vm_bs_add( program_t *prg, tree_t **sp, int n )
+{
+ /* Close off the current block. */
+ if ( prg->stack_block != 0 ) {
+ prg->stack_block->offset = sp - prg->stack_block->data;
+ prg->sb_total += prg->stack_block->len - prg->stack_block->offset;
+ }
+
+ if ( prg->reserve != 0 && prg->reserve->len >= n) {
+ struct stack_block *b = prg->reserve;
+ b->next = prg->stack_block;
+ b->offset = 0;
+
+ prg->stack_block = b;
+ prg->reserve = 0;
+ }
+ else {
+ struct stack_block *b = malloc( sizeof(struct stack_block) );
+ int size = VM_STACK_SIZE;
+ if ( n > size )
+ size = n;
+ b->next = prg->stack_block;
+ b->data = malloc( sizeof(tree_t*) * size );
+ b->len = size;
+ b->offset = 0;
+
+ prg->stack_block = b;
+ }
+
+ prg->sb_beg = prg->stack_block->data;
+ prg->sb_end = prg->stack_block->data + prg->stack_block->len;
+
+ return prg->sb_end;
+}
+
+tree_t **vm_bs_pop( program_t *prg, tree_t **sp, int n )
+{
+ while ( 1 ) {
+ tree_t **end = prg->stack_block->data + prg->stack_block->len;
+ int remaining = end - sp;
+
+ /* Don't have to free this block. Remaining values to pop leave us
+ * inside it. */
+ if ( n < remaining ) {
+ sp += n;
+ return sp;
+ }
+
+ if ( prg->stack_block->next == 0 ) {
+ /* Don't delete the sentinal stack block. Returns the end as in the
+ * creation of the first stack block. */
+ return prg->sb_end;
+ }
+
+ /* Clear any previous reserve. We are going to save this block as the
+ * reserve. */
+ if ( prg->reserve != 0 ) {
+ free( prg->reserve->data );
+ free( prg->reserve );
+ }
+
+ /* Pop the stack block. */
+ struct stack_block *b = prg->stack_block;
+ prg->stack_block = prg->stack_block->next;
+ prg->reserve = b;
+
+ /* Setup the bounds. Note that we restore the full block, which is
+ * necessary to honour any CONTIGUOUS statements that counted on it
+ * before a subsequent CONTIGUOUS triggered a new block. */
+ prg->sb_beg = prg->stack_block->data;
+ prg->sb_end = prg->stack_block->data + prg->stack_block->len;
+
+ /* Update the total stack usage. */
+ prg->sb_total -= prg->stack_block->len - prg->stack_block->offset;
+
+ n -= remaining;
+ sp = prg->stack_block->data + prg->stack_block->offset;
+ }
+}
+
+void vm_clear( program_t *prg )
+{
+ while ( prg->stack_block != 0 ) {
+ struct stack_block *b = prg->stack_block;
+ prg->stack_block = prg->stack_block->next;
+
+ free( b->data );
+ free( b );
+ }
+
+ if ( prg->reserve != 0 ) {
+ free( prg->reserve->data );
+ free( prg->reserve );
+ }
+}
+
+tree_t *colm_return_val( struct colm_program *prg )
+{
+ return prg->return_val;
+}
+
+void colm_set_debug( program_t *prg, long active_realm )
+{
+ prg->active_realm = active_realm;
+}
+
+void colm_set_reduce_clean( struct colm_program *prg, unsigned char reduce_clean )
+{
+ prg->reduce_clean = reduce_clean;
+}
+
+program_t *colm_new_program( struct colm_sections *rtd )
+{
+ program_t *prg = malloc(sizeof(program_t));
+ memset( prg, 0, sizeof(program_t) );
+
+ assert( sizeof(str_t) <= sizeof(tree_t) );
+ assert( sizeof(pointer_t) <= sizeof(tree_t) );
+
+ prg->rtd = rtd;
+ prg->ctx_dep_parsing = 1;
+ prg->reduce_clean = 1;
+
+ init_pool_alloc( &prg->kid_pool, sizeof(kid_t) );
+ init_pool_alloc( &prg->tree_pool, sizeof(tree_t) );
+ init_pool_alloc( &prg->parse_tree_pool, sizeof(parse_tree_t) );
+ init_pool_alloc( &prg->head_pool, sizeof(head_t) );
+ init_pool_alloc( &prg->location_pool, sizeof(location_t) );
+
+ prg->true_val = (tree_t*) 1;
+ prg->false_val = (tree_t*) 0;
+
+ /* Allocate the global variable. */
+ colm_alloc_global( prg );
+
+ /* Allocate the VM stack. */
+ vm_init( prg );
+
+ rtd->init_need();
+
+ prg->stream_fns = malloc( sizeof(char*) * 1 );
+ prg->stream_fns[0] = 0;
+ return prg;
+}
+
+void colm_run_program2( program_t *prg, int argc, const char **argv, const int *argl )
+{
+ if ( prg->rtd->root_code_len == 0 )
+ return;
+
+ /* Make the arguments available to the program. */
+ prg->argc = argc;
+ prg->argv = argv;
+ prg->argl = argl;
+
+ execution_t execution;
+ memset( &execution, 0, sizeof(execution) );
+ execution.frame_id = prg->rtd->root_frame_id;
+
+ colm_execute( prg, &execution, prg->rtd->root_code );
+
+ /* Clear the arg and stack. */
+ prg->argc = 0;
+ prg->argv = 0;
+}
+
+void colm_run_program( program_t *prg, int argc, const char **argv )
+{
+ colm_run_program2( prg, argc, argv, 0 );
+}
+
+static void colm_clear_heap( program_t *prg, tree_t **sp )
+{
+ struct colm_struct *hi = prg->heap.head;
+ while ( hi != 0 ) {
+ struct colm_struct *next = hi->next;
+ colm_struct_delete( prg, sp, hi );
+ hi = next;
+ }
+}
+
+void *colm_get_reduce_ctx( struct colm_program *prg )
+{
+ return prg->red_ctx;
+}
+
+void colm_set_reduce_ctx( struct colm_program *prg, void *ctx )
+{
+ prg->red_ctx = ctx;
+}
+
+const char **colm_extract_fns( struct colm_program *prg )
+{
+ const char **fns = prg->stream_fns;
+ prg->stream_fns = 0;
+ return fns;
+}
+
+const char *colm_error( struct colm_program *prg, int *length )
+{
+ const char *rtn = 0;
+ if ( prg->error != 0 ) {
+ rtn = prg->error->tokdata->data;
+ if ( length != 0 )
+ *length = prg->error->tokdata->length;
+ }
+ return rtn;
+}
+
+int colm_delete_program( program_t *prg )
+{
+ tree_t **sp = prg->stack_root;
+ int exit_status = prg->exit_status;
+
+ colm_tree_downref( prg, sp, prg->return_val );
+ colm_clear_heap( prg, sp );
+
+ colm_tree_downref( prg, sp, prg->error );
+
+#if DEBUG
+ long kid_lost = kid_num_lost( prg );
+ long tree_lost = tree_num_lost( prg );
+ long parse_tree_lost = parse_tree_num_lost( &prg->parse_tree_pool );
+ long head_lost = head_num_lost( prg );
+ long location_lost = location_num_lost( prg );
+
+ if ( kid_lost )
+ message( "warning: lost kids: %ld\n", kid_lost );
+
+ if ( tree_lost )
+ message( "warning: lost trees: %ld\n", tree_lost );
+
+ if ( parse_tree_lost )
+ message( "warning: lost parse trees: %ld\n", parse_tree_lost );
+
+ if ( head_lost )
+ message( "warning: lost heads: %ld\n", head_lost );
+
+ if ( location_lost )
+ message( "warning: lost locations: %ld\n", location_lost );
+#endif
+
+ kid_clear( prg );
+ tree_clear( prg );
+ head_clear( prg );
+ parse_tree_clear( &prg->parse_tree_pool );
+ location_clear( prg );
+
+ struct run_buf *rb = prg->alloc_run_buf;
+ while ( rb != 0 ) {
+ struct run_buf *next = rb->next;
+ free( rb );
+ rb = next;
+ }
+
+ vm_clear( prg );
+
+ if ( prg->stream_fns ) {
+ char **ptr = (char**)prg->stream_fns;
+ while ( *ptr != 0 ) {
+ free( *ptr );
+ ptr += 1;
+ }
+
+ free( prg->stream_fns );
+ }
+
+ free( prg );
+
+ return exit_status;
+}
diff --git a/src/program.h b/src/program.h
new file mode 100644
index 00000000..8ba716d4
--- /dev/null
+++ b/src/program.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_PROGRAM_H
+#define _COLM_PROGRAM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <colm/pdarun.h>
+
+struct stack_block
+{
+ tree_t **data;
+ int len;
+ int offset;
+ struct stack_block *next;
+};
+
+struct colm_sections
+{
+ struct lang_el_info *lel_info;
+ long num_lang_els;
+
+ struct struct_el_info *sel_info;
+ long num_struct_els;
+
+ struct prod_info *prod_info;
+ long num_prods;
+
+ struct region_info *region_info;
+ long num_regions;
+
+ code_t *root_code;
+ long root_code_len;
+ long root_frame_id;
+
+ struct frame_info *frame_info;
+ long num_frames;
+
+ struct function_info *function_info;
+ long num_functions;
+
+ struct pat_cons_info *pat_repl_info;
+ long num_patterns;
+
+ struct pat_cons_node *pat_repl_nodes;
+ long num_pattern_nodes;
+
+ struct generic_info *generic_info;
+ long num_generics;
+
+ long argv_generic_id;
+ long stds_generic_id;
+
+ const char **litdata;
+ long *litlen;
+ head_t **literals;
+ long num_literals;
+
+ CaptureAttr *capture_attr;
+ long num_captured_attr;
+
+ struct fsm_tables *fsm_tables;
+ struct pda_tables *pda_tables;
+ int *start_states;
+ int *eof_lel_ids;
+ int *parser_lel_ids;
+ long num_parsers;
+
+ long global_size;
+
+ long first_non_term_id;
+ long first_struct_el_id;
+
+ long integer_id;
+ long string_id;
+ long any_id;
+ long eof_id;
+ long no_token_id;
+ long global_id;
+ long argv_el_id;
+ long stds_el_id;
+ long struct_inbuilt_id;
+ long struct_input_id;
+ long struct_stream_id;
+
+ void (*fsm_execute)( struct pda_run *pda_run, struct input_impl *input_stream );
+ void (*send_named_lang_el)( struct colm_program *prg, tree_t **tree,
+ struct pda_run *pda_run, struct input_impl *input_stream );
+ void (*init_bindings)( struct pda_run *pda_run );
+ void (*pop_binding)( struct pda_run *pda_run, parse_tree_t *tree );
+
+ tree_t **(*host_call)( program_t *prg, long code, tree_t **sp );
+
+ void (*commit_reduce_forward)( program_t *prg, tree_t **root, struct pda_run *pda_run, parse_tree_t *pt );
+ long (*commit_union_sz)( int reducer );
+ void (*init_need)();
+ int (*reducer_need_tok)( program_t *prg, struct pda_run *pda_run, int id );
+ int (*reducer_need_ign)( program_t *prg, struct pda_run *pda_run );
+ void (*read_reduce)( program_t *prg, int reducer, input_t *input );
+};
+
+struct heap_list
+{
+ struct colm_struct *head;
+ struct colm_struct *tail;
+};
+
+struct colm_program
+{
+ long active_realm;
+
+ int argc;
+ const char **argv;
+ const int *argl;
+
+ unsigned char ctx_dep_parsing;
+ unsigned char reduce_clean;
+ struct colm_sections *rtd;
+ struct colm_struct *global;
+ int induce_exit;
+ int exit_status;
+
+ struct pool_alloc kid_pool;
+ struct pool_alloc tree_pool;
+ struct pool_alloc parse_tree_pool;
+ struct pool_alloc head_pool;
+ struct pool_alloc location_pool;
+
+ tree_t *true_val;
+ tree_t *false_val;
+
+ struct heap_list heap;
+
+ stream_t *stdin_val;
+ stream_t *stdout_val;
+ stream_t *stderr_val;
+
+ tree_t *error;
+
+ struct run_buf *alloc_run_buf;
+
+ /* Current stack block limits. Changed when crossing block boundaries. */
+ tree_t **sb_beg;
+ tree_t **sb_end;
+ long sb_total;
+ struct stack_block *reserve;
+ struct stack_block *stack_block;
+ tree_t **stack_root;
+
+ /* Returned value for main program and any exported functions. */
+ tree_t *return_val;
+
+ void *red_ctx;
+
+ /* This can be extracted for ownership transfer before a program is deleted. */
+ const char **stream_fns;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COLM_PROGRAM_H */
+
diff --git a/src/redbuild.cc b/src/redbuild.cc
new file mode 100644
index 00000000..7e0396d7
--- /dev/null
+++ b/src/redbuild.cc
@@ -0,0 +1,562 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "redbuild.h"
+
+#include <assert.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include <iostream>
+
+#include "fsmcodegen.h"
+
+using namespace std;
+
+RedFsmBuild::RedFsmBuild( Compiler *pd, FsmGraph *fsm )
+:
+ pd(pd),
+ fsm(fsm),
+ nextActionTableId(0),
+ startState(-1),
+ errState(-1)
+{
+}
+
+void RedFsmBuild::initActionList( unsigned long length )
+{
+ redFsm->allActions = new GenAction[length];
+ memset( redFsm->allActions, 0, sizeof(GenAction) * length );
+ for ( unsigned long a = 0; a < length; a++ )
+ redFsm->genActionList.append( redFsm->allActions+a );
+}
+
+
+void RedFsmBuild::makeActionList()
+{
+ /* Determine which actions to write. */
+ int nextActionId = 0;
+ for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
+ if ( act->numRefs() > 0 || act->numCondRefs > 0 )
+ act->actionId = nextActionId++;
+ }
+
+ initActionList( nextActionId );
+ curAction = 0;
+
+ for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) {
+ if ( act->actionId >= 0 )
+ makeAction( act );
+ }
+}
+
+void RedFsmBuild::initActionTableList( unsigned long length )
+{
+ redFsm->allActionTables = new RedAction[length];
+}
+
+void RedFsmBuild::initStateList( unsigned long length )
+{
+ redFsm->allStates = new RedState[length];
+ for ( unsigned long s = 0; s < length; s++ )
+ redFsm->stateList.append( redFsm->allStates+s );
+
+ /* We get the start state as an offset, set the pointer now. */
+ assert( startState >= 0 );
+ redFsm->startState = redFsm->allStates + startState;
+ if ( errState >= 0 )
+ redFsm->errState = redFsm->allStates + errState;
+ for ( EntryIdVect::Iter en = redFsm->entryPointIds; en.lte(); en++ )
+ redFsm->entryPoints.insert( redFsm->allStates + *en );
+
+ /* The nextStateId is no longer used to assign state ids (they come in set
+ * from the frontend now), however generation code still depends on it.
+ * Should eventually remove this variable. */
+ redFsm->nextStateId = redFsm->stateList.length();
+}
+
+void RedFsmBuild::addEntryPoint( int entryId, unsigned long entryState )
+{
+ redFsm->entryPointIds.append( entryState );
+ redFsm->redEntryMap.insert( entryId, entryState );
+}
+
+void RedFsmBuild::addRegionToEntry( int regionId, int entryId )
+{
+ assert( regionId == redFsm->regionToEntry.length() );
+ redFsm->regionToEntry.append( entryId );
+}
+
+void RedFsmBuild::initTransList( int snum, unsigned long length )
+{
+ /* Could preallocate the out range to save time growing it. For now do
+ * nothing. */
+}
+
+void RedFsmBuild::newTrans( int snum, int tnum, Key lowKey,
+ Key highKey, long targ, long action )
+{
+ /* Get the current state and range. */
+ RedState *curState = redFsm->allStates + snum;
+ RedTransList &destRange = curState->outRange;
+
+ if ( curState == redFsm->errState )
+ return;
+
+ /* Make the new transitions. */
+ RedState *targState = targ >= 0 ? (redFsm->allStates + targ) :
+ redFsm->wantComplete ? redFsm->getErrorState() : 0;
+ RedAction *actionTable = action >= 0 ? (redFsm->allActionTables + action) : 0;
+ RedTrans *trans = redFsm->allocateTrans( targState, actionTable );
+ RedTransEl transEl( lowKey, highKey, trans );
+
+ if ( redFsm->wantComplete ) {
+ /* If the machine is to be complete then we need to fill any gaps with
+ * the error transitions. */
+ if ( destRange.length() == 0 ) {
+ /* Range is currently empty. */
+ if ( keyOps->minKey < lowKey ) {
+ /* The first range doesn't start at the low end. */
+ Key fillHighKey = lowKey;
+ fillHighKey.decrement();
+
+ /* Create the filler with the state's error transition. */
+ RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ else {
+ /* The range list is not empty, get the the last range. */
+ RedTransEl *last = &destRange[destRange.length()-1];
+ Key nextKey = last->highKey;
+ nextKey.increment();
+ if ( nextKey < lowKey ) {
+ /* There is a gap to fill. Make the high key. */
+ Key fillHighKey = lowKey;
+ fillHighKey.decrement();
+
+ /* Create the filler with the state's error transtion. */
+ RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ }
+
+ /* Filler taken care of. Append the range. */
+ destRange.append( RedTransEl( lowKey, highKey, trans ) );
+}
+
+void RedFsmBuild::finishTransList( int snum )
+{
+ /* Get the current state and range. */
+ RedState *curState = redFsm->allStates + snum;
+ RedTransList &destRange = curState->outRange;
+
+ if ( curState == redFsm->errState )
+ return;
+
+ /* If building a complete machine we may need filler on the end. */
+ if ( redFsm->wantComplete ) {
+ /* Check if there are any ranges already. */
+ if ( destRange.length() == 0 ) {
+ /* Fill with the whole alphabet. */
+ /* Add the range on the lower and upper bound. */
+ RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ else {
+ /* Get the last and check for a gap on the end. */
+ RedTransEl *last = &destRange[destRange.length()-1];
+ if ( last->highKey < keyOps->maxKey ) {
+ /* Make the high key. */
+ Key fillLowKey = last->highKey;
+ fillLowKey.increment();
+
+ /* Create the new range with the error trans and append it. */
+ RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() );
+ destRange.append( newTel );
+ }
+ }
+ }
+}
+
+void RedFsmBuild::setId( int snum, int id )
+{
+ RedState *curState = redFsm->allStates + snum;
+ curState->id = id;
+}
+
+void RedFsmBuild::setEofTrans( int snum, int eofTarget, int actId )
+{
+ RedState *curState = redFsm->allStates + snum;
+ RedState *targState = redFsm->allStates + eofTarget;
+ RedAction *eofAct = redFsm->allActionTables + actId;
+ curState->eofTrans = redFsm->allocateTrans( targState, eofAct );
+}
+
+void RedFsmBuild::setFinal( int snum )
+{
+ RedState *curState = redFsm->allStates + snum;
+ curState->isFinal = true;
+}
+
+
+void RedFsmBuild::setStateActions( int snum, long toStateAction,
+ long fromStateAction, long eofAction )
+{
+ RedState *curState = redFsm->allStates + snum;
+ if ( toStateAction >= 0 )
+ curState->toStateAction = redFsm->allActionTables + toStateAction;
+ if ( fromStateAction >= 0 )
+ curState->fromStateAction = redFsm->allActionTables + fromStateAction;
+ if ( eofAction >= 0 )
+ curState->eofAction = redFsm->allActionTables + eofAction;
+}
+
+void RedFsmBuild::closeMachine()
+{
+}
+
+
+void RedFsmBuild::initStateCondList( int snum, ulong length )
+{
+ /* Could preallocate these, as we could with transitions. */
+}
+
+void RedFsmBuild::setForcedErrorState()
+{
+ redFsm->forcedErrorState = true;
+}
+
+Key RedFsmBuild::findMaxKey()
+{
+ Key maxKey = keyOps->maxKey;
+ for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
+ assert( st->outSingle.length() == 0 );
+ assert( st->defTrans == 0 );
+
+ long rangeLen = st->outRange.length();
+ if ( rangeLen > 0 ) {
+ Key highKey = st->outRange[rangeLen-1].highKey;
+ if ( highKey > maxKey )
+ maxKey = highKey;
+ }
+ }
+ return maxKey;
+}
+
+
+void RedFsmBuild::makeActionTableList()
+{
+ /* Must first order the action tables based on their id. */
+ int numTables = nextActionTableId;
+ RedActionTable **tables = new RedActionTable*[numTables];
+ for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ )
+ tables[at->id] = at;
+
+ initActionTableList( numTables );
+ curActionTable = 0;
+
+ for ( int t = 0; t < numTables; t++ ) {
+ long length = tables[t]->key.length();
+
+ /* Collect the action table. */
+ RedAction *redAct = redFsm->allActionTables + curActionTable;
+ redAct->actListId = curActionTable;
+ redAct->key.setAsNew( length );
+
+ int pos = 0;
+ for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) {
+ int actionId = atel->value->actionId;
+ redAct->key[pos].key = 0;
+ redAct->key[pos].value = redFsm->allActions+actionId;
+ pos += 1;
+ }
+
+ /* Insert into the action table map. */
+ redFsm->actionMap.insert( redAct );
+
+ curActionTable += 1;
+
+ }
+
+ delete[] tables;
+}
+
+void RedFsmBuild::reduceActionTables()
+{
+ /* Reduce the actions tables to a set. */
+ for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
+ RedActionTable *actionTable = 0;
+
+ /* Reduce To State Actions. */
+ if ( st->toStateActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Reduce From State Actions. */
+ if ( st->fromStateActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Reduce EOF actions. */
+ if ( st->eofActionTable.length() > 0 ) {
+ if ( actionTableMap.insert( st->eofActionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+
+ /* Loop the transitions and reduce their actions. */
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->actionTable.length() > 0 ) {
+ if ( actionTableMap.insert( trans->actionTable, &actionTable ) )
+ actionTable->id = nextActionTableId++;
+ }
+ }
+ }
+}
+
+void RedFsmBuild::appendTrans( TransListVect &outList, Key lowKey,
+ Key highKey, FsmTrans *trans )
+{
+ if ( trans->toState != 0 || trans->actionTable.length() > 0 )
+ outList.append( TransEl( lowKey, highKey, trans ) );
+}
+
+void RedFsmBuild::makeTrans( Key lowKey, Key highKey, FsmTrans *trans )
+{
+ /* First reduce the action. */
+ RedActionTable *actionTable = 0;
+ if ( trans->actionTable.length() > 0 )
+ actionTable = actionTableMap.find( trans->actionTable );
+
+ long targ = trans->toState == 0 ? -1 : trans->toState->alg.stateNum;
+ long action = actionTable == 0 ? -1 : actionTable->id;
+
+ newTrans( curState, curTrans++, lowKey, highKey, targ, action );
+}
+
+void RedFsmBuild::makeTransList( FsmState *state )
+{
+ TransListVect outList;
+
+ /* If there is only are no ranges the task is simple. */
+ if ( state->outList.length() > 0 ) {
+ /* Loop each source range. */
+ for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) {
+ /* Reduce the transition. If it reduced to anything then add it. */
+ appendTrans( outList, trans->lowKey, trans->highKey, trans );
+ }
+ }
+
+ long length = outList.length();
+ initTransList( curState, length );
+ curTrans = 0;
+
+ for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ )
+ makeTrans( tvi->lowKey, tvi->highKey, tvi->value );
+ finishTransList( curState );
+}
+
+void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *action )
+{
+ redFsm->allActions[anum].actionId = anum;
+ redFsm->allActions[anum].name = name;
+ redFsm->allActions[anum].loc.line = line;
+ redFsm->allActions[anum].loc.col = col;
+ redFsm->allActions[anum].inlineList = action->inlineList;
+ redFsm->allActions[anum].objField = action->objField;
+ redFsm->allActions[anum].markType = action->markType;
+ redFsm->allActions[anum].markId = action->markId + 1;
+}
+
+void RedFsmBuild::makeAction( Action *action )
+{
+ int line = action->loc.line;
+ int col = action->loc.col;
+
+ char *name = 0;
+ if ( action->name != 0 )
+ name = action->name;
+
+ newAction( curAction++, name, line, col, action );
+}
+
+void xmlEscapeHost( std::ostream &out, char *data, int len )
+{
+ char *end = data + len;
+ while ( data != end ) {
+ switch ( *data ) {
+ case '<': out << "&lt;"; break;
+ case '>': out << "&gt;"; break;
+ case '&': out << "&amp;"; break;
+ default: out << *data; break;
+ }
+ data += 1;
+ }
+}
+
+void RedFsmBuild::makeStateActions( FsmState *state )
+{
+ RedActionTable *toStateActions = 0;
+ if ( state->toStateActionTable.length() > 0 )
+ toStateActions = actionTableMap.find( state->toStateActionTable );
+
+ RedActionTable *fromStateActions = 0;
+ if ( state->fromStateActionTable.length() > 0 )
+ fromStateActions = actionTableMap.find( state->fromStateActionTable );
+
+ RedActionTable *eofActions = 0;
+ if ( state->eofActionTable.length() > 0 )
+ eofActions = actionTableMap.find( state->eofActionTable );
+
+ if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) {
+ long toStateAction = -1;
+ long fromStateAction = -1;
+ long eofAction = -1;
+
+ if ( toStateActions != 0 )
+ toStateAction = toStateActions->id;
+ if ( fromStateActions != 0 )
+ fromStateAction = fromStateActions->id;
+ if ( eofActions != 0 )
+ eofAction = eofActions->id;
+
+ setStateActions( curState, toStateAction,
+ fromStateAction, eofAction );
+ }
+}
+
+void RedFsmBuild::makeStateList()
+{
+ /* Write the list of states. */
+ long length = fsm->stateList.length();
+ initStateList( length );
+ curState = 0;
+
+ for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) {
+ /* Both or neither should be set. */
+ assert( !( (st->eofTarget != 0) xor (st->eofActionTable.length() > 0) ) );
+
+ makeStateActions( st );
+ makeTransList( st );
+
+ setId( curState, st->alg.stateNum );
+ if ( st->isFinState() )
+ setFinal( curState );
+
+ /* If there is an eof target, make an eof transition. */
+ if ( st->eofTarget != 0 ) {
+ /* Find the eof actions. */
+ RedActionTable *eofActions = 0;
+ eofActions = actionTableMap.find( st->eofActionTable );
+ setEofTrans( curState, st->eofTarget->alg.stateNum, eofActions->id );
+ }
+
+ curState += 1;
+ }
+}
+
+void RedFsmBuild::makeEntryPoints()
+{
+ if ( fsm->lmRequiresErrorState )
+ setForcedErrorState();
+
+ for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) {
+ /* Get the name instantiation from nameIndex. */
+ FsmState *state = en->value;
+ long entry = state->alg.stateNum;
+ addEntryPoint( en->key, entry );
+ }
+
+ for ( RegionList::Iter reg = pd->regionList; reg.lte(); reg++ ) {
+ assert( reg->impl->regionNameInst != 0 );
+
+ TokenRegion *use = reg;
+
+ if ( use->zeroLel != 0 )
+ use = use->ignoreOnly;
+
+ NameInst *regionName = use->impl->regionNameInst;
+ addRegionToEntry( reg->id, regionName->id );
+ }
+}
+
+void RedFsmBuild::makeMachine()
+{
+ /* Action tables. */
+ reduceActionTables();
+
+ makeActionList();
+ makeActionTableList();
+ makeConditions();
+
+ /* Start state. */
+ startState = fsm->startState->alg.stateNum;
+
+ /* Error state. */
+ if ( fsm->errState != 0 )
+ errState = fsm->errState->alg.stateNum;
+
+ makeEntryPoints();
+ makeStateList();
+}
+
+void RedFsmBuild::makeConditions()
+{
+}
+
+RedFsm *RedFsmBuild::reduceMachine()
+{
+ redFsm = new RedFsm();
+ redFsm->wantComplete = true;
+
+ /* Open the definition. */
+ makeMachine();
+
+ /* Do this before distributing transitions out to singles and defaults
+ * makes life easier. */
+ redFsm->maxKey = findMaxKey();
+
+ redFsm->assignActionLocs();
+
+ /* Find the first final state (The final state with the lowest id). */
+ redFsm->findFirstFinState();
+
+ /* Choose default transitions and the single transition. */
+ redFsm->chooseDefaultSpan();
+
+ /* Maybe do flat expand, otherwise choose single. */
+ redFsm->chooseSingle();
+
+ /* Set up incoming transitions. */
+ redFsm->setInTrans();
+
+ /* Anlayze Machine will find the final action reference counts, among
+ * other things. We will use these in reporting the usage
+ * of fsm directives in action code. */
+ redFsm->analyzeMachine();
+
+ return redFsm;
+}
+
diff --git a/src/redbuild.h b/src/redbuild.h
new file mode 100644
index 00000000..e9ad0465
--- /dev/null
+++ b/src/redbuild.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_FSMREDUCE_H
+#define _COLM_FSMREDUCE_H
+
+#include <iostream>
+
+#include <avltree.h>
+
+#include "fsmgraph.h"
+#include "compiler.h"
+
+/* Forwards. */
+struct FsmTrans;
+struct FsmGraph;
+struct Compiler;
+struct FsmCodeGen;
+struct RedFsm;
+struct GenCondSpace;
+struct Condition;
+
+struct RedActionTable
+:
+ public AvlTreeEl<RedActionTable>
+{
+ RedActionTable( const ActionTable &key )
+ :
+ key(key),
+ id(0)
+ { }
+
+ const ActionTable &getKey()
+ { return key; }
+
+ ActionTable key;
+ int id;
+};
+
+typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap;
+
+struct NextRedTrans
+{
+ Key lowKey, highKey;
+ FsmTrans *trans;
+ FsmTrans *next;
+
+ void load() {
+ if ( trans != 0 ) {
+ next = trans->next;
+ lowKey = trans->lowKey;
+ highKey = trans->highKey;
+ }
+ }
+
+ NextRedTrans( FsmTrans *t ) {
+ trans = t;
+ load();
+ }
+
+ void increment() {
+ trans = next;
+ load();
+ }
+};
+
+class RedFsmBuild
+{
+public:
+ RedFsmBuild( Compiler *pd, FsmGraph *fsm );
+ RedFsm *reduceMachine( );
+
+private:
+ void appendTrans( TransListVect &outList, Key lowKey, Key highKey, FsmTrans *trans );
+ void makeStateActions( FsmState *state );
+ void makeStateList();
+ void makeStateConditions( FsmState *state );
+
+ void initActionList( unsigned long length );
+ void newAction( int anum, char *name, int line, int col, Action *action );
+ void initActionTableList( unsigned long length );
+ void initCondSpaceList( ulong length );
+ void condSpaceItem( int cnum, long condActionId );
+ void newCondSpace( int cnum, int condSpaceId, Key baseKey );
+ void initStateCondList( int snum, ulong length );
+ void addStateCond( int snum, Key lowKey, Key highKey, long condNum );
+ void initStateList( unsigned long length );
+ void addRegionToEntry( int regionId, int entryId );
+ void addEntryPoint( int entryId, unsigned long entryState );
+ void setId( int snum, int id );
+ void initTransList( int snum, unsigned long length );
+ void newTrans( int snum, int tnum, Key lowKey, Key highKey,
+ long targ, long act );
+ void finishTransList( int snum );
+ void setFinal( int snum );
+ void setEofTrans( int snum, int eofTarget, int actId );
+ void setStateActions( int snum, long toStateAction,
+ long fromStateAction, long eofAction );
+ void setForcedErrorState();
+ void closeMachine();
+ Key findMaxKey();
+
+ void makeEntryPoints();
+ void makeGetKeyExpr();
+ void makeAccessExpr();
+ void makeCurStateExpr();
+ void makeConditions();
+ void makeInlineList( InlineList *inlineList, InlineItem *context );
+ void makeActionList();
+ void makeActionTableList();
+ void reduceTrans( FsmTrans *trans );
+ void reduceActionTables();
+ void makeTransList( FsmState *state );
+ void makeTrans( Key lowKey, Key highKey, FsmTrans *defTrans );
+ void makeAction( Action *action );
+ void makeLmSwitch( InlineItem *item );
+ void makeMachine();
+ void makeActionExec( InlineItem *item );
+ void makeActionExecTE( InlineItem *item );
+
+ Compiler *pd;
+ FsmGraph *fsm;
+ ActionTableMap actionTableMap;
+ int nextActionTableId;
+
+ int startState;
+ int errState;
+
+public:
+ RedFsm *redFsm;
+
+private:
+ int curAction;
+ int curActionTable;
+ int curTrans;
+ int curState;
+ int curCondSpace;
+ int curStateCond;
+};
+
+#endif /* _COLM_FSMREDUCE_H */
+
diff --git a/src/redfsm.cc b/src/redfsm.cc
new file mode 100644
index 00000000..d79a1e32
--- /dev/null
+++ b/src/redfsm.cc
@@ -0,0 +1,1049 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "redfsm.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <sstream>
+#include <iostream>
+
+#include "fsmgraph.h"
+#include "parsetree.h"
+
+using std::ostringstream;
+
+string nameOrLoc( GenAction *genAction )
+{
+ if ( genAction->name != 0 )
+ return string(genAction->name);
+ else {
+ ostringstream ret;
+ ret << genAction->loc.line << ":" << genAction->loc.col;
+ return ret.str();
+ }
+}
+
+RedFsm::RedFsm()
+:
+ wantComplete(false),
+ forcedErrorState(false),
+ nextActionId(0),
+ nextTransId(0),
+ errState(0),
+ errTrans(0),
+ firstFinState(0),
+ numFinStates(0),
+ allActions(0),
+ allActionTables(0),
+ allStates(0),
+ bAnyToStateActions(false),
+ bAnyFromStateActions(false),
+ bAnyRegActions(false),
+ bAnyEofActions(false),
+ bAnyActionGotos(false),
+ bAnyActionCalls(false),
+ bAnyActionRets(false),
+ bAnyRegActionRets(false),
+ bAnyRegActionByValControl(false),
+ bAnyRegNextStmt(false),
+ bAnyRegCurStateRef(false),
+ bAnyRegBreak(false),
+ bAnyLmSwitchError(false),
+ bAnyConditions(false)
+{
+}
+
+/* Does the machine have any actions. */
+bool RedFsm::anyActions()
+{
+ return actionMap.length() > 0;
+}
+
+void RedFsm::depthFirstOrdering( RedState *state )
+{
+ /* Nothing to do if the state is already on the list. */
+ if ( state->onStateList )
+ return;
+
+ /* Doing depth first, put state on the list. */
+ state->onStateList = true;
+ stateList.append( state );
+
+// /* At this point transitions should only be in ranges. */
+// assert( state->outSingle.length() == 0 );
+// assert( state->defTrans == 0 );
+
+ /* Recurse on singles. */
+ for ( RedTransList::Iter stel = state->outSingle; stel.lte(); stel++ ) {
+ if ( stel->value->targ != 0 )
+ depthFirstOrdering( stel->value->targ );
+ }
+
+ /* Recurse on everything ranges. */
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->targ != 0 )
+ depthFirstOrdering( rtel->value->targ );
+ }
+
+ if ( state->defTrans != 0 && state->defTrans->targ != 0 )
+ depthFirstOrdering( state->defTrans->targ );
+}
+
+/* Ordering states by transition connections. */
+void RedFsm::depthFirstOrdering()
+{
+ /* Init on state list flags. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ st->onStateList = false;
+
+ /* Clear out the state list, we will rebuild it. */
+ int stateListLen = stateList.length();
+ stateList.abandon();
+
+ /* Add back to the state list from the start state and all other entry
+ * points. */
+ depthFirstOrdering( startState );
+ for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ )
+ depthFirstOrdering( *en );
+ if ( forcedErrorState )
+ depthFirstOrdering( errState );
+
+ /* Make sure we put everything back on. */
+ assert( stateListLen == stateList.length() );
+}
+
+/* Assign state ids by appearance in the state list. */
+void RedFsm::sequentialStateIds()
+{
+ /* Table based machines depend on the state numbers starting at zero. */
+ nextStateId = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ st->id = nextStateId++;
+}
+
+/* Stable sort the states by final state status. */
+void RedFsm::sortStatesByFinal()
+{
+ /* Move forward through the list and throw final states onto the end. */
+ RedState *state = 0;
+ RedState *next = stateList.head;
+ RedState *last = stateList.tail;
+ while ( state != last ) {
+ /* Move forward and load up the next. */
+ state = next;
+ next = state->next;
+
+ /* Throw to the end? */
+ if ( state->isFinal ) {
+ stateList.detach( state );
+ stateList.append( state );
+ }
+ }
+}
+
+/* Assign state ids by final state state status. */
+void RedFsm::sortStateIdsByFinal()
+{
+ /* Table based machines depend on this starting at zero. */
+ nextStateId = 0;
+
+ /* First pass to assign non final ids. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( ! st->isFinal )
+ st->id = nextStateId++;
+ }
+
+ /* Second pass to assign final ids. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->isFinal )
+ st->id = nextStateId++;
+ }
+}
+
+struct CmpStateById
+{
+ static int compare( RedState *st1, RedState *st2 )
+ {
+ if ( st1->id < st2->id )
+ return -1;
+ else if ( st1->id > st2->id )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+void RedFsm::sortByStateId()
+{
+ /* Make the array. */
+ int pos = 0;
+ RedState **ptrList = new RedState*[stateList.length()];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ ptrList[pos++] = st;
+
+ MergeSort<RedState*, CmpStateById> mergeSort;
+ mergeSort.sort( ptrList, stateList.length() );
+
+ stateList.abandon();
+ for ( int st = 0; st < pos; st++ )
+ stateList.append( ptrList[st] );
+
+ delete[] ptrList;
+}
+
+/* Find the final state with the lowest id. */
+void RedFsm::findFirstFinState()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) )
+ firstFinState = st;
+ }
+}
+
+void RedFsm::assignActionLocs()
+{
+ int nextLocation = 0;
+ for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) {
+ /* Store the loc, skip over the array and a null terminator. */
+ act->location = nextLocation;
+ nextLocation += act->key.length() + 1;
+ }
+}
+
+/* Check if we can extend the current range by displacing any ranges
+ * ahead to the singles. */
+bool RedFsm::canExtend( const RedTransList &list, int pos )
+{
+ /* Get the transition that we want to extend. */
+ RedTrans *extendTrans = list[pos].value;
+
+ /* Look ahead in the transition list. */
+ for ( int next = pos + 1; next < list.length(); pos++, next++ ) {
+ /* If they are not continuous then cannot extend. */
+ Key nextKey = list[next].lowKey;
+ nextKey.decrement();
+ if ( list[pos].highKey != nextKey )
+ break;
+
+ /* Check for the extenstion property. */
+ if ( extendTrans == list[next].value )
+ return true;
+
+ /* If the span of the next element is more than one, then don't keep
+ * checking, it won't be moved to single. */
+ unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey );
+ if ( nextSpan > 1 )
+ break;
+ }
+ return false;
+}
+
+/* Move ranges to the singles list. */
+void RedFsm::moveTransToSingle( RedState *state )
+{
+ RedTransList &range = state->outRange;
+ RedTransList &single = state->outSingle;
+ for ( int rpos = 0; rpos < range.length(); ) {
+ /* Check if this is a range we can extend. */
+ if ( canExtend( range, rpos ) ) {
+ /* Transfer singles over. */
+ while ( range[rpos].value != range[rpos+1].value ) {
+ /* Transfer the range to single. */
+ single.append( range[rpos+1] );
+ range.remove( rpos+1 );
+ }
+
+ /* Extend. */
+ range[rpos].highKey = range[rpos+1].highKey;
+ range.remove( rpos+1 );
+ }
+ /* Maybe move it to the singles. */
+ else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) {
+ single.append( range[rpos] );
+ range.remove( rpos );
+ }
+ else {
+ /* Keeping it in the ranges. */
+ rpos += 1;
+ }
+ }
+}
+
+/* Look through ranges and choose suitable single character transitions. */
+void RedFsm::chooseSingle()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Rewrite the transition list taking out the suitable single
+ * transtions. */
+ moveTransToSingle( st );
+ }
+}
+
+void RedFsm::makeFlat()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ st->condLowKey = 0;
+ st->condHighKey = 0;
+
+ if ( st->outRange.length() == 0 ) {
+ st->lowKey = st->highKey = 0;
+ st->transList = 0;
+ }
+ else {
+ st->lowKey = st->outRange[0].lowKey;
+ st->highKey = st->outRange[st->outRange.length()-1].highKey;
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ st->transList = new RedTrans*[ span ];
+ memset( st->transList, 0, sizeof(RedTrans*)*span );
+
+ for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) {
+ unsigned long long base, trSpan;
+ base = keyOps->span( st->lowKey, trans->lowKey )-1;
+ trSpan = keyOps->span( trans->lowKey, trans->highKey );
+ for ( unsigned long long pos = 0; pos < trSpan; pos++ )
+ st->transList[base+pos] = trans->value;
+ }
+
+ /* Fill in the gaps with the default transition. */
+ for ( unsigned long long pos = 0; pos < span; pos++ ) {
+ if ( st->transList[pos] == 0 )
+ st->transList[pos] = st->defTrans;
+ }
+ }
+ }
+}
+
+
+/* A default transition has been picked, move it from the outRange to the
+ * default pointer. */
+void RedFsm::moveToDefault( RedTrans *defTrans, RedState *state )
+{
+ /* Rewrite the outRange, omitting any ranges that use
+ * the picked default. */
+ RedTransList outRange;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* If it does not take the default, copy it over. */
+ if ( rtel->value != defTrans )
+ outRange.append( *rtel );
+ }
+
+ /* Save off the range we just created into the state's range. */
+ state->outRange.transfer( outRange );
+
+ /* Store the default. */
+ state->defTrans = defTrans;
+}
+
+bool RedFsm::alphabetCovered( RedTransList &outRange )
+{
+ /* Cannot cover without any out ranges. */
+ if ( outRange.length() == 0 )
+ return false;
+
+ /* If the first range doesn't start at the the lower bound then the
+ * alphabet is not covered. */
+ RedTransList::Iter rtel = outRange;
+ if ( keyOps->minKey < rtel->lowKey )
+ return false;
+
+ /* Check that every range is next to the previous one. */
+ rtel.increment();
+ for ( ; rtel.lte(); rtel++ ) {
+ Key highKey = rtel[-1].highKey;
+ highKey.increment();
+ if ( highKey != rtel->lowKey )
+ return false;
+ }
+
+ /* The last must extend to the upper bound. */
+ RedTransEl *last = &outRange[outRange.length()-1];
+ if ( last->highKey < keyOps->maxKey )
+ return false;
+
+ return true;
+}
+
+RedTrans *RedFsm::chooseDefaultSpan( RedState *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransPtrSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
+ stateTransSet.insert( rtel->value );
+
+ /* For each transition in the find how many alphabet characters the
+ * transition spans. */
+ unsigned long long *span = new unsigned long long[stateTransSet.length()];
+ memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() );
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* Lookup the transition in the set. */
+ RedTrans **inSet = stateTransSet.find( rtel->value );
+ int pos = inSet - stateTransSet.data;
+ span[pos] += keyOps->span( rtel->lowKey, rtel->highKey );
+ }
+
+ /* Find the max span, choose it for making the default. */
+ RedTrans *maxTrans = 0;
+ unsigned long long maxSpan = 0;
+ for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
+ if ( span[rtel.pos()] > maxSpan ) {
+ maxSpan = span[rtel.pos()];
+ maxTrans = *rtel;
+ }
+ }
+
+ delete[] span;
+ return maxTrans;
+}
+
+/* Pick default transitions from ranges for the states. */
+void RedFsm::chooseDefaultSpan()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Only pick a default transition if the alphabet is covered. This
+ * avoids any transitions in the out range that go to error and avoids
+ * the need for an ERR state. */
+ if ( alphabetCovered( st->outRange ) ) {
+ /* Pick a default transition by largest span. */
+ RedTrans *defTrans = chooseDefaultSpan( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+ }
+}
+
+RedTrans *RedFsm::chooseDefaultGoto( RedState *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransPtrSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->targ == state->next )
+ return rtel->value;
+ }
+ return 0;
+}
+
+void RedFsm::chooseDefaultGoto()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Pick a default transition. */
+ RedTrans *defTrans = chooseDefaultGoto( st );
+ if ( defTrans == 0 )
+ defTrans = chooseDefaultSpan( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+}
+
+RedTrans *RedFsm::chooseDefaultNumRanges( RedState *state )
+{
+ /* Make a set of transitions from the outRange. */
+ RedTransPtrSet stateTransSet;
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ )
+ stateTransSet.insert( rtel->value );
+
+ /* For each transition in the find how many ranges use the transition. */
+ int *numRanges = new int[stateTransSet.length()];
+ memset( numRanges, 0, sizeof(int) * stateTransSet.length() );
+ for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) {
+ /* Lookup the transition in the set. */
+ RedTrans **inSet = stateTransSet.find( rtel->value );
+ numRanges[inSet - stateTransSet.data] += 1;
+ }
+
+ /* Find the max number of ranges. */
+ RedTrans *maxTrans = 0;
+ int maxNumRanges = 0;
+ for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) {
+ if ( numRanges[rtel.pos()] > maxNumRanges ) {
+ maxNumRanges = numRanges[rtel.pos()];
+ maxTrans = *rtel;
+ }
+ }
+
+ delete[] numRanges;
+ return maxTrans;
+}
+
+void RedFsm::chooseDefaultNumRanges()
+{
+ /* Loop the states. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Pick a default transition. */
+ RedTrans *defTrans = chooseDefaultNumRanges( st );
+
+ /* Rewrite the transition list taking out the transition we picked
+ * as the default and store the default. */
+ moveToDefault( defTrans, st );
+ }
+}
+
+RedTrans *RedFsm::getErrorTrans( )
+{
+ /* If the error trans has not been made aready, make it. */
+ if ( errTrans == 0 ) {
+ /* This insert should always succeed since no transition created by
+ * the user can point to the error state. */
+ errTrans = new RedTrans( getErrorState(), 0, nextTransId++ );
+ RedTrans *inRes = transSet.insert( errTrans );
+ assert( inRes != 0 );
+ }
+ return errTrans;
+}
+
+RedState *RedFsm::getErrorState()
+{
+ /* Something went wrong. An error state is needed but one was not supplied
+ * by the frontend. */
+ assert( errState != 0 );
+ return errState;
+}
+
+
+RedTrans *RedFsm::allocateTrans( RedState *targ, RedAction *action )
+{
+ /* Create a reduced trans and look for it in the transiton set. */
+ RedTrans redTrans( targ, action, 0 );
+ RedTrans *inDict = transSet.find( &redTrans );
+ if ( inDict == 0 ) {
+ inDict = new RedTrans( targ, action, nextTransId++ );
+ transSet.insert( inDict );
+ }
+ return inDict;
+}
+
+void RedFsm::partitionFsm( int nparts )
+{
+ /* At this point the states are ordered by a depth-first traversal. We
+ * will allocate to partitions based on this ordering. */
+ this->nParts = nparts;
+ int partSize = stateList.length() / nparts;
+ int remainder = stateList.length() % nparts;
+ int numInPart = partSize;
+ int partition = 0;
+ if ( remainder-- > 0 )
+ numInPart += 1;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ st->partition = partition;
+
+ numInPart -= 1;
+ if ( numInPart == 0 ) {
+ partition += 1;
+ numInPart = partSize;
+ if ( remainder-- > 0 )
+ numInPart += 1;
+ }
+ }
+}
+
+void RedFsm::setInTrans()
+{
+ /* First pass counts the number of transitions. */
+ for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ )
+ trans->targ->numInTrans += 1;
+
+ /* Pass over states to allocate the needed memory. Reset the counts so we
+ * can use them as the current size. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ st->inTrans = new RedTrans*[st->numInTrans];
+ st->numInTrans = 0;
+ }
+
+ /* Second pass over transitions copies pointers into the in trans list. */
+ for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ )
+ trans->targ->inTrans[trans->targ->numInTrans++] = trans;
+}
+
+void RedFsm::setValueLimits()
+{
+ maxSingleLen = 0;
+ maxRangeLen = 0;
+ maxKeyOffset = 0;
+ maxIndexOffset = 0;
+ maxActListId = 0;
+ maxActionLoc = 0;
+ maxActArrItem = 0;
+ maxSpan = 0;
+ maxCondSpan = 0;
+ maxFlatIndexOffset = 0;
+ maxCondOffset = 0;
+ maxCondLen = 0;
+ maxCondSpaceId = 0;
+ maxCondIndexOffset = 0;
+
+ /* In both of these cases the 0 index is reserved for no value, so the max
+ * is one more than it would be if they started at 0. */
+ maxIndex = transSet.length();
+ maxCond = 0;
+
+ /* The nextStateId - 1 is the last state id assigned. */
+ maxState = nextStateId - 1;
+
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Maximum single length. */
+ if ( st->outSingle.length() > maxSingleLen )
+ maxSingleLen = st->outSingle.length();
+
+ /* Maximum range length. */
+ if ( st->outRange.length() > maxRangeLen )
+ maxRangeLen = st->outRange.length();
+
+ /* The key offset index offset for the state after last is not used, skip it.. */
+ if ( ! st.last() ) {
+ maxKeyOffset += st->outSingle.length() + st->outRange.length()*2;
+ maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1;
+ }
+
+ /* Max key span. */
+ if ( st->transList != 0 ) {
+ unsigned long long span = keyOps->span( st->lowKey, st->highKey );
+ if ( span > maxSpan )
+ maxSpan = span;
+ }
+
+ /* Max flat index offset. */
+ if ( ! st.last() ) {
+ if ( st->transList != 0 )
+ maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey );
+ maxFlatIndexOffset += 1;
+ }
+ }
+
+ for ( GenActionTableMap::Iter at = actionMap; at.lte(); at++ ) {
+ /* Maximum id of action lists. */
+ if ( at->actListId+1 > maxActListId )
+ maxActListId = at->actListId+1;
+
+ /* Maximum location of items in action array. */
+ if ( at->location+1 > maxActionLoc )
+ maxActionLoc = at->location+1;
+
+ /* Maximum values going into the action array. */
+ if ( at->key.length() > maxActArrItem )
+ maxActArrItem = at->key.length();
+ for ( GenActionTable::Iter item = at->key; item.lte(); item++ ) {
+ if ( item->value->actionId > maxActArrItem )
+ maxActArrItem = item->value->actionId;
+ }
+ }
+}
+
+void RedFsm::findFinalActionRefs()
+{
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Rerence count out of single transitions. */
+ for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 ) {
+ rtel->value->action->numTransRefs += 1;
+ for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+ }
+
+ /* Reference count out of range transitions. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 ) {
+ rtel->value->action->numTransRefs += 1;
+ for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+ }
+
+ /* Reference count default transition. */
+ if ( st->defTrans != 0 && st->defTrans->action != 0 ) {
+ st->defTrans->action->numTransRefs += 1;
+ for ( GenActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ )
+ item->value->numTransRefs += 1;
+ }
+
+ /* Reference count to state actions. */
+ if ( st->toStateAction != 0 ) {
+ st->toStateAction->numToStateRefs += 1;
+ for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ )
+ item->value->numToStateRefs += 1;
+ }
+
+ /* Reference count from state actions. */
+ if ( st->fromStateAction != 0 ) {
+ st->fromStateAction->numFromStateRefs += 1;
+ for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ )
+ item->value->numFromStateRefs += 1;
+ }
+
+ /* Reference count EOF actions. */
+ if ( st->eofAction != 0 ) {
+ st->eofAction->numEofRefs += 1;
+ for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ )
+ item->value->numEofRefs += 1;
+ }
+ }
+}
+
+void RedFsm::analyzeAction( GenAction *act, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ /* Check for various things in regular actions. */
+ if ( act->numTransRefs > 0 || act->numToStateRefs > 0 ||
+ act->numFromStateRefs > 0 || act->numEofRefs > 0 )
+ {
+ if ( item->type == InlineItem::LmSwitch &&
+ item->tokenRegion->lmSwitchHandlesError )
+ {
+ bAnyLmSwitchError = true;
+ }
+ }
+
+ if ( item->children != 0 )
+ analyzeAction( act, item->children );
+ }
+}
+
+void RedFsm::analyzeActionList( RedAction *redAct, InlineList *inlineList )
+{
+ for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
+ if ( item->children != 0 )
+ analyzeActionList( redAct, item->children );
+ }
+}
+
+/* Assign ids to referenced actions. */
+void RedFsm::assignActionIds()
+{
+ int nextActionId = 0;
+ for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) {
+ /* Only ever interested in referenced actions. */
+ if ( numRefs( act ) > 0 )
+ act->actionId = nextActionId++;
+ }
+}
+
+/* Gather various info on the machine. */
+void RedFsm::analyzeMachine()
+{
+ /* Find the true count of action references. */
+ findFinalActionRefs();
+
+ /* Check if there are any calls in action code. */
+ for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) {
+ /* Record the occurrence of various kinds of actions. */
+ if ( act->numToStateRefs > 0 )
+ bAnyToStateActions = true;
+ if ( act->numFromStateRefs > 0 )
+ bAnyFromStateActions = true;
+ if ( act->numEofRefs > 0 )
+ bAnyEofActions = true;
+ if ( act->numTransRefs > 0 )
+ bAnyRegActions = true;
+
+ /* Recurse through the action's parse tree looking for various things. */
+ analyzeAction( act, act->inlineList );
+ }
+
+ /* Analyze reduced action lists. */
+ for ( GenActionTableMap::Iter redAct = actionMap; redAct.lte(); redAct++ ) {
+ for ( GenActionTable::Iter act = redAct->key; act.lte(); act++ )
+ analyzeActionList( redAct, act->value->inlineList );
+ }
+
+ /* Find states that have transitions with actions that have next
+ * statements. */
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Check any actions out of outSinge. */
+ for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+ }
+
+ /* Check any actions out of outRange. */
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+ }
+
+ /* Check any action out of default. */
+ if ( st->defTrans != 0 && st->defTrans->action != 0 &&
+ st->defTrans->action->anyCurStateRef() )
+ st->bAnyRegCurStateRef = true;
+ }
+
+ /* Assign ids to actions that are referenced. */
+ assignActionIds();
+
+ /* Set the maximums of various values used for deciding types. */
+ setValueLimits();
+}
+
+int transAction( RedTrans *trans )
+{
+ int retAct = 0;
+ if ( trans->action != 0 )
+ retAct = trans->action->location+1;
+ return retAct;
+}
+
+int toStateAction( RedState *state )
+{
+ int act = 0;
+ if ( state->toStateAction != 0 )
+ act = state->toStateAction->location+1;
+ return act;
+}
+
+int fromStateAction( RedState *state )
+{
+ int act = 0;
+ if ( state->fromStateAction != 0 )
+ act = state->fromStateAction->location+1;
+ return act;
+}
+
+int eofAction( RedState *state )
+{
+ int act = 0;
+ if ( state->eofAction != 0 )
+ act = state->eofAction->location+1;
+ return act;
+}
+
+
+fsm_tables *RedFsm::makeFsmTables()
+{
+ /* The fsm runtime needs states sorted by id. */
+ sortByStateId();
+
+ int pos, curKeyOffset, curIndOffset;
+ fsm_tables *fsmTables = new fsm_tables;
+ fsmTables->num_states = stateList.length();
+
+ /*
+ * actions
+ */
+
+ fsmTables->num_actions = 1;
+ for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ )
+ fsmTables->num_actions += 1 + act->key.length();
+
+ pos = 0;
+ fsmTables->actions = new long[fsmTables->num_actions];
+ fsmTables->actions[pos++] = 0;
+ for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) {
+ fsmTables->actions[pos++] = act->key.length();
+ for ( GenActionTable::Iter item = act->key; item.lte(); item++ )
+ fsmTables->actions[pos++] = item->value->actionId;
+ }
+
+ /*
+ * keyOffset
+ */
+ pos = 0, curKeyOffset = 0;
+ fsmTables->key_offsets = new long[fsmTables->num_states];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ /* Store the current offset. */
+ fsmTables->key_offsets[pos++] = curKeyOffset;
+
+ /* Move the key offset ahead. */
+ curKeyOffset += st->outSingle.length() + st->outRange.length()*2;
+ }
+
+ /*
+ * transKeys
+ */
+ fsmTables->num_trans_keys = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ fsmTables->num_trans_keys += st->outSingle.length();
+ fsmTables->num_trans_keys += 2 * st->outRange.length();
+ }
+
+ pos = 0;
+ fsmTables->trans_keys = new char[fsmTables->num_trans_keys];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ )
+ fsmTables->trans_keys[pos++] = stel->lowKey.getVal();
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) {
+ fsmTables->trans_keys[pos++] = rtel->lowKey.getVal();
+ fsmTables->trans_keys[pos++] = rtel->highKey.getVal();
+ }
+ }
+
+ /*
+ * singleLengths
+ */
+ pos = 0;
+ fsmTables->single_lengths = new long[fsmTables->num_states];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->single_lengths[pos++] = st->outSingle.length();
+
+ /*
+ * rangeLengths
+ */
+ pos = 0;
+ fsmTables->range_lengths = new long[fsmTables->num_states];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->range_lengths[pos++] = st->outRange.length();
+
+ /*
+ * indexOffsets
+ */
+ pos = 0, curIndOffset = 0;
+ fsmTables->index_offsets = new long[fsmTables->num_states];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ fsmTables->index_offsets[pos++] = curIndOffset;
+
+ curIndOffset += st->outSingle.length() + st->outRange.length();
+ if ( st->defTrans != 0 )
+ curIndOffset += 1;
+ }
+
+ /*
+ * transTargsWI
+ */
+ fsmTables->numTransTargsWI = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ fsmTables->numTransTargsWI += st->outSingle.length();
+ fsmTables->numTransTargsWI += st->outRange.length();
+ if ( st->defTrans != 0 )
+ fsmTables->numTransTargsWI += 1;
+ }
+
+ pos = 0;
+ fsmTables->transTargsWI = new long[fsmTables->numTransTargsWI];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ )
+ fsmTables->transTargsWI[pos++] = stel->value->targ->id;
+
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ )
+ fsmTables->transTargsWI[pos++] = rtel->value->targ->id;
+
+ if ( st->defTrans != 0 )
+ fsmTables->transTargsWI[pos++] = st->defTrans->targ->id;
+ }
+
+ /*
+ * transActionsWI
+ */
+ fsmTables->numTransActionsWI = 0;
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ fsmTables->numTransActionsWI += st->outSingle.length();
+ fsmTables->numTransActionsWI += st->outRange.length();
+ if ( st->defTrans != 0 )
+ fsmTables->numTransActionsWI += 1;
+ }
+
+ pos = 0;
+ fsmTables->transActionsWI = new long[fsmTables->numTransActionsWI];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ )
+ fsmTables->transActionsWI[pos++] = transAction( stel->value );
+
+ for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ )
+ fsmTables->transActionsWI[pos++] = transAction( rtel->value );
+
+ if ( st->defTrans != 0 )
+ fsmTables->transActionsWI[pos++] = transAction( st->defTrans );
+ }
+
+ /*
+ * toStateActions
+ */
+ pos = 0;
+ fsmTables->to_state_actions = new long[fsmTables->num_states];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->to_state_actions[pos++] = toStateAction( st );
+
+ /*
+ * fromStateActions
+ */
+ pos = 0;
+ fsmTables->from_state_actions = new long[fsmTables->num_states];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->from_state_actions[pos++] = fromStateAction( st );
+
+ /*
+ * eofActions
+ */
+ pos = 0;
+ fsmTables->eof_actions = new long[fsmTables->num_states];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ )
+ fsmTables->eof_actions[pos++] = eofAction( st );
+
+ /*
+ * eofTargs
+ */
+ pos = 0;
+ fsmTables->eof_targs = new long[fsmTables->num_states];
+ for ( RedStateList::Iter st = stateList; st.lte(); st++ ) {
+ int targ = -1;
+ if ( st->eofTrans != 0 )
+ targ = st->eofTrans->targ->id;
+ fsmTables->eof_targs[pos++] = targ;
+ }
+
+ /* Start state. */
+ fsmTables->start_state = startState->id;
+
+ /* First final state. */
+ fsmTables->first_final = ( firstFinState != 0 ) ?
+ firstFinState->id : nextStateId;
+
+ /* The error state. */
+ fsmTables->error_state = ( errState != 0 ) ?
+ errState->id : -1;
+
+ /* The array pointing to actions. */
+ pos = 0;
+ fsmTables->num_action_switch = genActionList.length();
+ fsmTables->action_switch = new GenAction*[fsmTables->num_action_switch];
+ for ( GenActionList::Iter act = genActionList; act.lte(); act++ )
+ fsmTables->action_switch[pos++] = act;
+
+ /*
+ * entryByRegion
+ */
+
+ fsmTables->num_regions = regionToEntry.length()+1;
+ fsmTables->entry_by_region = new long[fsmTables->num_regions];
+ fsmTables->entry_by_region[0] = fsmTables->error_state;
+
+ pos = 1;
+ for ( RegionToEntry::Iter en = regionToEntry; en.lte(); en++ ) {
+ /* Find the entry state from the entry id. */
+ RedEntryMapEl *entryMapEl = redEntryMap.find( *en );
+
+ /* Save it off. */
+ fsmTables->entry_by_region[pos++] = entryMapEl != 0 ? entryMapEl->value
+ : fsmTables->error_state;
+ }
+
+ return fsmTables;
+}
+
+
diff --git a/src/redfsm.h b/src/redfsm.h
new file mode 100644
index 00000000..618fbd61
--- /dev/null
+++ b/src/redfsm.h
@@ -0,0 +1,479 @@
+/*
+ * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_REDFSM_H
+#define _COLM_REDFSM_H
+
+#include <assert.h>
+#include <string.h>
+
+#include <string>
+
+#include <avlbasic.h>
+#include <avltree.h>
+#include <avlmap.h>
+#include <bstmap.h>
+#include <vector.h>
+#include <dlist.h>
+#include <bstset.h>
+#include <mergesort.h>
+#include <sbstmap.h>
+#include <sbstset.h>
+#include <sbsttable.h>
+
+#include "keyops.h"
+#include "compare.h"
+#include "global.h"
+#include "pdarun.h"
+
+#define TRANS_ERR_TRANS 0
+#define STATE_ERR_STATE 0
+#define FUNC_NO_FUNC 0
+
+using std::string;
+
+struct RedState;
+struct InlineList;
+struct Compiler;
+struct ObjectField;
+
+/* Element in list of actions. Contains the string for the code to exectute. */
+struct GenAction
+{
+ /* Data collected during parse. */
+ InputLoc loc;
+ char *name;
+ InlineList *inlineList;
+ int actionId;
+ MarkType markType;
+ ObjectField *objField;
+ long markId;
+
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+
+ GenAction *prev, *next;
+};
+
+typedef DList<GenAction> GenActionList;
+string nameOrLoc( GenAction *genAction );
+
+/* Number of references in the final machine. */
+inline int numRefs( GenAction *genAction )
+{
+ return genAction->numTransRefs +
+ genAction->numToStateRefs +
+ genAction->numFromStateRefs +
+ genAction->numEofRefs;
+}
+
+
+/* Forwards. */
+struct RedState;
+struct FsmState;
+
+/* Transistion GenAction Element. */
+typedef SBstMapEl< int, GenAction* > GenActionTableEl;
+
+/* Transition GenAction Table. */
+struct GenActionTable
+ : public SBstMap< int, GenAction*, CmpOrd<int> >
+{
+ void setAction( int ordering, GenAction *action );
+ void setActions( int *orderings, GenAction **actions, int nActs );
+ void setActions( const GenActionTable &other );
+};
+
+/* Compare of a whole action table element (key & value). */
+struct GenCmpActionTableEl
+{
+ static int compare( const GenActionTableEl &action1,
+ const GenActionTableEl &action2 )
+ {
+ if ( action1.key < action2.key )
+ return -1;
+ else if ( action1.key > action2.key )
+ return 1;
+ else if ( action1.value < action2.value )
+ return -1;
+ else if ( action1.value > action2.value )
+ return 1;
+ return 0;
+ }
+};
+
+/* Compare for GenActionTable. */
+typedef CmpSTable< GenActionTableEl, GenCmpActionTableEl > GenCmpActionTable;
+
+/* Set of states. */
+typedef BstSet<RedState*> RedStateSet;
+typedef BstSet<int> IntSet;
+
+/* Reduced action. */
+struct RedAction
+:
+ public AvlTreeEl<RedAction>
+{
+ RedAction( )
+ :
+ key(),
+ eofRefs(0),
+ numTransRefs(0),
+ numToStateRefs(0),
+ numFromStateRefs(0),
+ numEofRefs(0),
+ bAnyNextStmt(false),
+ bAnyCurStateRef(false),
+ bAnyBreakStmt(false)
+ { }
+
+ const GenActionTable &getKey()
+ { return key; }
+
+ GenActionTable key;
+ int actListId;
+ int location;
+ IntSet *eofRefs;
+
+ /* Number of references in the final machine. */
+ bool numRefs()
+ { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; }
+ int numTransRefs;
+ int numToStateRefs;
+ int numFromStateRefs;
+ int numEofRefs;
+
+ bool anyNextStmt() { return bAnyNextStmt; }
+ bool anyCurStateRef() { return bAnyCurStateRef; }
+ bool anyBreakStmt() { return bAnyBreakStmt; }
+
+ bool bAnyNextStmt;
+ bool bAnyCurStateRef;
+ bool bAnyBreakStmt;
+};
+typedef AvlTree<RedAction, GenActionTable, GenCmpActionTable> GenActionTableMap;
+
+/* Reduced transition. */
+struct RedTrans
+:
+ public AvlTreeEl<RedTrans>
+{
+ RedTrans( RedState *targ, RedAction *action, int id )
+ : targ(targ), action(action), id(id), labelNeeded(true) { }
+
+ RedState *targ;
+ RedAction *action;
+ int id;
+ bool partitionBoundary;
+ bool labelNeeded;
+};
+
+/* Compare of transitions for the final reduction of transitions. Comparison
+ * is on target and the pointer to the shared action table. It is assumed that
+ * when this is used the action tables have been reduced. */
+struct CmpRedTrans
+{
+ static int compare( const RedTrans &t1, const RedTrans &t2 )
+ {
+ if ( t1.targ < t2.targ )
+ return -1;
+ else if ( t1.targ > t2.targ )
+ return 1;
+ else if ( t1.action < t2.action )
+ return -1;
+ else if ( t1.action > t2.action )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+typedef AvlBasic<RedTrans, CmpRedTrans> RedTransSet;
+
+/* Element in out range. */
+struct RedTransEl
+{
+ /* Constructors. */
+ RedTransEl( Key lowKey, Key highKey, RedTrans *value )
+ : lowKey(lowKey), highKey(highKey), value(value) { }
+
+ Key lowKey, highKey;
+ RedTrans *value;
+};
+
+typedef Vector<RedTransEl> RedTransList;
+typedef Vector<RedState*> RedStateVect;
+
+typedef BstMapEl<RedState*, unsigned long long> RedSpanMapEl;
+typedef BstMap<RedState*, unsigned long long> RedSpanMap;
+
+/* Compare used by span map sort. Reverse sorts by the span. */
+struct CmpRedSpanMapEl
+{
+ static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 )
+ {
+ if ( smel1.value > smel2.value )
+ return -1;
+ else if ( smel1.value < smel2.value )
+ return 1;
+ else
+ return 0;
+ }
+};
+
+/* Sorting state-span map entries by span. */
+typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort;
+
+/* Set of entry ids that go into this state. */
+typedef Vector<int> EntryIdVect;
+typedef Vector<char*> EntryNameVect;
+
+/* Maps entry ids (defined by the frontend, to reduced state ids. */
+typedef BstMap<int, int> RedEntryMap;
+typedef BstMapEl<int, int> RedEntryMapEl;
+
+typedef Vector<int> RegionToEntry;
+
+/* Reduced state. */
+struct RedState
+{
+ RedState()
+ :
+ defTrans(0),
+ transList(0),
+ isFinal(false),
+ labelNeeded(false),
+ outNeeded(false),
+ onStateList(false),
+ toStateAction(0),
+ fromStateAction(0),
+ eofAction(0),
+ eofTrans(0),
+ id(0),
+ bAnyRegCurStateRef(false),
+ partitionBoundary(false),
+ inTrans(0),
+ numInTrans(0)
+ { }
+
+ /* Transitions out. */
+ RedTransList outSingle;
+ RedTransList outRange;
+ RedTrans *defTrans;
+
+ /* For flat conditions. */
+ Key condLowKey, condHighKey;
+
+ /* For flat keys. */
+ Key lowKey, highKey;
+ RedTrans **transList;
+
+ /* The list of states that transitions from this state go to. */
+ RedStateVect targStates;
+
+ bool isFinal;
+ bool labelNeeded;
+ bool outNeeded;
+ bool onStateList;
+ RedAction *toStateAction;
+ RedAction *fromStateAction;
+ RedAction *eofAction;
+ RedTrans *eofTrans;
+ int id;
+
+ /* Pointers for the list of states. */
+ RedState *prev, *next;
+
+ bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
+ bool bAnyRegCurStateRef;
+
+ int partition;
+ bool partitionBoundary;
+
+ RedTrans **inTrans;
+ int numInTrans;
+};
+
+/* List of states. */
+typedef DList<RedState> RedStateList;
+
+/* Set of reduced transitons. Comparison is by pointer. */
+typedef BstSet< RedTrans*, CmpOrd<RedTrans*> > RedTransPtrSet;
+
+/* Next version of the fsm machine. */
+struct RedFsm
+{
+ RedFsm();
+
+ bool wantComplete;
+ bool forcedErrorState;
+
+ int nextActionId;
+ int nextTransId;
+
+ /* Next State Id doubles as the total number of state ids. */
+ int nextStateId;
+
+ RedTransSet transSet;
+ GenActionTableMap actionMap;
+ RedStateList stateList;
+ RedStateSet entryPoints;
+ RedState *startState;
+ RedState *errState;
+ RedTrans *errTrans;
+ RedTrans *errActionTrans;
+ RedState *firstFinState;
+ int numFinStates;
+ int nParts;
+
+ GenAction *allActions;
+ RedAction *allActionTables;
+ RedState *allStates;
+ GenActionList genActionList;
+ EntryIdVect entryPointIds;
+ RedEntryMap redEntryMap;
+ RegionToEntry regionToEntry;
+
+ bool bAnyToStateActions;
+ bool bAnyFromStateActions;
+ bool bAnyRegActions;
+ bool bAnyEofActions;
+ bool bAnyActionGotos;
+ bool bAnyActionCalls;
+ bool bAnyActionRets;
+ bool bAnyRegActionRets;
+ bool bAnyRegActionByValControl;
+ bool bAnyRegNextStmt;
+ bool bAnyRegCurStateRef;
+ bool bAnyRegBreak;
+ bool bAnyLmSwitchError;
+ bool bAnyConditions;
+
+ int maxState;
+ int maxSingleLen;
+ int maxRangeLen;
+ int maxKeyOffset;
+ int maxIndexOffset;
+ int maxIndex;
+ int maxActListId;
+ int maxActionLoc;
+ int maxActArrItem;
+ unsigned long long maxSpan;
+ unsigned long long maxCondSpan;
+ int maxFlatIndexOffset;
+ Key maxKey;
+ int maxCondOffset;
+ int maxCondLen;
+ int maxCondSpaceId;
+ int maxCondIndexOffset;
+ int maxCond;
+
+ bool anyActions();
+ bool anyToStateActions() { return bAnyToStateActions; }
+ bool anyFromStateActions() { return bAnyFromStateActions; }
+ bool anyRegActions() { return bAnyRegActions; }
+ bool anyEofActions() { return bAnyEofActions; }
+ bool anyActionGotos() { return bAnyActionGotos; }
+ bool anyActionCalls() { return bAnyActionCalls; }
+ bool anyActionRets() { return bAnyActionRets; }
+ bool anyRegActionRets() { return bAnyRegActionRets; }
+ bool anyRegActionByValControl() { return bAnyRegActionByValControl; }
+ bool anyRegNextStmt() { return bAnyRegNextStmt; }
+ bool anyRegCurStateRef() { return bAnyRegCurStateRef; }
+ bool anyRegBreak() { return bAnyRegBreak; }
+ bool anyLmSwitchError() { return bAnyLmSwitchError; }
+ bool anyConditions() { return bAnyConditions; }
+
+ /* Is is it possible to extend a range by bumping ranges that span only
+ * one character to the singles array. */
+ bool canExtend( const RedTransList &list, int pos );
+
+ /* Pick single transitions from the ranges. */
+ void moveTransToSingle( RedState *state );
+ void chooseSingle();
+
+ void makeFlat();
+
+ /* Move a selected transition from ranges to default. */
+ void moveToDefault( RedTrans *defTrans, RedState *state );
+
+ /* Pick a default transition by largest span. */
+ RedTrans *chooseDefaultSpan( RedState *state );
+ void chooseDefaultSpan();
+
+ /* Pick a default transition by most number of ranges. */
+ RedTrans *chooseDefaultNumRanges( RedState *state );
+ void chooseDefaultNumRanges();
+
+ /* Pick a default transition tailored towards goto driven machine. */
+ RedTrans *chooseDefaultGoto( RedState *state );
+ void chooseDefaultGoto();
+
+ /* Ordering states by transition connections. */
+ void optimizeStateOrdering( RedState *state );
+ void optimizeStateOrdering();
+
+ /* Ordering states by transition connections. */
+ void depthFirstOrdering( RedState *state );
+ void depthFirstOrdering();
+
+ /* Set state ids. */
+ void sequentialStateIds();
+ void sortStateIdsByFinal();
+
+ /* Arrange states in by final id. This is a stable sort. */
+ void sortStatesByFinal();
+
+ /* Sorting states by id. */
+ void sortByStateId();
+
+ /* Locating the first final state. This is the final state with the lowest
+ * id. */
+ void findFirstFinState();
+
+ void assignActionLocs();
+
+ RedTrans *getErrorTrans();
+ RedState *getErrorState();
+
+ /* Is every char in the alphabet covered? */
+ bool alphabetCovered( RedTransList &outRange );
+
+ RedTrans *allocateTrans( RedState *targState, RedAction *actionTable );
+
+ void partitionFsm( int nParts );
+
+ void setInTrans();
+ void setValueLimits();
+ void assignActionIds();
+ void analyzeActionList( RedAction *redAct, InlineList *inlineList );
+ void analyzeAction( GenAction *act, InlineList *inlineList );
+ void findFinalActionRefs();
+ void analyzeMachine();
+
+ fsm_tables *makeFsmTables();
+};
+
+#endif /* _COLM_REDFSM_H */
+
diff --git a/src/reduce.cc b/src/reduce.cc
new file mode 100644
index 00000000..89a95015
--- /dev/null
+++ b/src/reduce.cc
@@ -0,0 +1,954 @@
+/*
+ * Copyright 2015-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+#include <stdbool.h>
+
+#include <iostream>
+
+#include "fsmcodegen.h"
+
+void Compiler::writeCommitStub()
+{
+ *outStream <<
+ "void " << objectName << "_commit_reduce_forward( program_t *prg, tree_t **root,\n"
+ " struct pda_run *pda_run, parse_tree_t *pt )\n"
+ "{\n"
+ " commit_clear_parse_tree( prg, root, pda_run, pt->child );\n"
+ "}\n"
+ "\n"
+ "long " << objectName << "_commit_union_sz( int reducer ) { return 0; }\n"
+ "void " << objectName << "_init_need() {}\n"
+ "int " << objectName << "_reducer_need_tok( program_t *prg, "
+ "struct pda_run *pda_run, int id ) { return COLM_RN_BOTH; }\n"
+ "int " << objectName << "_reducer_need_ign( program_t *prg, "
+ "struct pda_run *pda_run ) { return COLM_RN_BOTH; }\n"
+ "\n"
+ "void " << objectName << "_read_reduce( program_t *prg, int reducer, input_t *stream ) {}\n"
+ ;
+}
+
+void Compiler::findRhsRefs( bool &lhsUsed, Vector<ProdEl*> &rhsUsed, Vector<ProdEl*> &treeUsed,
+ Vector<ProdEl*> &locUsed, Reduction *reduction, Production *production,
+ const ReduceTextItemList &list )
+{
+ ObjectDef *objectDef = production->prodName->objectDef;
+
+ rhsUsed.setAsNew( production->prodElList->length() );
+ treeUsed.setAsNew( production->prodElList->length() );
+ locUsed.setAsNew( production->prodElList->length() );
+
+ for ( ReduceTextItemList::Iter i = list; i.lte(); i++ ) {
+ if ( i->type == ReduceTextItem::LhsRef ) {
+ lhsUsed = true;
+ }
+
+ if ( i->type == ReduceTextItem::RhsRef ||
+ i->type == ReduceTextItem::RhsLoc ||
+ i->type == ReduceTextItem::TreeRef )
+ {
+ if ( i->n > 0 ) {
+ /* Numbered. */
+ ProdEl *prodEl = production->prodElList->head;
+ int adv = i->n - 1;
+ while ( adv > 0 ) {
+ prodEl = prodEl->next;
+ adv -= 1;
+ }
+
+ if ( i->type == ReduceTextItem::RhsLoc )
+ locUsed[i->n-1] = prodEl;
+ else if ( i->type == ReduceTextItem::TreeRef )
+ treeUsed[i->n-1] = prodEl;
+ else
+ rhsUsed[i->n-1] = prodEl;
+ }
+ else {
+ /* Named. */
+ String name( i->txt.data + 1, i->txt.length() - 1 );
+ ObjectField *field = objectDef->rootScope->findField( name );
+ if ( field != 0 ) {
+ for ( Vector<RhsVal>::Iter r = field->rhsVal; r.lte(); r++ ) {
+ if ( r->prodEl->production == production ) {
+ if ( i->type == ReduceTextItem::RhsLoc )
+ locUsed[r->prodEl->pos] = r->prodEl;
+ else
+ rhsUsed[r->prodEl->pos] = r->prodEl;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void Compiler::computeNeeded( Reduction *reduction, Production *production,
+ const ReduceTextItemList &list )
+{
+ bool lhsUsed = false;
+ Vector<ProdEl*> rhsUsed;
+ Vector<ProdEl*> treeUsed;
+ Vector<ProdEl*> locUsed;
+
+ findRhsRefs( lhsUsed, rhsUsed, treeUsed, locUsed, reduction, production, list );
+
+ /* Same length, can concurrently walk with one test. */
+ Vector<ProdEl*>::Iter rhs = rhsUsed;
+ Vector<ProdEl*>::Iter loc = locUsed;
+
+ for ( ; rhs.lte(); rhs++, loc++ ) {
+ ProdEl *prodEl = *rhs;
+ if ( prodEl != 0 ) {
+ if ( prodEl->production == production && prodEl->langEl->type == LangEl::Term )
+ reduction->needData[prodEl->langEl->id] = true;
+ }
+
+ ProdEl *locEl = *loc;
+ if ( locEl != 0 && locEl->production == production )
+ reduction->needLoc[locEl->langEl->id] = true;
+ }
+}
+
+void Compiler::loadRefs( Reduction *reduction, Production *production,
+ const ReduceTextItemList &list, bool read )
+{
+ bool lhsUsed = false;
+ Vector<ProdEl*> rhsUsed;
+ Vector<ProdEl*> treeUsed;
+ Vector<ProdEl*> locUsed;
+
+ findRhsRefs( lhsUsed, rhsUsed, treeUsed, locUsed, reduction, production, list );
+
+ if ( lhsUsed ) {
+ *outStream << " lel_" << production->prodName->fullName << " *_lhs = ";
+
+ if ( read ) {
+ *outStream <<
+ "&node->u." << production->prodName->fullName << ";\n";
+ }
+ else {
+ *outStream <<
+ "&((commit_reduce_union*)(lel+1))->" << production->prodName->fullName << ";\n";
+ }
+ }
+
+ /*
+ * In the first pass we load using a parse tree cursor. This is for
+ * nonterms.
+ */
+ bool useCursor = false;
+ for ( Vector<ProdEl*>::Iter rhs = rhsUsed; rhs.lte(); rhs++ ) {
+ if ( *rhs != 0 && (*rhs)->production == production &&
+ (*rhs)->langEl->type != LangEl::Term )
+ {
+ useCursor = true;
+ break;
+ }
+ }
+
+ if ( useCursor ) {
+ int cursorPos = 0;
+
+ if ( read ) {
+ *outStream <<
+ " struct read_reduce_node *_pt_cursor = node->child;\n";
+ }
+ else {
+ *outStream <<
+ " struct colm_parse_tree *_pt_cursor = lel->child;\n";
+ }
+
+ /* Same length, can concurrently walk with one test. */
+ Vector<ProdEl*>::Iter rhs = rhsUsed;
+ Vector<ProdEl*>::Iter loc = locUsed;
+
+ for ( ; rhs.lte(); rhs++, loc++ ) {
+ ProdEl *prodEl = *rhs;
+ if ( prodEl != 0 ) {
+ while ( cursorPos < rhs.pos() ) {
+ *outStream <<
+ " _pt_cursor = _pt_cursor->next;\n";
+ cursorPos += 1;
+ }
+
+ if ( prodEl->production == production ) {
+ if ( prodEl->langEl->type != LangEl::Term ) {
+ *outStream <<
+ "lel_" << prodEl->langEl->fullName << " *" "_rhs" << rhs.pos() << " = ";
+
+ if ( read ) {
+ *outStream << "&_pt_cursor->u." << prodEl->langEl->fullName << ";\n";
+ }
+ else {
+ *outStream << "&((commit_reduce_union*)(_pt_cursor+1))->" << prodEl->langEl->fullName << ";\n";
+ }
+ }
+ }
+
+ }
+ }
+ }
+
+ /* In the second pass we load using a tree cursor. This is for token/tree
+ * data and locations. */
+
+ useCursor = false;
+ for ( Vector<ProdEl*>::Iter rhs = rhsUsed; rhs.lte(); rhs++ ) {
+ if ( *rhs != 0 && (*rhs)->production == production &&
+ (*rhs)->langEl->type == LangEl::Term )
+ {
+ useCursor = true;
+ break;
+ }
+ }
+ for ( Vector<ProdEl*>::Iter rhs = treeUsed; rhs.lte(); rhs++ ) {
+ if ( *rhs != 0 ) {
+ useCursor = true;
+ break;
+ }
+ }
+ for ( Vector<ProdEl*>::Iter loc = locUsed; loc.lte(); loc++ ) {
+ if ( *loc != 0 ) {
+ useCursor = true;
+ break;
+ }
+ }
+
+ if ( useCursor ) {
+ int cursorPos = 0;
+
+ if ( read ) {
+ *outStream <<
+ " read_reduce_node *_tree_cursor = node->child;\n";
+ }
+ else {
+ *outStream <<
+ " kid_t *_tree_cursor = kid->tree->child;\n";
+ }
+
+ /* Same length, can concurrently walk with one test. */
+ Vector<ProdEl*>::Iter rhs = rhsUsed;
+ Vector<ProdEl*>::Iter tree = treeUsed;
+ Vector<ProdEl*>::Iter loc = locUsed;
+
+ for ( ; rhs.lte(); rhs++, loc++ ) {
+
+ ProdEl *prodEl = *rhs;
+ if ( prodEl != 0 ) {
+ if ( prodEl->production == production ) {
+ if ( prodEl->langEl->type == LangEl::Term ) {
+
+ while ( cursorPos < rhs.pos() ) {
+ *outStream <<
+ " _tree_cursor = _tree_cursor->next;\n";
+ cursorPos += 1;
+ }
+
+ *outStream << " colm_data *_rhs" << rhs.pos() << " = ";
+
+ if ( read ) {
+ *outStream <<
+ "&_tree_cursor->data;\n";
+ }
+ else {
+ *outStream <<
+ "_tree_cursor->tree->tokdata;\n";
+ }
+ }
+ }
+ }
+
+ ProdEl *treeEl = *tree;
+ if ( treeEl != 0 ) {
+ if ( treeEl->production == production ) {
+ while ( cursorPos < rhs.pos() ) {
+ *outStream <<
+ " _tree_cursor = _tree_cursor->next;\n";
+ cursorPos += 1;
+ }
+
+ *outStream << " colm_tree *_tree" << rhs.pos() << " = ";
+ *outStream << "_tree_cursor->tree;\n";
+ }
+ }
+
+ ProdEl *locEl = *loc;
+ if ( locEl != 0 ) {
+ if ( locEl->production == production ) {
+
+ while ( cursorPos < rhs.pos() ) {
+ *outStream <<
+ " _tree_cursor = _tree_cursor->next;\n";
+ cursorPos += 1;
+ }
+
+ *outStream <<
+ " colm_location *_loc" << loc.pos() << " = ";
+
+ if ( read ) {
+ *outStream << "&_tree_cursor->loc;\n";
+ }
+ else {
+ *outStream <<
+ "colm_find_location( prg, _tree_cursor->tree );\n";
+ }
+ }
+ }
+ }
+ }
+}
+
+void Compiler::writeRhsRef( Production *production, ReduceTextItem *i )
+{
+ if ( i->n > 0 ) {
+ *outStream << "_rhs" << ( i->n - 1 );
+ }
+ else {
+ ObjectDef *objectDef = production->prodName->objectDef;
+ String name( i->txt.data + 1, i->txt.length() - 1 );
+
+ /* Find the field in the rhsVal using capture field. */
+ ObjectField *field = objectDef->rootScope->findField( name );
+ if ( field != 0 ) {
+ for ( Vector<RhsVal>::Iter r = field->rhsVal;
+ r.lte(); r++ )
+ {
+ if ( r->prodEl->production == production )
+ *outStream << "_rhs" << r->prodEl->pos;
+ }
+ }
+ }
+}
+
+void Compiler::writeTreeRef( Production *production, ReduceTextItem *i )
+{
+ if ( i->n > 0 ) {
+ *outStream << "_tree" << ( i->n - 1 );
+ }
+ else {
+ ObjectDef *objectDef = production->prodName->objectDef;
+ String name( i->txt.data + 1, i->txt.length() - 1 );
+
+ /* Find the field in the rhsVal using capture field. */
+ ObjectField *field = objectDef->rootScope->findField( name );
+ if ( field != 0 ) {
+ for ( Vector<RhsVal>::Iter r = field->rhsVal;
+ r.lte(); r++ )
+ {
+ if ( r->prodEl->production == production )
+ *outStream << "_tree" << r->prodEl->pos;
+ }
+ }
+ }
+}
+
+void Compiler::writeRhsLoc( Production *production, ReduceTextItem *i )
+{
+ if ( i->n > 0 ) {
+ *outStream << "_loc" << ( i->n - 1 );
+ }
+ else {
+ ObjectDef *objectDef = production->prodName->objectDef;
+ String name( i->txt.data + 1, i->txt.length() - 1 );
+
+ /* Find the field in the rhsVal using capture field. */
+ ObjectField *field = objectDef->rootScope->findField( name );
+ if ( field != 0 ) {
+ for ( Vector<RhsVal>::Iter r = field->rhsVal;
+ r.lte(); r++ )
+ {
+ if ( r->prodEl->production == production )
+ *outStream << "_loc" << r->prodEl->pos;
+ }
+ }
+ }
+}
+
+void Compiler::writeLhsRef( Production *production, ReduceTextItem *i )
+{
+ *outStream << "_lhs";
+}
+
+void Compiler::writeHostItemList( Production *production,
+ const ReduceTextItemList &list )
+{
+ for ( ReduceTextItemList::Iter i = list; i.lte(); i++ ) {
+ switch ( i->type ) {
+ case ReduceTextItem::LhsRef:
+ writeLhsRef( production, i );
+ break;
+ case ReduceTextItem::RhsRef:
+ writeRhsRef( production, i );
+ break;
+ case ReduceTextItem::TreeRef:
+ writeTreeRef( production, i );
+ break;
+ case ReduceTextItem::RhsLoc:
+ writeRhsLoc( production, i );
+ break;
+ case ReduceTextItem::Txt:
+ *outStream << i->txt;
+ break;
+ }
+ }
+}
+
+/* For sorting according to prod name id, then by prod num. */
+struct CmpReduceAction
+{
+ static int compare( const ReduceAction *ra1 , const ReduceAction *ra2 )
+ {
+ if ( ra1->production->prodName->id < ra2->production->prodName->id )
+ return -1;
+ else if ( ra1->production->prodName->id > ra2->production->prodName->id )
+ return 1;
+ else {
+ if ( ra1->production->prodNum < ra2->production->prodNum )
+ return -1;
+ else if ( ra1->production->prodNum > ra2->production->prodNum )
+ return 1;
+ }
+ return 0;
+ }
+};
+
+void Compiler::initReductionNeeds( Reduction *reduction )
+{
+ reduction->needData = new bool[nextLelId];
+ reduction->needLoc = new bool[nextLelId];
+ memset( reduction->needData, 0, sizeof(bool)*nextLelId );
+ memset( reduction->needLoc, 0, sizeof(bool)*nextLelId );
+}
+
+void Compiler::writeNeeds()
+{
+
+ *outStream <<
+ "struct reduction_info\n"
+ "{\n"
+ " unsigned char need_data[" << nextLelId << "];\n"
+ " unsigned char need_loc[" << nextLelId << "];\n"
+ "};\n"
+ "\n";
+
+ *outStream <<
+ "static struct reduction_info ri[" << rootNamespace->reductions.length() + 1 << "];\n"
+ "\n";
+
+ *outStream <<
+ "extern \"C\" void " << objectName << "_init_need()\n"
+ "{\n";
+
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ Reduction *reduction = *r;
+ *outStream <<
+ " memset( ri[" << reduction->id << "]"
+ ".need_data, 0, sizeof(unsigned char) * " << nextLelId << " );\n"
+ " memset( ri[" << reduction->id << "]"
+ ".need_loc, 0, sizeof(unsigned char) * " << nextLelId << " );\n";
+
+ for ( int i = 0; i < nextLelId; i++ ) {
+ if ( reduction->needData[i] ) {
+ *outStream <<
+ " ri[" << reduction->id << "].need_data[" << i << "] = COLM_RN_DATA;\n";
+ }
+
+ if ( reduction->needLoc[i] ) {
+ *outStream <<
+ " ri[" << reduction->id << "].need_loc[" << i << "] = COLM_RN_LOC;\n";
+ }
+ }
+ }
+
+ *outStream <<
+ "}\n";
+
+ *outStream <<
+ "extern \"C\" int " << objectName << "_reducer_need_tok( program_t *prg, "
+ "struct pda_run *pda_run, int id )\n"
+ "{\n"
+ " if ( prg->reduce_clean && pda_run->reducer > 0 ) {\n"
+ /* Note we are forcing the reducer need for data. Enabling requires finding
+ * a solution for backtracking push. */
+ " return COLM_RN_DATA | ri[pda_run->reducer].need_data[id] | \n"
+ " ri[pda_run->reducer].need_loc[id];\n"
+ " }\n"
+ " return COLM_RN_BOTH;\n"
+ "}\n"
+ "\n"
+ "extern \"C\" int " << objectName << "_reducer_need_ign( program_t *prg, struct pda_run *pda_run )\n"
+ "{\n"
+ // Using this requires finding a solution for backtracking push back.
+ //" if ( pda_run->reducer > 0 )\n"
+ //" return COLM_RN_NEITHER;\n"
+ " return COLM_RN_BOTH;\n"
+ "}\n";
+}
+
+void Compiler::writeReduceStructs()
+{
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ for ( ReduceNonTermList::Iter rdi = (*r)->reduceNonTerms; rdi.lte(); rdi++ ) {
+ *outStream <<
+ "struct lel_" << rdi->nonTerm->uniqueType->langEl->fullName << "\n"
+ "{\n";
+
+ *outStream <<
+ "#line " << rdi->loc.line << "\"" << rdi->loc.fileName << "\"\n";
+
+ writeHostItemList( 0, rdi->itemList );
+
+ *outStream <<
+ "};\n";
+ }
+ }
+
+ *outStream <<
+ "union commit_reduce_union\n"
+ "{\n";
+
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ for ( ReduceNonTermList::Iter rdi = (*r)->reduceNonTerms; rdi.lte(); rdi++ ) {
+ LangEl *langEl = rdi->nonTerm->uniqueType->langEl;
+ *outStream <<
+ " lel_" << langEl->fullName << " " << langEl->fullName << ";\n";
+ }
+ }
+
+ *outStream <<
+ "};\n"
+ "\n";
+
+ *outStream <<
+ "extern \"C\" long " << objectName << "_commit_union_sz( int reducer )\n"
+ "{\n"
+ " return sizeof( commit_reduce_union );\n"
+ "}\n";
+
+ *outStream <<
+ "struct read_reduce_node\n"
+ "{\n"
+ " std::string name;\n"
+ " int id;\n"
+ " int prod_num;\n"
+ " colm_location loc;\n"
+ " colm_data data;\n"
+ " commit_reduce_union u;\n"
+ " read_reduce_node *next;\n"
+ " read_reduce_node *child;\n"
+ "};\n"
+ "\n";
+}
+
+
+void Compiler::writeUnescape()
+{
+ *outStream <<
+ "static void unescape( colm_data *tokdata )\n"
+ "{\n"
+ " unsigned char *src = (unsigned char*)tokdata->data, *dest = (unsigned char*)tokdata->data;\n"
+ " while ( *src != 0 ) {\n"
+ " if ( *src == '\\\\' ) {\n"
+ " unsigned int i;\n"
+ " char buf[3];\n"
+ "\n"
+ " src += 1;\n"
+ " buf[0] = *src++;\n"
+ " buf[1] = *src++;\n"
+ " buf[2] = 0;\n"
+ "\n"
+ " sscanf( buf, \"%x\", &i );\n"
+ " *dest++ = (unsigned char)i;\n"
+ "\n"
+ " tokdata->length -= 2;\n"
+ " }\n"
+ " else {\n"
+ " *dest++ = *src++;\n"
+ " }\n"
+ " }\n"
+ " *dest = 0;\n"
+ "}\n"
+ "\n";
+}
+
+void Compiler::writeReduceDispatchers()
+{
+ *outStream <<
+ "\n"
+ "extern \"C\" void " << objectName << "_commit_reduce_forward( program_t *prg, tree_t **root,\n"
+ " struct pda_run *pda_run, parse_tree_t *pt )\n"
+ "{\n"
+ " switch ( pda_run->reducer ) {\n";
+
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ Reduction *reduction = *r;
+ if ( reduction->parserBased ) {
+ *outStream <<
+ " case " << reduction->id << ":\n"
+ " ((" << reduction->name << "*)prg->red_ctx)->commit_reduce_forward( "
+ "prg, root, pda_run, pt );\n"
+ " break;\n";
+ }
+ }
+
+ *outStream <<
+ " }\n"
+ "}\n"
+ "\n";
+
+ *outStream <<
+ "extern \"C\" void " << objectName << "_read_reduce( program_t *prg, int reducer, stream_t *stream )\n"
+ "{\n"
+ " switch ( reducer ) {\n";
+
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ Reduction *reduction = *r;
+ if ( reduction->postfixBased ) {
+ *outStream <<
+ " case " << reduction->id << ":\n"
+ " ((" << reduction->name << "*)prg->red_ctx)->read_reduce_forward( prg, stream->impl->file );\n"
+ " break;\n";
+ }
+ }
+
+ *outStream <<
+ " }\n"
+ "}\n"
+ "\n";
+}
+
+void Compiler::computeNeeded()
+{
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ Reduction *reduction = *r;
+ initReductionNeeds( reduction );
+
+ for ( ReduceActionList::Iter rdi = reduction->reduceActions; rdi.lte(); rdi++ )
+ computeNeeded( reduction, rdi->production, rdi->itemList );
+ }
+}
+
+void Compiler::writeParseReduce( Reduction *reduction )
+{
+ *outStream <<
+ "void " << reduction->name << "::commit_reduce_forward( program_t *prg, \n"
+ " tree_t **root, struct pda_run *pda_run, parse_tree_t *pt )\n"
+ "{\n"
+ " tree_t **sp = root;\n"
+ "\n"
+ " parse_tree_t *lel = pt;\n"
+ " kid_t *kid = pt->shadow;\n"
+ "\n"
+ "recurse:\n"
+ "\n"
+ " if ( lel->child != 0 ) {\n"
+ " /* There are children. Must process all children first. */\n"
+ " vm_push_ptree( lel );\n"
+ " vm_push_kid( kid );\n"
+ "\n"
+ " lel = lel->child;\n"
+ " kid = tree_child( prg, kid->tree );\n"
+ " while ( lel != 0 ) {\n"
+ " goto recurse;\n"
+ " resume:\n"
+ " lel = lel->next;\n"
+ " kid = kid->next;\n"
+ " }\n"
+ "\n"
+ " kid = vm_pop_kid();\n"
+ " lel = vm_pop_ptree();\n"
+ " }\n"
+ "\n"
+ " if ( !( lel->flags & PF_COMMITTED ) ) {\n"
+ " /* Now can execute the reduction action. */\n"
+ " {\n";
+
+
+ *outStream <<
+ " { switch ( kid->tree->id ) {\n";
+
+ /* Populate a vector with the reduce actions. */
+ Vector<ReduceAction*> actions;
+ actions.setAsNew( reduction->reduceActions.length() );
+ long pos = 0;
+ for ( ReduceActionList::Iter rdi = reduction->reduceActions; rdi.lte(); rdi++ )
+ actions[pos++] = rdi;
+
+ /* Sort it by lhs id, then prod num. */
+ MergeSort<ReduceAction*, CmpReduceAction> sortActions;
+ sortActions.sort( actions.data, actions.length() );
+
+ ReduceAction *last = 0;
+
+ for ( Vector<ReduceAction*>::Iter rdi = actions; rdi.lte(); rdi++ ) {
+ ReduceAction *action = *rdi;
+ int lelId = action->production->prodName->id;
+ int prodNum = action->production->prodNum;
+
+ /* Maybe close off the last prod. */
+ if ( last != 0 &&
+ last->production->prodName != action->production->prodName )
+ {
+ *outStream <<
+ " break;\n"
+ " }\n";
+
+ }
+
+ /* Maybe open a new prod. */
+ if ( last == 0 ||
+ last->production->prodName != action->production->prodName )
+ {
+ *outStream <<
+ " case " << lelId << ": {\n";
+ }
+
+ *outStream <<
+ " if ( kid->tree->prod_num == " << prodNum << " ) {\n";
+
+
+ loadRefs( reduction, action->production, action->itemList, false );
+
+ *outStream <<
+ "#line " << action->loc.line << " \"" << action->loc.fileName << "\"\n";
+
+ writeHostItemList( action->production, action->itemList );
+
+ *outStream <<
+ " }\n";
+
+ last = action;
+ }
+
+ if ( last != 0 ) {
+ *outStream <<
+ " break;\n"
+ " }\n";
+ }
+
+ *outStream <<
+ " } }\n"
+ " }\n"
+ " }\n"
+ "\n"
+ " commit_clear_parse_tree( prg, sp, pda_run, lel->child );\n"
+ " if ( prg->reduce_clean ) {\n"
+ " commit_clear_kid_list( prg, sp, kid->tree->child );\n"
+ " kid->tree->child = 0;\n"
+ " kid->tree->flags &= ~( AF_LEFT_IGNORE | AF_RIGHT_IGNORE );\n"
+ " }\n"
+ " lel->child = 0;\n"
+ "\n"
+ " if ( sp != root )\n"
+ " goto resume;\n"
+ " pt->flags |= PF_COMMITTED;\n"
+ "}\n"
+ "\n";
+
+}
+
+void Compiler::writeParseReduce()
+{
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ Reduction *reduction = *r;
+ if ( reduction->parserBased )
+ writeParseReduce( reduction );
+ }
+}
+
+void Compiler::writePostfixReduce( Reduction *reduction )
+{
+ *outStream <<
+ "void " << reduction->name << "::read_reduce_forward( program_t *prg, FILE *file )\n"
+ "{\n"
+ " __gnu_cxx::stdio_filebuf<char> fbuf( file, std::ios::in|std::ios::out|std::ios::app );\n"
+ " std::iostream in( &fbuf );\n"
+ " std::string type, tok, text;\n"
+ " long _id, line, column, byte, prod_num, children;\n"
+ " read_reduce_node sentinal;\n"
+ " sentinal.next = 0;\n"
+ " read_reduce_node *stack = &sentinal, *last = 0;\n"
+ " while ( in >> type ) {\n"
+ " /* read. */\n"
+ " if ( type == \"t\" ) {\n"
+ " in >> tok >> _id >> line >> column >> byte >> text;\n"
+ " read_reduce_node *node = new read_reduce_node;\n"
+ " node->name = tok;\n"
+ " node->id = _id;\n"
+ " node->loc.name = \"<>\";\n"
+ " node->loc.line = line;\n"
+ " node->loc.column = column;\n"
+ " node->loc.byte = byte;\n"
+ " node->data.data = strdup( text.c_str() );\n"
+ " node->data.length = text.size();\n"
+ " unescape( &node->data );\n"
+ "\n"
+ " node->next = stack;\n"
+ " node->child = 0;\n"
+ " stack = node;\n"
+ " }\n"
+ " else if ( type == \"r\" ) {\n"
+ " in >> tok >> _id >> prod_num >> children;\n"
+ " read_reduce_node *node = new read_reduce_node;\n"
+ " memset( &node->loc, 0, sizeof(colm_location) );\n"
+ " memset( &node->data, 0, sizeof(colm_data) );\n"
+ " node->name = tok;\n"
+ " node->id = _id;\n"
+ " node->prod_num = prod_num;\n"
+ " node->child = 0;\n"
+ " while ( children-- > 0 ) {\n"
+ " last = stack;\n"
+ " stack = stack->next;\n"
+ " last->next = node->child;\n"
+ " node->child = last;\n"
+ " }\n"
+ "\n"
+ " node->next = stack;\n"
+ " stack = node;\n"
+ "\n"
+ " { switch ( node->id ) {\n";
+
+ /* Populate a vector with the reduce actions. */
+ Vector<ReduceAction*> actions;
+ actions.setAsNew( reduction->reduceActions.length() );
+ long pos = 0;
+ for ( ReduceActionList::Iter rdi = reduction->reduceActions; rdi.lte(); rdi++ )
+ actions[pos++] = rdi;
+
+ /* Sort it by lhs id, then prod num. */
+ MergeSort<ReduceAction*, CmpReduceAction> sortActions;
+ sortActions.sort( actions.data, actions.length() );
+
+ ReduceAction *last = 0;
+
+ for ( Vector<ReduceAction*>::Iter rdi = actions; rdi.lte(); rdi++ ) {
+ ReduceAction *action = *rdi;
+ int lelId = action->production->prodName->id;
+ int prodNum = action->production->prodNum;
+
+ /* Maybe close off the last prod. */
+ if ( last != 0 &&
+ last->production->prodName != action->production->prodName )
+ {
+ *outStream <<
+ " break;\n"
+ " }\n";
+ }
+
+ /* Maybe open a new prod. */
+ if ( last == 0 ||
+ last->production->prodName != action->production->prodName )
+ {
+ *outStream <<
+ " case " << lelId << ": {\n";
+ }
+
+ *outStream <<
+ " if ( node->prod_num == " << prodNum << " ) {\n";
+
+ loadRefs( reduction, action->production, action->itemList, true );
+
+ *outStream <<
+ "#line " << action->loc.line << "\"" << action->loc.fileName << "\"\n";
+
+ writeHostItemList( action->production, action->itemList );
+
+ *outStream <<
+ " }\n";
+
+ last = action;
+ }
+
+ if ( last != 0 ) {
+ *outStream <<
+ " break;\n"
+ " }\n";
+ }
+
+ *outStream <<
+ " } }\n"
+ " /* delete the children */\n"
+ " last = node->child;\n"
+ " while ( last != 0 ) {\n"
+ " read_reduce_node *next = last->next;\n"
+ " delete last;\n"
+ " last = next;\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ "}\n"
+ "\n";
+}
+
+void Compiler::writePostfixReduce()
+{
+ bool unescape = false;
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ Reduction *reduction = *r;
+ if ( reduction->postfixBased ) {
+ if ( !unescape )
+ writeUnescape();
+
+ writePostfixReduce( reduction );
+ }
+ }
+}
+
+void Compiler::writeCommit()
+{
+ *outStream <<
+ "#include <colm/pdarun.h>\n"
+ "#include <colm/bytecode.h>\n"
+ "#include <colm/defs.h>\n"
+ "#include <colm/input.h>\n"
+ "#include <colm/tree.h>\n"
+ "#include <colm/program.h>\n"
+ "#include <colm/colm.h>\n"
+ "\n"
+ "#include <stdio.h>\n"
+ "#include <stdlib.h>\n"
+ "#include <string.h>\n"
+ "#include <assert.h>\n"
+ "#include <errno.h>\n"
+ "\n"
+ "#include <iostream>\n"
+ /* Not available on MAC OS. */
+ // "#include <ext/stdio_filebuf.h>\n"
+ "#include <fstream>\n"
+ "\n"
+ "using std::endl;\n"
+ "\n"
+ "#include \"reducer.h\"\n"
+ "\n";
+
+ computeNeeded();
+
+ writeReduceStructs();
+
+ writeReduceDispatchers();
+
+ //writePostfixReduce();
+
+ writeParseReduce();
+
+ writeNeeds();
+}
diff --git a/src/resolve.cc b/src/resolve.cc
new file mode 100644
index 00000000..c1f2cb3d
--- /dev/null
+++ b/src/resolve.cc
@@ -0,0 +1,988 @@
+/*
+ * Copyright 2009-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <iostream>
+#include "compiler.h"
+
+/*
+ * Type Resolve.
+ */
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+Namespace *TypeRef::resolveNspace( Compiler *pd )
+{
+ if ( parsedVarRef != 0 && !nspaceQual->thisOnly() ) {
+ UniqueType *ut = parsedVarRef->lookup( pd );
+ return ut->langEl->nspace;
+ }
+ else if ( parsedTypeRef != 0 && !nspaceQual->thisOnly() ) {
+ UniqueType *ut = parsedTypeRef->resolveType( pd );
+ return ut->langEl->nspace;
+ }
+ else {
+ /* Lookup up the qualifiction and then the name. */
+ return nspaceQual->getQual( pd );
+ }
+}
+
+UniqueType *TypeRef::resolveTypeName( Compiler *pd )
+{
+ nspace = resolveNspace( pd );
+
+ if ( nspace == 0 )
+ error(loc) << "do not have region for resolving reference" << endp;
+
+ while ( nspace != 0 ) {
+ /* Search for the token in the region by typeName. */
+ TypeMapEl *inDict = nspace->typeMap.find( typeName );
+
+ if ( inDict != 0 ) {
+ switch ( inDict->type ) {
+ /* Defer to the typeRef we are an alias of. We need to guard
+ * against loops here. */
+ case TypeMapEl::AliasType: {
+ return inDict->typeRef->resolveType( pd );
+ }
+
+ case TypeMapEl::LangElType: {
+ UniqueType *ut = pd->findUniqueType( TYPE_TREE, inDict->value );
+ return ut;
+ }
+ case TypeMapEl::StructType: {
+ UniqueType *ut = pd->findUniqueType( TYPE_STRUCT, inDict->structEl );
+ return ut;
+ }
+ }
+ }
+
+ if ( nspaceQual->thisOnly() )
+ break;
+
+ nspace = nspace->parentNamespace;
+ }
+
+ error(loc) << "unknown type " << typeName << " in typeof expression" << endp;
+ return 0;
+}
+
+UniqueType *TypeRef::resolveTypeLiteral( Compiler *pd )
+{
+ /* Lookup up the qualifiction and then the name. */
+ nspace = resolveNspace( pd );
+
+ if ( nspace == 0 )
+ error(loc) << "do not have region for resolving reference" << endp;
+
+ /* Interpret escape sequences and remove quotes. */
+ bool unusedCI;
+ String interp;
+ prepareLitString( interp, unusedCI, pdaLiteral->data,
+ pdaLiteral->loc );
+
+ while ( nspace != 0 ) {
+ LiteralDictEl *ldel = nspace->literalDict.find( interp );
+
+ if ( ldel != 0 )
+ return pd->findUniqueType( TYPE_TREE, ldel->value->tokenDef->tdLangEl );
+
+ if ( nspaceQual->thisOnly() )
+ break;
+
+ nspace = nspace->parentNamespace;
+ }
+
+ error(loc) << "unknown type " << pdaLiteral->data << " in typeof expression" << endp;
+ return 0;
+}
+
+bool TypeRef::uniqueGeneric( UniqueGeneric *&inMap, Compiler *pd,
+ const UniqueGeneric &searchKey )
+{
+ bool inserted = false;
+ inMap = pd->uniqueGenericMap.find( &searchKey );
+ if ( inMap == 0 ) {
+ inserted = true;
+ inMap = new UniqueGeneric( searchKey );
+ pd->uniqueGenericMap.insert( inMap );
+ }
+ return inserted;
+}
+
+StructEl *TypeRef::declareListEl( Compiler *pd, TypeRef *valType )
+{
+ static long vlistElId = 1;
+ String name( 32, "list_el_%d", vlistElId++ );
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::StructType,
+ name, pd->nextObjectId++ );
+
+ StructDef *structDef = new StructDef( loc, name, objectDef );
+
+ pd->rootNamespace->structDefList.append( structDef );
+
+ /* Value Element. */
+ String id = "value";
+ ObjectField *elValObjField = ObjectField::cons( internal,
+ ObjectField::StructFieldType, valType, id );
+
+ objectDef->rootScope->insertField( elValObjField->name, elValObjField );
+
+ /* Typeref for the struct. Used for pointers. */
+ NamespaceQual *nspaceQual = NamespaceQual::cons( pd->rootNamespace );
+ TypeRef *selfTypeRef = TypeRef::cons( InputLoc(), nspaceQual, name, RepeatNone );
+
+ /* Type ref for the list pointers psuedo type. */
+ TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::ListPtrs, 0, selfTypeRef, 0 );
+
+ ObjectField *of = ObjectField::cons( InputLoc(),
+ ObjectField::GenericElementType, elTr, name );
+
+ objectDef->rootScope->insertField( of->name, of );
+
+ return declareStruct( pd, pd->rootNamespace, name, structDef );
+}
+
+void ConsItemList::resolve( Compiler *pd )
+{
+ /* Types in constructor. */
+ for ( ConsItemList::Iter item = first(); item.lte(); item++ ) {
+ switch ( item->type ) {
+ case ConsItem::LiteralType:
+ /* Use pdaFactor reference resolving. */
+ pd->resolveProdEl( item->prodEl );
+ break;
+ case ConsItem::InputText:
+ break;
+ case ConsItem::ExprType:
+ item->expr->resolve( pd );
+ break;
+ }
+ }
+}
+
+UniqueType *TypeRef::resolveTypeListEl( Compiler *pd )
+{
+ TypeRef *valTr = typeRef1;
+ UniqueType *utValue = valTr->resolveType( pd );
+
+ UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::ListEl, utValue );
+ if ( uniqueGeneric( inMap, pd, searchKey ) )
+ inMap->structEl = declareListEl( pd, valTr );
+
+ return pd->findUniqueType( TYPE_STRUCT, inMap->structEl );
+}
+
+UniqueType *TypeRef::resolveTypeList( Compiler *pd )
+{
+ nspace = pd->rootNamespace;
+
+ UniqueType *utValue = typeRef1->resolveType( pd );
+
+ if ( utValue->typeId != TYPE_STRUCT )
+ error( loc ) << "only structs can be list elements" << endp;
+
+ /* Find the list element. */
+ ObjectDef *elObjDef = utValue->structEl->structDef->objectDef;
+ UniqueType *ptrsUt = pd->findUniqueType( TYPE_LIST_PTRS );
+ ObjectField *listEl = elObjDef->findFieldType( pd, ptrsUt );
+
+ if ( !listEl )
+ error( loc ) << "could not find list element in type ref" << endp;
+
+ UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::List, utValue );
+ if ( uniqueGeneric( inMap, pd, searchKey ) ) {
+
+ GenericType *generic = new GenericType( GEN_LIST,
+ pd->nextGenericId++, typeRef1, 0, typeRef2, listEl );
+
+ nspace->genericList.append( generic );
+
+ generic->declare( pd, nspace );
+
+ inMap->generic = generic;
+ }
+
+ generic = inMap->generic;
+ return pd->findUniqueType( TYPE_GENERIC, inMap->generic );
+}
+
+StructEl *TypeRef::declareMapElStruct( Compiler *pd, TypeRef *keyType, TypeRef *valType )
+{
+ static long vlistElId = 1;
+ String name( 32, "map_el_%d", vlistElId++ );
+ ObjectDef *objectDef = ObjectDef::cons( ObjectDef::StructType,
+ name, pd->nextObjectId++ );
+
+ StructDef *structDef = new StructDef( loc, name, objectDef );
+
+ pd->rootNamespace->structDefList.append( structDef );
+
+ /* Value Element. */
+ String id = "value";
+ ObjectField *elValObjField = ObjectField::cons( internal,
+ ObjectField::StructFieldType, valType, id );
+
+ objectDef->rootScope->insertField( elValObjField->name, elValObjField );
+
+ /* Typeref for the pointers. */
+ NamespaceQual *nspaceQual = NamespaceQual::cons( pd->rootNamespace );
+ TypeRef *selfTypeRef = TypeRef::cons( InputLoc(), nspaceQual, name, RepeatNone );
+
+ TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::MapPtrs, 0, selfTypeRef, keyType );
+
+ ObjectField *of = ObjectField::cons( InputLoc(),
+ ObjectField::GenericElementType, elTr, name );
+
+ objectDef->rootScope->insertField( of->name, of );
+
+ StructEl *sel = declareStruct( pd, pd->rootNamespace, name, structDef );
+ return sel;
+}
+
+UniqueType *TypeRef::resolveTypeMapEl( Compiler *pd )
+{
+ TypeRef *keyType = typeRef1;
+ TypeRef *valType = typeRef2;
+
+ UniqueType *utKey = keyType->resolveType( pd );
+ UniqueType *utValue = valType->resolveType( pd );
+
+ UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::MapEl, utKey, utValue );
+ if ( uniqueGeneric( inMap, pd, searchKey ) )
+ inMap->structEl = declareMapElStruct( pd, keyType, valType );
+
+ return pd->findUniqueType( TYPE_STRUCT, inMap->structEl );
+}
+
+
+UniqueType *TypeRef::resolveTypeMap( Compiler *pd )
+{
+ nspace = pd->rootNamespace;
+
+ UniqueType *utKey = typeRef1->resolveType( pd );
+ UniqueType *utEl = typeRef2->resolveType( pd );
+
+ if ( utEl->typeId != TYPE_STRUCT )
+ error( loc ) << "only structs can be map elements" << endp;
+
+ /* Find the list element. */
+ ObjectDef *elObjDef = utEl->structEl->structDef->objectDef;
+ UniqueType *ptrsUt = pd->findUniqueType( TYPE_MAP_PTRS );
+ ObjectField *mapEl = elObjDef->findFieldType( pd, ptrsUt );
+
+ if ( !mapEl )
+ error( loc ) << "could not find map element in type ref" << endp;
+
+ UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::Map, utKey, utEl );
+
+ if ( uniqueGeneric( inMap, pd, searchKey ) ) {
+
+ GenericType *generic = new GenericType( GEN_MAP,
+ pd->nextGenericId++, typeRef2, typeRef1, typeRef3, mapEl );
+
+ nspace->genericList.append( generic );
+
+ generic->declare( pd, nspace );
+
+ inMap->generic = generic;
+ }
+
+ generic = inMap->generic;
+ return pd->findUniqueType( TYPE_GENERIC, inMap->generic );
+}
+
+UniqueType *TypeRef::resolveTypeParser( Compiler *pd )
+{
+ nspace = pd->rootNamespace;
+
+ UniqueType *utParse = typeRef1->resolveType( pd );
+
+ UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::Parser, utParse );
+ if ( uniqueGeneric( inMap, pd, searchKey ) ) {
+ GenericType *generic = new GenericType( GEN_PARSER,
+ pd->nextGenericId++, typeRef1, 0, 0, 0 );
+
+ nspace->genericList.append( generic );
+
+ generic->declare( pd, nspace );
+
+ inMap->generic = generic;
+ }
+
+ generic = inMap->generic;
+ return pd->findUniqueType( TYPE_GENERIC, inMap->generic );
+}
+
+
+/*
+ * End object based list/map
+ */
+
+UniqueType *TypeRef::resolveTypeRef( Compiler *pd )
+{
+ typeRef1->resolveType( pd );
+ return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl );
+}
+
+void TypeRef::resolveRepeat( Compiler *pd )
+{
+ if ( uniqueType->typeId != TYPE_TREE )
+ error(loc) << "cannot repeat non-tree type" << endp;
+
+ UniqueRepeat searchKey( repeatType, uniqueType->langEl );
+ UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey );
+ if ( uniqueRepeat == 0 ) {
+ uniqueRepeat = new UniqueRepeat( repeatType, uniqueType->langEl );
+ pd->uniqeRepeatMap.insert( uniqueRepeat );
+
+ LangEl *declLangEl = 0;
+
+ switch ( repeatType ) {
+ case RepeatRepeat: {
+ /* If the factor is a repeat, create the repeat element and link the
+ * factor to it. */
+ String repeatName( 128, "_repeat_%s", typeName.data );
+ declLangEl = pd->makeRepeatProd( loc, nspace, repeatName, uniqueType, false );
+ break;
+ }
+ case RepeatLeftRepeat: {
+ /* If the factor is a repeat, create the repeat element and link the
+ * factor to it. */
+ String repeatName( 128, "_lrepeat_%s", typeName.data );
+ declLangEl = pd->makeRepeatProd( loc, nspace, repeatName, uniqueType, true );
+ break;
+ }
+ case RepeatList: {
+ /* If the factor is a repeat, create the repeat element and link the
+ * factor to it. */
+ String listName( 128, "_list_%s", typeName.data );
+ declLangEl = pd->makeListProd( loc, nspace, listName, uniqueType, false );
+ break;
+ }
+ case RepeatLeftList: {
+ /* If the factor is a repeat, create the repeat element and link the
+ * factor to it. */
+ String repeatName( 128, "_llist_%s", typeName.data );
+ declLangEl = pd->makeListProd( loc, nspace, repeatName, uniqueType, true );
+ break;
+ }
+ case RepeatOpt: {
+ /* If the factor is an opt, create the opt element and link the factor
+ * to it. */
+ String optName( 128, "_opt_%s", typeName.data );
+ declLangEl = pd->makeOptProd( loc, nspace, optName, uniqueType );
+ break;
+ }
+ case RepeatNone:
+ break;
+ }
+
+ uniqueRepeat->declLangEl = declLangEl;
+ declLangEl->repeatOf = uniqueRepeat->langEl;
+ }
+
+ uniqueType = pd->findUniqueType( TYPE_TREE, uniqueRepeat->declLangEl );
+}
+
+UniqueType *TypeRef::resolveIterator( Compiler *pd )
+{
+ UniqueType *searchUT = searchTypeRef->resolveType( pd );
+
+ /* Lookup the iterator call. Make sure it is an iterator. */
+ VarRefLookup lookup = iterCall->langTerm->varRef->lookupIterCall( pd );
+ if ( lookup.objMethod->iterDef == 0 ) {
+ error(loc) << "attempt to iterate using something "
+ "that is not an iterator" << endp;
+ }
+
+ /* Now that we have done the iterator call lookup we can make the type
+ * reference for the object field. */
+ UniqueType *iterUniqueType = pd->findUniqueType( TYPE_ITER, lookup.objMethod->iterDef );
+
+ iterDef = lookup.objMethod->iterDef;
+ searchUniqueType = searchUT;
+
+ return iterUniqueType;
+}
+
+
+UniqueType *TypeRef::resolveType( Compiler *pd )
+{
+ if ( uniqueType != 0 )
+ return uniqueType;
+
+ /* Not an iterator. May be a reference. */
+ switch ( type ) {
+ case Name:
+ uniqueType = resolveTypeName( pd );
+ break;
+ case Literal:
+ uniqueType = resolveTypeLiteral( pd );
+ break;
+ case Parser:
+ uniqueType = resolveTypeParser( pd );
+ break;
+ case Ref:
+ uniqueType = resolveTypeRef( pd );
+ break;
+ case Iterator:
+ uniqueType = resolveIterator( pd );
+ break;
+
+ case List:
+ uniqueType = resolveTypeList( pd );
+ break;
+ case ListPtrs:
+ uniqueType = pd->findUniqueType( TYPE_LIST_PTRS );
+ break;
+ case ListEl:
+ uniqueType = resolveTypeListEl( pd );
+ break;
+
+ case Map:
+ uniqueType = resolveTypeMap( pd );
+ break;
+ case MapPtrs:
+ uniqueType = pd->findUniqueType( TYPE_MAP_PTRS );
+ break;
+ case MapEl:
+ uniqueType = resolveTypeMapEl( pd );
+ break;
+
+ case Unspecified:
+ /* No lookup needed, unique type(s) set when constructed. */
+ break;
+ }
+
+ if ( repeatType != RepeatNone )
+ resolveRepeat( pd );
+
+ return uniqueType;
+}
+
+void Compiler::resolveProdEl( ProdEl *prodEl )
+{
+ prodEl->typeRef->resolveType( this );
+ prodEl->langEl = prodEl->typeRef->uniqueType->langEl;
+}
+
+void LangTerm::resolveFieldArgs( Compiler *pd )
+{
+ /* Initialization expressions. */
+ if ( fieldInitArgs != 0 ) {
+ for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ )
+ (*pi)->expr->resolve( pd );
+ }
+}
+
+void LangTerm::resolve( Compiler *pd )
+{
+ switch ( type ) {
+ case ConstructType:
+ typeRef->resolveType( pd );
+
+ resolveFieldArgs( pd );
+
+ /* Types in constructor. */
+ constructor->list->resolve( pd );
+ break;
+
+ case VarRefType:
+ break;
+
+ case MakeTreeType:
+ case MakeTokenType:
+ case MethodCallType:
+ if ( args != 0 ) {
+ for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ )
+ (*pe)->expr->resolve( pd );
+ }
+ break;
+
+ case NumberType:
+ case StringType:
+ break;
+
+ case ProdCompareType:
+ /* If it has a match expression go into that. */
+ if ( expr != 0 )
+ expr->resolve( pd );
+ break;
+
+ case MatchType:
+ for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) {
+ switch ( item->form ) {
+ case PatternItem::TypeRefForm:
+ /* Use pdaFactor reference resolving. */
+ pd->resolveProdEl( item->prodEl );
+ break;
+ case PatternItem::InputTextForm:
+ /* Nothing to do here. */
+ break;
+ }
+ }
+
+ break;
+ case NewType:
+ /* Init args, then the new type. */
+ resolveFieldArgs( pd );
+ typeRef->resolveType( pd );
+ break;
+ case TypeIdType:
+ typeRef->resolveType( pd );
+ break;
+ case SearchType:
+ typeRef->resolveType( pd );
+ break;
+ case NilType:
+ case TrueType:
+ case FalseType:
+ break;
+
+ case ParseType:
+ case ParseTreeType:
+ case ParseStopType:
+ typeRef->resolveType( pd );
+
+ resolveFieldArgs( pd );
+
+ parserText->list->resolve( pd );
+ break;
+
+ case SendType:
+// for ( CallArgVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- )
+// (*pex)->expr->resolve( pd );
+ parserText->list->resolve( pd );
+ break;
+ case SendTreeType:
+ case EmbedStringType:
+ break;
+
+ case CastType:
+ typeRef->resolveType( pd );
+ expr->resolve( pd );
+ break;
+ }
+}
+
+void LangVarRef::resolve( Compiler *pd ) const
+{
+}
+
+void LangExpr::resolve( Compiler *pd ) const
+{
+ switch ( type ) {
+ case BinaryType: {
+ left->resolve( pd );
+ right->resolve( pd );
+ break;
+ }
+ case UnaryType: {
+ right->resolve( pd );
+ break;
+ }
+ case TermType: {
+ term->resolve( pd );
+ break;
+ }
+ }
+}
+
+void IterCall::resolve( Compiler *pd ) const
+{
+ switch ( form ) {
+ case Call:
+ langTerm->resolve( pd );
+ break;
+ case Expr:
+ langExpr->resolve( pd );
+ break;
+ }
+}
+
+void LangStmt::resolveForIter( Compiler *pd ) const
+{
+ iterCall->resolve( pd );
+
+ /* Search type ref. */
+ typeRef->resolveType( pd );
+
+ /* Iterator type ref. */
+ objField->typeRef->resolveType( pd );
+
+ /* Resolve the statements. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+}
+
+void LangStmt::resolve( Compiler *pd ) const
+{
+ switch ( type ) {
+ case ExprType: {
+ /* Evaluate the exrepssion, then pop it immediately. */
+ expr->resolve( pd );
+ break;
+ }
+ case IfType: {
+ /* Evaluate the test. */
+ expr->resolve( pd );
+
+ /* Analyze the if true branch. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+
+ if ( elsePart != 0 )
+ elsePart->resolve( pd );
+
+ break;
+ }
+ case ElseType: {
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+ break;
+ }
+ case RejectType:
+ break;
+ case WhileType: {
+ expr->resolve( pd );
+
+ /* Compute the while block. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+ break;
+ }
+ case AssignType: {
+ /* Evaluate the exrepssion. */
+ expr->resolve( pd );
+ break;
+ }
+ case ForIterType: {
+ resolveForIter( pd );
+ break;
+ }
+ case ReturnType: {
+ /* Evaluate the exrepssion. */
+ expr->resolve( pd );
+ break;
+ }
+ case BreakType: {
+ break;
+ }
+ case YieldType: {
+ /* take a reference and yield it. Immediately reset the referece. */
+ varRef->resolve( pd );
+ break;
+ }
+ }
+}
+
+void ObjectDef::resolve( Compiler *pd )
+{
+ for ( FieldList::Iter fli = fieldList; fli.lte(); fli++ ) {
+ ObjectField *field = fli->value;
+
+ if ( field->typeRef != 0 )
+ field->typeRef->resolveType( pd );
+ }
+}
+
+void CodeBlock::resolve( Compiler *pd ) const
+{
+ if ( localFrame != 0 ) {
+ localFrame->resolve( pd );
+ }
+
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->resolve( pd );
+}
+
+void Compiler::resolveFunction( Function *func )
+{
+ if ( func->typeRef != 0 )
+ func->typeRef->resolveType( this );
+
+ for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ )
+ param->typeRef->resolveType( this );
+
+ CodeBlock *block = func->codeBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveInHost( Function *func )
+{
+ if ( func->typeRef != 0 )
+ func->typeRef->resolveType( this );
+
+ for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ )
+ param->typeRef->resolveType( this );
+}
+
+
+void Compiler::resolvePreEof( TokenRegion *region )
+{
+ CodeBlock *block = region->preEofBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveRootBlock()
+{
+ CodeBlock *block = rootCodeBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveTranslateBlock( LangEl *langEl )
+{
+ CodeBlock *block = langEl->transBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveReductionCode( Production *prod )
+{
+ CodeBlock *block = prod->redBlock;
+ block->resolve( this );
+}
+
+void Compiler::resolveParseTree()
+{
+ /* Compile functions. */
+ for ( FunctionList::Iter f = functionList; f.lte(); f++ )
+ resolveFunction( f );
+
+ for ( FunctionList::Iter f = inHostList; f.lte(); f++ )
+ resolveInHost( f );
+
+ /* Compile the reduction code. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->redBlock != 0 )
+ resolveReductionCode( prod );
+ }
+
+ /* Compile the token translation code. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->transBlock != 0 )
+ resolveTranslateBlock( lel );
+ }
+
+ /* Compile preeof blocks. */
+ for ( RegionList::Iter r = regionList; r.lte(); r++ ) {
+ if ( r->preEofBlock != 0 )
+ resolvePreEof( r );
+ }
+
+ /* Compile the init code */
+ resolveRootBlock( );
+
+ rootLocalFrame->resolve( this );
+
+ /* Init all user object fields (need consistent size). */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ ObjectDef *objDef = lel->objectDef;
+ if ( objDef != 0 ) {
+ /* Init all fields of the object. */
+ for ( FieldList::Iter f = objDef->fieldList; f.lte(); f++ )
+ f->value->typeRef->resolveType( this );
+ }
+ }
+
+ for ( StructElList::Iter sel = structEls; sel.lte(); sel++ ) {
+ ObjectDef *objDef = sel->structDef->objectDef;
+ for ( FieldList::Iter f = objDef->fieldList; f.lte(); f++ )
+ f->value->typeRef->resolveType( this );
+ }
+
+ /* Init all fields of the global object. */
+ for ( FieldList::Iter f = globalObjectDef->fieldList; f.lte(); f++ ) {
+ f->value->typeRef->resolveType( this );
+ }
+}
+
+/* Resolves production els and computes the precedence of each prod. */
+void Compiler::resolveProductionEls()
+{
+ /* NOTE: as we process this list it may be growing! */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ /* First resolve. */
+ for ( ProdElList::Iter prodEl = *prod->prodElList; prodEl.lte(); prodEl++ )
+ resolveProdEl( prodEl );
+
+ /* If there is no explicit precdence ... */
+ if ( prod->predOf == 0 ) {
+ /* Compute the precedence of the productions. */
+ for ( ProdElList::Iter prodEl = prod->prodElList->last(); prodEl.gtb(); prodEl-- ) {
+ /* Production inherits the precedence of the last terminal with
+ * precedence. */
+ if ( prodEl->langEl->predType != PredNone ) {
+ prod->predOf = prodEl->langEl;
+ break;
+ }
+ }
+ }
+ }
+}
+
+void Compiler::makeTerminalWrappers()
+{
+ /* Make terminal language elements corresponding to each nonterminal in
+ * the grammar. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->type == LangEl::NonTerm ) {
+ String name( lel->name.length() + 5, "_T_%s", lel->name.data );
+ LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term );
+
+ /* Give the dup the attributes of the nonterminal. This ensures
+ * that the attributes are allocated when patterns and
+ * constructors are parsed. */
+ termDup->objectDef = lel->objectDef;
+
+ langEls.append( termDup );
+ lel->termDup = termDup;
+ termDup->termDup = lel;
+ }
+ }
+}
+
+void Compiler::makeEofElements()
+{
+ /* Make eof language elements for each user terminal. This is a bit excessive and
+ * need to be reduced to the ones that we need parsers for, but we don't know that yet.
+ * Another pass before this one is needed. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->eofLel == 0 &&
+ lel != eofLangEl &&
+ lel != errorLangEl &&
+ lel != noTokenLangEl /* &&
+ !( lel->tokenInstance == 0 || lel->tokenInstance->dupOf == 0 ) */ )
+ {
+ String name( lel->name.length() + 5, "_eof_%s", lel->name.data );
+ LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term );
+
+ langEls.append( eofLel );
+ lel->eofLel = eofLel;
+ eofLel->eofLel = lel;
+ eofLel->isEOF = true;
+ }
+ }
+}
+
+void Compiler::resolvePrecedence()
+{
+ for ( PredDeclList::Iter predDecl = predDeclList; predDecl != 0; predDecl++ ) {
+ predDecl->typeRef->resolveType( this );
+
+ LangEl *langEl = predDecl->typeRef->uniqueType->langEl;
+ langEl->predType = predDecl->predType;
+ langEl->predValue = predDecl->predValue;
+ }
+}
+
+void Compiler::resolveReductionActions()
+{
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ for ( ReduceNonTermList::Iter rni = (*r)->reduceNonTerms; rni.lte(); rni++ )
+ rni->nonTerm->resolveType( this );
+
+ for ( ReduceActionList::Iter rai = (*r)->reduceActions; rai.lte(); rai++ )
+ rai->nonTerm->resolveType( this );
+ }
+}
+
+Production *Compiler::findProductionByLabel( LangEl *langEl, String label )
+{
+ for ( LelDefList::Iter ldi = langEl->defList; ldi.lte(); ldi++ ) {
+ if ( ldi->_name != 0 && ( strcmp( ldi->_name, label ) == 0 ) )
+ return ldi;
+ }
+ return 0;
+}
+
+void Compiler::findReductionActionProds()
+{
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ for ( ReduceActionList::Iter rai = (*r)->reduceActions; rai.lte(); rai++ ) {
+ rai->nonTerm->resolveType( this );
+ LangEl *langEl = rai->nonTerm->uniqueType->langEl;
+
+ Production *prod = findProductionByLabel( langEl, rai->prod );
+
+ if ( prod == 0 ) {
+ error(rai->loc) << "could not find production \"" <<
+ rai->prod << "\"" << endp;
+ }
+
+ rai->production = prod;
+ }
+ }
+}
+
+void Compiler::resolveReducers()
+{
+ for ( ParserTextList::Iter pt = parserTextList; pt.lte(); pt++ ) {
+ if ( pt->reduce ) {
+ Reduction *reduction = rootNamespace->findReduction( pt->reducer );
+ if ( reduction == 0 ) {
+ error ( pt->loc ) << "could not locate reduction \"" <<
+ pt->reducer << "\"" << endp;
+ }
+
+ pt->reducerId = reduction->id;
+
+ /* Indicate which type of reducing we need. Parser based, or
+ * postfix. */
+ if ( pt->read )
+ reduction->postfixBased = true;
+ else
+ reduction->parserBased = true;
+ }
+ }
+}
+
+void Compiler::resolvePass()
+{
+ /*
+ * Type Resolving.
+ */
+
+ resolvePrecedence();
+
+ resolveParseTree();
+
+ UniqueType *argvUT = argvTypeRef->resolveType( this );
+ argvElSel = argvUT->generic->elUt->structEl;
+
+ UniqueType *stdsUT = stdsTypeRef->resolveType( this );
+ stdsElSel = stdsUT->generic->elUt->structEl;
+
+ resolveReductionActions();
+
+ /* We must do this as the last step in the type resolution process because
+ * all type resolves can cause new language elments with associated
+ * productions. They get tacked onto the end of the list of productions.
+ * Doing it at the end results processing a growing list. */
+ resolveProductionEls();
+
+ findReductionActionProds();
+
+ resolveReducers();
+}
diff --git a/src/rtvector.h b/src/rtvector.h
new file mode 100644
index 00000000..e15d3f2a
--- /dev/null
+++ b/src/rtvector.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2002-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_RTVECTOR_H
+#define _COLM_RTVECTOR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COLM_RT_VECTOR_H */
+
diff --git a/src/stream.c b/src/stream.c
new file mode 100644
index 00000000..77779aae
--- /dev/null
+++ b/src/stream.c
@@ -0,0 +1,828 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <colm/input.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+#include <colm/pdarun.h>
+#include <colm/debug.h>
+#include <colm/program.h>
+#include <colm/tree.h>
+#include <colm/bytecode.h>
+#include <colm/pool.h>
+#include <colm/struct.h>
+
+DEF_STREAM_FUNCS( stream_funcs_data, stream_impl_data );
+
+extern struct stream_funcs_data file_funcs;
+extern struct stream_funcs_data accum_funcs;
+
+void stream_impl_push_line( struct stream_impl_data *ss, int ll )
+{
+ if ( ss->line_len == 0 ) {
+ ss->lines_cur = 0;
+ ss->lines_alloc = 16;
+ ss->line_len = malloc( sizeof(int) * ss->lines_alloc );
+ }
+ else if ( ss->lines_cur == ss->lines_alloc ) {
+ int lines_alloc_new = ss->lines_alloc * 2;
+ int *line_len_new = malloc( sizeof(int) * lines_alloc_new );
+ memcpy( line_len_new, ss->line_len, sizeof(int) * ss->lines_alloc );
+ ss->lines_alloc = lines_alloc_new;
+ ss->line_len = line_len_new;
+ }
+
+ ss->line_len[ ss->lines_cur ] = ll;
+ ss->lines_cur += 1;
+}
+
+int stream_impl_pop_line( struct stream_impl_data *ss )
+{
+ int len = 0;
+ if ( ss->lines_cur > 0 ) {
+ ss->lines_cur -= 1;
+ len = ss->line_len[ss->lines_cur];
+ }
+ return len;
+}
+
+static void dump_contents( struct colm_program *prg, struct stream_impl_data *sid )
+{
+ struct run_buf *rb = sid->queue.head;
+ while ( rb != 0 ) {
+ debug( prg, REALM_INPUT, " %p contents |%d|%d|%d|%.*s|\n", sid,
+ rb->offset, rb->length,
+ rb->length - rb->offset,
+ (int)rb->length - rb->offset,
+ rb->data + rb->offset );
+ rb = rb->next;
+ }
+}
+
+static bool loc_set( location_t *loc )
+{
+ return loc->line != 0;
+}
+
+static void close_stream_file( FILE *file )
+{
+ if ( file != stdin && file != stdout && file != stderr &&
+ fileno(file) != 0 && fileno( file) != 1 && fileno(file) != 2 )
+ {
+ fclose( file );
+ }
+}
+
+static void si_data_push_tail( struct stream_impl_data *ss, struct run_buf *run_buf )
+{
+ if ( ss->queue.head == 0 ) {
+ run_buf->prev = run_buf->next = 0;
+ ss->queue.head = ss->queue.tail = run_buf;
+ }
+ else {
+ ss->queue.tail->next = run_buf;
+ run_buf->prev = ss->queue.tail;
+ run_buf->next = 0;
+ ss->queue.tail = run_buf;
+ }
+}
+
+static struct run_buf *si_data_pop_tail( struct stream_impl_data *ss )
+{
+ struct run_buf *ret = ss->queue.tail;
+ ss->queue.tail = ss->queue.tail->prev;
+ if ( ss->queue.tail == 0 )
+ ss->queue.head = 0;
+ else
+ ss->queue.tail->next = 0;
+ return ret;
+}
+
+
+static void si_data_push_head( struct stream_impl_data *ss, struct run_buf *run_buf )
+{
+ if ( ss->queue.head == 0 ) {
+ run_buf->prev = run_buf->next = 0;
+ ss->queue.head = ss->queue.tail = run_buf;
+ }
+ else {
+ ss->queue.head->prev = run_buf;
+ run_buf->prev = 0;
+ run_buf->next = ss->queue.head;
+ ss->queue.head = run_buf;
+ }
+}
+
+static struct run_buf *si_data_pop_head( struct stream_impl_data *ss )
+{
+ struct run_buf *ret = ss->queue.head;
+ ss->queue.head = ss->queue.head->next;
+ if ( ss->queue.head == 0 )
+ ss->queue.tail = 0;
+ else
+ ss->queue.head->prev = 0;
+ return ret;
+}
+
+
+struct run_buf *new_run_buf( int sz )
+{
+ struct run_buf *rb;
+ if ( sz > FSM_BUFSIZE ) {
+ int ssz = sizeof(struct run_buf) + sz - FSM_BUFSIZE;
+ rb = (struct run_buf*) malloc( ssz );
+ memset( rb, 0, ssz );
+ }
+ else {
+ rb = (struct run_buf*) malloc( sizeof(struct run_buf) );
+ memset( rb, 0, sizeof(struct run_buf) );
+ }
+ return rb;
+}
+
+/* Keep the position up to date after consuming text. */
+void update_position_data( struct stream_impl_data *is, const alph_t *data, long length )
+{
+ int i;
+ for ( i = 0; i < length; i++ ) {
+ if ( data[i] == '\n' ) {
+ stream_impl_push_line( is, is->column );
+ is->line += 1;
+ is->column = 1;
+ }
+ else {
+ is->column += 1;
+ }
+ }
+
+ is->byte += length;
+}
+
+/* Keep the position up to date after sending back text. */
+void undo_position_data( struct stream_impl_data *is, const alph_t *data, long length )
+{
+ /* FIXME: this needs to fetch the position information from the parsed
+ * token and restore based on that.. */
+ int i;
+ for ( i = 0; i < length; i++ ) {
+ if ( data[i] == '\n' ) {
+ is->line -= 1;
+ is->column = stream_impl_pop_line( is );
+ }
+ else {
+ is->column -= 1;
+ }
+ }
+
+ is->byte -= length;
+}
+
+
+/*
+ * Interface
+ */
+
+static void data_transfer_loc( struct colm_program *prg, location_t *loc,
+ struct stream_impl_data *ss )
+{
+ loc->name = ss->name;
+ loc->line = ss->line;
+ loc->column = ss->column;
+ loc->byte = ss->byte;
+}
+
+/*
+ * Data inputs: files, strings, etc.
+ */
+
+static int data_get_data( struct colm_program *prg, struct stream_impl_data *ss,
+ alph_t *dest, int length )
+{
+ int copied = 0;
+
+ /* Move over skip bytes. */
+ struct run_buf *buf = ss->queue.head;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ struct run_buf *run_buf = new_run_buf( 0 );
+ int received = ss->funcs->get_data_source( prg,
+ (struct stream_impl*)ss, run_buf->data, FSM_BUFSIZE );
+ if ( received == 0 ) {
+ free( run_buf );
+ break;
+ }
+
+ run_buf->length = received;
+ si_data_push_tail( ss, run_buf );
+
+ buf = run_buf;
+ }
+
+ int avail = buf->length - buf->offset;
+
+ /* Anything available in the current buffer. */
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ alph_t *src = &buf->data[buf->offset];
+
+ int slen = avail < length ? avail : length;
+ memcpy( dest+copied, src, slen ) ;
+ copied += slen;
+ length -= slen;
+ }
+
+ if ( length == 0 ) {
+ //debug( REALM_INPUT, "exiting get data\n", length );
+ break;
+ }
+
+ buf = buf->next;
+ }
+
+ return copied;
+}
+
+static struct stream_impl *data_split_consumed( program_t *prg, struct stream_impl_data *sid )
+{
+ struct stream_impl *split_off = 0;
+ if ( sid->consumed > 0 ) {
+ debug( prg, REALM_INPUT, "maybe split: consumed is > 0, splitting\n" );
+ split_off = colm_impl_consumed( "<text3>", sid->consumed );
+ sid->consumed = 0;
+ }
+ return split_off;
+}
+
+int data_append_data( struct colm_program *prg, struct stream_impl_data *sid,
+ const alph_t *data, int length )
+{
+ struct run_buf *tail = sid->queue.tail;
+ if ( tail == 0 || length > (FSM_BUFSIZE - tail->length) ) {
+ debug( prg, REALM_INPUT, "data_append_data: allocating run buf\n" );
+ tail = new_run_buf( length );
+ si_data_push_tail( sid, tail );
+ }
+
+ debug( prg, REALM_INPUT, "data_append_data: appending to "
+ "accum tail, offset: %d, length: %d, dlen: %d\n",
+ tail->offset, tail->length, length );
+
+ memcpy( tail->data + tail->length, data, length );
+ tail->length += length;
+
+#ifdef DEBUG
+ dump_contents( prg, sid );
+#endif
+
+ return length;
+}
+
+int data_undo_append_data( struct colm_program *prg, struct stream_impl_data *sid, int length )
+{
+ int consumed = 0;
+ int remaining = length;
+
+ /* Move over skip bytes. */
+ while ( true ) {
+ struct run_buf *buf = sid->queue.tail;
+
+ if ( buf == 0 )
+ break;
+
+ /* Anything available in the current buffer. */
+ int avail = buf->length - buf->offset;
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ int slen = avail <= remaining ? avail : remaining;
+ consumed += slen;
+ remaining -= slen;
+ buf->length -= slen;
+ //sid->consumed += slen;
+ }
+
+ if ( remaining == 0 )
+ break;
+
+ struct run_buf *run_buf = si_data_pop_tail( sid );
+ free( run_buf );
+ }
+
+ debug( prg, REALM_INPUT, "data_undo_append_data: stream %p "
+ "ask: %d, consumed: %d, now: %d\n", sid, length, consumed );
+
+#ifdef DEBUG
+ dump_contents( prg, sid );
+#endif
+
+ return consumed;
+
+}
+
+static void data_destructor( program_t *prg, tree_t **sp, struct stream_impl_data *si )
+{
+ if ( si->file != 0 )
+ close_stream_file( si->file );
+
+ if ( si->collect != 0 ) {
+ str_collect_destroy( si->collect );
+ free( si->collect );
+ }
+
+ struct run_buf *buf = si->queue.head;
+ while ( buf != 0 ) {
+ struct run_buf *next = buf->next;
+ free( buf );
+ buf = next;
+ }
+
+ si->queue.head = 0;
+
+ if ( si->data != 0 )
+ free( (char*)si->data );
+
+ /* FIXME: Need to leak this for now. Until we can return strings to a
+ * program loader and free them at a later date (after the colm program is
+ * deleted). */
+ // if ( si->name != 0 )
+ // free( si->name );
+
+ free( si );
+}
+
+static str_collect_t *data_get_collect( struct colm_program *prg, struct stream_impl_data *si )
+{
+ return si->collect;
+}
+
+static void data_flush_stream( struct colm_program *prg, struct stream_impl_data *si )
+{
+ if ( si->file != 0 )
+ fflush( si->file );
+}
+
+static void data_close_stream( struct colm_program *prg, struct stream_impl_data *si )
+{
+ if ( si->file != 0 ) {
+ close_stream_file( si->file );
+ si->file = 0;
+ }
+}
+
+static int data_get_option( struct colm_program *prg, struct stream_impl_data *si, int option )
+{
+ return si->auto_trim;
+}
+
+static void data_set_option( struct colm_program *prg, struct stream_impl_data *si, int option, int value )
+{
+ si->auto_trim = value ? 1 : 0;
+}
+
+static void data_print_tree( struct colm_program *prg, tree_t **sp,
+ struct stream_impl_data *si, tree_t *tree, int trim )
+{
+ if ( si->file != 0 )
+ colm_print_tree_file( prg, sp, si, tree, trim );
+ else if ( si->collect != 0 )
+ colm_print_tree_collect( prg, sp, si->collect, tree, trim );
+}
+
+static int data_get_parse_block( struct colm_program *prg, struct stream_impl_data *ss,
+ int *pskip, alph_t **pdp, int *copied )
+{
+ int ret = 0;
+ *copied = 0;
+
+ /* Move over skip bytes. */
+ struct run_buf *buf = ss->queue.head;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ struct run_buf *run_buf = new_run_buf( 0 );
+ int received = ss->funcs->get_data_source( prg,
+ (struct stream_impl*)ss, run_buf->data, FSM_BUFSIZE );
+ if ( received == 0 ) {
+ free( run_buf );
+ ret = INPUT_EOD;
+ break;
+ }
+
+ run_buf->length = received;
+ si_data_push_tail( ss, run_buf );
+
+ int slen = received;
+ *pdp = run_buf->data;
+ *copied = slen;
+ ret = INPUT_DATA;
+ break;
+ }
+
+ int avail = buf->length - buf->offset;
+
+ /* Anything available in the current buffer. */
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ alph_t *src = &buf->data[buf->offset];
+
+ /* Need to skip? */
+ if ( *pskip > 0 && *pskip >= avail ) {
+ /* Skipping the the whole source. */
+ *pskip -= avail;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += *pskip;
+ avail -= *pskip;
+ *pskip = 0;
+
+ int slen = avail;
+ *pdp = src;
+ *copied += slen;
+ ret = INPUT_DATA;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ }
+
+ return ret;
+}
+
+static int data_consume_data( struct colm_program *prg, struct stream_impl_data *sid,
+ int length, location_t *loc )
+{
+ int consumed = 0;
+ int remaining = length;
+
+ /* Move over skip bytes. */
+ while ( true ) {
+ struct run_buf *buf = sid->queue.head;
+
+ if ( buf == 0 )
+ break;
+
+ /* Anything available in the current buffer. */
+ int avail = buf->length - buf->offset;
+ if ( avail > 0 ) {
+
+ if ( !loc_set( loc ) )
+ data_transfer_loc( prg, loc, sid );
+
+ /* The source data from the current buffer. */
+ int slen = avail <= remaining ? avail : remaining;
+ consumed += slen;
+ remaining -= slen;
+ update_position_data( sid, buf->data + buf->offset, slen );
+ buf->offset += slen;
+ sid->consumed += slen;
+ }
+
+ if ( remaining == 0 )
+ break;
+
+ struct run_buf *run_buf = si_data_pop_head( sid );
+ free( run_buf );
+ }
+
+ debug( prg, REALM_INPUT, "data_consume_data: stream %p "
+ "ask: %d, consumed: %d, now: %d\n", sid, length, consumed, sid->consumed );
+
+#ifdef DEBUG
+ dump_contents( prg, sid );
+#endif
+
+ return consumed;
+}
+
+static int data_undo_consume_data( struct colm_program *prg, struct stream_impl_data *sid,
+ const alph_t *data, int length )
+{
+ const alph_t *end = data + length;
+ int amount = length;
+ if ( amount > sid->consumed )
+ amount = sid->consumed;
+
+ int remaining = amount;
+ struct run_buf *head = sid->queue.head;
+ if ( head != 0 && head->offset > 0 ) {
+ /* Fill into the offset space. */
+ int fill = remaining > head->offset ? head->offset : remaining;
+ end -= fill;
+ remaining -= fill;
+
+ undo_position_data( sid, end, fill );
+ memcpy( head->data + (head->offset - fill), end, fill );
+
+ head->offset -= fill;
+ sid->consumed -= fill;
+ }
+
+ if ( remaining > 0 ) {
+ end -= remaining;
+ struct run_buf *new_buf = new_run_buf( 0 );
+ new_buf->length = remaining;
+ undo_position_data( sid, end, remaining );
+ memcpy( new_buf->data, end, remaining );
+ si_data_push_head( sid, new_buf );
+ sid->consumed -= amount;
+ }
+
+ debug( prg, REALM_INPUT, "data_undo_consume_data: stream %p "
+ "undid consume %d of %d bytes, consumed now %d, \n",
+ sid, amount, length, sid->consumed );
+
+#ifdef DEBUG
+ dump_contents( prg, sid );
+#endif
+
+ return amount;
+}
+
+/*
+ * File Inputs
+ */
+
+static int file_get_data_source( struct colm_program *prg, struct stream_impl_data *si,
+ alph_t *dest, int length )
+{
+ return fread( dest, 1, length, si->file );
+}
+
+/*
+ * Text inputs
+ */
+
+static int accum_get_data_source( struct colm_program *prg, struct stream_impl_data *si,
+ alph_t *dest, int want )
+{
+ long avail = si->dlen - si->offset;
+ long take = avail < want ? avail : want;
+ if ( take > 0 )
+ memcpy( dest, si->data + si->offset, take );
+ si->offset += take;
+ return take;
+}
+
+char stream_get_eof_sent( struct colm_program *prg, struct input_impl_seq *si )
+{
+ return si->eof_sent;
+}
+
+void stream_set_eof_sent( struct colm_program *prg, struct input_impl_seq *si, char eof_sent )
+{
+ si->eof_sent = eof_sent;
+}
+
+struct stream_funcs_data file_funcs =
+{
+ &data_get_parse_block,
+ &data_get_data,
+ &file_get_data_source,
+
+ &data_consume_data,
+ &data_undo_consume_data,
+
+ &data_transfer_loc,
+ &data_get_collect,
+ &data_flush_stream,
+ &data_close_stream,
+ &data_print_tree,
+
+ &data_split_consumed,
+ &data_append_data,
+ &data_undo_append_data,
+ &data_destructor,
+
+ &data_get_option,
+ &data_set_option,
+};
+
+struct stream_funcs_data accum_funcs =
+{
+ &data_get_parse_block,
+ &data_get_data,
+ &accum_get_data_source,
+
+ &data_consume_data,
+ &data_undo_consume_data,
+
+ &data_transfer_loc,
+ &data_get_collect,
+ &data_flush_stream,
+ &data_close_stream,
+ &data_print_tree,
+
+ &data_split_consumed,
+ &data_append_data,
+ &data_undo_append_data,
+ &data_destructor,
+
+ &data_get_option,
+ &data_set_option,
+};
+
+static void si_data_init( struct stream_impl_data *is, char *name )
+{
+ memset( is, 0, sizeof(struct stream_impl_data) );
+
+ is->type = 'D';
+ is->name = name;
+ is->line = 1;
+ is->column = 1;
+ is->byte = 0;
+
+ /* Indentation turned off. */
+ is->indent.level = COLM_INDENT_OFF;
+ is->indent.indent = 0;
+}
+
+struct stream_impl *colm_impl_new_accum( char *name )
+{
+ struct stream_impl_data *si = (struct stream_impl_data*)
+ malloc(sizeof(struct stream_impl_data));
+ si_data_init( si, name );
+ si->funcs = (struct stream_funcs*)&accum_funcs;
+
+ return (struct stream_impl*)si;
+}
+
+static struct stream_impl *colm_impl_new_file( char *name, FILE *file )
+{
+ struct stream_impl_data *ss = (struct stream_impl_data*)
+ malloc(sizeof(struct stream_impl_data));
+ si_data_init( ss, name );
+ ss->funcs = (struct stream_funcs*)&file_funcs;
+ ss->file = file;
+ return (struct stream_impl*)ss;
+}
+
+static struct stream_impl *colm_impl_new_fd( char *name, long fd )
+{
+ struct stream_impl_data *si = (struct stream_impl_data*)
+ malloc(sizeof(struct stream_impl_data));
+ si_data_init( si, name );
+ si->funcs = (struct stream_funcs*)&file_funcs;
+ si->file = fdopen( fd, ( fd == 0 ) ? "r" : "w" );
+ return (struct stream_impl*)si;
+}
+
+struct stream_impl *colm_impl_consumed( char *name, int len )
+{
+ struct stream_impl_data *si = (struct stream_impl_data*)
+ malloc(sizeof(struct stream_impl_data));
+ si_data_init( si, name );
+ si->funcs = (struct stream_funcs*)&accum_funcs;
+
+ si->data = 0;
+ si->consumed = len;
+ si->offset = len;
+
+ si->dlen = len;
+
+ return (struct stream_impl*)si;
+}
+
+struct stream_impl *colm_impl_new_text( char *name, struct colm_location *loc, const alph_t *data, int len )
+{
+ struct stream_impl_data *si = (struct stream_impl_data*)
+ malloc(sizeof(struct stream_impl_data));
+ si_data_init( si, name );
+ si->funcs = (struct stream_funcs*)&accum_funcs;
+
+ alph_t *buf = (alph_t*)malloc( len );
+ memcpy( buf, data, len );
+
+ si->data = buf;
+ si->dlen = len;
+
+ if ( loc != 0 ) {
+ si->line = loc->line;
+ si->column = loc->column;
+ si->byte = loc->byte;
+ }
+
+ return (struct stream_impl*)si;
+}
+
+struct stream_impl *colm_impl_new_collect( char *name )
+{
+ struct stream_impl_data *ss = (struct stream_impl_data*)
+ malloc(sizeof(struct stream_impl_data));
+ si_data_init( ss, name );
+ ss->funcs = (struct stream_funcs*)&accum_funcs;
+ ss->collect = (struct colm_str_collect*) malloc( sizeof( struct colm_str_collect ) );
+ init_str_collect( ss->collect );
+ return (struct stream_impl*)ss;
+}
+
+struct stream_impl *stream_to_impl( stream_t *ptr )
+{
+ return ptr->impl;
+}
+
+str_t *collect_string( program_t *prg, stream_t *s )
+{
+ str_collect_t *collect = s->impl->funcs->get_collect( prg, s->impl );
+ head_t *head = string_alloc_full( prg, collect->data, collect->length );
+ str_t *str = (str_t*)construct_string( prg, head );
+ return str;
+}
+
+stream_t *colm_stream_open_fd( program_t *prg, char *name, long fd )
+{
+ struct stream_impl *impl = colm_impl_new_fd( colm_filename_add( prg, name ), fd );
+
+ struct colm_stream *s = colm_stream_new_struct( prg );
+ s->impl = impl;
+ return s;
+}
+
+stream_t *colm_stream_open_file( program_t *prg, tree_t *name, tree_t *mode )
+{
+ head_t *head_name = ((str_t*)name)->value;
+ head_t *head_mode = ((str_t*)mode)->value;
+ stream_t *stream = 0;
+
+ const char *given_mode = string_data(head_mode);
+ const char *fopen_mode = 0;
+ if ( memcmp( given_mode, "r", string_length(head_mode) ) == 0 )
+ fopen_mode = "rb";
+ else if ( memcmp( given_mode, "w", string_length(head_mode) ) == 0 )
+ fopen_mode = "wb";
+ else if ( memcmp( given_mode, "a", string_length(head_mode) ) == 0 )
+ fopen_mode = "ab";
+ else {
+ fatal( "unknown file open mode: %s\n", given_mode );
+ }
+
+ /* Need to make a C-string (null terminated). */
+ char *file_name = malloc(string_length(head_name)+1);
+ memcpy( file_name, string_data(head_name), string_length(head_name) );
+ file_name[string_length(head_name)] = 0;
+
+ FILE *file = fopen( file_name, fopen_mode );
+ if ( file != 0 ) {
+ stream = colm_stream_new_struct( prg );
+ stream->impl = colm_impl_new_file( colm_filename_add( prg, file_name ), file );
+ }
+
+ free( file_name );
+
+ return stream;
+}
+
+
+void colm_stream_destroy( program_t *prg, tree_t **sp, struct_t *s )
+{
+ stream_t *stream = (stream_t*) s;
+ struct stream_impl *si = stream->impl;
+ si->funcs->destructor( prg, sp, si );
+}
+
+stream_t *colm_stream_new_struct( program_t *prg )
+{
+ size_t memsize = sizeof(struct colm_stream);
+ struct colm_stream *stream = (struct colm_stream*) malloc( memsize );
+ memset( stream, 0, memsize );
+ colm_struct_add( prg, (struct colm_struct *)stream );
+ stream->id = prg->rtd->struct_stream_id;
+ stream->destructor = &colm_stream_destroy;
+ return stream;
+}
+
+stream_t *colm_stream_open_collect( program_t *prg )
+{
+ struct stream_impl *impl = colm_impl_new_collect( colm_filename_add( prg, "<internal>" ) );
+ struct colm_stream *stream = colm_stream_new_struct( prg );
+ stream->impl = impl;
+ return stream;
+}
+
diff --git a/src/string.c b/src/string.c
new file mode 100644
index 00000000..d1d16aa9
--- /dev/null
+++ b/src/string.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include <colm/pool.h>
+#include <colm/pdarun.h>
+#include <colm/bytecode.h>
+
+str_t *string_prefix( program_t *prg, str_t *str, long len )
+{
+ head_t *head = string_alloc_full( prg, str->value->data, len );
+ return (str_t*)construct_string( prg, head );
+}
+
+str_t *string_suffix( program_t *prg, str_t *str, long pos )
+{
+ long len = str->value->length - pos;
+ head_t *head = string_alloc_full( prg, str->value->data + pos, len );
+ return (str_t*)construct_string( prg, head );
+}
+
+tree_t *construct_string( program_t *prg, head_t *s )
+{
+ str_t *str = (str_t*) tree_allocate( prg );
+ str->id = LEL_ID_STR;
+ str->value = s;
+
+ return (tree_t*)str;
+}
+
+
+/*
+ * In this system strings are not null terminated. Often strings come from a
+ * parse, in which case the string is just a pointer into the the data stream.
+ * A block in a parsed stream can hold many tokens and there is no room
+ * allocated for nulls.
+ */
+
+head_t *string_copy( program_t *prg, head_t *head )
+{
+ head_t *result = 0;
+ if ( head != 0 ) {
+ if ( (char*)(head+1) == head->data )
+ result = string_alloc_full( prg, head->data, head->length );
+ else
+ result = colm_string_alloc_pointer( prg, head->data, head->length );
+
+ if ( head->location != 0 ) {
+ result->location = location_allocate( prg );
+ result->location->name = head->location->name;
+ result->location->line = head->location->line;
+ result->location->column = head->location->column;
+ result->location->byte = head->location->byte;
+ }
+ }
+ return result;
+}
+
+void string_free( program_t *prg, head_t *head )
+{
+ if ( head != 0 ) {
+ if ( head->location != 0 )
+ location_free( prg, head->location );
+
+ if ( (char*)(head+1) == head->data ) {
+ /* Full string allocation. */
+ free( head );
+ }
+ else {
+ /* Just a string head. */
+ head_free( prg, head );
+ }
+ }
+}
+
+const char *string_data( head_t *head )
+{
+ if ( head == 0 )
+ return 0;
+ return head->data;
+}
+
+long string_length( head_t *head )
+{
+ if ( head == 0 )
+ return 0;
+ return head->length;
+}
+
+void string_shorten( head_t *head, long newlen )
+{
+ assert( newlen <= head->length );
+ head->length = newlen;
+}
+
+head_t *init_str_space( long length )
+{
+ /* Find the length and allocate the space for the shared string. */
+ head_t *head = (head_t*) malloc( sizeof(head_t) + length );
+
+ /* Init the header. */
+ head->data = (char*)(head+1);
+ head->length = length;
+ head->location = 0;
+
+ /* Save the pointer to the data. */
+ return head;
+}
+
+/* Create from a c-style string. */
+head_t *string_alloc_full( program_t *prg, const char *data, long length )
+{
+ /* Init space for the data. */
+ head_t *head = init_str_space( length );
+
+ /* Copy in the data. */
+ memcpy( (head+1), data, length );
+
+ return head;
+}
+
+/* Create from a c-style string. */
+head_t *colm_string_alloc_pointer( program_t *prg, const char *data, long length )
+{
+ /* Find the length and allocate the space for the shared string. */
+ head_t *head = head_allocate( prg );
+
+ /* Init the header. */
+ head->data = data;
+ head->length = length;
+
+ return head;
+}
+
+head_t *concat_str( head_t *s1, head_t *s2 )
+{
+ long s1Len = s1->length;
+ long s2Len = s2->length;
+
+ /* Init space for the data. */
+ head_t *head = init_str_space( s1Len + s2Len );
+
+ /* Copy in the data. */
+ memcpy( (head+1), s1->data, s1Len );
+ memcpy( (char*)(head+1) + s1Len, s2->data, s2Len );
+
+ return head;
+}
+
+head_t *string_to_upper( head_t *s )
+{
+ /* Init space for the data. */
+ long len = s->length;
+ head_t *head = init_str_space( len );
+
+ /* Copy in the data. */
+ const char *src = s->data;
+ char *dst = (char*)(head+1);
+ int i;
+ for ( i = 0; i < len; i++ )
+ *dst++ = toupper( *src++ );
+
+ return head;
+}
+
+head_t *string_to_lower( head_t *s )
+{
+ /* Init space for the data. */
+ long len = s->length;
+ head_t *head = init_str_space( len );
+
+ /* Copy in the data. */
+ const char *src = s->data;
+ char *dst = (char*)(head+1);
+ int i;
+ for ( i = 0; i < len; i++ )
+ *dst++ = tolower( *src++ );
+
+ return head;
+}
+
+
+/* Compare two strings. If identical returns 1, otherwise 0. */
+word_t cmp_string( head_t *s1, head_t *s2 )
+{
+ if ( s1->length < s2->length )
+ return -1;
+ else if ( s1->length > s2->length )
+ return 1;
+ else {
+ char *d1 = (char*)(s1->data);
+ char *d2 = (char*)(s2->data);
+ return memcmp( d1, d2, s1->length );
+ }
+}
+
+word_t str_atoi( head_t *str )
+{
+ /* FIXME: need to implement this by hand. There is no null terminator. */
+ char *nulled = (char*)malloc( str->length + 1 );
+ memcpy( nulled, str->data, str->length );
+ nulled[str->length] = 0;
+ int res = atoi( nulled );
+ free( nulled );
+ return res;
+}
+
+word_t str_atoo( head_t *str )
+{
+ /* FIXME: need to implement this by hand. There is no null terminator. */
+ char *nulled = (char*)malloc( str->length + 1 );
+ memcpy( nulled, str->data, str->length );
+ nulled[str->length] = 0;
+ int res = strtol( nulled, 0, 8 );
+ free( nulled );
+ return res;
+}
+
+head_t *int_to_str( program_t *prg, word_t i )
+{
+ char data[20];
+ sprintf( data, "%ld", i );
+ return string_alloc_full( prg, data, strlen(data) );
+}
+
+word_t str_uord16( head_t *head )
+{
+ uchar *data = (uchar*)(head->data);
+ ulong res;
+ res = (ulong)data[1];
+ res |= ((ulong)data[0]) << 8;
+ return res;
+}
+
+word_t str_uord8( head_t *head )
+{
+ uchar *data = (uchar*)(head->data);
+ ulong res = (ulong)data[0];
+ return res;
+}
+
+head_t *make_literal( program_t *prg, long offset )
+{
+ return colm_string_alloc_pointer( prg,
+ prg->rtd->litdata[offset],
+ prg->rtd->litlen[offset] );
+}
+
+head_t *string_sprintf( program_t *prg, str_t *format, long integer )
+{
+ head_t *format_head = format->value;
+ long written = snprintf( 0, 0, (char*)string_data(format_head), integer );
+ head_t *head = init_str_space( written+1 );
+ written = snprintf( (char*)head->data, written+1, (char*)string_data(format_head), integer );
+ head->length -= 1;
+ return head;
+}
diff --git a/src/struct.c b/src/struct.c
new file mode 100644
index 00000000..5ee58ed3
--- /dev/null
+++ b/src/struct.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2016-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include <colm/program.h>
+#include <colm/struct.h>
+
+#include "internal.h"
+#include "bytecode.h"
+
+struct colm_tree *colm_get_global( program_t *prg, long pos )
+{
+ return colm_struct_get_field( prg->global, tree_t*, pos );
+}
+
+void colm_struct_add( program_t *prg, struct colm_struct *item )
+{
+ if ( prg->heap.head == 0 ) {
+ prg->heap.head = prg->heap.tail = item;
+ item->prev = item->next = 0;
+ }
+ else {
+ item->prev = prg->heap.tail;
+ item->next = 0;
+ prg->heap.tail->next = item;
+ prg->heap.tail = item;
+ }
+}
+
+struct colm_struct *colm_struct_new_size( program_t *prg, int size )
+{
+ size_t memsize = sizeof(struct colm_struct) + ( sizeof(tree_t*) * size );
+ struct colm_struct *item = (struct colm_struct*) malloc( memsize );
+ memset( item, 0, memsize );
+
+ colm_struct_add( prg, item );
+ return item;
+}
+
+struct colm_struct *colm_struct_new( program_t *prg, int id )
+{
+ struct colm_struct *s = colm_struct_new_size( prg, prg->rtd->sel_info[id - prg->rtd->num_lang_els].size );
+ s->id = id;
+ return s;
+}
+
+struct struct_el_info *colm_sel_info( program_t *prg, int id )
+{
+ return &prg->rtd->sel_info[id - prg->rtd->num_lang_els];
+}
+
+void colm_struct_delete( program_t *prg, tree_t **sp, struct colm_struct *el )
+{
+ if ( el->id == prg->rtd->struct_inbuilt_id || el->id == prg->rtd->struct_stream_id ) {
+ colm_destructor_t destructor = ((struct colm_inbuilt*)el)->destructor;
+ if ( destructor != 0 )
+ (*destructor)( prg, sp, el );
+ }
+ else {
+ int tree_i;
+ struct struct_el_info *sel = colm_sel_info( prg, el->id );
+ for ( tree_i = 0; tree_i < sel->trees_len; tree_i++ ) {
+ tree_t *tree = colm_struct_get_field( el, tree_t*, sel->trees[tree_i] );
+ colm_tree_downref( prg, sp, tree );
+ }
+ }
+ free( el );
+}
+
+void colm_parser_destroy( program_t *prg, tree_t **sp, struct colm_struct *s )
+{
+ struct colm_parser *parser = (struct colm_parser*) s;
+
+ /* Free the PDA run. */
+ colm_pda_clear( prg, sp, parser->pda_run );
+ free( parser->pda_run );
+
+ /* Free the result. */
+ colm_tree_downref( prg, sp, parser->result );
+}
+
+parser_t *colm_parser_new( program_t *prg, struct generic_info *gi, int stop_id, int reducer )
+{
+ struct pda_run *pda_run = malloc( sizeof(struct pda_run) );
+
+ /* Start off the parsing process. */
+ colm_pda_init( prg, pda_run, prg->rtd->pda_tables,
+ gi->parser_id, stop_id, 0, 0, reducer );
+
+ size_t memsize = sizeof(struct colm_parser);
+ struct colm_parser *parser = (struct colm_parser*) malloc( memsize );
+ memset( parser, 0, memsize );
+ colm_struct_add( prg, (struct colm_struct*) parser );
+
+ parser->id = prg->rtd->struct_inbuilt_id;
+ parser->destructor = &colm_parser_destroy;
+ parser->pda_run = pda_run;
+
+ return parser;
+}
+
+void colm_map_destroy( program_t *prg, tree_t **sp, struct colm_struct *s )
+{
+ struct colm_map *map = (struct colm_map*) s;
+
+ map_el_t *el = map->head;
+ while ( el != 0 ) {
+ map_el_t *next = el->next;
+ colm_tree_downref( prg, sp, el->key );
+ //mapElFree( prg, el );
+ el = next;
+ }
+}
+
+map_t *colm_map_new( struct colm_program *prg )
+{
+ size_t memsize = sizeof(struct colm_map);
+ struct colm_map *map = (struct colm_map*) malloc( memsize );
+ memset( map, 0, memsize );
+ colm_struct_add( prg, (struct colm_struct *)map );
+ map->id = prg->rtd->struct_inbuilt_id;
+ return map;
+}
+
+struct_t *colm_construct_generic( program_t *prg, long generic_id, int stop_id )
+{
+ struct generic_info *generic_info = &prg->rtd->generic_info[generic_id];
+ struct_t *new_generic = 0;
+ switch ( generic_info->type ) {
+ case GEN_MAP: {
+ map_t *map = colm_map_new( prg );
+ map->generic_info = generic_info;
+ new_generic = (struct_t*) map;
+ break;
+ }
+ case GEN_LIST: {
+ list_t *list = colm_list_new( prg );
+ list->generic_info = generic_info;
+ new_generic = (struct_t*) list;
+ break;
+ }
+ case GEN_PARSER: {
+ parser_t *parser = colm_parser_new( prg, generic_info, stop_id, 0 );
+ parser->input = colm_input_new( prg );
+ new_generic = (struct_t*) parser;
+ break;
+ }
+ }
+
+ return new_generic;
+}
+
+struct_t *colm_construct_reducer( program_t *prg, long generic_id, int reducer_id )
+{
+ struct generic_info *generic_info = &prg->rtd->generic_info[generic_id];
+ struct_t *new_generic = 0;
+
+ parser_t *parser = colm_parser_new( prg, generic_info, 0, reducer_id );
+ parser->input = colm_input_new( prg );
+ new_generic = (struct_t*) parser;
+
+ return new_generic;
+}
diff --git a/src/struct.h b/src/struct.h
new file mode 100644
index 00000000..13f78c40
--- /dev/null
+++ b/src/struct.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2016-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_STRUCT_H
+#define _COLM_STRUCT_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef void (*colm_destructor_t)( struct colm_program *prg,
+ tree_t **sp, struct colm_struct *s );
+
+struct colm_struct
+{
+ short id;
+ struct colm_struct *prev, *next;
+};
+
+/* Must overlay colm_struct. */
+struct colm_inbuilt
+{
+ short id;
+ struct colm_struct *prev, *next;
+ colm_destructor_t destructor;
+};
+
+/* Must overlay colm_inbuilt. */
+typedef struct colm_parser
+{
+ short id;
+ struct colm_struct *prev, *next;
+ colm_destructor_t destructor;
+
+ struct pda_run *pda_run;
+ struct colm_input *input;
+ tree_t *result;
+} parser_t;
+
+/* Must overlay colm_inbuilt. */
+typedef struct colm_input
+{
+ short id;
+ struct colm_struct *prev, *next;
+ colm_destructor_t destructor;
+
+ struct input_impl *impl;
+} input_t;
+
+/* Must overlay colm_inbuilt. */
+typedef struct colm_stream
+{
+ short id;
+ struct colm_struct *prev, *next;
+ colm_destructor_t destructor;
+
+ struct stream_impl *impl;
+} stream_t;
+
+#define COLM_LIST_EL_SIZE 2
+typedef struct colm_list_el
+{
+ struct colm_list_el *list_next;
+ struct colm_list_el *list_prev;
+} list_el_t;
+
+/* Must overlay colm_inbuilt. */
+typedef struct colm_list
+{
+ short id;
+ struct colm_struct *prev, *next;
+ colm_destructor_t destructor;
+
+ list_el_t *head, *tail;
+ long list_len;
+ struct generic_info *generic_info;
+} list_t;
+
+typedef struct colm_map_el
+{
+ tree_t *key;
+
+ struct colm_map_el *left, *right, *parent;
+ long height;
+
+ struct colm_map_el *next, *prev;
+} map_el_t;
+
+#define COLM_MAP_EL_SIZE ( sizeof(colm_map_el) / sizeof(void*) )
+
+typedef struct colm_map
+{
+ short id;
+ struct colm_struct *prev, *next;
+ colm_destructor_t destructor;
+
+ struct colm_map_el *head, *tail, *root;
+ long tree_size;
+ struct generic_info *generic_info;
+} map_t;
+
+struct colm_struct *colm_struct_new_size( struct colm_program *prg, int size );
+struct colm_struct *colm_struct_new( struct colm_program *prg, int id );
+void colm_struct_add( struct colm_program *prg, struct colm_struct *item );
+void colm_struct_delete( struct colm_program *prg, struct colm_tree **sp,
+ struct colm_struct *el );
+
+struct colm_struct *colm_struct_inbuilt( struct colm_program *prg, int size,
+ colm_destructor_t destructor );
+
+#define colm_struct_get_field( obj, type, field ) \
+ (type)(((void**)(((struct colm_struct*)obj)+1))[field])
+
+#define colm_struct_set_field( obj, type, field, val ) \
+ ((type*)(((struct colm_struct*)obj)+1))[field] = val
+
+#define colm_struct_get_addr( obj, type, field ) \
+ (type)(&(((void **)(((struct colm_struct*)obj)+1))[field]))
+
+#define colm_struct_container( el, field ) \
+ ((void*)el) - (field * sizeof(void*)) - sizeof(struct colm_struct)
+
+#define colm_generic_el_container( prg, el, genId ) \
+ colm_struct_container( el, prg->rtd->generic_info[genId].el_offset )
+
+#define colm_struct_to_list_el( prg, obj, genId ) \
+ colm_struct_get_addr( obj, list_el_t*, prg->rtd->generic_info[genId].el_offset )
+
+#define colm_struct_to_map_el( prg, obj, genId ) \
+ colm_struct_get_addr( obj, map_el_t*, prg->rtd->generic_info[genId].el_offset )
+
+parser_t *colm_parser_new( program_t *prg, struct generic_info *gi, int stop_id, int reducer );
+input_t *colm_input_new( struct colm_program *prg );
+stream_t *colm_stream_new_struct( struct colm_program *prg );
+
+list_t *colm_list_new( struct colm_program *prg );
+struct colm_struct *colm_list_get( struct colm_program *prg, list_t *list,
+ word_t gen_id, word_t field );
+struct colm_struct *colm_list_el_get( struct colm_program *prg,
+ list_el_t *list_el, word_t gen_id, word_t field );
+list_el_t *colm_list_detach_head( list_t *list );
+list_el_t *colm_list_detach_tail( list_t *list );
+long colm_list_length( list_t *list );
+
+map_t *colm_map_new( struct colm_program *prg );
+struct colm_struct *colm_map_el_get( struct colm_program *prg,
+ map_el_t *map_el, word_t gen_id, word_t field );
+struct colm_struct *colm_map_get( struct colm_program *prg, map_t *map,
+ word_t gen_id, word_t field );
+
+struct colm_struct *colm_construct_generic( struct colm_program *prg, long generic_id, int stop_id );
+struct colm_struct *colm_construct_reducer( struct colm_program *prg, long generic_id, int reducer_id );
+struct input_impl *input_to_impl( input_t *ptr );
+struct stream_impl *stream_to_impl( stream_t *ptr );
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _COLM_STRUCT_H */
+
diff --git a/src/synthesis.cc b/src/synthesis.cc
new file mode 100644
index 00000000..17c2440a
--- /dev/null
+++ b/src/synthesis.cc
@@ -0,0 +1,3370 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <iostream>
+#include "compiler.h"
+
+using std::cout;
+using std::cerr;
+using std::endl;
+
+bool isStr( UniqueType *ut )
+{
+ return ut->typeId == TYPE_TREE && ut->langEl != 0 && ut->langEl->id == LEL_ID_STR;
+}
+
+bool isTree( UniqueType *ut )
+{
+ return ut->typeId == TYPE_TREE;
+}
+
+IterDef::IterDef( Type type )
+:
+ type(type),
+ func(0)
+{
+}
+
+IterDef::IterDef( Type type, Function *func )
+:
+ type(type),
+ func(func)
+{}
+
+IterImpl::IterImpl( Type type ) :
+ type(type),
+ func(0),
+ useFuncId(false),
+ useSearchUT(false),
+ useGenericId(false)
+{
+ switch ( type ) {
+ case Tree:
+ inCreateWV = IN_TRITER_FROM_REF;
+ inCreateWC = IN_TRITER_FROM_REF;
+ inUnwind = IN_TRITER_UNWIND;
+ inDestroy = IN_TRITER_DESTROY;
+ inAdvance = IN_TRITER_ADVANCE;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+
+ case Child:
+ inCreateWV = IN_TRITER_FROM_REF;
+ inCreateWC = IN_TRITER_FROM_REF;
+ inUnwind = IN_TRITER_UNWIND;
+ inDestroy = IN_TRITER_DESTROY;
+ inAdvance = IN_TRITER_NEXT_CHILD;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+
+ case RevChild:
+ inCreateWV = IN_REV_TRITER_FROM_REF;
+ inCreateWC = IN_REV_TRITER_FROM_REF;
+ inUnwind = IN_REV_TRITER_UNWIND;
+ inDestroy = IN_REV_TRITER_DESTROY;
+ inAdvance = IN_REV_TRITER_PREV_CHILD;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+
+ case Repeat:
+ inCreateWV = IN_TRITER_FROM_REF;
+ inCreateWC = IN_TRITER_FROM_REF;
+ inUnwind = IN_TRITER_UNWIND;
+ inDestroy = IN_TRITER_DESTROY;
+ inAdvance = IN_TRITER_NEXT_REPEAT;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+
+ case RevRepeat:
+ inCreateWV = IN_TRITER_FROM_REF;
+ inCreateWC = IN_TRITER_FROM_REF;
+ inUnwind = IN_TRITER_UNWIND;
+ inDestroy = IN_TRITER_DESTROY;
+ inAdvance = IN_TRITER_PREV_REPEAT;
+
+ inGetCurR = IN_TRITER_GET_CUR_R;
+ inGetCurWC = IN_TRITER_GET_CUR_WC;
+ inSetCurWC = IN_TRITER_SET_CUR_WC;
+ inRefFromCur = IN_TRITER_REF_FROM_CUR;
+ useSearchUT = true;
+ break;
+
+ case ListEl:
+ inCreateWV = IN_GEN_ITER_FROM_REF;
+ inCreateWC = IN_GEN_ITER_FROM_REF;
+ inUnwind = IN_GEN_ITER_UNWIND;
+ inDestroy = IN_GEN_ITER_DESTROY;
+ inAdvance = IN_LIST_ITER_ADVANCE;
+
+ inGetCurR = IN_GEN_ITER_GET_CUR_R;
+// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC;
+// inSetCurWC = //IN_HALT;
+// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR;
+ useGenericId = true;
+ break;
+
+ case ListVal:
+ inCreateWV = IN_GEN_ITER_FROM_REF;
+ inCreateWC = IN_GEN_ITER_FROM_REF;
+ inUnwind = IN_GEN_ITER_UNWIND;
+ inDestroy = IN_GEN_ITER_DESTROY;
+ inAdvance = IN_LIST_ITER_ADVANCE;
+
+ inGetCurR = IN_GEN_VITER_GET_CUR_R;
+// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC;
+// inSetCurWC = //IN_HALT;
+// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR;
+ useGenericId = true;
+ break;
+
+ case RevListVal:
+ inCreateWV = IN_GEN_ITER_FROM_REF;
+ inCreateWC = IN_GEN_ITER_FROM_REF;
+ inUnwind = IN_GEN_ITER_UNWIND;
+ inDestroy = IN_GEN_ITER_DESTROY;
+ inAdvance = IN_REV_LIST_ITER_ADVANCE;
+
+ inGetCurR = IN_GEN_VITER_GET_CUR_R;
+// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC;
+// inSetCurWC = //IN_HALT;
+// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR;
+ useGenericId = true;
+ break;
+
+
+ case MapVal:
+ inCreateWV = IN_GEN_ITER_FROM_REF;
+ inCreateWC = IN_GEN_ITER_FROM_REF;
+ inUnwind = IN_GEN_ITER_UNWIND;
+ inDestroy = IN_GEN_ITER_DESTROY;
+ inAdvance = IN_MAP_ITER_ADVANCE;
+
+ inGetCurR = IN_GEN_VITER_GET_CUR_R;
+ inGetCurWC = IN_GEN_VITER_GET_CUR_R; //IN_HALT; //IN_LIST_ITER_GET_CUR_WC;
+// inSetCurWC = IN_HALT;//IN_HALT;
+// inRefFromCur = IN_HALT;//IN_LIST_ITER_REF_FROM_CUR;
+ useGenericId = true;
+ break;
+
+ case MapEl:
+ inCreateWV = IN_GEN_ITER_FROM_REF;
+ inCreateWC = IN_GEN_ITER_FROM_REF;
+ inUnwind = IN_GEN_ITER_UNWIND;
+ inDestroy = IN_GEN_ITER_DESTROY;
+ inAdvance = IN_MAP_ITER_ADVANCE;
+
+ inGetCurR = IN_GEN_ITER_GET_CUR_R;
+// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC;
+// inSetCurWC = //IN_HALT;
+// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR;
+ useGenericId = true;
+ break;
+
+ case User:
+ assert(false);
+ }
+}
+
+IterImpl::IterImpl( Type type, Function *func ) :
+ type(type),
+ func(func),
+ useFuncId(true),
+ useSearchUT(true),
+ useGenericId(false),
+ inCreateWV(IN_UITER_CREATE_WV),
+ inCreateWC(IN_UITER_CREATE_WC),
+ inUnwind(IN_UITER_UNWIND),
+ inDestroy(IN_UITER_DESTROY),
+ inAdvance(IN_UITER_ADVANCE),
+ inGetCurR(IN_UITER_GET_CUR_R),
+ inGetCurWC(IN_UITER_GET_CUR_WC),
+ inSetCurWC(IN_UITER_SET_CUR_WC),
+ inRefFromCur(IN_UITER_REF_FROM_CUR)
+{}
+
+IterDef *Compiler::findIterDef( IterDef::Type type, Function *func )
+{
+ IterDefSetEl *el = iterDefSet.find( IterDef( type, func ) );
+ if ( el == 0 )
+ el = iterDefSet.insert( IterDef( type, func ) );
+ return &el->key;
+}
+
+IterDef *Compiler::findIterDef( IterDef::Type type )
+{
+ IterDefSetEl *el = iterDefSet.find( IterDef( type ) );
+ if ( el == 0 )
+ el = iterDefSet.insert( IterDef( type ) );
+ return &el->key;
+}
+
+UniqueType *Compiler::findUniqueType( enum TYPE typeId )
+{
+ UniqueType searchKey( typeId );
+ UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
+ if ( uniqueType == 0 ) {
+ uniqueType = new UniqueType( typeId );
+ uniqeTypeMap.insert( uniqueType );
+ }
+ return uniqueType;
+}
+
+UniqueType *Compiler::findUniqueType( enum TYPE typeId, LangEl *langEl )
+{
+ UniqueType searchKey( typeId, langEl );
+ UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
+ if ( uniqueType == 0 ) {
+ uniqueType = new UniqueType( typeId, langEl );
+ uniqeTypeMap.insert( uniqueType );
+ }
+ return uniqueType;
+}
+
+UniqueType *Compiler::findUniqueType( enum TYPE typeId, IterDef *iterDef )
+{
+ UniqueType searchKey( typeId, iterDef );
+ UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
+ if ( uniqueType == 0 ) {
+ uniqueType = new UniqueType( typeId, iterDef );
+ uniqeTypeMap.insert( uniqueType );
+ }
+ return uniqueType;
+}
+
+UniqueType *Compiler::findUniqueType( enum TYPE typeId, StructEl *structEl )
+{
+ UniqueType searchKey( typeId, structEl );
+ UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
+ if ( uniqueType == 0 ) {
+ uniqueType = new UniqueType( typeId, structEl );
+ uniqeTypeMap.insert( uniqueType );
+ }
+ return uniqueType;
+}
+
+UniqueType *Compiler::findUniqueType( enum TYPE typeId, GenericType *generic )
+{
+ UniqueType searchKey( typeId, generic );
+ UniqueType *uniqueType = uniqeTypeMap.find( &searchKey );
+ if ( uniqueType == 0 ) {
+ uniqueType = new UniqueType( typeId, generic );
+ uniqeTypeMap.insert( uniqueType );
+ }
+ return uniqueType;
+}
+
+/* 0-based. */
+ObjectField *ObjectDef::findFieldNum( long offset )
+{
+ /* Bounds check. */
+ if ( offset >= fieldList.length() )
+ return 0;
+
+ int fn = 0;
+ FieldList::Iter field = fieldList;
+ while ( fn < offset ) {
+ fn++;
+ field++;
+ }
+
+ return field->value;
+}
+
+/* Finds the first field by type. */
+ObjectField *ObjectDef::findFieldType( Compiler *pd, UniqueType *ut )
+{
+ for ( FieldList::Iter f = fieldList; f.lte(); f++ ) {
+ UniqueType *fUT = f->value->typeRef->resolveType( pd );
+ if ( fUT == ut )
+ return f->value;
+ }
+ return 0;
+}
+
+
+long sizeOfField( UniqueType *fieldUT )
+{
+ long size = 0;
+ switch ( fieldUT->typeId ) {
+ case TYPE_ITER:
+ /* Select on the iterator type. */
+ switch ( fieldUT->iterDef->type ) {
+ case IterDef::Tree:
+ case IterDef::Child:
+ case IterDef::Repeat:
+ case IterDef::RevRepeat:
+ size = sizeof(tree_iter_t) / sizeof(word_t);
+ break;
+
+ case IterDef::RevChild:
+ size = sizeof(rev_tree_iter_t) / sizeof(word_t);
+ break;
+
+ case IterDef::MapEl:
+ case IterDef::ListEl:
+ case IterDef::RevListVal:
+ size = sizeof(generic_iter_t) / sizeof(word_t);
+ break;
+
+ case IterDef::User:
+ /* User iterators are just a pointer to the user_iter_t struct. The
+ * struct needs to go right beneath the call to the user iterator
+ * so it can be found by a yield. It is therefore allocated on the
+ * stack right before the call. */
+ size = 1;
+ break;
+ }
+ break;
+ case TYPE_REF:
+ size = 2;
+ break;
+ case TYPE_GENERIC:
+ size = 1;
+ break;
+ case TYPE_LIST_PTRS:
+ size = COLM_LIST_EL_SIZE;
+ break;
+ case TYPE_MAP_PTRS:
+ size = COLM_MAP_EL_SIZE;
+ break;
+ default:
+ size = 1;
+ break;
+ }
+
+ return size;
+}
+
+void ObjectDef::referenceField( Compiler *pd, ObjectField *field )
+{
+ field->beenReferenced = true;
+}
+
+UniqueType *LangVarRef::loadField( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, ObjectField *el, bool forWriting, bool revert ) const
+{
+ /* Ensure that the field is referenced. */
+ inObject->referenceField( pd, el );
+
+ UniqueType *elUT = el->typeRef->uniqueType;
+
+ if ( elUT->val() ) {
+ if ( forWriting ) {
+ /* The instruction, depends on whether or not we are reverting. */
+ if ( pd->revertOn && revert )
+ code.append( el->inGetValWV );
+ else
+ code.append( el->inGetValWC );
+ }
+ else {
+ /* Loading for writing */
+ code.append( el->inGetValR );
+ }
+ }
+ else {
+ /* If it's a reference then we load it read always. */
+ if ( forWriting ) {
+ /* The instruction, depends on whether or not we are reverting. */
+ if ( elUT->typeId == TYPE_ITER )
+ code.append( el->iterImpl->inGetCurWC );
+ else if ( pd->revertOn && revert )
+ code.append( el->inGetWV );
+ else
+ code.append( el->inGetWC );
+ }
+ else {
+ /* Loading something for reading */
+ if ( elUT->typeId == TYPE_ITER )
+ code.append( el->iterImpl->inGetCurR );
+ else
+ code.append( el->inGetR );
+ }
+ }
+
+ if ( el->useGenericId )
+ code.appendHalf( el->generic->id );
+
+ if ( el->useOffset() ) {
+ /* Gets of locals and fields require offsets. Fake vars like token
+ * data and lhs don't require it. */
+ code.appendHalf( el->offset );
+ }
+ else if ( el->isRhsGet() ) {
+ /* Need to place the array computing the val. */
+ code.append( el->rhsVal.length() );
+ for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) {
+ code.append( rg->prodEl->production->prodNum );
+ code.append( rg->prodEl->pos );
+ }
+ }
+
+ if ( el->isConstVal ) {
+ code.appendHalf( el->constValId );
+
+ if ( el->constValId == CONST_ARG ) {
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( el->constValArg, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.appendWord( mapEl->value );
+ }
+ }
+
+ /* If we are dealing with an iterator then dereference it. */
+ if ( elUT->typeId == TYPE_ITER )
+ elUT = el->typeRef->searchUniqueType;
+
+ return elUT;
+}
+
+/* The qualification must start at a local frame. There cannot be any pointer. */
+long LangVarRef::loadQualificationRefs( Compiler *pd, CodeVect &code,
+ NameScope *rootScope ) const
+{
+ long count = 0;
+
+ /* Start the search from the root object. */
+ NameScope *searchScope = rootScope;
+
+ for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) {
+ /* Lookup the field in the current qualification. */
+ ObjectField *el = searchScope->findField( qi->data );
+ if ( el == 0 )
+ error(qi->loc) << "cannot resolve qualification " << qi->data << endp;
+
+ if ( qi.pos() > 0 ) {
+ if ( el->isRhsGet() ) {
+ code.append( IN_RHS_REF_FROM_QUAL_REF );
+ code.appendHalf( 0 );
+
+ code.append( el->rhsVal.length() );
+ for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) {
+ code.append( rg->prodEl->production->prodNum );
+ code.append( rg->prodEl->pos );
+ }
+ }
+ else {
+ code.append( IN_REF_FROM_QUAL_REF );
+ code.appendHalf( 0 );
+ code.appendHalf( el->offset );
+ }
+ }
+ else if ( el->iterImpl != 0 ) {
+ code.append( el->iterImpl->inRefFromCur );
+ code.appendHalf( el->offset );
+ }
+ else if ( el->typeRef->type == TypeRef::Ref ) {
+ code.append( IN_REF_FROM_REF );
+ code.appendHalf( el->offset );
+ }
+ else {
+ code.append( IN_REF_FROM_LOCAL );
+ code.appendHalf( el->offset );
+ }
+
+ UniqueType *elUT = el->typeRef->uniqueType;
+ if ( elUT->typeId == TYPE_ITER )
+ elUT = el->typeRef->searchUniqueType;
+
+ assert( qi->form == QualItem::Dot );
+
+ ObjectDef *searchObjDef = elUT->objectDef();
+ searchScope = searchObjDef->rootScope;
+
+ count += 1;
+ }
+ return count;
+}
+
+void LangVarRef::loadQualification( Compiler *pd, CodeVect &code,
+ NameScope *rootScope, int lastPtrInQual, bool forWriting, bool revert ) const
+{
+ /* Start the search from the root object. */
+ NameScope *searchScope = rootScope;
+
+ for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) {
+ /* Lookup the field int the current qualification. */
+ ObjectField *el = searchScope->findField( qi->data );
+ if ( el == 0 )
+ error(qi->loc) << "cannot resolve qualification " << qi->data << endp;
+
+ if ( forWriting && el->refActive )
+ error(qi->loc) << "reference active, cannot write to object" << endp;
+
+ bool lfForWriting = forWriting;
+ bool lfRevert = revert;
+
+ /* If there is a pointer in the qualification, we need to compute
+ * forWriting and revert. */
+ if ( lastPtrInQual >= 0 ) {
+ if ( qi.pos() <= lastPtrInQual ) {
+ /* If we are before or at the pointer we are strictly read
+ * only, regardless of the origin. */
+ lfForWriting = false;
+ lfRevert = false;
+ }
+ else {
+ /* If we are past the pointer then we are always reverting
+ * because the object is global. Forwriting is as passed in.
+ * */
+ lfRevert = true;
+ }
+ }
+
+ UniqueType *qualUT = loadField( pd, code, searchScope->owningObj,
+ el, lfForWriting, lfRevert );
+
+ if ( qi->form == QualItem::Dot ) {
+ /* Cannot a reference. Iterator yes (access of the iterator not
+ * hte current) */
+ if ( qualUT->ptr() )
+ error(loc) << "dot cannot be used to access a pointer" << endp;
+ }
+ else if ( qi->form == QualItem::Arrow ) {
+ if ( qualUT->ptr() ) {
+ /* This deref instruction exists to capture the pointer reverse
+ * execution purposes. */
+ if ( pd->revertOn && qi.pos() == lastPtrInQual && forWriting ) {
+ /* This is like a global load. */
+ code.append( IN_PTR_ACCESS_WV );
+ }
+ }
+ else {
+ error(loc) << "arrow operator cannot be used to "
+ "access this type" << endp;
+ }
+ }
+
+ ObjectDef *searchObjDef = qualUT->objectDef();
+ searchScope = searchObjDef->rootScope;
+ }
+}
+
+void LangVarRef::loadContextObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ /* Start the search in the global object. */
+ ObjectDef *rootObj = structDef->objectDef;
+
+ if ( forWriting && lastPtrInQual < 0 ) {
+ /* If we are writing an no reference was found in the qualification
+ * then load the gloabl with a revert. */
+ if ( pd->revertOn )
+ code.append( IN_LOAD_CONTEXT_WV );
+ else
+ code.append( IN_LOAD_CONTEXT_WC );
+ }
+ else {
+ /* Either we are reading or we are loading a pointer that will be
+ * dereferenced. */
+ code.append( IN_LOAD_CONTEXT_R );
+ }
+
+ loadQualification( pd, code, rootObj->rootScope, lastPtrInQual, forWriting, true );
+}
+
+void LangVarRef::loadGlobalObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ NameScope *scope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope;
+
+ if ( forWriting && lastPtrInQual < 0 ) {
+ /* If we are writing an no reference was found in the qualification
+ * then load the gloabl with a revert. */
+ if ( pd->revertOn )
+ code.append( IN_LOAD_GLOBAL_WV );
+ else
+ code.append( IN_LOAD_GLOBAL_WC );
+ }
+ else {
+ /* Either we are reading or we are loading a pointer that will be
+ * dereferenced. */
+ code.append( IN_LOAD_GLOBAL_R );
+ }
+
+ loadQualification( pd, code, scope, lastPtrInQual, forWriting, true );
+}
+
+void LangVarRef::loadScopedObj( Compiler *pd, CodeVect &code,
+ NameScope *scope, int lastPtrInQual, bool forWriting ) const
+{
+// NameScope *scope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope;
+
+ if ( forWriting && lastPtrInQual < 0 ) {
+ /* If we are writing an no reference was found in the qualification
+ * then load the gloabl with a revert. */
+ if ( pd->revertOn )
+ code.append( IN_LOAD_GLOBAL_WV );
+ else
+ code.append( IN_LOAD_GLOBAL_WC );
+ }
+ else {
+ /* Either we are reading or we are loading a pointer that will be
+ * dereferenced. */
+ code.append( IN_LOAD_GLOBAL_R );
+ }
+
+ loadQualification( pd, code, scope, lastPtrInQual, forWriting, true );
+}
+
+void LangVarRef::loadInbuiltObject( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ /* Start the search in the local frame. */
+ loadQualification( pd, code, scope, lastPtrInQual, forWriting, pd->revertOn );
+}
+
+void LangVarRef::loadLocalObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ /* Start the search in the local frame. */
+ loadQualification( pd, code, scope, lastPtrInQual, forWriting, false );
+}
+
+void LangVarRef::loadObj( Compiler *pd, CodeVect &code,
+ int lastPtrInQual, bool forWriting ) const
+{
+ if ( nspaceQual != 0 && nspaceQual->qualNames.length() > 0 ) {
+ Namespace *nspace = pd->rootNamespace->findNamespace( nspaceQual->qualNames[0] );
+ loadScopedObj( pd, code, nspace->rootScope, lastPtrInQual, forWriting );
+ }
+ else if ( isInbuiltObject() )
+ loadInbuiltObject( pd, code, lastPtrInQual, forWriting );
+ else if ( isLocalRef() )
+ loadLocalObj( pd, code, lastPtrInQual, forWriting );
+ else if ( isProdRef( pd ) ) {
+ LangVarRef *dup = new LangVarRef( *this );
+ dup->qual->prepend( QualItem( QualItem::Dot, InputLoc(), scope->caseClauseVarRef->name ) );
+ dup->loadObj( pd, code, lastPtrInQual, forWriting );
+ }
+ else if ( isStructRef() )
+ loadContextObj( pd, code, lastPtrInQual, forWriting );
+ else
+ loadGlobalObj( pd, code, lastPtrInQual, forWriting );
+}
+
+
+bool castAssignment( Compiler *pd, CodeVect &code, UniqueType *destUT,
+ UniqueType *destSearchUT, UniqueType *srcUT )
+{
+ if ( destUT == srcUT )
+ return true;
+
+ /* Casting trees to any. */
+ if ( destUT->typeId == TYPE_TREE && destUT->langEl == pd->anyLangEl &&
+ srcUT->typeId == TYPE_TREE )
+ return true;
+
+ /* Setting a reference from a tree. */
+ if ( destUT->typeId == TYPE_REF && srcUT->typeId == TYPE_TREE &&
+ destUT->langEl == srcUT->langEl )
+ return true;
+
+ /* Setting a tree from a reference. */
+ if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_REF &&
+ destUT->langEl == srcUT->langEl )
+ return true;
+
+ /* Setting an iterator from a tree. */
+ if ( destUT->typeId == TYPE_ITER && srcUT->typeId == TYPE_TREE &&
+ destSearchUT->langEl == srcUT->langEl )
+ return true;
+
+ /* Assigning nil to a tree. */
+ if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_NIL )
+ return true;
+
+ if ( destUT->typeId == TYPE_STRUCT && srcUT->typeId == TYPE_NIL )
+ return true;
+
+ if ( destUT->typeId == TYPE_GENERIC && srcUT->typeId == TYPE_NIL )
+ return true;
+
+ if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_TREE &&
+ srcUT->langEl == pd->anyLangEl )
+ return true;
+
+ return false;
+}
+
+void LangVarRef::setFieldIter( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, ObjectField *el, UniqueType *objUT,
+ UniqueType *exprType, bool revert ) const
+{
+ code.append( el->iterImpl->inSetCurWC );
+ code.appendHalf( el->offset );
+}
+
+void LangVarRef::setField( Compiler *pd, CodeVect &code,
+ ObjectDef *inObject, ObjectField *el,
+ UniqueType *exprUT, bool revert ) const
+{
+ /* Ensure that the field is referenced. */
+ inObject->referenceField( pd, el );
+
+ if ( exprUT->val() ) {
+ if ( pd->revertOn && revert )
+ code.append( el->inSetValWV );
+ else
+ code.append( el->inSetValWC );
+ }
+ else {
+ if ( pd->revertOn && revert )
+ code.append( el->inSetWV );
+ else
+ code.append( el->inSetWC );
+ }
+
+ /* Maybe write out an offset. */
+ if ( el->useOffset() )
+ code.appendHalf( el->offset );
+ else if ( el->isRhsGet() ) {
+ /* Need to place the array computing the val. */
+ code.append( el->rhsVal.length() );
+ for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) {
+ code.append( rg->prodEl->production->prodNum );
+ code.append( rg->prodEl->pos );
+ }
+ }
+}
+
+
+UniqueType *LangVarRef::evaluate( Compiler *pd, CodeVect &code, bool forWriting ) const
+{
+ /* Lookup the loadObj. */
+ VarRefLookup lookup = lookupField( pd );
+
+ /* Load the object, if any. */
+ loadObj( pd, code, lookup.lastPtrInQual, forWriting );
+
+ /* Load the field. */
+ UniqueType *ut = loadField( pd, code, lookup.inObject,
+ lookup.objField, forWriting, false );
+
+ return ut;
+}
+
+bool LangVarRef::canTakeRef( Compiler *pd, VarRefLookup &lookup ) const
+{
+ bool canTake = false;
+
+ /* If the var is not a local, it must be an attribute accessed
+ * via a local and attributes. */
+ if ( lookup.inObject->type == ObjectDef::FrameType )
+ canTake = true;
+ else if ( isLocalRef() ) {
+ if ( lookup.lastPtrInQual < 0 && ! lookup.uniqueType->ptr() )
+ canTake = true;
+ }
+
+ return canTake;
+}
+
+void LangVarRef::verifyRefPossible( Compiler *pd, VarRefLookup &lookup ) const
+{
+ bool canTake = canTakeRef( pd, lookup );
+
+ if ( !canTake ) {
+ error(loc) << "can only take references of locals or "
+ "attributes accessed via a local" << endp;
+ }
+
+ if ( lookup.objField->refActive )
+ error(loc) << "reference currently active, cannot take another" << endp;
+}
+
+bool LangExpr::canTakeRef( Compiler *pd ) const
+{
+ bool canTake = false;
+
+ if ( type == LangExpr::TermType && term->type == LangTerm::VarRefType ) {
+ VarRefLookup lookup = term->varRef->lookupField( pd );
+ if ( term->varRef->canTakeRef( pd, lookup ) )
+ canTake = true;
+ }
+
+ return canTake;
+}
+
+
+/* Return the field referenced. */
+ObjectField *LangVarRef::preEvaluateRef( Compiler *pd, CodeVect &code ) const
+{
+ VarRefLookup lookup = lookupField( pd );
+
+ verifyRefPossible( pd, lookup );
+
+ loadQualificationRefs( pd, code, scope );
+
+ return lookup.objField;
+}
+
+/* Return the field referenced. */
+ObjectField *LangVarRef::evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const
+{
+ VarRefLookup lookup = lookupField( pd );
+
+ verifyRefPossible( pd, lookup );
+
+ /* Ensure that the field is referenced. */
+ lookup.inObject->referenceField( pd, lookup.objField );
+
+ /* Note that we could have modified children. */
+ if ( qual->length() == 0 )
+ lookup.objField->refActive = true;
+
+ /* Whenever we take a reference we have to assume writing and that the
+ * tree is dirty. */
+ lookup.objField->dirtyTree = true;
+
+ if ( qual->length() > 0 ) {
+ if ( lookup.objField->isRhsGet() ) {
+ code.append( IN_RHS_REF_FROM_QUAL_REF );
+ code.appendHalf( pushCount );
+
+ ObjectField *el = lookup.objField;
+ code.append( el->rhsVal.length() );
+ for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) {
+ code.append( rg->prodEl->production->prodNum );
+ code.append( rg->prodEl->pos );
+ }
+ }
+ else {
+ code.append( IN_REF_FROM_QUAL_REF );
+ code.appendHalf( pushCount );
+ code.appendHalf( lookup.objField->offset );
+ }
+ }
+ else if ( lookup.objField->iterImpl != 0 ) {
+ code.append( lookup.objField->iterImpl->inRefFromCur );
+ code.appendHalf( lookup.objField->offset );
+ }
+ else if ( lookup.objField->typeRef->type == TypeRef::Ref ) {
+ code.append( IN_REF_FROM_REF );
+ code.appendHalf( lookup.objField->offset );
+ }
+ else {
+ code.append( IN_REF_FROM_LOCAL );
+ code.appendHalf( lookup.objField->offset );
+ }
+
+ return lookup.objField;
+}
+
+IterImpl *LangVarRef::chooseTriterCall( Compiler *pd,
+ UniqueType *searchUT, CallArgVect *args )
+{
+ IterImpl *iterImpl = 0;
+
+ /* Evaluate the triter args and choose the triter call based on it. */
+ if ( args->length() == 1 ) {
+ /* Evaluate the expression. */
+ CodeVect unused;
+ CallArgVect::Iter pe = *args;
+ UniqueType *exprUT = (*pe)->expr->evaluate( pd, unused );
+
+ if ( exprUT->typeId == TYPE_GENERIC && exprUT->generic->typeId == GEN_LIST ) {
+ if ( searchUT == exprUT->generic->elUt )
+ iterImpl = new IterImpl( IterImpl::ListEl );
+ else
+ iterImpl = new IterImpl( IterImpl::ListVal );
+ }
+
+ if ( exprUT->typeId == TYPE_GENERIC && exprUT->generic->typeId == GEN_MAP ) {
+ if ( searchUT == exprUT->generic->elUt )
+ iterImpl = new IterImpl( IterImpl::MapEl );
+ else
+ iterImpl = new IterImpl( IterImpl::MapVal );
+ }
+ }
+
+ if ( iterImpl == 0 )
+ iterImpl = new IterImpl( IterImpl::Tree );
+
+ return iterImpl;
+}
+
+ObjectField **LangVarRef::evaluateArgs( Compiler *pd, CodeVect &code,
+ VarRefLookup &lookup, CallArgVect *args )
+{
+ /* Parameter list is given only for user defined methods. Otherwise it
+ * will be null. */
+ ParameterList *paramList = lookup.objMethod->paramList;
+
+ /* Match the number of arguments. */
+ int numArgs = args != 0 ? args->length() : 0;
+ if ( numArgs != lookup.objMethod->numParams )
+ error(loc) << "wrong number of arguments" << endp;
+
+ /* This is for storing the object fields used by references. */
+ ObjectField **paramRefs = new ObjectField*[numArgs];
+ memset( paramRefs, 0, sizeof(ObjectField*) * numArgs );
+
+ /* Done now if there are no args. */
+ if ( args == 0 )
+ return paramRefs;
+
+ /* We use this only if there is a paramter list. */
+ ParameterList::Iter p;
+ long size = 0;
+ long tempPops = 0;
+ long pos = 0;
+
+ paramList != 0 && ( p = *paramList );
+ for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Get the expression and the UT for the arg. */
+ LangExpr *expression = (*pe)->expr;
+ UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()];
+
+ if ( paramUT->typeId == TYPE_REF ) {
+ if ( expression->canTakeRef( pd ) ) {
+ /* Push object loads for reference parameters. */
+ LangVarRef *varRef = expression->term->varRef;
+ ObjectField *refOf = varRef->preEvaluateRef( pd, code );
+ paramRefs[pe.pos()] = refOf;
+
+ size += varRef->qual->length() * 2;
+ (*pe)->offQualRef = size;
+ /**/
+
+ refOf = varRef->evaluateRef( pd, code, 0 ); //(size - (*pe)->offQualRef) );
+ paramRefs[pe.pos()] = refOf;
+
+ //size += 2;
+ }
+ else {
+ /* First pass we need to allocate and evaluate temporaries. */
+ UniqueType *exprUT = expression->evaluate( pd, code );
+
+ (*pe)->exprUT = exprUT;
+
+ size += 1;
+ (*pe)->offTmp = size;
+ tempPops += 1;
+ /**/
+ code.append( IN_REF_FROM_BACK );
+ code.appendHalf( 0 ); //size - (*pe)->offTmp );
+
+ //size += 2;
+ }
+
+ if ( lookup.objMethod->func ) {
+ code.append( IN_STASH_ARG );
+ code.appendHalf( pos );
+ code.appendHalf( 2 );
+ }
+
+ pos += 2;
+ }
+ else {
+ UniqueType *exprUT = expression->evaluate( pd, code );
+ // pd->unwindCode.remove( 0, 1 );
+
+ if ( !castAssignment( pd, code, paramUT, 0, exprUT ) )
+ error(loc) << "arg " << pe.pos()+1 << " is of the wrong type" << endp;
+
+ size += 1;
+
+ if ( lookup.objMethod->func && !lookup.objMethod->func->inHost ) {
+ code.append( IN_STASH_ARG );
+ code.appendHalf( pos );
+ code.appendHalf( 1 );
+ }
+
+ pos += 1;
+ }
+
+ /* Advance the parameter list iterator if we have it. */
+ paramList != 0 && p.increment();
+ }
+
+ argSize = tempPops;
+
+ return paramRefs;
+}
+
+void LangVarRef::resetActiveRefs( Compiler *pd, VarRefLookup &lookup,
+ ObjectField **paramRefs ) const
+{
+ /* Parameter list is given only for user defined methods. Otherwise it
+ * will be null. */
+ for ( long p = 0; p < lookup.objMethod->numParams; p++ ) {
+ if ( paramRefs[p] != 0 )
+ paramRefs[p]->refActive = false;
+ }
+}
+
+bool LangVarRef::isFinishCall( VarRefLookup &lookup ) const
+{
+ return lookup.objMethod->type == ObjectMethod::ParseFinish;
+}
+
+void LangVarRef::callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const
+{
+ /* This is for writing if it is a non-const builtin. */
+ bool forWriting = lookup.objMethod->func == 0 &&
+ !lookup.objMethod->isConst;
+
+ if ( lookup.objMethod->useCallObj ) {
+ /* Load the object, if any. */
+ loadObj( pd, code, lookup.lastPtrInQual, forWriting );
+ }
+
+ /* Check if we need to revert the function. If it operates on a reference
+ * or if it is not local then we need to revert it. */
+ bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef() || isInbuiltObject();
+ bool unwind = false;
+
+ if ( isFinishCall( lookup ) ) {
+ code.append( IN_SEND_EOF_W );
+
+ LangTerm::parseFrag( pd, code, 0 );
+
+ code.append( IN_GET_PARSER_MEM_R );
+ code.appendHalf( 0 );
+ }
+ else {
+ if ( pd->revertOn && revert ) {
+ if ( lookup.objMethod->opcodeWV == IN_CALL_WV ||
+ lookup.objMethod->opcodeWC == FN_EXIT )
+ unwind = true;
+
+ if ( lookup.objMethod->useFnInstr )
+ code.append( IN_FN );
+ code.append( lookup.objMethod->opcodeWV );
+ }
+ else {
+ if ( lookup.objMethod->opcodeWC == IN_CALL_WC ||
+ lookup.objMethod->opcodeWC == FN_EXIT )
+ unwind = true;
+
+ if ( lookup.objMethod->useFnInstr )
+ code.append( IN_FN );
+ code.append( lookup.objMethod->opcodeWC );
+ }
+ }
+
+ if ( lookup.objMethod->useFuncId )
+ code.appendHalf( lookup.objMethod->funcId );
+
+ if ( lookup.objMethod->useGenericId )
+ code.appendHalf( lookup.objMethod->generic->id );
+
+ if ( unwind ) {
+ if ( pd->unwindCode.length() == 0 )
+ code.appendHalf( 0 );
+ else {
+ code.appendHalf( pd->unwindCode.length() + 1 );
+ code.append( pd->unwindCode );
+ code.append( IN_DONE );
+ }
+ }
+}
+
+void LangVarRef::popRefQuals( Compiler *pd, CodeVect &code,
+ VarRefLookup &lookup, CallArgVect *args, bool temps ) const
+{
+ long popCount = 0;
+
+ /* Evaluate and push the args. */
+ if ( args != 0 ) {
+ for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Get the expression and the UT for the arg. */
+ LangExpr *expression = (*pe)->expr;
+ UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()];
+
+ if ( paramUT->typeId == TYPE_REF ) {
+ if ( expression->canTakeRef( pd ) ) {
+ LangVarRef *varRef = expression->term->varRef;
+ popCount += varRef->qual->length() * 2;
+ }
+ }
+ }
+
+ if ( popCount > 0 ) {
+ code.append( IN_POP_N_WORDS );
+ code.appendHalf( (short)popCount );
+ }
+
+ if ( temps ) {
+ for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Get the expression and the UT for the arg. */
+ LangExpr *expression = (*pe)->expr;
+ UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()];
+
+ if ( paramUT->typeId == TYPE_REF ) {
+ if ( ! expression->canTakeRef( pd ) )
+ code.append( IN_POP_TREE );
+ }
+ }
+ }
+ }
+}
+
+
+UniqueType *LangVarRef::evaluateCall( Compiler *pd, CodeVect &code, CallArgVect *args )
+{
+ /* Evaluate the object. */
+ VarRefLookup lookup = lookupMethod( pd );
+
+ Function *func = lookup.objMethod->func;
+
+ /* Prepare the contiguous call args space. */
+ int asLoc;
+ if ( func != 0 && !func->inHost ) {
+ code.append( IN_PREP_ARGS );
+ asLoc = code.length();
+ code.appendHalf( 0 );
+ }
+
+ /* Evaluate and push the arguments. */
+ ObjectField **paramRefs = evaluateArgs( pd, code, lookup, args );
+
+ /* Write the call opcode. */
+ callOperation( pd, code, lookup );
+
+ popRefQuals( pd, code, lookup, args, true );
+
+ resetActiveRefs( pd, lookup, paramRefs);
+ delete[] paramRefs;
+
+ if ( func != 0 && !func->inHost ) {
+ code.append( IN_CLEAR_ARGS );
+ code.appendHalf( func->paramListSize );
+ code.setHalf( asLoc, func->paramListSize );
+ }
+
+ if ( func != 0 && !func->inHost )
+ code.append( IN_LOAD_RETVAL );
+
+ /* Return the type to the expression. */
+ return lookup.uniqueType;
+}
+
+/* Can match on a tree or a ref. A tree always comes back. */
+UniqueType *LangTerm::evaluateMatch( Compiler *pd, CodeVect &code ) const
+{
+ /* Add the vars bound by the pattern into the local scope. */
+ for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) {
+ if ( item->varRef != 0 )
+ item->bindId = pattern->nextBindId++;
+ }
+
+ UniqueType *ut = varRef->evaluate( pd, code );
+ if ( ut->typeId != TYPE_TREE && ut->typeId != TYPE_REF ) {
+ error(varRef->loc) << "expected match against a tree/ref type" << endp;
+ }
+
+ /* Store the language element type in the pattern. This is needed by
+ * the pattern parser. */
+ pattern->langEl = ut->langEl;
+
+ code.append( IN_MATCH );
+ code.appendHalf( pattern->patRepId );
+
+ for ( PatternItemList::Iter item = pattern->list->last(); item.gtb(); item-- ) {
+ if ( item->varRef != 0 ) {
+ /* Compute the unique type. */
+ UniqueType *exprType = pd->findUniqueType( TYPE_TREE, item->prodEl->langEl );
+
+ /* Get the type of the variable being assigned to. */
+ VarRefLookup lookup = item->varRef->lookupField( pd );
+
+ item->varRef->loadObj( pd, code, lookup.lastPtrInQual, false );
+ item->varRef->setField( pd, code, lookup.inObject,
+ lookup.objField, exprType, false );
+ }
+ }
+
+ /* The process of matching turns refs into trees. */
+ if ( ut->typeId == TYPE_REF )
+ ut = pd->findUniqueType( TYPE_TREE, ut->langEl );
+
+ return ut;
+}
+
+UniqueType *LangTerm::evaluateProdCompare( Compiler *pd, CodeVect &code ) const
+{
+ UniqueType *ut = varRef->evaluate( pd, code );
+ if ( ut->typeId != TYPE_TREE && ut->typeId != TYPE_REF ) {
+ error(varRef->loc) << "expected match against a tree/ref type" << endp;
+ }
+ code.append( IN_PROD_NUM );
+
+ /* look up the production name. */
+ Production *prod = pd->findProductionByLabel( ut->langEl, this->prod );
+
+ if ( prod == 0 ) {
+ error( this->loc) << "could not find "
+ "production label: " << this->prod << endp;
+ }
+
+ unsigned int n = prod->prodNum;
+ code.append( IN_LOAD_INT );
+ code.appendWord( n );
+
+ code.append( IN_TST_EQL_VAL );
+
+ if ( expr != 0 ) {
+ code.append( IN_DUP_VAL );
+
+ /* Test: jump past the match if the production test failed. We don't have
+ * the distance yet. */
+ long jumpFalse = code.length();
+ code.append( IN_JMP_FALSE_VAL );
+ code.appendHalf( 0 );
+
+ code.append( IN_POP_VAL );
+
+ expr->evaluate( pd, code );
+
+ /* Set the jump false distance. */
+ long falseDist = code.length() - jumpFalse - 3;
+ code.setHalf( jumpFalse+1, falseDist );
+
+ return ut;
+ }
+
+ return pd->uniqueTypeInt;
+}
+
+void LangTerm::evaluateCapture( Compiler *pd, CodeVect &code, UniqueType *valUt ) const
+{
+ if ( varRef != 0 ) {
+ /* Get the type of the variable being assigned to. */
+ VarRefLookup lookup = varRef->lookupField( pd );
+
+ /* Need a copy of the tree. */
+ code.append( lookup.uniqueType->tree() ? IN_DUP_TREE : IN_DUP_VAL );
+
+ varRef->loadObj( pd, code, lookup.lastPtrInQual, false );
+ varRef->setField( pd, code, lookup.inObject, lookup.objField, valUt, false );
+ }
+}
+
+UniqueType *LangTerm::evaluateNew( Compiler *pd, CodeVect &code ) const
+{
+ /* What is being newstructed. */
+ UniqueType *newUT = typeRef->uniqueType;
+
+ if ( newUT->typeId != TYPE_STRUCT && newUT->typeId != TYPE_GENERIC )
+ error(loc) << "can only new a struct or generic" << endp;
+
+ bool context = false;
+ if ( newUT->typeId == TYPE_GENERIC &&
+ newUT->generic->typeId == GEN_PARSER &&
+ newUT->generic->elUt->langEl->contextIn != 0 )
+ {
+ if ( fieldInitArgs == 0 || fieldInitArgs->length() != 1 )
+ error(loc) << "parse command requires just context " << endp;
+ context = true;
+ }
+
+ if ( newUT->typeId == TYPE_GENERIC ) {
+ code.append( IN_CONS_GENERIC );
+ code.appendHalf( newUT->generic->id );
+ code.appendHalf( 0 ); // stopId
+
+ if ( newUT->generic->typeId == GEN_PARSER ) {
+
+ }
+ }
+ else if ( newUT->typeId == TYPE_STRUCT && newUT->structEl == pd->streamSel ) {
+ code.append( IN_NEW_STREAM );
+ }
+ else {
+ code.append( IN_NEW_STRUCT );
+ code.appendHalf( newUT->structEl->id );
+ }
+
+ /*
+ * First load the context into the parser.
+ */
+ if ( context ) {
+ for ( int i = 0; i < fieldInitArgs->length(); i++ ) {
+ /* Eval what we are initializing with. */
+ UniqueType *argUT = fieldInitArgs->data[i]->expr->evaluate( pd, code );
+
+ if ( argUT == pd->uniqueTypeInput ) {
+ code.append( IN_SET_PARSER_INPUT );
+ }
+ else if ( argUT->typeId == TYPE_STRUCT ) {
+ code.append( IN_SET_PARSER_CONTEXT );
+ }
+ else {
+ error(loc) << "cannot initialize parser with this type, context or input only" << endp;
+ }
+ }
+ }
+
+ evaluateCapture( pd, code, newUT );
+
+ return newUT;
+}
+
+UniqueType *LangTerm::evaluateCast( Compiler *pd, CodeVect &code ) const
+{
+ expr->evaluate( pd, code );
+ code.append( IN_TREE_CAST );
+ code.appendHalf( typeRef->uniqueType->langEl->id );
+ return typeRef->uniqueType;
+}
+
+void LangTerm::assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const
+{
+ /* Now assign the field initializations. Note that we need to do this in
+ * reverse because the last expression evaluated is at the top of the
+ * stack. */
+ if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) {
+ ObjectDef *objDef = replUT->objectDef();
+ /* Note the reverse traversal. */
+ for ( FieldInitVect::Iter pi = fieldInitArgs->last(); pi.gtb(); pi-- ) {
+ FieldInit *fieldInit = *pi;
+ ObjectField *field = objDef->findFieldNum( pi.pos() );
+ if ( field == 0 ) {
+ error(fieldInit->loc) << "failed to find init pos " <<
+ pi.pos() << " in object" << endp;
+ }
+
+ /* Lookup the type of the field and compare it to the type of the
+ * expression. */
+ UniqueType *fieldUT = field->typeRef->uniqueType;
+ if ( !castAssignment( pd, code, fieldUT, 0, fieldInit->exprUT ) )
+ error(fieldInit->loc) << "type mismatch in initialization" << endp;
+
+ /* The set field instruction must leave the object on the top of
+ * the stack. */
+ code.append( IN_SET_FIELD_TREE_LEAVE_WC );
+ code.appendHalf( field->offset );
+ }
+ }
+}
+
+UniqueType *LangTerm::evaluateConstruct( Compiler *pd, CodeVect &code ) const
+{
+ /* Evaluate the initialization expressions. */
+ if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) {
+ for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) {
+ FieldInit *fieldInit = *pi;
+ fieldInit->exprUT = fieldInit->expr->evaluate( pd, code );
+ }
+ }
+
+ /* Assign bind ids to the variables in the replacement. */
+ for ( ConsItemList::Iter item = *constructor->list; item.lte(); item++ ) {
+ if ( item->expr != 0 )
+ item->bindId = constructor->nextBindId++;
+ }
+
+ /* Evaluate variable references. */
+ for ( ConsItemList::Iter item = constructor->list->last(); item.gtb(); item-- ) {
+ if ( item->type == ConsItem::ExprType ) {
+ UniqueType *ut = item->expr->evaluate( pd, code );
+
+ if ( ut->typeId != TYPE_TREE ) {
+ error(constructor->loc) << "variables used in "
+ "replacements must be trees" << endp;
+ }
+
+ if ( !isStr( ut ) ) {
+ if ( item->trim == ConsItem::TrimYes )
+ code.append( IN_TREE_TRIM );
+ }
+
+ item->langEl = ut->langEl;
+ }
+ }
+
+ /* Construct the tree using the tree information stored in the compiled
+ * code. */
+ code.append( IN_CONSTRUCT );
+ code.appendHalf( constructor->patRepId );
+
+ /* Lookup the type of the replacement and store it in the replacement
+ * object so that replacement parsing has a target. */
+ UniqueType *replUT = typeRef->uniqueType;
+ if ( replUT->typeId != TYPE_TREE )
+ error(loc) << "don't know how to construct this type" << endp;
+
+ constructor->langEl = replUT->langEl;
+ assignFieldArgs( pd, code, replUT );
+
+ evaluateCapture( pd, code, replUT );
+
+ return replUT;
+}
+
+void LangTerm::parseFrag( Compiler *pd, CodeVect &code, int stopId )
+{
+ code.append( IN_PARSE_FRAG_W );
+}
+
+UniqueType *LangTerm::evaluateReadReduce( Compiler *pd, CodeVect &code ) const
+{
+ UniqueType *parserUT = typeRef->uniqueType;
+ UniqueType *targetUT = parserUT->generic->elUt;
+
+ /* Should be one arg and it should be a stream. */
+
+ /* Assign bind ids to the variables in the replacement. */
+ for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case ConsItem::LiteralType: {
+ break;
+ }
+ case ConsItem::InputText: {
+ break;
+ }
+ case ConsItem::ExprType: {
+ item->expr->evaluate( pd, code );
+ break;
+ }}
+ }
+
+ code.append( IN_READ_REDUCE );
+ code.appendHalf( parserUT->generic->id );
+ code.appendHalf( parserText->reducerId );
+
+ return targetUT;
+}
+
+UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code,
+ bool tree, bool stop ) const
+{
+ if ( parserText->reduce && parserText->read ) {
+ return evaluateReadReduce( pd, code );
+ }
+
+ UniqueType *parserUT = typeRef->uniqueType;
+ UniqueType *targetUT = parserUT->generic->elUt;
+
+ /* If this is a parse stop then we need to verify that the type is
+ * compatible with parse stop. */
+ if ( stop )
+ targetUT->langEl->parseStop = true;
+ int stopId = stop ? targetUT->langEl->id : 0;
+
+ bool context = false;
+ if ( fieldInitArgs != 0 ) {
+ if ( fieldInitArgs == 0 || ( fieldInitArgs->length() != 1 && fieldInitArgs->length() != 2 ) )
+ error(loc) << "parse command requires just context and input" << endp;
+ context = true;
+ }
+
+ /* Evaluate variable references. */
+ for ( ConsItemList::Iter item = consItemList->last(); item.gtb(); item-- ) {
+ if ( item->type == ConsItem::ExprType ) {
+ UniqueType *ut = item->expr->evaluate( pd, code );
+
+ if ( ut->typeId != TYPE_TREE )
+ error() << "variables used in replacements must be trees" << endp;
+
+ if ( item->trim == ConsItem::TrimYes )
+ code.append( IN_TREE_TRIM );
+
+ item->langEl = ut->langEl;
+ }
+ }
+
+ /* Construct the parser. */
+
+ if ( parserText->reduce ) {
+ code.append( IN_CONS_REDUCER );
+ code.appendHalf( parserUT->generic->id );
+ code.appendHalf( parserText->reducerId );
+ }
+ else {
+ code.append( IN_CONS_GENERIC );
+ code.appendHalf( parserUT->generic->id );
+ code.appendHalf( stopId );
+ }
+
+ /*
+ * First load the context into the parser.
+ */
+ if ( context ) {
+ for ( int i = 0; i < fieldInitArgs->length(); i++ ) {
+ /* Eval what we are initializing with. */
+ UniqueType *argUT = fieldInitArgs->data[i]->expr->evaluate( pd, code );
+
+ if ( argUT == pd->uniqueTypeInput ) {
+ code.append( IN_SET_PARSER_INPUT );
+ }
+ else if ( argUT->typeId == TYPE_STRUCT && targetUT->langEl->contextIn != 0 ) {
+ code.append( IN_SET_PARSER_CONTEXT );
+ }
+ else {
+ error(loc) << "cannot initialize parser with this type, context or input only" << endp;
+ }
+ }
+ }
+
+ /*****************************/
+
+ if ( parserText->list->length() == 0 ) {
+ code.append( IN_SEND_NOTHING );
+
+ /* Parse instruction, dependent on whether or not we are producing
+ * revert or commit code. */
+ parseFrag( pd, code, stopId );
+ }
+ else {
+ for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) {
+ bool isStream = false;
+ uchar trim = TRIM_DEFAULT;
+
+ switch ( item->type ) {
+ case ConsItem::LiteralType: {
+ String result;
+ bool unusedCI;
+ prepareLitString( result, unusedCI,
+ item->prodEl->typeRef->pdaLiteral->data,
+ item->prodEl->typeRef->pdaLiteral->loc );
+
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( result, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ConsItem::InputText: {
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( item->data, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ConsItem::ExprType: {
+ UniqueType *ut = item->expr->evaluate( pd, code );
+
+ if ( ut->typeId == TYPE_VOID ) {
+ /* Clear it away if return type is void. */
+ code.append( IN_POP_VAL );
+ continue;
+ }
+
+ if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL )
+ code.append( IN_INT_TO_STR );
+
+ if ( ut == pd->uniqueTypeStream )
+ isStream = true;
+
+ if ( item->trim == ConsItem::TrimYes )
+ trim = TRIM_YES;
+ else if ( item->trim == ConsItem::TrimNo )
+ trim = TRIM_NO;
+
+ break;
+ }}
+
+ if ( isStream )
+ code.append( IN_SEND_STREAM_W );
+ else if ( tree ) {
+ code.append( IN_SEND_TREE_W );
+ code.append( trim );
+ }
+ else {
+ code.append( IN_SEND_TEXT_W );
+ code.append( trim );
+ }
+
+ /* Parse instruction, dependent on whether or not we are producing
+ * revert or commit code. */
+ parseFrag( pd, code, stopId );
+ }
+ }
+
+ /*
+ * Finish operation
+ */
+
+ if ( !stop ) {
+ code.append( IN_SEND_EOF_W );
+ parseFrag( pd, code, stopId );
+ }
+
+ if ( parserText->reduce ) {
+ code.append( IN_REDUCE_COMMIT );
+ }
+
+ /* Pull out the error and save it off. */
+ code.append( IN_DUP_VAL );
+ code.append( IN_GET_PARSER_MEM_R );
+ code.appendHalf( 1 );
+ code.append( IN_SET_ERROR );
+
+ /* Replace the parser with the parsed tree. */
+ code.append( IN_GET_PARSER_MEM_R );
+ code.appendHalf( 0 );
+
+ /* Capture to the local var. */
+ evaluateCapture( pd, code, targetUT );
+
+ return targetUT;
+}
+
+void LangTerm::evaluateSendStream( Compiler *pd, CodeVect &code ) const
+{
+ UniqueType *varUt = varRef->evaluate( pd, code );
+
+ if ( varUt->listOf( pd->uniqueTypeStream ) ) {
+ code.append( IN_GET_VLIST_MEM_R );
+ code.appendHalf( varUt->generic->id );
+ code.appendHalf( 0 );
+ }
+
+ /* Assign bind ids to the variables in the replacement. */
+ for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) {
+ uchar trim = TRIM_DEFAULT;
+
+ switch ( item->type ) {
+ case ConsItem::LiteralType: {
+ String result;
+ bool unusedCI;
+ prepareLitString( result, unusedCI,
+ item->prodEl->typeRef->pdaLiteral->data,
+ item->prodEl->typeRef->pdaLiteral->loc );
+
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( result, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ConsItem::InputText: {
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( item->data, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ConsItem::ExprType: {
+ UniqueType *ut = item->expr->evaluate( pd, code );
+ if ( ut->typeId == TYPE_VOID ) {
+ /* Clear it away if return type is void. */
+ code.append( IN_POP_VAL );
+ continue;
+ }
+
+ if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL )
+ code.append( IN_INT_TO_STR );
+
+ if ( item->trim == ConsItem::TrimYes )
+ trim = TRIM_YES;
+ else if ( item->trim == ConsItem::TrimNo )
+ trim = TRIM_NO;
+
+ break;
+ }}
+
+ code.append( IN_PRINT_TREE );
+ code.append( trim );
+ }
+}
+
+void LangTerm::evaluateSendParser( Compiler *pd, CodeVect &code, bool strings ) const
+{
+ UniqueType *varUt = varRef->evaluate( pd, code );
+
+ if ( varUt->parser() ) {
+ }
+ else if ( varUt->listOf( pd->uniqueTypeStream ) ) {
+ code.append( IN_GET_VLIST_MEM_R );
+ code.appendHalf( varUt->generic->id );
+ code.appendHalf( 0 );
+ }
+
+ if ( parserText->list->length() == 0 ) {
+ code.append( IN_SEND_NOTHING );
+
+ /* Parse instruction, dependent on whether or not we are producing
+ * revert or commit code. */
+ parseFrag( pd, code, 0 );
+ }
+ else {
+
+ /* Assign bind ids to the variables in the replacement. */
+ for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) {
+ bool isStream = false;
+ uchar trim = TRIM_DEFAULT;
+
+ switch ( item->type ) {
+ case ConsItem::LiteralType: {
+ String result;
+ bool unusedCI;
+ prepareLitString( result, unusedCI,
+ item->prodEl->typeRef->pdaLiteral->data,
+ item->prodEl->typeRef->pdaLiteral->loc );
+
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( result, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ConsItem::InputText: {
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( item->data, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ConsItem::ExprType:
+ UniqueType *ut = item->expr->evaluate( pd, code );
+ if ( ut->typeId == TYPE_VOID ) {
+ /* Clear it away if return type is void. */
+ code.append( IN_POP_VAL );
+ continue;
+ }
+
+ if ( ut == pd->uniqueTypeStream )
+ isStream = true;
+
+ if ( item->trim == ConsItem::TrimYes )
+ trim = TRIM_YES;
+ else if ( item->trim == ConsItem::TrimNo )
+ trim = TRIM_NO;
+
+ if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL )
+ code.append( IN_INT_TO_STR );
+
+ break;
+ }
+
+ if ( isStream )
+ code.append( IN_SEND_STREAM_W );
+ else if ( !strings ) {
+ code.append( IN_SEND_TREE_W );
+ code.append( trim );
+ }
+ else {
+ code.append( IN_SEND_TEXT_W );
+ code.append( trim );
+ }
+
+ parseFrag( pd, code, 0 );
+ }
+ }
+
+ if ( eof ) {
+ code.append( IN_SEND_EOF_W );
+ parseFrag( pd, code, 0 );
+ }
+}
+
+UniqueType *LangTerm::evaluateSend( Compiler *pd, CodeVect &code ) const
+{
+ UniqueType *varUt = varRef->lookup( pd );
+
+ if ( varUt == pd->uniqueTypeStream )
+ evaluateSendStream( pd, code );
+ else if ( varUt->listOf( pd->uniqueTypeStream ) )
+ evaluateSendStream( pd, code );
+ else if ( varUt->parser() )
+ evaluateSendParser( pd, code, true );
+ else
+ error(loc) << "can only send to parsers and streams" << endl;
+
+ return varUt;
+}
+
+
+UniqueType *LangTerm::evaluateSendTree( Compiler *pd, CodeVect &code ) const
+{
+ UniqueType *varUt = varRef->lookup( pd );
+
+ if ( varUt->parser() )
+ evaluateSendParser( pd, code, false );
+ else
+ error(loc) << "can only send_tree to parsers" << endl;
+
+ return varUt;
+}
+
+UniqueType *LangTerm::evaluateEmbedString( Compiler *pd, CodeVect &code ) const
+{
+ /* Assign bind ids to the variables in the replacement. */
+ for ( ConsItemList::Iter item = *consItemList; item.lte(); item++ ) {
+ switch ( item->type ) {
+ case ConsItem::LiteralType: {
+ String result;
+ bool unusedCI;
+ prepareLitString( result, unusedCI,
+ item->prodEl->typeRef->pdaLiteral->data,
+ item->prodEl->typeRef->pdaLiteral->loc );
+
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( result, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ConsItem::InputText: {
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( item->data, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ break;
+ }
+ case ConsItem::ExprType: {
+ UniqueType *ut = item->expr->evaluate( pd, code );
+
+ if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL )
+ code.append( IN_INT_TO_STR );
+
+ if ( ut->typeId == TYPE_TREE &&
+ ut->langEl != pd->strLangEl && ut != pd->uniqueTypeStream )
+ {
+ /* Convert it to a string. */
+ code.append( IN_TREE_TO_STR );
+ }
+ break;
+ }}
+ }
+
+ /* If there was nothing loaded, load the empty string. We must produce
+ * something. */
+ if ( consItemList->length() == 0 ) {
+ String result = "";
+
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( result, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ }
+
+ long items = consItemList->length();
+ for ( long i = 0; i < items-1; i++ )
+ code.append( IN_CONCAT_STR );
+
+ return pd->uniqueTypeStr;
+}
+
+UniqueType *LangTerm::evaluateSearch( Compiler *pd, CodeVect &code ) const
+{
+ UniqueType *ut = typeRef->uniqueType;
+ if ( ut->typeId != TYPE_TREE )
+ error(loc) << "can only search for tree types" << endp;
+
+ /* Evaluate the expression. */
+ UniqueType *treeUT = varRef->evaluate( pd, code );
+ if ( treeUT->typeId != TYPE_TREE && treeUT->typeId != TYPE_REF )
+ error(loc) << "search can be applied only to tree/ref types" << endp;
+
+ /* Run the search. */
+ code.append( IN_TREE_SEARCH );
+ code.appendWord( ut->langEl->id );
+ return ut;
+}
+
+UniqueType *LangTerm::evaluate( Compiler *pd, CodeVect &code ) const
+{
+ UniqueType *retUt = 0;
+ switch ( type ) {
+ case VarRefType:
+ retUt = varRef->evaluate( pd, code );
+ break;
+ case MethodCallType:
+ retUt = varRef->evaluateCall( pd, code, args );
+ break;
+ case NilType:
+ code.append( IN_LOAD_NIL );
+ retUt = pd->uniqueTypeNil;
+ break;
+ case TrueType:
+ code.append( IN_LOAD_TRUE );
+ retUt = pd->uniqueTypeBool;
+ break;
+ case FalseType:
+ code.append( IN_LOAD_FALSE );
+ retUt = pd->uniqueTypeBool;
+ break;
+ case MakeTokenType:
+ retUt = evaluateMakeToken( pd, code );
+ break;
+ case MakeTreeType:
+ retUt = evaluateMakeTree( pd, code );
+ break;
+ case NumberType: {
+ unsigned int n = atoi( data );
+ code.append( IN_LOAD_INT );
+ code.appendWord( n );
+ retUt = pd->uniqueTypeInt;
+ break;
+ }
+ case StringType: {
+ String interp;
+ bool unused;
+ prepareLitString( interp, unused, data, InputLoc() );
+
+ /* Make sure we have this string. */
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( interp, &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ code.append( IN_LOAD_STR );
+ code.appendWord( mapEl->value );
+ retUt = pd->uniqueTypeStr;
+ break;
+ }
+ case MatchType:
+ retUt = evaluateMatch( pd, code );
+ break;
+ case ProdCompareType:
+ retUt = evaluateProdCompare( pd, code );
+ break;
+ case ParseType:
+ retUt = evaluateParse( pd, code, false, false );
+ break;
+ case ParseTreeType:
+ retUt = evaluateParse( pd, code, true, false );
+ break;
+ case ParseStopType:
+ retUt = evaluateParse( pd, code, false, true );
+ break;
+ case ConstructType:
+ retUt = evaluateConstruct( pd, code );
+ break;
+ case SendType:
+ retUt = evaluateSend( pd, code );
+ break;
+ case SendTreeType:
+ retUt = evaluateSendTree( pd, code );
+ break;
+ case NewType:
+ retUt = evaluateNew( pd, code );
+ break;
+ case TypeIdType: {
+ /* Evaluate the expression. */
+ UniqueType *ut = typeRef->uniqueType;
+ if ( ut->typeId != TYPE_TREE )
+ error() << "typeid can only be applied to tree types" << endp;
+
+ code.append( IN_LOAD_INT );
+ code.appendWord( ut->langEl->id );
+ retUt = pd->uniqueTypeInt;
+ break;
+ }
+ case SearchType:
+ retUt = evaluateSearch( pd, code );
+ break;
+ case EmbedStringType:
+ retUt = evaluateEmbedString( pd, code );
+ break;
+ case CastType:
+ retUt = evaluateCast( pd, code );
+ break;
+ }
+
+ // if ( retUt->val() )
+ // pd->unwindCode.insert( 0, IN_POP_VAL );
+ // else
+ // pd->unwindCode.insert( 0, IN_POP_TREE );
+
+ return retUt;
+}
+
+UniqueType *LangExpr::evaluate( Compiler *pd, CodeVect &code ) const
+{
+ switch ( type ) {
+ case BinaryType: {
+ switch ( op ) {
+ case '+': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ // pd->unwindCode.remove( 0, 2 );
+ // pd->unwindCode.insert( 0, IN_POP_TREE );
+
+ if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
+ code.append( IN_ADD_INT );
+ return pd->uniqueTypeInt;
+ }
+
+ if ( lt == pd->uniqueTypeStr && rt == pd->uniqueTypeStr ) {
+ code.append( IN_CONCAT_STR );
+ return pd->uniqueTypeStr;
+ }
+
+
+ error(loc) << "do not have an addition operator for these types" << endp;
+ break;
+ }
+ case '-': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
+ code.append( IN_SUB_INT );
+ return pd->uniqueTypeInt;
+ }
+
+ error(loc) << "do not have an addition operator for these types" << endp;
+ break;
+ }
+ case '*': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
+ code.append( IN_MULT_INT );
+ return pd->uniqueTypeInt;
+ }
+
+ error(loc) << "do not have an multiplication "
+ "operator for these types" << endp;
+ break;
+ }
+ case '/': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) {
+ code.append( IN_DIV_INT );
+ return pd->uniqueTypeInt;
+ }
+
+ error(loc) << "do not have an division"
+ "operator for these types" << endp;
+ break;
+ }
+ case OP_DoubleEql: {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt != rt )
+ error(loc) << "comparison of different types" << endp;
+
+ if ( lt->val() )
+ code.append( IN_TST_EQL_VAL );
+ else
+ code.append( IN_TST_EQL_TREE );
+ return pd->uniqueTypeBool;
+ }
+ case OP_NotEql: {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt != rt )
+ error(loc) << "comparison of different types" << endp;
+
+ if ( lt->val() )
+ code.append( IN_TST_NOT_EQL_VAL );
+ else
+ code.append( IN_TST_NOT_EQL_TREE );
+
+ return pd->uniqueTypeBool;
+ }
+ case '<': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt != rt )
+ error(loc) << "comparison of different types" << endp;
+
+ if ( lt->val() )
+ code.append( IN_TST_LESS_VAL );
+ else
+ code.append( IN_TST_LESS_TREE );
+ return pd->uniqueTypeBool;
+ }
+ case '>': {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt != rt )
+ error(loc) << "comparison of different types" << endp;
+
+ if ( lt->val() )
+ code.append( IN_TST_GRTR_VAL );
+ else
+ code.append( IN_TST_GRTR_TREE );
+
+ return pd->uniqueTypeBool;
+ }
+ case OP_LessEql: {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt != rt )
+ error(loc) << "comparison of different types" << endp;
+
+ if ( lt->val() )
+ code.append( IN_TST_LESS_EQL_VAL );
+ else
+ code.append( IN_TST_LESS_EQL_TREE );
+
+ return pd->uniqueTypeBool;
+ }
+ case OP_GrtrEql: {
+ UniqueType *lt = left->evaluate( pd, code );
+ UniqueType *rt = right->evaluate( pd, code );
+
+ if ( lt != rt )
+ error(loc) << "comparison of different types" << endp;
+
+ if ( lt->val() )
+ code.append( IN_TST_GRTR_EQL_VAL );
+ else
+ code.append( IN_TST_GRTR_EQL_TREE );
+
+ return pd->uniqueTypeBool;
+ }
+ case OP_LogicalAnd: {
+ /* Evaluate the left and duplicate it. */
+ UniqueType *lut = left->evaluate( pd, code );
+ if ( !lut->val() )
+ code.append( IN_TST_NZ_TREE );
+ code.append( IN_DUP_VAL );
+
+ /* Jump over the right if false, leaving the original left
+ * result on the top of the stack. We don't know the
+ * distance yet so record the position of the jump. */
+ long jump = code.length();
+ code.append( IN_JMP_FALSE_VAL );
+ code.appendHalf( 0 );
+
+ /* Evauluate the right, add the test. Store it separately. */
+ UniqueType *rut = right->evaluate( pd, code );
+ if ( !rut->val() )
+ code.append( IN_TST_NZ_TREE );
+
+ code.append( IN_TST_LOGICAL_AND );
+
+ /* Set the distance of the jump. */
+ long distance = code.length() - jump - 3;
+ code.setHalf( jump+1, distance );
+
+ return pd->uniqueTypeInt;
+ }
+ case OP_LogicalOr: {
+ /* Evaluate the left and duplicate it. */
+ UniqueType *lut = left->evaluate( pd, code );
+ if ( !lut->val() )
+ code.append( IN_TST_NZ_TREE );
+ code.append( IN_DUP_VAL );
+
+ /* Jump over the right if true, leaving the original left
+ * result on the top of the stack. We don't know the
+ * distance yet so record the position of the jump. */
+ long jump = code.length();
+ code.append( IN_JMP_TRUE_VAL );
+ code.appendHalf( 0 );
+
+ /* Evauluate the right, add the test. */
+ UniqueType *rut = right->evaluate( pd, code );
+ if ( !rut->val() )
+ code.append( IN_TST_NZ_TREE );
+
+ code.append( IN_TST_LOGICAL_OR );
+
+ /* Set the distance of the jump. */
+ long distance = code.length() - jump - 3;
+ code.setHalf( jump+1, distance );
+
+ return pd->uniqueTypeInt;
+ }
+ }
+
+ assert(false);
+ return 0;
+ }
+ case UnaryType: {
+ switch ( op ) {
+ case '!': {
+ /* Evaluate the left and duplicate it. */
+ UniqueType *ut = right->evaluate( pd, code );
+ if ( ut->val() )
+ code.append( IN_NOT_VAL );
+ else
+ code.append( IN_NOT_TREE );
+ return pd->uniqueTypeBool;
+ }
+ case '$': {
+ UniqueType *ut = right->evaluate( pd, code );
+
+ if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL )
+ code.append( IN_INT_TO_STR );
+
+ code.append( IN_TREE_TO_STR_TRIM );
+ return pd->uniqueTypeStr;
+
+ }
+ case 'S': {
+ UniqueType *ut = right->evaluate( pd, code );
+
+ if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL )
+ code.append( IN_INT_TO_STR );
+
+ code.append( IN_TREE_TO_STR_TRIM_A );
+ return pd->uniqueTypeStr;
+ }
+ case '%': {
+ UniqueType *ut = right->evaluate( pd, code );
+ if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL )
+ code.append( IN_INT_TO_STR );
+ else
+ code.append( IN_TREE_TO_STR );
+ return pd->uniqueTypeStr;
+ }
+ case '^': {
+ UniqueType *rt = right->evaluate( pd, code );
+ code.append( IN_TREE_TRIM );
+ return rt;
+ }
+ case '@': {
+ UniqueType *rt = right->evaluate( pd, code );
+ //code.append( IN_TREE_TRIM );
+ return rt;
+ }
+ default:
+ assert(false);
+ }
+ return 0;
+ }
+ case TermType: {
+ return term->evaluate( pd, code );
+ }
+ }
+ return 0;
+}
+
+void LangVarRef::assignValue( Compiler *pd, CodeVect &code,
+ UniqueType *exprUT ) const
+{
+ /* Lookup the left hand side of the assignment. */
+ VarRefLookup lookup = lookupField( pd );
+
+ if ( lookup.objField->refActive )
+ error(loc) << "reference active, cannot write to object" << endp;
+
+ if ( lookup.firstConstPart >= 0 ) {
+ error(loc) << "left hand side qualification \"" <<
+ qual->data[lookup.firstConstPart].data << "\" is const" << endp;
+ }
+
+ if ( lookup.objField->isConst )
+ error(loc) << "field \"" << name << "\" is const" << endp;
+
+ /* Writing guarantees the field is dirty. tree is dirty. */
+ lookup.objField->dirtyTree = true;
+
+ /* Check the types of the assignment and possibly cast. */
+ UniqueType *objUT = lookup.objField->typeRef->uniqueType;
+ assert( lookup.uniqueType == lookup.objField->typeRef->uniqueType );
+ if ( !castAssignment( pd, code, objUT, lookup.iterSearchUT, exprUT ) )
+ error(loc) << "type mismatch in assignment" << endp;
+
+ /* Decide if we need to revert the assignment. */
+ bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef();
+
+ /* Load the object and generate the field setting code. */
+ loadObj( pd, code, lookup.lastPtrInQual, true );
+
+ if ( lookup.uniqueType->typeId == TYPE_ITER )
+ setFieldIter( pd, code, lookup.inObject, lookup.objField, lookup.uniqueType, exprUT, false );
+ else
+ setField( pd, code, lookup.inObject, lookup.objField, exprUT, revert );
+}
+
+UniqueType *LangTerm::evaluateMakeToken( Compiler *pd, CodeVect &code ) const
+{
+// if ( pd->compileContext != Compiler::CompileTranslation )
+// error(loc) << "make_token can be used only in a translation block" << endp;
+
+ /* Match the number of arguments. */
+ int numArgs = args != 0 ? args->length() : 0;
+ if ( numArgs < 2 )
+ error(loc) << "need at least two arguments" << endp;
+
+ for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Evaluate. */
+ UniqueType *exprUT = (*pe)->expr->evaluate( pd, code );
+
+ if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt )
+ error(loc) << "first arg, id, must be an int" << endp;
+
+ if ( pe.pos() == 1 && exprUT != pd->uniqueTypeStr )
+ error(loc) << "second arg, length, must be a string" << endp;
+ }
+
+ /* The token is now created, send it. */
+ code.append( IN_MAKE_TOKEN );
+ code.append( args->length() );
+
+ return pd->uniqueTypeAny;
+}
+
+UniqueType *LangTerm::evaluateMakeTree( Compiler *pd, CodeVect &code ) const
+{
+// if ( pd->compileContext != Compiler::CompileTranslation )
+// error(loc) << "make_tree can be used only in a translation block" << endp;
+
+ /* Match the number of arguments. */
+ int numArgs = args != 0 ? args->length() : 0;
+ if ( numArgs < 1 )
+ error(loc) << "need at least one argument" << endp;
+
+ for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) {
+ /* Evaluate. */
+ UniqueType *exprUT = (*pe)->expr->evaluate( pd, code );
+
+ if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt )
+ error(loc) << "first arg, nonterm id, must be an int" << endp;
+ }
+
+ /* The token is now created, send it. */
+ code.append( IN_MAKE_TREE );
+ code.append( args->length() );
+
+ return pd->uniqueTypeAny;
+}
+
+void LangStmt::compileForIterBody( Compiler *pd,
+ CodeVect &code, UniqueType *iterUT ) const
+{
+ /* Remember the top of the loop. */
+ long top = code.length();
+
+ /* Advance */
+ code.append( objField->iterImpl->inAdvance );
+ code.appendHalf( objField->offset );
+
+ /* Test: jump past the while block if false. Note that we don't have the
+ * distance yet. */
+ long jumpFalse = code.length();
+ code.append( IN_JMP_FALSE_VAL );
+ code.appendHalf( 0 );
+
+ /*
+ * Set up the loop cleanup code.
+ */
+
+ /* Add the cleanup for the current loop. */
+ int lcLen = pd->unwindCode.length();
+ pd->unwindCode.insertHalf( 0, objField->offset );
+ pd->unwindCode.insert( 0, objField->iterImpl->inUnwind );
+
+ /* Compile the contents. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+
+ pd->unwindCode.remove( 0, pd->unwindCode.length() - lcLen );
+
+ /* Jump back to the top to retest. */
+ long retestDist = code.length() - top + 3;
+ code.append( IN_JMP );
+ code.appendHalf( -retestDist );
+
+ /* Set the jump false distance. */
+ long falseDist = code.length() - jumpFalse - 3;
+ code.setHalf( jumpFalse+1, falseDist );
+
+ /* Compute the jump distance for the break jumps. */
+ for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) {
+ long distance = code.length() - *brk - 3;
+ code.setHalf( *brk+1, distance );
+ }
+ pd->breakJumps.empty();
+
+ /* Destroy the iterator. */
+ code.append( objField->iterImpl->inDestroy );
+ code.appendHalf( objField->offset );
+
+ /* Clean up any prepush args. */
+}
+
+void LangStmt::compileForIter( Compiler *pd, CodeVect &code ) const
+{
+ /* The type we are searching for. */
+ UniqueType *searchUT = typeRef->uniqueType;
+
+ /* Lookup the iterator call. Make sure it is an iterator. */
+ VarRefLookup lookup = iterCall->langTerm->varRef->lookupIterCall( pd );
+ if ( lookup.objMethod->iterDef == 0 ) {
+ error(loc) << "attempt to iterate using something "
+ "that is not an iterator" << endp;
+ }
+
+ /* Prepare the contiguous call args space. */
+ Function *func = lookup.objMethod->func;
+ int asLoc;
+ if ( func != 0 ) {
+ code.append( IN_PREP_ARGS );
+ asLoc = code.length();
+ code.appendHalf( 0 );
+ }
+
+ /*
+ * Create the iterator from the local var.
+ */
+
+ UniqueType *iterUT = objField->typeRef->uniqueType;
+ IterImpl *iterImpl = 0;
+
+ switch ( iterUT->iterDef->type ) {
+ case IterDef::Tree:
+ iterImpl = iterCall->langTerm->varRef->chooseTriterCall( pd,
+ searchUT, iterCall->langTerm->args );
+ break;
+ case IterDef::Child:
+ iterImpl = new IterImpl( IterImpl::Child );
+ break;
+ case IterDef::RevChild:
+ iterImpl = new IterImpl( IterImpl::RevChild );
+ break;
+ case IterDef::Repeat:
+ iterImpl = new IterImpl( IterImpl::Repeat );
+ break;
+ case IterDef::RevRepeat:
+ iterImpl = new IterImpl( IterImpl::RevRepeat );
+ break;
+ case IterDef::User:
+ iterImpl = new IterImpl( IterImpl::User, iterUT->iterDef->func );
+ break;
+ case IterDef::ListEl:
+ iterImpl = new IterImpl( IterImpl::ListEl );
+ break;
+ case IterDef::RevListVal:
+ iterImpl = new IterImpl( IterImpl::RevListVal );
+ break;
+ case IterDef::MapEl:
+ iterImpl = new IterImpl( IterImpl::MapEl );
+ break;
+ }
+
+ objField->iterImpl = iterImpl;
+
+ /* Evaluate and push the arguments. */
+ ObjectField **paramRefs = iterCall->langTerm->varRef->evaluateArgs(
+ pd, code, lookup, iterCall->langTerm->args );
+
+ if ( pd->revertOn )
+ code.append( iterImpl->inCreateWV );
+ else
+ code.append( iterImpl->inCreateWC );
+
+ code.appendHalf( objField->offset );
+
+ /* Arg size (or func id for user iters). */
+ if ( lookup.objMethod->func != 0 )
+ code.appendHalf( lookup.objMethod->func->funcId );
+ else
+ code.appendHalf( iterCall->langTerm->varRef->argSize );
+
+ /* Search type. */
+ if ( iterImpl->useSearchUT )
+ code.appendHalf( searchUT->langEl->id );
+
+ if ( iterImpl->useGenericId ) {
+ CodeVect unused;
+ UniqueType *ut =
+ iterCall->langTerm->args->data[0]->expr->evaluate( pd, unused );
+
+ code.appendHalf( ut->generic->id );
+ }
+
+ compileForIterBody( pd, code, iterUT );
+
+ iterCall->langTerm->varRef->popRefQuals( pd, code, lookup,
+ iterCall->langTerm->args, false );
+
+ iterCall->langTerm->varRef->resetActiveRefs( pd, lookup, paramRefs );
+ delete[] paramRefs;
+
+ if ( func != 0 ) {
+ code.append( IN_CLEAR_ARGS );
+ code.appendHalf( func->paramListSize );
+ code.setHalf( asLoc, func->paramListSize );
+ }
+}
+
+void LangStmt::compileWhile( Compiler *pd, CodeVect &code ) const
+{
+ /* Generate code for the while test. Remember the top. */
+ long top = code.length();
+ UniqueType *eut = expr->evaluate( pd, code );
+
+ /* Jump past the while block if false. Note that we don't have the
+ * distance yet. */
+ long jumpFalse = code.length();
+ half_t jinstr = eut->tree() ? IN_JMP_FALSE_TREE : IN_JMP_FALSE_VAL;
+ code.append( jinstr );
+ code.appendHalf( 0 );
+
+ /* Compute the while block. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+
+ /* Jump back to the top to retest. */
+ long retestDist = code.length() - top + 3;
+ code.append( IN_JMP );
+ code.appendHalf( -retestDist );
+
+ /* Set the jump false distance. */
+ long falseDist = code.length() - jumpFalse - 3;
+ code.setHalf( jumpFalse+1, falseDist );
+
+ /* Compute the jump distance for the break jumps. */
+ for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) {
+ long distance = code.length() - *brk - 3;
+ code.setHalf( *brk+1, distance );
+ }
+ pd->breakJumps.empty();
+}
+
+void LangStmt::compile( Compiler *pd, CodeVect &code ) const
+{
+ CodeVect block;
+
+ StringMapEl *mapEl = 0;
+ if ( pd->literalStrings.insert( "unwind code\n", &mapEl ) )
+ mapEl->value = pd->literalStrings.length()-1;
+
+ block.append( IN_LOAD_STR );
+ block.appendWord( mapEl->value );
+
+ block.append( IN_POP_TREE );
+
+ pd->unwindCode.insert( 0, block );
+
+ switch ( type ) {
+ case ExprType: {
+ /* Evaluate the exrepssion, then pop it immediately. */
+ UniqueType *exprUt = expr->evaluate( pd, code );
+ if ( exprUt->tree() )
+ code.append( IN_POP_TREE );
+ else
+ code.append( IN_POP_VAL );
+
+ // pd->unwindCode.remove( 0, 1 );
+ break;
+ }
+ case IfType: {
+ long jumpFalse = 0, jumpPastElse = 0, distance = 0;
+
+ /* Evaluate the test. */
+ UniqueType *eut = expr->evaluate( pd, code );
+
+ /* Jump past the if block if false. We don't know the distance
+ * yet so store the location of the jump. */
+ jumpFalse = code.length();
+ half_t jinstr = eut->tree() ? IN_JMP_FALSE_TREE : IN_JMP_FALSE_VAL;
+
+ code.append( jinstr );
+ code.appendHalf( 0 );
+
+ /* Compile the if true branch. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+
+ if ( elsePart != 0 ) {
+ /* Jump past the else code for the if true branch. */
+ jumpPastElse = code.length();
+ code.append( IN_JMP );
+ code.appendHalf( 0 );
+ }
+
+ /* Set the distance for the jump false case. */
+ distance = code.length() - jumpFalse - 3;
+ code.setHalf( jumpFalse+1, distance );
+
+ if ( elsePart != 0 ) {
+ /* Compile the else branch. */
+ elsePart->compile( pd, code );
+
+ /* Set the distance for jump over the else part. */
+ distance = code.length() - jumpPastElse - 3;
+ code.setHalf( jumpPastElse+1, distance );
+ }
+
+ break;
+ }
+ case ElseType: {
+ /* Compile the else branch. */
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+ break;
+ }
+ case RejectType: {
+ code.append( IN_REJECT );
+ break;
+ }
+ case WhileType: {
+ compileWhile( pd, code );
+ break;
+ }
+ case AssignType: {
+ /* Evaluate the exrepssion. */
+ UniqueType *exprUT = expr->evaluate( pd, code );
+
+ /* Do the assignment. */
+ varRef->assignValue( pd, code, exprUT );
+ break;
+ }
+ case ForIterType: {
+ compileForIter( pd, code );
+ break;
+ }
+ case ReturnType: {
+ /* Evaluate the exrepssion. */
+ UniqueType *exprUT = expr->evaluate( pd, code );
+
+ if ( pd->curFunction == 0 ) {
+ /* In the main function */
+ pd->mainReturnUT = exprUT;
+ }
+ else {
+ UniqueType *resUT = pd->curFunction->typeRef->uniqueType;
+ if ( resUT != pd->uniqueTypeVoid &&
+ !castAssignment( pd, code, resUT, 0, exprUT ) )
+ error(loc) << "return value wrong type" << endp;
+ }
+
+ code.append( IN_SAVE_RET );
+
+ /* The loop cleanup code. */
+ if ( pd->unwindCode.length() > 0 )
+ code.append( pd->unwindCode );
+
+ /* Jump to the return label. The distance will be filled in
+ * later. */
+ pd->returnJumps.append( code.length() );
+ code.append( IN_JMP );
+ code.appendHalf( 0 );
+ break;
+ }
+ case BreakType: {
+ pd->breakJumps.append( code.length() );
+ code.append( IN_JMP );
+ code.appendHalf( 0 );
+ break;
+ }
+ case YieldType: {
+ /* take a reference and yield it. Immediately reset the referece. */
+ varRef->preEvaluateRef( pd, code );
+ ObjectField *objField = varRef->evaluateRef( pd, code, 0 );
+ code.append( IN_YIELD );
+
+ if ( varRef->qual->length() > 0 ) {
+ code.append( IN_POP_N_WORDS );
+ code.appendHalf( (short)(varRef->qual->length()*2) );
+ }
+
+ objField->refActive = false;
+ break;
+ }
+ }
+
+ pd->unwindCode.remove( 0, block.length() );
+}
+
+void CodeBlock::compile( Compiler *pd, CodeVect &code ) const
+{
+ for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ )
+ stmt->compile( pd, code );
+}
+
+void Compiler::findLocals( ObjectDef *localFrame, CodeBlock *block )
+{
+ Locals &locals = block->locals;
+
+ for ( FieldList::Iter ol = localFrame->fieldList; ol.lte(); ol++ ) {
+ ObjectField *el = ol->value;
+
+ /* FIXME: This test needs to be improved. Match_text was getting
+ * through before useOffset was tested. What will? */
+ if ( el->useOffset() && !el->isLhsEl() &&
+ ( el->beenReferenced || el->isParam() ) )
+ {
+ UniqueType *ut = el->typeRef->uniqueType;
+ if ( ut->tree() ) {
+ int depth = el->scope->depth();
+ locals.append( LocalLoc( LT_Tree, depth, el->offset ) );
+ }
+ }
+
+ if ( el->useOffset() ) {
+ UniqueType *ut = el->typeRef->uniqueType;
+ if ( ut->typeId == TYPE_ITER ) {
+ int depth = el->scope->depth();
+ LocalType type = LT_Tree;
+ switch ( ut->iterDef->type ) {
+ case IterDef::Tree:
+ case IterDef::Child:
+ case IterDef::Repeat:
+ case IterDef::RevRepeat:
+ type = LT_Iter;
+ break;
+
+ case IterDef::MapEl:
+ case IterDef::ListEl:
+ case IterDef::RevListVal:
+ /* ? */
+ type = LT_Iter;
+ break;
+
+ case IterDef::RevChild:
+ type = LT_RevIter;
+ break;
+ case IterDef::User:
+ type = LT_UserIter;
+ break;
+ }
+
+ locals.append( LocalLoc( type, depth, (int)el->offset ) );
+ }
+ }
+ }
+}
+
+void Compiler::addProdLHSLoad( Production *prod, CodeVect &code, long &insertPos )
+{
+ NameScope *scope = prod->redBlock->localFrame->rootScope;
+ ObjectField *lhsField = scope->findField("lhs");
+ assert( lhsField != 0 );
+
+ CodeVect loads;
+ if ( lhsField->beenReferenced ) {
+ loads.append( IN_INIT_LHS_EL );
+ loads.appendHalf( lhsField->offset );
+ }
+
+ code.insert( insertPos, loads );
+ insertPos += loads.length();
+}
+
+void Compiler::addPushBackLHS( Production *prod, CodeVect &code, long &insertPos )
+{
+ CodeBlock *block = prod->redBlock;
+
+ /* If the lhs tree is dirty then we will need to save off the old lhs
+ * before it gets modified. We want to avoid this for attribute
+ * modifications. The computation of dirtyTree should deal with this for
+ * us. */
+ NameScope *scope = block->localFrame->rootScope;
+ ObjectField *lhsField = scope->findField("lhs");
+ assert( lhsField != 0 );
+
+ if ( lhsField->beenReferenced ) {
+ code.append( IN_STORE_LHS_EL );
+ code.appendHalf( lhsField->offset );
+ }
+}
+
+void Compiler::addProdRHSLoads( Production *prod, CodeVect &code, long &insertPos )
+{
+ CodeVect loads;
+ long elPos = 0;
+ for ( ProdElList::Iter rhsEl = *prod->prodElList; rhsEl.lte(); rhsEl++, elPos++ ) {
+ if ( rhsEl->type == ProdEl::ReferenceType ) {
+ if ( rhsEl->rhsElField->beenReferenced ) {
+ loads.append ( IN_INIT_RHS_EL );
+ loads.appendHalf( elPos );
+ loads.appendHalf( rhsEl->rhsElField->offset );
+ }
+ }
+ }
+
+ /* Insert and update the insert position. */
+ code.insert( insertPos, loads );
+ insertPos += loads.length();
+}
+
+
+
+void Compiler::makeProdCopies( Production *prod )
+{
+ int pos = 0;
+ for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++, pos++) {
+ if ( pel->captureField != 0 ) {
+ prod->copy.append( pel->captureField->offset );
+ prod->copy.append( pos );
+ }
+ }
+}
+
+void Compiler::compileReductionCode( Production *prod )
+{
+ CodeBlock *block = prod->redBlock;
+
+ /* Init the compilation context. */
+ compileContext = CompileReduction;
+ revertOn = true;
+ block->frameId = nextFrameId++;
+
+ CodeVect &code = block->codeWV;
+
+ long afterInit = code.length();
+
+ /* Compile the reduce block. */
+ block->compile( this, code );
+
+ /* Might need to load right hand side values. */
+ addProdRHSLoads( prod, code, afterInit );
+
+ addProdLHSLoad( prod, code, afterInit );
+ addPushBackLHS( prod, code, afterInit );
+
+ code.append( IN_PCR_RET );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocals( block->localFrame, block );
+}
+
+void Compiler::compileTranslateBlock( LangEl *langEl )
+{
+ CodeBlock *block = langEl->transBlock;
+
+ /* Set up compilation context. */
+ compileContext = CompileTranslation;
+ revertOn = true;
+ block->frameId = nextFrameId++;
+
+ CodeVect &code = block->codeWV;
+
+ if ( langEl->tokenDef->reCaptureVect.length() > 0 ) {
+ code.append( IN_INIT_CAPTURES );
+ code.append( langEl->tokenDef->reCaptureVect.length() );
+ }
+
+ /* Set the local frame and compile the reduce block. */
+ block->compile( this, code );
+
+ code.append( IN_PCR_RET );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocals( block->localFrame, block );
+}
+
+void Compiler::compilePreEof( TokenRegion *region )
+{
+ CodeBlock *block = region->preEofBlock;
+
+ /* Set up compilation context. */
+ compileContext = CompileTranslation;
+ revertOn = true;
+ block->frameId = nextFrameId++;
+
+ addInput( block->localFrame );
+ addThis( block->localFrame );
+
+ CodeVect &code = block->codeWV;
+
+ /* Set the local frame and compile the reduce block. */
+ block->compile( this, code );
+
+ code.append( IN_PCR_RET );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocals( block->localFrame, block );
+}
+
+int Compiler::arg0Offset()
+{
+ globalObjectDef->referenceField( this, arg0 );
+ return arg0->offset;
+}
+
+int Compiler::argvOffset()
+{
+ globalObjectDef->referenceField( this, argv );
+ return argv->offset;
+}
+
+int Compiler::stdsOffset()
+{
+ globalObjectDef->referenceField( this, stds );
+ return stds->offset;
+}
+
+void Compiler::compileRootBlock( )
+{
+ CodeBlock *block = rootCodeBlock;
+
+ /* The root block never needs to be reverted. */
+
+ /* Set up the compile context. No locals are needed for the root code
+ * block, but we need an empty local frame for the compile. */
+ compileContext = CompileRoot;
+ revertOn = false;
+
+ /* The block needs a frame id. */
+ block->frameId = nextFrameId++;
+
+ /* The root block is not reverted. */
+ CodeVect &code = block->codeWC;
+
+ code.append( IN_FN );
+ code.append( FN_LOAD_ARG0 );
+ code.appendHalf( arg0Offset() );
+
+ code.append( IN_FN );
+ code.append( FN_LOAD_ARGV );
+ code.appendHalf( argvOffset() );
+
+ code.append( IN_FN );
+ code.append( FN_INIT_STDS );
+ code.appendHalf( stdsOffset() );
+
+ block->compile( this, code );
+
+ code.append( IN_FN );
+ code.append( FN_STOP );
+
+ /* Make the local trees descriptor. */
+ findLocals( rootLocalFrame, block );
+}
+
+void ObjectField::initField()
+{
+ switch ( type ) {
+ case UserLocalType:
+ case LhsElType:
+ case ParamValType:
+ case RedRhsType:
+ inGetR = IN_GET_LOCAL_R;
+ inGetWC = IN_GET_LOCAL_WC;
+ inSetWC = IN_SET_LOCAL_WC;
+ inGetValR = IN_GET_LOCAL_VAL_R;
+ inGetValWC = IN_GET_LOCAL_VAL_R;
+ inGetValWV = IN_GET_LOCAL_VAL_R;
+ inSetValWC = IN_SET_LOCAL_VAL_WC;
+ break;
+
+ case ParamRefType:
+ inGetR = IN_GET_LOCAL_REF_R;
+ inGetWC = IN_GET_LOCAL_REF_WC;
+ inSetWC = IN_SET_LOCAL_REF_WC;
+ break;
+
+ case UserFieldType:
+ inGetR = IN_GET_FIELD_TREE_R;
+ inGetWC = IN_GET_FIELD_TREE_WC;
+ inGetWV = IN_GET_FIELD_TREE_WV;
+ inSetWC = IN_SET_FIELD_TREE_WC;
+ inSetWV = IN_SET_FIELD_TREE_WV;
+
+ //inGetValR;
+ inGetValR = IN_GET_FIELD_VAL_R;
+ //inGetValWC = IN_GET_FIELD_VAL_WC;
+ //inGetValWV;
+ inSetValWC = IN_SET_FIELD_VAL_WC;
+ //inSetValWV;
+ break;
+
+ case GenericElementType:
+ case GenericDependentType:
+ case StructFieldType:
+ inGetR = IN_GET_STRUCT_R;
+ inGetWC = IN_GET_STRUCT_WC;
+ inGetWV = IN_GET_STRUCT_WV;
+ inSetWC = IN_SET_STRUCT_WC;
+ inSetWV = IN_SET_STRUCT_WV;
+ inGetValR = IN_GET_STRUCT_VAL_R;
+ inGetValWC = IN_GET_STRUCT_VAL_R;
+ inGetValWV = IN_GET_STRUCT_VAL_R;
+ inSetValWC = IN_SET_STRUCT_VAL_WC;
+ inSetValWV = IN_SET_STRUCT_VAL_WV;
+ break;
+
+ case RhsNameType:
+ inGetR = IN_GET_RHS_VAL_R;
+ inGetWC = IN_GET_RHS_VAL_WC;
+ inGetWV = IN_GET_RHS_VAL_WV;
+ inSetWC = IN_SET_RHS_VAL_WC;
+ inSetWV = IN_SET_RHS_VAL_WC;
+ break;
+
+ /* Inbuilts have instructions intialized outside the cons, at place of
+ * call. */
+ case InbuiltFieldType:
+ case InbuiltObjectType:
+ case InbuiltOffType:
+ break;
+
+ /* Out of date impl. */
+ case LexSubstrType:
+ break;
+ }
+}
+
+void ObjectDef::placeField( Compiler *pd, ObjectField *field )
+{
+ UniqueType *fieldUT = field->typeRef->uniqueType;
+
+ switch ( field->type ) {
+ case ObjectField::LhsElType:
+ case ObjectField::UserLocalType:
+ case ObjectField::RedRhsType:
+
+ /* Local frame fields. Move the running offset first since this is
+ * a negative off from the end. */
+ nextOffset += sizeOfField( fieldUT );
+ field->offset = -nextOffset;
+ break;
+
+
+ case ObjectField::GenericElementType: {
+
+ /* Tree object frame fields. Record the position, then move the
+ * running offset. */
+ field->offset = nextOffset;
+ nextOffset += sizeOfField( fieldUT );
+
+ if ( fieldUT->typeId == TYPE_MAP_PTRS ) {
+ if ( field->mapKeyField != 0 )
+ field->mapKeyField->offset = field->offset;
+ }
+
+ break;
+ }
+
+ case ObjectField::UserFieldType:
+
+ /* Tree object frame fields. Record the position, then move the
+ * running offset. */
+ field->offset = nextOffset;
+ nextOffset += sizeOfField( fieldUT );
+ break;
+
+ case ObjectField::StructFieldType:
+ field->offset = nextOffset;
+ nextOffset += sizeOfField( fieldUT );
+ break;
+
+ case ObjectField::GenericDependentType:
+ /* There is an object field that this type depends on. When it is
+ * placed, this one will be placed as well. Nothing to do now. */
+
+ case ObjectField::InbuiltFieldType:
+ case ObjectField::InbuiltOffType:
+ case ObjectField::InbuiltObjectType:
+ case ObjectField::RhsNameType:
+ case ObjectField::LexSubstrType:
+
+ case ObjectField::ParamValType:
+ case ObjectField::ParamRefType:
+ break;
+ }
+}
+
+void Compiler::placeAllLanguageObjects()
+{
+ /* Init all user object fields (need consistent size). */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ ObjectDef *objDef = lel->objectDef;
+ if ( objDef != 0 ) {
+ /* Init all fields of the object. */
+ for ( FieldList::Iter f = objDef->fieldList; f.lte(); f++ )
+ objDef->placeField( this, f->value );
+ }
+ }
+}
+
+void Compiler::placeAllStructObjects()
+{
+ for ( StructElList::Iter s = structEls; s.lte(); s++ ) {
+ ObjectDef *objectDef = s->structDef->objectDef;
+ for ( FieldList::Iter f = objectDef->fieldList; f.lte(); f++ )
+ objectDef->placeField( this, f->value );
+ }
+}
+
+void Compiler::placeFrameFields( ObjectDef *localFrame )
+{
+ for ( FieldList::Iter f = localFrame->fieldList; f.lte(); f++ )
+ localFrame->placeField( this, f->value );
+}
+
+void Compiler::placeAllFrameObjects()
+{
+ /* Functions. */
+ for ( FunctionList::Iter f = functionList; f.lte(); f++ )
+ placeFrameFields( f->localFrame );
+
+ for ( FunctionList::Iter f = inHostList; f.lte(); f++ )
+ placeFrameFields( f->localFrame );
+
+ /* Reduction code. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ if ( prod->redBlock != 0 )
+ placeFrameFields( prod->redBlock->localFrame );
+ }
+
+ /* Token translation code. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->transBlock != 0 ) {
+ ObjectDef *localFrame = lel->transBlock->localFrame;
+ if ( lel->tokenDef->reCaptureVect.length() > 0 ) {
+ FieldList::Iter f = localFrame->fieldList;
+ for ( int i = 0; i < lel->tokenDef->reCaptureVect.length(); i++, f++ )
+ localFrame->placeField( this, f->value );
+ }
+
+ placeFrameFields( localFrame );
+ }
+ }
+
+ /* Preeof blocks. */
+ for ( RegionList::Iter r = regionList; r.lte(); r++ ) {
+ if ( r->preEofBlock != 0 )
+ placeFrameFields( r->preEofBlock->localFrame );
+ }
+
+ /* Root code. */
+ placeFrameFields( rootLocalFrame );
+}
+
+void Compiler::placeUserFunction( Function *func, bool isUserIter )
+{
+ /* Set up the parameters. */
+ long paramPos = 0, paramListSize = 0, paramOffset = 0;
+ UniqueType **paramUTs = new UniqueType*[func->paramList->length()];
+ for ( ParameterList::Iter param = *func->paramList; param.lte(); param++, paramPos++ ) {
+ paramUTs[paramPos] = param->typeRef->uniqueType;
+ paramListSize += sizeOfField( paramUTs[paramPos] );
+ }
+
+ /* Param offset is relative to one past the last item in the array of
+ * words containing the args. */
+ paramOffset = 0;
+ paramPos = 0;
+ for ( ParameterList::Iter param = *func->paramList; param.lte(); param++, paramPos++ ) {
+ /* How much space do we need to make for call overhead. */
+ long frameAfterArgs = isUserIter ? IFR_AA : FR_AA;
+
+ param->offset = frameAfterArgs + paramOffset;
+
+ paramOffset += sizeOfField( paramUTs[paramPos] );
+ }
+
+ func->paramListSize = paramListSize;
+ func->paramUTs = paramUTs;
+
+ func->objMethod->paramUTs = paramUTs;
+
+ /* Insert the function into the global function map. */
+ UniqueType *returnUT = func->typeRef != 0 ?
+ func->typeRef->uniqueType : uniqueTypeInt;
+ func->objMethod->returnUT = returnUT;
+
+ func->objMethod->paramUTs = new UniqueType*[func->paramList->length()];
+ memcpy( func->objMethod->paramUTs, paramUTs,
+ sizeof(UniqueType*) * func->paramList->length() );
+}
+
+void Compiler::placeAllFunctions()
+{
+ for ( FunctionList::Iter f = functionList; f.lte(); f++ )
+ placeUserFunction( f, f->isUserIter );
+
+ for ( FunctionList::Iter f = inHostList; f.lte(); f++ )
+ placeUserFunction( f, false );
+}
+
+
+void Compiler::compileUserIter( Function *func, CodeVect &code )
+{
+ CodeBlock *block = func->codeBlock;
+
+ /* Compile the block. */
+ block->compile( this, code );
+
+ /* Always yeild a nil at the end. This causes iteration to stop. */
+ code.append( IN_LOAD_NIL );
+ code.append( IN_YIELD );
+}
+
+void Compiler::compileUserIter( Function *func )
+{
+ CodeBlock *block = func->codeBlock;
+
+ /* Set up the context. */
+ compileContext = CompileFunction;
+ curFunction = func;
+ block->frameId = nextFrameId++;
+
+ /* Compile for revert and commit. */
+ revertOn = true;
+ compileUserIter( func, block->codeWV );
+
+ revertOn = false;
+ compileUserIter( func, block->codeWC );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocals( block->localFrame, block );
+
+ /* FIXME: Need to deal with the freeing of local trees. */
+}
+
+/* Called for each type of function compile: revert and commit. */
+void Compiler::compileFunction( Function *func, CodeVect &code )
+{
+ CodeBlock *block = func->codeBlock;
+
+ /* Compile the block. */
+ block->compile( this, code );
+
+ /* Check for a return statement. */
+ if ( block->stmtList->length() == 0 ||
+ block->stmtList->tail->type != LangStmt::ReturnType )
+ {
+ /* Push the return value. */
+ code.append( IN_LOAD_NIL );
+ code.append( IN_SAVE_RET );
+ }
+
+ /* Compute the jump distance for the return jumps. */
+ for ( LongVect::Iter rj = returnJumps; rj.lte(); rj++ ) {
+ long distance = code.length() - *rj - 3;
+ code.setHalf( *rj+1, distance );
+ }
+
+ /* Reset the vector of return jumps. */
+ returnJumps.empty();
+
+ /* Return cleans up the stack (including the args) and leaves the return
+ * value on the top. */
+ code.append( IN_RET );
+}
+
+void Compiler::compileFunction( Function *func )
+{
+ CodeBlock *block = func->codeBlock;
+
+ /* Set up the compilation context. */
+ compileContext = CompileFunction;
+ curFunction = func;
+
+ /* Assign a frame Id. */
+ block->frameId = nextFrameId++;
+
+ /* Compile once for revert. */
+ revertOn = true;
+ compileFunction( func, block->codeWV );
+
+ /* Compile once for commit. */
+ revertOn = false;
+ compileFunction( func, block->codeWC );
+
+ /* Now that compilation is done variables are referenced. Make the local
+ * trees descriptor. */
+ findLocals( block->localFrame, block );
+}
+
+void Compiler::removeNonUnparsableRepls()
+{
+ for ( ConsList::Iter repl = replList; repl.lte(); ) {
+ Constructor *maybeDel = repl++;
+ if ( !maybeDel->parse )
+ replList.detach( maybeDel );
+ }
+}
+
+void Compiler::compileByteCode()
+{
+ /* Compile functions. */
+ for ( FunctionList::Iter f = functionList; f.lte(); f++ ) {
+ if ( f->isUserIter )
+ compileUserIter( f );
+ else
+ compileFunction( f );
+ }
+
+ /* Compile the reduction code. */
+ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
+ makeProdCopies( prod );
+ if ( prod->redBlock != 0 )
+ compileReductionCode( prod );
+ }
+
+ /* Compile the token translation code. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->transBlock != 0 )
+ compileTranslateBlock( lel );
+ }
+
+ /* Compile preeof blocks. */
+ for ( RegionList::Iter r = regionList; r.lte(); r++ ) {
+ if ( r->preEofBlock != 0 )
+ compilePreEof( r );
+ }
+
+ /* Compile the init code */
+ compileRootBlock( );
+ removeNonUnparsableRepls();
+}
diff --git a/src/tree.c b/src/tree.c
new file mode 100644
index 00000000..e05681b9
--- /dev/null
+++ b/src/tree.c
@@ -0,0 +1,1655 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <colm/tree.h>
+#include <colm/pool.h>
+#include <colm/bytecode.h>
+#include <colm/debug.h>
+
+kid_t *alloc_attrs( program_t *prg, long length )
+{
+ kid_t *cur = 0;
+ long i;
+ for ( i = 0; i < length; i++ ) {
+ kid_t *next = cur;
+ cur = kid_allocate( prg );
+ cur->next = next;
+ }
+ return cur;
+}
+
+void free_attrs( program_t *prg, kid_t *attrs )
+{
+ kid_t *cur = attrs;
+ while ( cur != 0 ) {
+ kid_t *next = cur->next;
+ kid_free( prg, cur );
+ cur = next;
+ }
+}
+
+void free_kid_list( program_t *prg, kid_t *kid )
+{
+ while ( kid != 0 ) {
+ kid_t *next = kid->next;
+ kid_free( prg, kid );
+ kid = next;
+ }
+}
+
+static void colm_tree_set_attr( tree_t *tree, long pos, tree_t *val )
+{
+ long i;
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ for ( i = 0; i < pos; i++ )
+ kid = kid->next;
+ kid->tree = val;
+}
+
+tree_t *colm_get_attr( tree_t *tree, long pos )
+{
+ long i;
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ for ( i = 0; i < pos; i++ )
+ kid = kid->next;
+ return kid->tree;
+}
+
+
+tree_t *colm_get_repeat_next( tree_t *tree )
+{
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid->next->tree;
+}
+
+tree_t *colm_get_repeat_val( tree_t *tree )
+{
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid->tree;
+}
+
+tree_t *colm_get_left_repeat_next( tree_t *tree )
+{
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid->tree;
+}
+
+tree_t *colm_get_left_repeat_val( tree_t *tree )
+{
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid->next->tree;
+}
+
+int colm_repeat_end( tree_t *tree )
+{
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid == 0;
+}
+
+int colm_list_last( tree_t *tree )
+{
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid->next == 0;
+}
+
+kid_t *get_attr_kid( tree_t *tree, long pos )
+{
+ long i;
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ for ( i = 0; i < pos; i++ )
+ kid = kid->next;
+ return kid;
+}
+
+kid_t *kid_list_concat( kid_t *list1, kid_t *list2 )
+{
+ if ( list1 == 0 )
+ return list2;
+ else if ( list2 == 0 )
+ return list1;
+
+ kid_t *dest = list1;
+ while ( dest->next != 0 )
+ dest = dest->next;
+ dest->next = list2;
+ return list1;
+}
+
+tree_t *colm_construct_pointer( program_t *prg, value_t value )
+{
+ pointer_t *pointer = (pointer_t*) tree_allocate( prg );
+ pointer->id = LEL_ID_PTR;
+ pointer->value = value;
+
+ return (tree_t*)pointer;
+}
+
+value_t colm_get_pointer_val( tree_t *ptr )
+{
+ return ((pointer_t*)ptr)->value;
+}
+
+
+tree_t *colm_construct_term( program_t *prg, word_t id, head_t *tokdata )
+{
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+
+ tree_t *tree = tree_allocate( prg );
+ tree->id = id;
+ tree->refs = 0;
+ tree->tokdata = tokdata;
+
+ int object_length = lel_info[tree->id].object_length;
+ tree->child = alloc_attrs( prg, object_length );
+
+ return tree;
+}
+
+
+kid_t *construct_kid( program_t *prg, tree_t **bindings, kid_t *prev, long pat );
+
+static kid_t *construct_ignore_list( program_t *prg, long ignore_ind )
+{
+ struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes;
+
+ kid_t *first = 0, *last = 0;
+ while ( ignore_ind >= 0 ) {
+ head_t *ignore_data = colm_string_alloc_pointer( prg, nodes[ignore_ind].data,
+ nodes[ignore_ind].length );
+
+ tree_t *ign_tree = tree_allocate( prg );
+ ign_tree->refs = 1;
+ ign_tree->id = nodes[ignore_ind].id;
+ ign_tree->tokdata = ignore_data;
+
+ kid_t *ign_kid = kid_allocate( prg );
+ ign_kid->tree = ign_tree;
+ ign_kid->next = 0;
+
+ if ( last == 0 )
+ first = ign_kid;
+ else
+ last->next = ign_kid;
+
+ ignore_ind = nodes[ignore_ind].next;
+ last = ign_kid;
+ }
+
+ return first;
+}
+
+static kid_t *construct_left_ignore_list( program_t *prg, long pat )
+{
+ struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes;
+ return construct_ignore_list( prg, nodes[pat].left_ignore );
+}
+
+static kid_t *construct_right_ignore_list( program_t *prg, long pat )
+{
+ struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes;
+ return construct_ignore_list( prg, nodes[pat].right_ignore );
+}
+
+static void ins_left_ignore( program_t *prg, tree_t *tree, tree_t *ignore_list )
+{
+ assert( ! (tree->flags & AF_LEFT_IGNORE) );
+
+ /* Allocate. */
+ kid_t *kid = kid_allocate( prg );
+ kid->tree = ignore_list;
+ colm_tree_upref( prg, ignore_list );
+
+ /* Attach it. */
+ kid->next = tree->child;
+ tree->child = kid;
+
+ tree->flags |= AF_LEFT_IGNORE;
+}
+
+static void ins_right_ignore( program_t *prg, tree_t *tree, tree_t *ignore_list )
+{
+ assert( ! (tree->flags & AF_RIGHT_IGNORE) );
+
+ /* Insert an ignore head in the child list. */
+ kid_t *kid = kid_allocate( prg );
+ kid->tree = ignore_list;
+ colm_tree_upref( prg, ignore_list );
+
+ /* Attach it. */
+ if ( tree->flags & AF_LEFT_IGNORE ) {
+ kid->next = tree->child->next;
+ tree->child->next = kid;
+ }
+ else {
+ kid->next = tree->child;
+ tree->child = kid;
+ }
+
+ tree->flags |= AF_RIGHT_IGNORE;
+}
+
+tree_t *push_right_ignore( program_t *prg, tree_t *push_to, tree_t *right_ignore )
+{
+ /* About to alter the data tree. Split first. */
+ push_to = split_tree( prg, push_to );
+
+ if ( push_to->flags & AF_RIGHT_IGNORE ) {
+ /* The previous token already has a right ignore. Merge by
+ * attaching it as a left ignore of the new list. */
+ kid_t *cur_ignore = tree_right_ignore_kid( prg, push_to );
+ ins_left_ignore( prg, right_ignore, cur_ignore->tree );
+
+ /* Replace the current ignore. Safe to access refs here because we just
+ * upreffed it in insLeftIgnore. */
+ cur_ignore->tree->refs -= 1;
+ cur_ignore->tree = right_ignore;
+ colm_tree_upref( prg, right_ignore );
+ }
+ else {
+ /* Attach The ignore list. */
+ ins_right_ignore( prg, push_to, right_ignore );
+ }
+
+ return push_to;
+}
+
+tree_t *push_left_ignore( program_t *prg, tree_t *push_to, tree_t *left_ignore )
+{
+ push_to = split_tree( prg, push_to );
+
+ /* Attach as left ignore to the token we are sending. */
+ if ( push_to->flags & AF_LEFT_IGNORE ) {
+ /* The token already has a left-ignore. Merge by attaching it as a
+ * right ignore of the new list. */
+ kid_t *cur_ignore = tree_left_ignore_kid( prg, push_to );
+ ins_right_ignore( prg, left_ignore, cur_ignore->tree );
+
+ /* Replace the current ignore. Safe to upref here because we just
+ * upreffed it in insRightIgnore. */
+ cur_ignore->tree->refs -= 1;
+ cur_ignore->tree = left_ignore;
+ colm_tree_upref( prg, left_ignore );
+ }
+ else {
+ /* Attach the ignore list. */
+ ins_left_ignore( prg, push_to, left_ignore );
+ }
+
+ return push_to;
+}
+
+static void rem_left_ignore( program_t *prg, tree_t **sp, tree_t *tree )
+{
+ assert( tree->flags & AF_LEFT_IGNORE );
+
+ kid_t *next = tree->child->next;
+ colm_tree_downref( prg, sp, tree->child->tree );
+ kid_free( prg, tree->child );
+ tree->child = next;
+
+ tree->flags &= ~AF_LEFT_IGNORE;
+}
+
+static void rem_right_ignore( program_t *prg, tree_t **sp, tree_t *tree )
+{
+ assert( tree->flags & AF_RIGHT_IGNORE );
+
+ if ( tree->flags & AF_LEFT_IGNORE ) {
+ kid_t *next = tree->child->next->next;
+ colm_tree_downref( prg, sp, tree->child->next->tree );
+ kid_free( prg, tree->child->next );
+ tree->child->next = next;
+ }
+ else {
+ kid_t *next = tree->child->next;
+ colm_tree_downref( prg, sp, tree->child->tree );
+ kid_free( prg, tree->child );
+ tree->child = next;
+ }
+
+ tree->flags &= ~AF_RIGHT_IGNORE;
+}
+
+tree_t *pop_right_ignore( program_t *prg, tree_t **sp, tree_t *pop_from, tree_t **right_ignore )
+{
+ /* Modifying the tree we are detaching from. */
+ pop_from = split_tree( prg, pop_from );
+
+ kid_t *ri_kid = tree_right_ignore_kid( prg, pop_from );
+
+ /* If the right ignore has a left ignore, then that was the original
+ * right ignore. */
+ kid_t *li = tree_left_ignore_kid( prg, ri_kid->tree );
+ if ( li != 0 ) {
+ colm_tree_upref( prg, li->tree );
+ rem_left_ignore( prg, sp, ri_kid->tree );
+ *right_ignore = ri_kid->tree;
+ colm_tree_upref( prg, *right_ignore );
+ ri_kid->tree = li->tree;
+ }
+ else {
+ *right_ignore = ri_kid->tree;
+ colm_tree_upref( prg, *right_ignore );
+ rem_right_ignore( prg, sp, pop_from );
+ }
+
+ return pop_from;
+}
+
+tree_t *pop_left_ignore( program_t *prg, tree_t **sp, tree_t *pop_from, tree_t **left_ignore )
+{
+ /* Modifying, make the write safe. */
+ pop_from = split_tree( prg, pop_from );
+
+ kid_t *li_kid = tree_left_ignore_kid( prg, pop_from );
+
+ /* If the left ignore has a right ignore, then that was the original
+ * left ignore. */
+ kid_t *ri = tree_right_ignore_kid( prg, li_kid->tree );
+ if ( ri != 0 ) {
+ colm_tree_upref( prg, ri->tree );
+ rem_right_ignore( prg, sp, li_kid->tree );
+ *left_ignore = li_kid->tree;
+ colm_tree_upref( prg, *left_ignore );
+ li_kid->tree = ri->tree;
+ }
+ else {
+ *left_ignore = li_kid->tree;
+ colm_tree_upref( prg, *left_ignore );
+ rem_left_ignore( prg, sp, pop_from );
+ }
+
+ return pop_from;
+}
+
+tree_t *colm_construct_object( program_t *prg, kid_t *kid, tree_t **bindings, long lang_el_id )
+{
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ tree_t *tree = 0;
+
+ tree = tree_allocate( prg );
+ tree->id = lang_el_id;
+ tree->refs = 1;
+ tree->tokdata = 0;
+ tree->prod_num = 0;
+
+ int object_length = lel_info[tree->id].object_length;
+
+ kid_t *attrs = alloc_attrs( prg, object_length );
+ kid_t *child = 0;
+
+ tree->child = kid_list_concat( attrs, child );
+
+ return tree;
+}
+
+/* Returns an uprefed tree. Saves us having to downref and bindings to zero to
+ * return a zero-ref tree. */
+tree_t *colm_construct_tree( program_t *prg, kid_t *kid, tree_t **bindings, long pat )
+{
+ struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes;
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ tree_t *tree = 0;
+
+ if ( nodes[pat].bind_id > 0 ) {
+ /* All bindings have been uprefed. */
+ tree = bindings[nodes[pat].bind_id];
+
+ long ignore = nodes[pat].left_ignore;
+ tree_t *left_ignore = 0;
+ if ( ignore >= 0 ) {
+ kid_t *ignore = construct_left_ignore_list( prg, pat );
+
+ left_ignore = tree_allocate( prg );
+ left_ignore->id = LEL_ID_IGNORE;
+ left_ignore->child = ignore;
+
+ tree = push_left_ignore( prg, tree, left_ignore );
+ }
+
+ ignore = nodes[pat].right_ignore;
+ tree_t *right_ignore = 0;
+ if ( ignore >= 0 ) {
+ kid_t *ignore = construct_right_ignore_list( prg, pat );
+
+ right_ignore = tree_allocate( prg );
+ right_ignore->id = LEL_ID_IGNORE;
+ right_ignore->child = ignore;
+
+ tree = push_right_ignore( prg, tree, right_ignore );
+ }
+ }
+ else {
+ tree = tree_allocate( prg );
+ tree->id = nodes[pat].id;
+ tree->refs = 1;
+ tree->tokdata = nodes[pat].length == 0 ? 0 :
+ colm_string_alloc_pointer( prg,
+ nodes[pat].data, nodes[pat].length );
+ tree->prod_num = nodes[pat].prod_num;
+
+ int object_length = lel_info[tree->id].object_length;
+
+ kid_t *attrs = alloc_attrs( prg, object_length );
+ kid_t *child = construct_kid( prg, bindings,
+ 0, nodes[pat].child );
+
+ tree->child = kid_list_concat( attrs, child );
+
+ /* Right first, then left. */
+ kid_t *ignore = construct_right_ignore_list( prg, pat );
+ if ( ignore != 0 ) {
+ tree_t *ignore_list = tree_allocate( prg );
+ ignore_list->id = LEL_ID_IGNORE;
+ ignore_list->refs = 1;
+ ignore_list->child = ignore;
+
+ kid_t *ignore_head = kid_allocate( prg );
+ ignore_head->tree = ignore_list;
+ ignore_head->next = tree->child;
+ tree->child = ignore_head;
+
+ tree->flags |= AF_RIGHT_IGNORE;
+ }
+
+ ignore = construct_left_ignore_list( prg, pat );
+ if ( ignore != 0 ) {
+ tree_t *ignore_list = tree_allocate( prg );
+ ignore_list->id = LEL_ID_IGNORE;
+ ignore_list->refs = 1;
+ ignore_list->child = ignore;
+
+ kid_t *ignore_head = kid_allocate( prg );
+ ignore_head->tree = ignore_list;
+ ignore_head->next = tree->child;
+ tree->child = ignore_head;
+
+ tree->flags |= AF_LEFT_IGNORE;
+ }
+
+ int i;
+ for ( i = 0; i < lel_info[tree->id].num_capture_attr; i++ ) {
+ long ci = pat+1+i;
+ CaptureAttr *ca = prg->rtd->capture_attr + lel_info[tree->id].capture_attr + i;
+ tree_t *attr = tree_allocate( prg );
+ attr->id = nodes[ci].id;
+ attr->refs = 1;
+ attr->tokdata = nodes[ci].length == 0 ? 0 :
+ colm_string_alloc_pointer( prg,
+ nodes[ci].data, nodes[ci].length );
+
+ colm_tree_set_attr( tree, ca->offset, attr );
+ }
+ }
+
+ return tree;
+}
+
+kid_t *construct_kid( program_t *prg, tree_t **bindings, kid_t *prev, long pat )
+{
+ struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes;
+ kid_t *kid = 0;
+
+ if ( pat != -1 ) {
+ kid = kid_allocate( prg );
+ kid->tree = colm_construct_tree( prg, kid, bindings, pat );
+
+ /* Recurse down next. */
+ kid_t *next = construct_kid( prg, bindings,
+ kid, nodes[pat].next );
+
+ kid->next = next;
+ }
+
+ return kid;
+}
+
+tree_t *colm_construct_token( program_t *prg, tree_t **args, long nargs )
+{
+ value_t id_int = (value_t)args[0];
+ str_t *text_str = (str_t*)args[1];
+
+ long id = (long)id_int;
+ head_t *tokdata = string_copy( prg, text_str->value );
+
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ tree_t *tree;
+
+ if ( lel_info[id].ignore ) {
+ tree = tree_allocate( prg );
+ tree->refs = 1;
+ tree->id = id;
+ tree->tokdata = tokdata;
+ }
+ else {
+ long object_length = lel_info[id].object_length;
+ assert( nargs-2 <= object_length );
+
+ kid_t *attrs = alloc_attrs( prg, object_length );
+
+ tree = tree_allocate( prg );
+ tree->id = id;
+ tree->refs = 1;
+ tree->tokdata = tokdata;
+
+ tree->child = attrs;
+
+ long i;
+ for ( i = 2; i < nargs; i++ ) {
+ colm_tree_set_attr( tree, i-2, args[i] );
+ colm_tree_upref( prg, colm_get_attr( tree, i-2 ) );
+ }
+ }
+ return tree;
+}
+
+tree_t *cast_tree( program_t *prg, int lang_el_id, tree_t *tree )
+{
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+
+ /* Need to keep a lookout for next down. If
+ * copying it, return the copy. */
+ tree_t *new_tree = tree_allocate( prg );
+
+ new_tree->id = lang_el_id;
+ new_tree->tokdata = string_copy( prg, tree->tokdata );
+
+ /* Invalidate the production number. */
+ new_tree->prod_num = -1;
+
+ /* Copy the child list. Start with ignores, then the list. */
+ kid_t *child = tree->child, *last = 0;
+
+ /* Flags we are interested in. */
+ new_tree->flags |= tree->flags & ( AF_LEFT_IGNORE | AF_RIGHT_IGNORE );
+
+ int ignores = 0;
+ if ( tree->flags & AF_LEFT_IGNORE )
+ ignores += 1;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ ignores += 1;
+
+ /* Igores. */
+ while ( ignores-- > 0 ) {
+ kid_t *new_kid = kid_allocate( prg );
+
+ new_kid->tree = child->tree;
+ new_kid->next = 0;
+ new_kid->tree->refs += 1;
+
+ /* Store the first child. */
+ if ( last == 0 )
+ new_tree->child = new_kid;
+ else
+ last->next = new_kid;
+
+ child = child->next;
+ last = new_kid;
+ }
+
+ /* Skip over the source's attributes. */
+ int object_length = lel_info[tree->id].object_length;
+ while ( object_length-- > 0 )
+ child = child->next;
+
+ /* Allocate the target type's kids. */
+ object_length = lel_info[lang_el_id].object_length;
+ while ( object_length-- > 0 ) {
+ kid_t *new_kid = kid_allocate( prg );
+
+ new_kid->tree = 0;
+ new_kid->next = 0;
+
+ /* Store the first child. */
+ if ( last == 0 )
+ new_tree->child = new_kid;
+ else
+ last->next = new_kid;
+
+ last = new_kid;
+ }
+
+ /* Copy the source's children. */
+ while ( child != 0 ) {
+ kid_t *new_kid = kid_allocate( prg );
+
+ new_kid->tree = child->tree;
+ new_kid->next = 0;
+ new_kid->tree->refs += 1;
+
+ /* Store the first child. */
+ if ( last == 0 )
+ new_tree->child = new_kid;
+ else
+ last->next = new_kid;
+
+ child = child->next;
+ last = new_kid;
+ }
+
+ return new_tree;
+}
+
+tree_t *make_tree( program_t *prg, tree_t **args, long nargs )
+{
+ value_t id_int = (value_t)args[0];
+
+ long id = (long)id_int;
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+
+ tree_t *tree = tree_allocate( prg );
+ tree->id = id;
+ tree->refs = 1;
+
+ long object_length = lel_info[id].object_length;
+ kid_t *attrs = alloc_attrs( prg, object_length );
+
+ kid_t *last = 0, *child = 0;
+ for ( id = 1; id < nargs; id++ ) {
+ kid_t *kid = kid_allocate( prg );
+ kid->tree = args[id];
+ colm_tree_upref( prg, kid->tree );
+
+ if ( last == 0 )
+ child = kid;
+ else
+ last->next = kid;
+
+ last = kid;
+ }
+
+ tree->child = kid_list_concat( attrs, child );
+
+ return tree;
+}
+
+int test_false( program_t *prg, tree_t *tree )
+{
+ int flse = (
+ tree == 0 ||
+ tree == prg->false_val
+ );
+ return flse;
+}
+
+kid_t *copy_ignore_list( program_t *prg, kid_t *ignore_header )
+{
+ kid_t *new_header = kid_allocate( prg );
+ kid_t *last = 0, *ic = (kid_t*)ignore_header->tree;
+ while ( ic != 0 ) {
+ kid_t *new_ic = kid_allocate( prg );
+
+ new_ic->tree = ic->tree;
+ new_ic->tree->refs += 1;
+
+ /* List pointers. */
+ if ( last == 0 )
+ new_header->tree = (tree_t*)new_ic;
+ else
+ last->next = new_ic;
+
+ ic = ic->next;
+ last = new_ic;
+ }
+ return new_header;
+}
+
+kid_t *copy_kid_list( program_t *prg, kid_t *kid_list )
+{
+ kid_t *new_list = 0, *last = 0, *ic = kid_list;
+
+ while ( ic != 0 ) {
+ kid_t *new_ic = kid_allocate( prg );
+
+ new_ic->tree = ic->tree;
+ colm_tree_upref( prg, new_ic->tree );
+
+ /* List pointers. */
+ if ( last == 0 )
+ new_list = new_ic;
+ else
+ last->next = new_ic;
+
+ ic = ic->next;
+ last = new_ic;
+ }
+ return new_list;
+}
+
+/* New tree has zero ref. */
+tree_t *copy_real_tree( program_t *prg, tree_t *tree, kid_t *old_next_down, kid_t **new_next_down )
+{
+ /* Need to keep a lookout for next down. If
+ * copying it, return the copy. */
+ tree_t *new_tree = tree_allocate( prg );
+
+ new_tree->id = tree->id;
+ new_tree->tokdata = string_copy( prg, tree->tokdata );
+ new_tree->prod_num = tree->prod_num;
+
+ /* Copy the child list. Start with ignores, then the list. */
+ kid_t *child = tree->child, *last = 0;
+
+ /* Left ignores. */
+ if ( tree->flags & AF_LEFT_IGNORE ) {
+ new_tree->flags |= AF_LEFT_IGNORE;
+// kid_t *newHeader = copyIgnoreList( prg, child );
+//
+// /* Always the head. */
+// newTree->child = newHeader;
+//
+// child = child->next;
+// last = newHeader;
+ }
+
+ /* Right ignores. */
+ if ( tree->flags & AF_RIGHT_IGNORE ) {
+ new_tree->flags |= AF_RIGHT_IGNORE;
+// kid_t *newHeader = copyIgnoreList( prg, child );
+// if ( last == 0 )
+// newTree->child = newHeader;
+// else
+// last->next = newHeader;
+// child = child->next;
+// last = newHeader;
+ }
+
+ /* Attributes and children. */
+ while ( child != 0 ) {
+ kid_t *new_kid = kid_allocate( prg );
+
+ /* Watch out for next down. */
+ if ( child == old_next_down )
+ *new_next_down = new_kid;
+
+ new_kid->tree = child->tree;
+ new_kid->next = 0;
+
+ /* May be an attribute. */
+ if ( new_kid->tree != 0 )
+ new_kid->tree->refs += 1;
+
+ /* Store the first child. */
+ if ( last == 0 )
+ new_tree->child = new_kid;
+ else
+ last->next = new_kid;
+
+ child = child->next;
+ last = new_kid;
+ }
+
+ return new_tree;
+}
+
+
+tree_t *colm_copy_tree( program_t *prg, tree_t *tree, kid_t *old_next_down, kid_t **new_next_down )
+{
+ assert( tree->id != LEL_ID_PTR && tree->id != LEL_ID_STR );
+
+ tree = copy_real_tree( prg, tree, old_next_down, new_next_down );
+
+ assert( tree->refs == 0 );
+
+ return tree;
+}
+
+tree_t *split_tree( program_t *prg, tree_t *tree )
+{
+ if ( tree != 0 ) {
+ assert( tree->refs >= 1 );
+
+ if ( tree->refs > 1 ) {
+ kid_t *old_next_down = 0, *new_next_down = 0;
+ tree_t *new_tree = colm_copy_tree( prg, tree, old_next_down, &new_next_down );
+ colm_tree_upref( prg, new_tree );
+
+ /* Downref the original. Don't need to consider freeing because
+ * refs were > 1. */
+ tree->refs -= 1;
+
+ tree = new_tree;
+ }
+
+ assert( tree->refs == 1 );
+ }
+ return tree;
+}
+
+/* We can't make recursive calls here since the tree we are freeing may be
+ * very large. Need the VM stack. */
+void tree_free_rec( program_t *prg, tree_t **sp, tree_t *tree )
+{
+ tree_t **top = vm_ptop();
+
+free_tree:
+ switch ( tree->id ) {
+ case LEL_ID_PTR:
+ tree_free( prg, tree );
+ break;
+ case LEL_ID_STR: {
+ str_t *str = (str_t*) tree;
+ string_free( prg, str->value );
+ tree_free( prg, tree );
+ break;
+ }
+ default: {
+ if ( tree->id != LEL_ID_IGNORE )
+ string_free( prg, tree->tokdata );
+
+ /* Attributes and grammar-based children. */
+ kid_t *child = tree->child;
+ while ( child != 0 ) {
+ kid_t *next = child->next;
+ vm_push_tree( child->tree );
+ kid_free( prg, child );
+ child = next;
+ }
+
+ tree_free( prg, tree );
+ break;
+ }}
+
+ /* Any trees to downref? */
+ while ( sp != top ) {
+ tree = vm_pop_tree();
+ if ( tree != 0 ) {
+ assert( tree->refs > 0 );
+ tree->refs -= 1;
+ if ( tree->refs == 0 )
+ goto free_tree;
+ }
+ }
+}
+
+void colm_tree_upref( program_t *prg, tree_t *tree )
+{
+ if ( tree != 0 ) {
+ assert( tree->id < prg->rtd->first_struct_el_id );
+ tree->refs += 1;
+ }
+}
+
+void colm_tree_downref( program_t *prg, tree_t **sp, tree_t *tree )
+{
+ if ( tree != 0 ) {
+ assert( tree->id < prg->rtd->first_struct_el_id );
+ assert( tree->refs > 0 );
+ tree->refs -= 1;
+ if ( tree->refs == 0 )
+ tree_free_rec( prg, sp, tree );
+ }
+}
+
+/* We can't make recursive calls here since the tree we are freeing may be
+ * very large. Need the VM stack. */
+void object_free_rec( program_t *prg, tree_t **sp, tree_t *tree )
+{
+ tree_t **top = vm_ptop();
+
+free_tree:
+
+ switch ( tree->id ) {
+ case LEL_ID_STR: {
+ str_t *str = (str_t*) tree;
+ string_free( prg, str->value );
+ tree_free( prg, tree );
+ break;
+ }
+ case LEL_ID_PTR: {
+ tree_free( prg, tree );
+ break;
+ }
+ default: {
+ if ( tree->id != LEL_ID_IGNORE )
+ string_free( prg, tree->tokdata );
+
+ /* Attributes and grammar-based children. */
+ kid_t *child = tree->child;
+ while ( child != 0 ) {
+ kid_t *next = child->next;
+ vm_push_tree( child->tree );
+ kid_free( prg, child );
+ child = next;
+ }
+
+ tree_free( prg, tree );
+ break;
+ }}
+
+ /* Any trees to downref? */
+ while ( sp != top ) {
+ tree = vm_pop_tree();
+ if ( tree != 0 ) {
+ assert( tree->refs > 0 );
+ tree->refs -= 1;
+ if ( tree->refs == 0 )
+ goto free_tree;
+ }
+ }
+}
+
+void object_downref( program_t *prg, tree_t **sp, tree_t *tree )
+{
+ if ( tree != 0 ) {
+ assert( tree->refs > 0 );
+ tree->refs -= 1;
+ if ( tree->refs == 0 )
+ object_free_rec( prg, sp, tree );
+ }
+}
+
+/* Find the first child of a tree. */
+kid_t *tree_child( program_t *prg, const tree_t *tree )
+{
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ /* Skip over attributes. */
+ long object_length = lel_info[tree->id].object_length;
+ long a;
+ for ( a = 0; a < object_length; a++ )
+ kid = kid->next;
+
+ return kid;
+}
+
+/* Detach at the first real child of a tree. */
+kid_t *tree_extract_child( program_t *prg, tree_t *tree )
+{
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ kid_t *kid = tree->child, *last = 0;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ /* Skip over attributes. */
+ long a, object_length = lel_info[tree->id].object_length;
+ for ( a = 0; a < object_length; a++ ) {
+ last = kid;
+ kid = kid->next;
+ }
+
+ if ( last == 0 )
+ tree->child = 0;
+ else
+ last->next = 0;
+
+ return kid;
+}
+
+
+/* Find the first child of a tree. */
+kid_t *tree_attr( program_t *prg, const tree_t *tree )
+{
+ kid_t *kid = tree->child;
+
+ if ( tree->flags & AF_LEFT_IGNORE )
+ kid = kid->next;
+ if ( tree->flags & AF_RIGHT_IGNORE )
+ kid = kid->next;
+
+ return kid;
+}
+
+tree_t *tree_left_ignore( program_t *prg, tree_t *tree )
+{
+ if ( tree->flags & AF_LEFT_IGNORE )
+ return tree->child->tree;
+ return 0;
+}
+
+tree_t *tree_right_ignore( program_t *prg, tree_t *tree )
+{
+ if ( tree->flags & AF_RIGHT_IGNORE ) {
+ if ( tree->flags & AF_LEFT_IGNORE )
+ return tree->child->next->tree;
+ else
+ return tree->child->tree;
+ }
+ return 0;
+}
+
+kid_t *tree_left_ignore_kid( program_t *prg, tree_t *tree )
+{
+ if ( tree->flags & AF_LEFT_IGNORE )
+ return tree->child;
+ return 0;
+}
+
+kid_t *tree_right_ignore_kid( program_t *prg, tree_t *tree )
+{
+ if ( tree->flags & AF_RIGHT_IGNORE ) {
+ if ( tree->flags & AF_LEFT_IGNORE )
+ return tree->child->next;
+ else
+ return tree->child;
+ }
+ return 0;
+}
+
+void ref_set_value( program_t *prg, tree_t **sp, ref_t *ref, tree_t *v )
+{
+ colm_tree_downref( prg, sp, ref->kid->tree );
+ ref->kid->tree = v;
+}
+
+tree_t *get_rhs_el( program_t *prg, tree_t *lhs, long position )
+{
+ kid_t *pos = tree_child( prg, lhs );
+ while ( position > 0 ) {
+ pos = pos->next;
+ position -= 1;
+ }
+ return pos->tree;
+}
+
+void set_rhs_el( program_t *prg, tree_t *lhs, long position, tree_t *value )
+{
+ kid_t *pos = tree_child( prg, lhs );
+ while ( position > 0 ) {
+ pos = pos->next;
+ position -= 1;
+ }
+ pos->tree = value;
+}
+
+
+kid_t *get_rhs_el_kid( program_t *prg, tree_t *lhs, long position )
+{
+ kid_t *pos = tree_child( prg, lhs );
+ while ( position > 0 ) {
+ pos = pos->next;
+ position -= 1;
+ }
+ return pos;
+}
+
+parse_tree_t *get_rhs_parse_tree( program_t *prg, parse_tree_t *lhs, long position )
+{
+ parse_tree_t *pos = lhs->child;
+ while ( position > 0 ) {
+ pos = pos->next;
+ position -= 1;
+ }
+ return pos;
+}
+
+tree_t *colm_get_rhs_val( program_t *prg, tree_t *tree, int *a )
+{
+ int i, len = a[0];
+ for ( i = 0; i < len; i++ ) {
+ int prod_num = a[1 + i * 2];
+ int child_num = a[1 + i * 2 + 1];
+ if ( tree->prod_num == prod_num )
+ return get_rhs_el( prg, tree, child_num );
+ }
+ return 0;
+}
+
+void colm_tree_set_field( program_t *prg, tree_t *tree, long field, tree_t *value )
+{
+ assert( tree->refs == 1 );
+ if ( value != 0 )
+ assert( value->refs >= 1 );
+ colm_tree_set_attr( tree, field, value );
+}
+
+tree_t *colm_tree_get_field( tree_t *tree, word_t field )
+{
+ return colm_get_attr( tree, field );
+}
+
+kid_t *get_field_kid( tree_t *tree, word_t field )
+{
+ return get_attr_kid( tree, field );
+}
+
+tree_t *get_field_split( program_t *prg, tree_t *tree, word_t field )
+{
+ tree_t *val = colm_get_attr( tree, field );
+ tree_t *split = split_tree( prg, val );
+ colm_tree_set_attr( tree, field, split );
+ return split;
+}
+
+/* This must traverse in the same order that the bindId assignments are done
+ * in. */
+int match_pattern( tree_t **bindings, program_t *prg, long pat, kid_t *kid, int check_next )
+{
+ struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes;
+
+ /* match node, recurse on children. */
+ if ( pat != -1 && kid != 0 ) {
+ if ( nodes[pat].id == kid->tree->id ) {
+ /* If the pattern node has data, then this means we need to match
+ * the data against the token data. */
+ if ( nodes[pat].data != 0 ) {
+ /* Check the length of token text. */
+ if ( nodes[pat].length != string_length( kid->tree->tokdata ) )
+ return false;
+
+ /* Check the token text data. */
+ if ( nodes[pat].length > 0 && memcmp( nodes[pat].data,
+ string_data( kid->tree->tokdata ), nodes[pat].length ) != 0 )
+ return false;
+ }
+
+ /* No failure, all okay. */
+ if ( nodes[pat].bind_id > 0 ) {
+ bindings[nodes[pat].bind_id] = kid->tree;
+ }
+
+ /* If we didn't match a terminal duplicate of a nonterm then check
+ * down the children. */
+ if ( !nodes[pat].stop ) {
+ /* Check for failure down child branch. */
+ int child_check = match_pattern( bindings, prg,
+ nodes[pat].child, tree_child( prg, kid->tree ), true );
+ if ( ! child_check )
+ return false;
+ }
+
+ /* If checking next, then look for failure there. */
+ if ( check_next ) {
+ int next_check = match_pattern( bindings, prg,
+ nodes[pat].next, kid->next, true );
+ if ( ! next_check )
+ return false;
+ }
+
+ return true;
+ }
+ }
+ else if ( pat == -1 && kid == 0 ) {
+ /* Both null is a match. */
+ return 1;
+ }
+
+ return false;
+}
+
+
+long colm_cmp_tree( program_t *prg, const tree_t *tree1, const tree_t *tree2 )
+{
+ long cmpres = 0;
+ if ( tree1 == 0 ) {
+ if ( tree2 == 0 )
+ return 0;
+ else
+ return -1;
+ }
+ else if ( tree2 == 0 )
+ return 1;
+ else if ( tree1->id < tree2->id )
+ return -1;
+ else if ( tree1->id > tree2->id )
+ return 1;
+ else if ( tree1->id == LEL_ID_PTR ) {
+ if ( ((pointer_t*)tree1)->value < ((pointer_t*)tree2)->value )
+ return -1;
+ else if ( ((pointer_t*)tree1)->value > ((pointer_t*)tree2)->value )
+ return 1;
+ }
+ else if ( tree1->id == LEL_ID_STR ) {
+ cmpres = cmp_string( ((str_t*)tree1)->value, ((str_t*)tree2)->value );
+ if ( cmpres != 0 )
+ return cmpres;
+ }
+ else {
+ if ( tree1->tokdata == 0 && tree2->tokdata != 0 )
+ return -1;
+ else if ( tree1->tokdata != 0 && tree2->tokdata == 0 )
+ return 1;
+ else if ( tree1->tokdata != 0 && tree2->tokdata != 0 ) {
+ cmpres = cmp_string( tree1->tokdata, tree2->tokdata );
+ if ( cmpres != 0 )
+ return cmpres;
+ }
+ }
+
+ kid_t *kid1 = tree_child( prg, tree1 );
+ kid_t *kid2 = tree_child( prg, tree2 );
+
+ while ( true ) {
+ if ( kid1 == 0 && kid2 == 0 )
+ return 0;
+ else if ( kid1 == 0 && kid2 != 0 )
+ return -1;
+ else if ( kid1 != 0 && kid2 == 0 )
+ return 1;
+ else {
+ cmpres = colm_cmp_tree( prg, kid1->tree, kid2->tree );
+ if ( cmpres != 0 )
+ return cmpres;
+ }
+ kid1 = kid1->next;
+ kid2 = kid2->next;
+ }
+}
+
+
+void split_ref( program_t *prg, tree_t ***psp, ref_t *from_ref )
+{
+ /* Go up the chain of kids, turing the pointers down. */
+ ref_t *last = 0, *ref = from_ref, *next = 0;
+ while ( ref->next != 0 ) {
+ next = ref->next;
+ ref->next = last;
+ last = ref;
+ ref = next;
+ }
+ ref->next = last;
+
+ /* Now traverse the list, which goes down. */
+ while ( ref != 0 ) {
+ if ( ref->kid->tree->refs > 1 ) {
+ ref_t *next_down = ref->next;
+ while ( next_down != 0 && next_down->kid == ref->kid )
+ next_down = next_down->next;
+
+ kid_t *old_next_kid_down = next_down != 0 ? next_down->kid : 0;
+ kid_t *new_next_kid_down = 0;
+
+ tree_t *new_tree = colm_copy_tree( prg, ref->kid->tree,
+ old_next_kid_down, &new_next_kid_down );
+ colm_tree_upref( prg, new_tree );
+
+ /* Downref the original. Don't need to consider freeing because
+ * refs were > 1. */
+ ref->kid->tree->refs -= 1;
+
+ while ( ref != 0 && ref != next_down ) {
+ next = ref->next;
+ ref->next = 0;
+
+ ref->kid->tree = new_tree;
+ ref = next;
+ }
+
+ /* Correct kid pointers down from ref. */
+ while ( next_down != 0 && next_down->kid == old_next_kid_down ) {
+ next_down->kid = new_next_kid_down;
+ next_down = next_down->next;
+ }
+ }
+ else {
+ /* Reset the list as we go down. */
+ next = ref->next;
+ ref->next = 0;
+ ref = next;
+ }
+ }
+}
+
+tree_t *set_list_mem( list_t *list, half_t field, tree_t *value )
+{
+ if ( value != 0 )
+ assert( value->refs >= 1 );
+
+ tree_t *existing = 0;
+ switch ( field ) {
+ case 0:
+// existing = list->head->value;
+// list->head->value = value;
+ break;
+ case 1:
+// existing = list->tail->value;
+// list->tail->value = value;
+ break;
+ default:
+ assert( false );
+ break;
+ }
+ return existing;
+}
+
+struct tree_pair map_remove( program_t *prg, map_t *map, tree_t *key )
+{
+ map_el_t *map_el = map_impl_find( prg, map, key );
+ struct tree_pair result = { 0, 0 };
+ if ( map_el != 0 ) {
+ map_detach( prg, map, map_el );
+ result.key = map_el->key;
+ //mapElFree( prg, mapEl );
+ }
+
+ return result;
+}
+
+#if 0
+tree_t *map_unstore( program_t *prg, map_t *map, tree_t *key, tree_t *existing )
+{
+ tree_t *stored = 0;
+ if ( existing == 0 ) {
+ map_el_t *map_el = map_detach_by_key( prg, map, key );
+ // stored = mapEl->tree;
+ map_el_free( prg, map_el );
+ }
+ else {
+ map_el_t *map_el = map_impl_find( prg, map, key );
+ // stored = mapEl->tree;
+ //mapEl->tree = existing;
+ }
+ return stored;
+}
+#endif
+
+tree_t *map_find( program_t *prg, map_t *map, tree_t *key )
+{
+// map_el_t *mapEl = mapImplFind( prg, map, key );
+// return mapEl == 0 ? 0 : mapEl->tree;
+ return 0;
+}
+
+long map_length( map_t *map )
+{
+ return map->tree_size;
+}
+
+void list_push_tail( program_t *prg, list_t *list, tree_t *val )
+{
+// if ( val != 0 )
+// assert( val->refs >= 1 );
+// list_el_t *listEl = colm_list_el_new( prg );
+// listEl->value = val;
+// listAppend( list, listEl );
+}
+
+void list_push_head( program_t *prg, list_t *list, tree_t *val )
+{
+// if ( val != 0 )
+// assert( val->refs >= 1 );
+// list_el_t *listEl = listElAllocate( prg );
+// listEl->value = val;
+// listPrepend( list, listEl );
+}
+
+tree_t *list_remove_end( program_t *prg, list_t *list )
+{
+// tree_t *tree = list->tail->value;
+// listElFree( prg, listDetachLast( list ) );
+// return tree;
+ return 0;
+}
+
+tree_t *list_remove_head( program_t *prg, list_t *list )
+{
+// tree_t *tree = list->head;
+// listDetachFirst( list );
+// return tree;
+ return 0;
+}
+
+tree_t *get_parser_mem( parser_t *parser, word_t field )
+{
+ tree_t *result = 0;
+ switch ( field ) {
+ case 0: {
+ tree_t *tree = get_parsed_root( parser->pda_run, parser->pda_run->stop_target > 0 );
+ result = tree;
+ break;
+ }
+ case 1: {
+ struct pda_run *pda_run = parser->pda_run;
+ result = pda_run->parse_error_text;
+ break;
+ }
+ default: {
+ assert( false );
+ break;
+ }
+ }
+ return result;
+}
+
+tree_t *get_list_mem_split( program_t *prg, list_t *list, word_t field )
+{
+ tree_t *sv = 0;
+ switch ( field ) {
+ case 0:
+// sv = splitTree( prg, list->head->value );
+// list->head->value = sv;
+ break;
+ case 1:
+// sv = splitTree( prg, list->tail->value );
+// list->tail->value = sv;
+ break;
+ default:
+ assert( false );
+ break;
+ }
+ return sv;
+}
+
+
+#if 0
+int map_insert( program_t *prg, map_t *map, tree_t *key, tree_t *element )
+{
+ map_el_t *map_el = map_insert_key( prg, map, key, 0 );
+
+ if ( map_el != 0 ) {
+ //mapEl->tree = element;
+ return true;
+ }
+
+ return false;
+}
+#endif
+
+#if 0
+void map_unremove( program_t *prg, map_t *map, tree_t *key, tree_t *element )
+{
+ map_el_t *map_el = map_insert_key( prg, map, key, 0 );
+ assert( map_el != 0 );
+ //mapEl->tree = element;
+}
+#endif
+
+#if 0
+tree_t *map_uninsert( program_t *prg, map_t *map, tree_t *key )
+{
+ map_el_t *el = map_detach_by_key( prg, map, key );
+// tree_t *val = el->tree;
+ map_el_free( prg, el );
+// return val;
+ return 0;
+}
+#endif
+
+#if 0
+tree_t *map_store( program_t *prg, map_t *map, tree_t *key, tree_t *element )
+{
+ tree_t *old_tree = 0;
+ map_el_t *el_in_tree = 0;
+ map_el_t *map_el = map_insert_key( prg, map, key, &el_in_tree );
+
+// if ( mapEl != 0 )
+// mapEl->tree = element;
+// else {
+// /* Element with key exists. Overwriting the value. */
+// oldTree = elInTree->tree;
+// elInTree->tree = element;
+// }
+
+ return old_tree;
+}
+#endif
+
+static tree_t *tree_search_kid( program_t *prg, kid_t *kid, long id )
+{
+ /* This node the one? */
+ if ( kid->tree->id == id )
+ return kid->tree;
+
+ tree_t *res = 0;
+
+ /* Search children. */
+ kid_t *child = tree_child( prg, kid->tree );
+ if ( child != 0 )
+ res = tree_search_kid( prg, child, id );
+
+ /* Search siblings. */
+ if ( res == 0 && kid->next != 0 )
+ res = tree_search_kid( prg, kid->next, id );
+
+ return res;
+}
+
+tree_t *tree_search( program_t *prg, tree_t *tree, long id )
+{
+ tree_t *res = 0;
+ if ( tree->id == id )
+ res = tree;
+ else {
+ kid_t *child = tree_child( prg, tree );
+ if ( child != 0 )
+ res = tree_search_kid( prg, child, id );
+ }
+ return res;
+}
+
+static location_t *loc_search_kid( program_t *prg, kid_t *kid )
+{
+ /* This node the one? */
+ if ( kid->tree->tokdata != 0 && kid->tree->tokdata->location != 0 )
+ return kid->tree->tokdata->location;
+
+ location_t *res = 0;
+
+ /* Search children. */
+ kid_t *child = tree_child( prg, kid->tree );
+ if ( child != 0 )
+ res = loc_search_kid( prg, child );
+
+ /* Search siblings. */
+ if ( res == 0 && kid->next != 0 )
+ res = loc_search_kid( prg, kid->next );
+
+ return res;
+}
+
+static location_t *loc_search( program_t *prg, tree_t *tree )
+{
+ location_t *res = 0;
+ if ( tree->tokdata != 0 && tree->tokdata->location != 0 )
+ return tree->tokdata->location;
+
+ kid_t *child = tree_child( prg, tree );
+ if ( child != 0 )
+ res = loc_search_kid( prg, child );
+
+ return res;
+}
+
+struct colm_location *colm_find_location( program_t *prg, tree_t *tree )
+{
+ return loc_search( prg, tree );
+}
+
+head_t *tree_to_str( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs )
+{
+ /* Collect the tree data. */
+ str_collect_t collect;
+ init_str_collect( &collect );
+
+ if ( attrs )
+ colm_print_tree_collect_a( prg, sp, &collect, tree, trim );
+ else
+ colm_print_tree_collect( prg, sp, &collect, tree, trim );
+
+ /* Set up the input stream. */
+ head_t *ret = string_alloc_full( prg, collect.data, collect.length );
+
+ str_collect_destroy( &collect );
+
+ return ret;
+}
+
diff --git a/src/tree.h b/src/tree.h
new file mode 100644
index 00000000..97833c6f
--- /dev/null
+++ b/src/tree.h
@@ -0,0 +1,401 @@
+/*
+ * Copyright 2010-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_TREE_H
+#define _COLM_TREE_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <colm/colm.h>
+#include <colm/type.h>
+#include <colm/input.h>
+#include <colm/internal.h>
+#include <colm/defs.h>
+
+#define COLM_INDENT_OFF -1
+
+typedef unsigned char code_t;
+#if SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P
+ typedef unsigned long word_t;
+#elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P
+ typedef unsigned long long word_t;
+#else
+ #error "The type word_t was not declared"
+#endif
+typedef unsigned long half_t;
+
+struct bindings;
+struct function_info;
+
+typedef struct colm_tree tree_t;
+#include <colm/struct.h>
+
+typedef struct colm_location
+{
+ const char *name;
+ long line;
+ long column;
+ long byte;
+} location_t;
+
+/* Header located just before string data. */
+typedef struct colm_data
+{
+ const char *data;
+ long length;
+ struct colm_location *location;
+} head_t;
+
+/* Kid: used to implement a list of child trees. Kids are never shared. The
+ * trees they point to may be shared. This struct is also used on the stack by
+ * pushing two words and taking a pointer. We use it to take references to
+ * trees. Do not modify this struct. */
+typedef struct colm_kid
+{
+ struct colm_tree *tree;
+ struct colm_kid *next;
+} kid_t;
+
+/* Reference chains. Allocated on the stack. The chain goes up the list of kids
+ * to the root of the reference and tells us which trees we need to split so
+ * they are not shared before we can modify a node in a tree. Do not change
+ * this struct. */
+typedef struct colm_ref
+{
+ struct colm_kid *kid;
+ struct colm_ref *next;
+} ref_t;
+
+struct tree_pair
+{
+ tree_t *key;
+ tree_t *val;
+};
+
+typedef struct colm_parse_tree
+{
+ short id;
+ unsigned short flags;
+
+ struct colm_parse_tree *child;
+ struct colm_parse_tree *next;
+ struct colm_parse_tree *left_ignore;
+ struct colm_parse_tree *right_ignore;
+ kid_t *shadow;
+
+ /* Parsing algorithm. */
+ long state;
+ short cause_reduce;
+
+ /* Retry vars. Might be able to unify lower and upper. */
+ long retry_region;
+ char retry_lower;
+ char retry_upper;
+} parse_tree_t;
+
+typedef struct colm_pointer
+{
+ /* Must overlay tree_t. */
+ short id;
+ unsigned short flags;
+ long refs;
+ kid_t *child;
+
+ colm_value_t value;
+} pointer_t;
+
+typedef struct colm_str
+{
+ /* Must overlay tree_t. */
+ short id;
+ unsigned short flags;
+ long refs;
+ kid_t *child;
+
+ head_t *value;
+} str_t;
+
+/*
+ * Maps
+ */
+struct generic_info
+{
+ long type;
+
+ long el_struct_id;
+ long el_offset;
+
+ enum TYPE key_type;
+ long key_offset;
+
+ enum TYPE value_type;
+ long value_offset;
+
+ long parser_id;
+};
+
+enum IterType
+{
+ IT_Tree = 1,
+ IT_RevTree,
+ IT_User
+};
+
+typedef struct colm_tree_iter
+{
+ enum IterType type;
+ ref_t root_ref;
+ ref_t ref;
+ long search_id;
+ tree_t **stack_root;
+ long arg_size;
+ long yield_size;
+ long root_size;
+} tree_iter_t;
+
+typedef struct colm_generic_iter
+{
+ enum IterType type;
+ ref_t root_ref;
+ ref_t ref;
+ tree_t **stack_root;
+ long arg_size;
+ long yield_size;
+ long root_size;
+ long generic_id;
+} generic_iter_t;
+
+/* This must overlay tree iter because some of the same bytecodes are used. */
+typedef struct colm_rev_tree_iter
+{
+ enum IterType type;
+ ref_t root_ref;
+ ref_t ref;
+ long search_id;
+ tree_t **stack_root;
+ long arg_size;
+ long yield_size;
+ long root_size;
+
+ /* For detecting a split at the leaf. */
+ kid_t *kid_at_yield;
+ long children;
+} rev_tree_iter_t;
+
+typedef struct colm_user_iter
+{
+ enum IterType type;
+ /* The current item. */
+ ref_t ref;
+ tree_t **stack_root;
+ long arg_size;
+ long yield_size;
+ long root_size;
+
+ code_t *resume;
+ tree_t **frame;
+ long search_id;
+} user_iter_t;
+
+void colm_tree_upref_( tree_t *tree );
+void colm_tree_upref( struct colm_program *prg, tree_t *tree );
+void colm_tree_downref( struct colm_program *prg, tree_t **sp, tree_t *tree );
+long colm_cmp_tree( struct colm_program *prg, const tree_t *tree1, const tree_t *tree2 );
+
+tree_t *push_right_ignore( struct colm_program *prg, tree_t *push_to, tree_t *right_ignore );
+tree_t *push_left_ignore( struct colm_program *prg, tree_t *push_to, tree_t *left_ignore );
+tree_t *pop_right_ignore( struct colm_program *prg, tree_t **sp,
+ tree_t *pop_from, tree_t **right_ignore );
+tree_t *pop_left_ignore( struct colm_program *prg, tree_t **sp,
+ tree_t *pop_from, tree_t **left_ignore );
+tree_t *tree_left_ignore( struct colm_program *prg, tree_t *tree );
+tree_t *tree_right_ignore( struct colm_program *prg, tree_t *tree );
+kid_t *tree_left_ignore_kid( struct colm_program *prg, tree_t *tree );
+kid_t *tree_right_ignore_kid( struct colm_program *prg, tree_t *tree );
+kid_t *tree_child( struct colm_program *prg, const tree_t *tree );
+kid_t *tree_attr( struct colm_program *prg, const tree_t *tree );
+kid_t *kid_list_concat( kid_t *list1, kid_t *list2 );
+kid_t *tree_extract_child( struct colm_program *prg, tree_t *tree );
+kid_t *reverse_kid_list( kid_t *kid );
+
+tree_t *colm_construct_pointer( struct colm_program *prg, colm_value_t value );
+tree_t *colm_construct_term( struct colm_program *prg, word_t id, head_t *tokdata );
+tree_t *colm_construct_tree( struct colm_program *prg, kid_t *kid,
+ tree_t **bindings, long pat );
+tree_t *colm_construct_object( struct colm_program *prg, kid_t *kid,
+ tree_t **bindings, long lang_el_id );
+tree_t *colm_construct_token( struct colm_program *prg, tree_t **args, long nargs );
+
+int test_false( struct colm_program *prg, tree_t *tree );
+tree_t *make_tree( struct colm_program *prg, tree_t **args, long nargs );
+stream_t *open_file( struct colm_program *prg, tree_t *name, tree_t *mode );
+stream_t *colm_stream_open_file( struct colm_program *prg, tree_t *name, tree_t *mode );
+stream_t *colm_stream_open_fd( struct colm_program *prg, char *name, long fd );
+kid_t *copy_ignore_list( struct colm_program *prg, kid_t *ignore_header );
+kid_t *copy_kid_list( struct colm_program *prg, kid_t *kid_list );
+void colm_stream_free( struct colm_program *prg, stream_t *s );
+tree_t *colm_copy_tree( struct colm_program *prg, tree_t *tree,
+ kid_t *old_next_down, kid_t **new_next_down );
+
+colm_value_t colm_get_pointer_val( tree_t *pointer );
+tree_t *colm_tree_get_field( tree_t *tree, word_t field );
+tree_t *get_field_split( struct colm_program *prg, tree_t *tree, word_t field );
+tree_t *get_rhs_el( struct colm_program *prg, tree_t *lhs, long position );
+void set_rhs_el( program_t *prg, tree_t *lhs, long position, tree_t *value );
+kid_t *get_rhs_el_kid( struct colm_program *prg, tree_t *lhs, long position );
+parse_tree_t *get_rhs_parse_tree( struct colm_program *prg,
+ parse_tree_t *lhs, long position );
+void colm_tree_set_field( struct colm_program *prg, tree_t *tree, long field, tree_t *value );
+
+void set_triter_cur( struct colm_program *prg, tree_iter_t *iter, tree_t *tree );
+void set_uiter_cur( struct colm_program *prg, user_iter_t *uiter, tree_t *tree );
+void ref_set_value( struct colm_program *prg, tree_t **sp, ref_t *ref, tree_t *v );
+tree_t *tree_search( struct colm_program *prg, tree_t *tree, long id );
+
+int match_pattern( tree_t **bindings, struct colm_program *prg,
+ long pat, kid_t *kid, int check_next );
+tree_t *tree_iter_deref_cur( tree_iter_t *iter );
+
+/* For making references of attributes. */
+kid_t *get_field_kid( tree_t *tree, word_t field );
+
+tree_t *copy_real_tree( struct colm_program *prg, tree_t *tree,
+ kid_t *old_next_down, kid_t **new_next_down );
+void split_iter_cur( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter );
+tree_t *set_list_mem( list_t *list, half_t field, tree_t *value );
+
+void list_push_tail( struct colm_program *prg, list_t *list, tree_t *val );
+void list_push_head( struct colm_program *prg, list_t *list, tree_t *val );
+tree_t *list_remove_end( struct colm_program *prg, list_t *list );
+tree_t *list_remove_head( struct colm_program *prg, list_t *list );
+tree_t *get_list_mem_split( struct colm_program *prg, list_t *list, word_t field );
+tree_t *get_parser_mem( parser_t *parser, word_t field );
+
+tree_t *tree_iter_advance( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter );
+tree_t *tree_iter_next_child( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter );
+tree_t *tree_rev_iter_prev_child( struct colm_program *prg, tree_t ***psp, rev_tree_iter_t *iter );
+tree_t *tree_iter_next_repeat( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter );
+tree_t *tree_iter_prev_repeat( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter );
+
+/* An automatically grown buffer for collecting tokens. Always reuses space;
+ * never down resizes. */
+typedef struct colm_str_collect
+{
+ char *data;
+ int allocated;
+ int length;
+ struct indent_impl indent;
+} str_collect_t;
+
+void init_str_collect( str_collect_t *collect );
+void str_collect_destroy( str_collect_t *collect );
+void str_collect_append( str_collect_t *collect, const char *data, long len );
+void str_collect_clear( str_collect_t *collect );
+tree_t *tree_trim( struct colm_program *prg, tree_t **sp, tree_t *tree );
+
+void colm_print_tree_collect( struct colm_program *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim );
+
+void colm_print_tree_collect_a( struct colm_program *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim );
+
+void colm_print_tree_file( struct colm_program *prg, tree_t **sp,
+ struct stream_impl_data *impl, tree_t *tree, int trim );
+void colm_print_xml_stdout( struct colm_program *prg, tree_t **sp,
+ struct stream_impl_data *impl, tree_t *tree, int comm_attr, int trim );
+
+void colm_postfix_tree_collect( struct colm_program *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim );
+void colm_postfix_tree_file( struct colm_program *prg, tree_t **sp,
+ struct stream_impl *impl, tree_t *tree, int trim );
+
+/*
+ * Iterators.
+ */
+
+user_iter_t *colm_uiter_create( struct colm_program *prg, tree_t ***psp,
+ struct function_info *fi, long search_id );
+void uiter_init( struct colm_program *prg, tree_t **sp, user_iter_t *uiter,
+ struct function_info *fi, int revert_on );
+
+void colm_init_tree_iter( tree_iter_t *tree_iter, tree_t **stack_root,
+ long arg_size, long root_size, const ref_t *root_ref, int search_id );
+void colm_init_rev_tree_iter( rev_tree_iter_t *rev_triter, tree_t **stack_root,
+ long arg_size, long root_size, const ref_t *root_ref, int search_id, int children );
+void colm_init_user_iter( user_iter_t *user_iter, tree_t **stack_root, long root_size,
+ long arg_size, long search_id );
+
+void colm_tree_iter_destroy( struct colm_program *prg,
+ tree_t ***psp, tree_iter_t *iter );
+
+void colm_rev_tree_iter_destroy( struct colm_program *prg,
+ tree_t ***psp, rev_tree_iter_t *iter );
+
+void colm_uiter_destroy( struct colm_program *prg, tree_t ***psp, user_iter_t *uiter );
+void colm_uiter_unwind( struct colm_program *prg, tree_t ***psp, user_iter_t *uiter );
+
+tree_t *cast_tree( struct colm_program *prg, int lang_el_id, tree_t *tree );
+
+void colm_init_list_iter( generic_iter_t *list_iter, tree_t **stack_root,
+ long arg_size, long root_size, const ref_t *root_ref, int generic_id );
+void colm_list_iter_destroy( struct colm_program *prg,
+ tree_t ***psp, generic_iter_t *iter );
+
+tree_t *colm_list_iter_advance( struct colm_program *prg,
+ tree_t ***psp, generic_iter_t *iter );
+tree_t *colm_rev_list_iter_advance( struct colm_program *prg,
+ tree_t ***psp, generic_iter_t *iter );
+
+tree_t *colm_list_iter_deref_cur( struct colm_program *prg, generic_iter_t *iter );
+void colm_list_append( struct colm_list *list, struct colm_list_el *new_el );
+void colm_list_prepend( struct colm_list *list, struct colm_list_el *new_el );
+
+void colm_vlist_append( struct colm_program *prg, list_t *list, value_t value );
+void colm_vlist_prepend( struct colm_program *prg, list_t *list, value_t value );
+value_t colm_vlist_detach_head( struct colm_program *prg, list_t *list );
+value_t colm_vlist_detach_tail( struct colm_program *prg, list_t *list );
+
+value_t colm_viter_deref_cur( struct colm_program *prg, generic_iter_t *iter );
+
+str_t *string_prefix( program_t *prg, str_t *str, long len );
+str_t *string_suffix( program_t *prg, str_t *str, long pos );
+head_t *string_alloc_full( struct colm_program *prg, const char *data, long length );
+tree_t *construct_string( struct colm_program *prg, head_t *s );
+
+void free_kid_list( program_t *prg, kid_t *kid );
+
+void colm_print_tree_collect_xml( program_t *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim );
+
+void colm_print_tree_collect_xml_ac( program_t *prg, tree_t **sp,
+ str_collect_t *collect, tree_t *tree, int trim );
+
+head_t *tree_to_str( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs );
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* COLM_TREE_H */
+
diff --git a/src/type.h b/src/type.h
new file mode 100644
index 00000000..dca8f2ad
--- /dev/null
+++ b/src/type.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_TYPE_H
+#define _COLM_TYPE_H
+
+enum TYPE
+{
+ TYPE_NOTYPE = 0x00,
+ TYPE_NIL = 0x01,
+ TYPE_TREE = 0x02,
+ TYPE_REF = 0x03,
+ TYPE_ITER = 0x04,
+ TYPE_STRUCT = 0x05,
+ TYPE_GENERIC = 0x06,
+ TYPE_INT = 0x07,
+ TYPE_BOOL = 0x08,
+ TYPE_LIST_PTRS = 0x09,
+ TYPE_MAP_PTRS = 0x0a,
+ TYPE_VOID = 0x0b
+};
+
+#endif /* _COLM_TYPE_H */
+
diff --git a/src/version.h.cmake.in b/src/version.h.cmake.in
new file mode 100644
index 00000000..0b45a8f0
--- /dev/null
+++ b/src/version.h.cmake.in
@@ -0,0 +1,9 @@
+/* version.h Generated from version.h.cmake.in by cmake */
+
+#ifndef _COLM_VERSION_H
+#define _COLM_VERSION_H
+
+#cmakedefine COLM_VERSION "@COLM_VERSION@"
+#cmakedefine COLM_PUBDATE "@COLM_PUBDATE@"
+
+#endif /* _COLM_VERSION_H */