diff options
author | Adrian Thurston <thurston@colm.net> | 2020-03-14 15:29:52 +0200 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2020-03-14 15:29:52 +0200 |
commit | f653735830d537715f2885bd832cf04851d35401 (patch) | |
tree | 95e6551e39407543366d4f49aedf7b78c6e8bbe1 /src | |
parent | bcc54d5df10cf425e7134b06f70d7ffe1abee4e4 (diff) | |
download | colm-f653735830d537715f2885bd832cf04851d35401.tar.gz |
moved source files into commit repository
Diffstat (limited to 'src')
-rw-r--r-- | src/.gitignore | 37 | ||||
-rw-r--r-- | src/CMakeLists.txt | 188 | ||||
-rw-r--r-- | src/ChangeLog | 92 | ||||
-rw-r--r-- | src/Makefile.am | 207 | ||||
-rw-r--r-- | src/buffer.h | 57 | ||||
-rw-r--r-- | src/bytecode.c | 5025 | ||||
-rw-r--r-- | src/bytecode.h | 678 | ||||
-rw-r--r-- | src/closure.cc | 458 | ||||
-rw-r--r-- | src/codegen.cc | 62 | ||||
-rw-r--r-- | src/codevect.c | 183 | ||||
-rw-r--r-- | src/colm-config.cmake.in | 3 | ||||
-rw-r--r-- | src/colm-wrap.sh | 79 | ||||
-rw-r--r-- | src/colm.h | 159 | ||||
-rw-r--r-- | src/colm.lm | 910 | ||||
-rw-r--r-- | src/colmex.h | 109 | ||||
-rw-r--r-- | src/commit.c | 111 | ||||
-rw-r--r-- | src/compiler.cc | 1263 | ||||
-rw-r--r-- | src/compiler.h | 1158 | ||||
-rw-r--r-- | src/config.h.cmake.in | 1 | ||||
-rw-r--r-- | src/consinit.cc | 922 | ||||
-rw-r--r-- | src/consinit.h | 113 | ||||
-rw-r--r-- | src/cstring.h | 862 | ||||
-rw-r--r-- | src/ctinput.cc | 570 | ||||
-rw-r--r-- | src/debug.c | 82 | ||||
-rw-r--r-- | src/debug.h | 65 | ||||
-rw-r--r-- | src/declare.cc | 1623 | ||||
-rw-r--r-- | src/defs.h.cmake.in | 11 | ||||
-rw-r--r-- | src/defs.h.in | 40 | ||||
-rw-r--r-- | src/dotgen.cc | 117 | ||||
-rw-r--r-- | src/dotgen.h | 52 | ||||
-rw-r--r-- | src/exports.cc | 260 | ||||
-rw-r--r-- | src/fsmap.cc | 806 | ||||
-rw-r--r-- | src/fsmattach.cc | 427 | ||||
-rw-r--r-- | src/fsmbase.cc | 603 | ||||
-rw-r--r-- | src/fsmcodegen.cc | 918 | ||||
-rw-r--r-- | src/fsmcodegen.h | 211 | ||||
-rw-r--r-- | src/fsmexec.cc | 220 | ||||
-rw-r--r-- | src/fsmgraph.cc | 981 | ||||
-rw-r--r-- | src/fsmgraph.h | 1321 | ||||
-rw-r--r-- | src/fsmmin.cc | 737 | ||||
-rw-r--r-- | src/fsmstate.cc | 441 | ||||
-rw-r--r-- | src/global.h | 110 | ||||
-rw-r--r-- | src/input.c | 759 | ||||
-rw-r--r-- | src/input.h | 232 | ||||
-rw-r--r-- | src/internal.h | 33 | ||||
-rw-r--r-- | src/iter.c | 648 | ||||
-rw-r--r-- | src/keyops.h | 196 | ||||
-rw-r--r-- | src/list.c | 255 | ||||
-rw-r--r-- | src/lmparse.kh | 86 | ||||
-rw-r--r-- | src/lmparse.kl | 2139 | ||||
-rw-r--r-- | src/lmscan.h | 104 | ||||
-rw-r--r-- | src/lmscan.rl | 637 | ||||
-rw-r--r-- | src/loadboot2.cc | 3 | ||||
-rw-r--r-- | src/loadcolm.cc | 2 | ||||
-rw-r--r-- | src/loadfinal.cc | 2978 | ||||
-rw-r--r-- | src/loadfinal.h | 31 | ||||
-rw-r--r-- | src/loadinit.cc | 416 | ||||
-rw-r--r-- | src/loadinit.h | 77 | ||||
-rw-r--r-- | src/lookup.cc | 323 | ||||
-rw-r--r-- | src/main.cc | 836 | ||||
-rw-r--r-- | src/map.c | 876 | ||||
-rw-r--r-- | src/map.cc | 27 | ||||
-rw-r--r-- | src/map.h | 86 | ||||
-rw-r--r-- | src/parser.cc | 1128 | ||||
-rw-r--r-- | src/parser.h | 197 | ||||
-rw-r--r-- | src/parsetree.cc | 1495 | ||||
-rw-r--r-- | src/parsetree.h | 3607 | ||||
-rw-r--r-- | src/pcheck.cc | 156 | ||||
-rw-r--r-- | src/pcheck.h | 50 | ||||
-rw-r--r-- | src/pdabuild.cc | 2205 | ||||
-rw-r--r-- | src/pdacodegen.cc | 698 | ||||
-rw-r--r-- | src/pdacodegen.h | 107 | ||||
-rw-r--r-- | src/pdagraph.cc | 533 | ||||
-rw-r--r-- | src/pdagraph.h | 517 | ||||
-rw-r--r-- | src/pdarun.c | 2265 | ||||
-rw-r--r-- | src/pdarun.h | 471 | ||||
-rw-r--r-- | src/pool.c | 248 | ||||
-rw-r--r-- | src/pool.h | 73 | ||||
-rw-r--r-- | src/print.c | 775 | ||||
-rw-r--r-- | src/prog.lm | 88 | ||||
-rw-r--r-- | src/program.c | 333 | ||||
-rw-r--r-- | src/program.h | 186 | ||||
-rw-r--r-- | src/redbuild.cc | 562 | ||||
-rw-r--r-- | src/redbuild.h | 161 | ||||
-rw-r--r-- | src/redfsm.cc | 1049 | ||||
-rw-r--r-- | src/redfsm.h | 479 | ||||
-rw-r--r-- | src/reduce.cc | 954 | ||||
-rw-r--r-- | src/resolve.cc | 988 | ||||
-rw-r--r-- | src/rtvector.h | 35 | ||||
-rw-r--r-- | src/stream.c | 828 | ||||
-rw-r--r-- | src/string.c | 281 | ||||
-rw-r--r-- | src/struct.c | 185 | ||||
-rw-r--r-- | src/struct.h | 180 | ||||
-rw-r--r-- | src/synthesis.cc | 3370 | ||||
-rw-r--r-- | src/tree.c | 1655 | ||||
-rw-r--r-- | src/tree.h | 401 | ||||
-rw-r--r-- | src/type.h | 43 | ||||
-rw-r--r-- | src/version.h.cmake.in | 9 |
98 files changed, 59322 insertions, 6 deletions
diff --git a/src/.gitignore b/src/.gitignore index 8b728643..082d9db4 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -1,11 +1,40 @@ +/*.o +/*.lo +/Makefile.in +/Makefile +/.*.d +/colm +/defs.h /config.h -/config.h.in -/config.h.in~ +/version.h +/tags +/.deps +/libcolm.a +/libcolm.la +/.libs /stamp-h1 /stamp-h2 -/Makefile +/bootstrap[012] +/gen +/tar +/include + +/libprog.a /CMakeFiles /cmake_install.cmake -/install_manifest.txt +/*.exe + +# Common testing files. +/tmp.lm +/tmp.c +/tmp +/input[0-9] +/input +/output +/log + +/colm-wrap + +/config.h.in diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7afa276d..3d855523 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1 +1,187 @@ -configure_file(config.h.cmake.in config.h @ONLY) +# Package name +set(_PACKAGE_NAME colm) + +# Read project configuration from ../configure.ac file +file(STRINGS ../configure.ac _PROJECT_CONFIGS + REGEX "(COLM_VERSION=)|(COLM_PUBDATE=)") +foreach(_PROJECT_CONFIG ${_PROJECT_CONFIGS}) + if(_PROJECT_CONFIG MATCHES "COLM_VERSION=\"([^\"]+)") + string(STRIP ${CMAKE_MATCH_1} COLM_VERSION) + endif() + if(_PROJECT_CONFIG MATCHES "COLM_PUBDATE=\"([^\"]+)") + string(STRIP ${CMAKE_MATCH_1} COLM_PUBDATE) + endif() +endforeach() + +# Generate headers +configure_file(version.h.cmake.in version.h @ONLY) +configure_file(defs.h.cmake.in defs.h @ONLY) +configure_file(colm-config.cmake.in +"${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake" @ONLY) + +# Runtime headers +set(RUNTIME_HDR + bytecode.h debug.h pool.h input.h + pdarun.h map.h type.h tree.h struct.h program.h colm.h internal.h) + +foreach(_hdr defs.h) + list(APPEND RUNTIME_HDR "${CMAKE_CURRENT_BINARY_DIR}/${_hdr}") +endforeach() + +# Other CMake modules +include(GNUInstallDirs) + +# libcolm + +add_library(libcolm + map.c pdarun.c list.c input.c stream.c debug.c + codevect.c pool.c string.c tree.c iter.c + bytecode.c program.c struct.c commit.c + print.c) + +target_include_directories(libcolm + PUBLIC + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/..> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/..> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/../src> + $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>) + +set_target_properties(libcolm PROPERTIES + OUTPUT_NAME colm) + +# libprog + +add_library(libprog + buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h + input.h keyops.h map.h compiler.h + parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h + redfsm.h rtvector.h tree.h global.h colm.h parser.h cstring.h + internal.h + resolve.cc lookup.cc synthesis.cc parsetree.cc + fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc + fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc + redfsm.cc fsmexec.cc redbuild.cc closure.cc fsmap.cc + dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc + exports.cc compiler.cc parser.cc reduce.cc) + +target_include_directories(libprog + PUBLIC + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}> + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/..> + $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../aapl> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> + $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/..>) + +set_target_properties(libprog PROPERTIES + OUTPUT_NAME prog) + +# bootstrap0 + +add_executable(bootstrap0 + consinit.cc consinit.h main.cc) + +target_link_libraries(bootstrap0 libprog libcolm) + +set_property(TARGET bootstrap0 APPEND PROPERTY + COMPILE_DEFINITIONS CONS_INIT ${common_COMPILE_DEFINITIONS}) + +set_property(TARGET bootstrap0 APPEND PROPERTY + COMPILE_FLAGS -fpermissive) + +# bootstrap1 + +make_directory("${CMAKE_CURRENT_BINARY_DIR}/gen") + +add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/gen/parse1.c" + "${CMAKE_CURRENT_BINARY_DIR}/gen/if1.h" + "${CMAKE_CURRENT_BINARY_DIR}/gen/if1.cc" + COMMAND bootstrap0 + ARGS -c -o parse1.c -e if1.h -x if1.cc + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen") + +add_executable(bootstrap1 + loadinit.h loadinit.cc main.cc + "${CMAKE_CURRENT_BINARY_DIR}/gen/parse1.c" + "${CMAKE_CURRENT_BINARY_DIR}/gen/if1.cc") + +target_link_libraries(bootstrap1 libprog libcolm) + +set_property(TARGET bootstrap1 APPEND PROPERTY + COMPILE_DEFINITIONS LOAD_INIT ${common_COMPILE_DEFINITIONS}) + +# bootstrap2 + +add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/gen/parse2.c" + "${CMAKE_CURRENT_BINARY_DIR}/gen/if2.h" + "${CMAKE_CURRENT_BINARY_DIR}/gen/if2.cc" + COMMAND bootstrap1 + ARGS -c -o parse2.c -e if2.h -x if2.cc "${CMAKE_CURRENT_LIST_DIR}/colm.lm" + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen") + +add_executable(bootstrap2 + main.cc loadboot2.cc loadfinal.h version.h + "${CMAKE_CURRENT_BINARY_DIR}/gen/parse2.c" + "${CMAKE_CURRENT_BINARY_DIR}/gen/if2.cc") + +target_link_libraries(bootstrap2 libprog libcolm) + +set_property(TARGET bootstrap2 APPEND PROPERTY + COMPILE_DEFINITIONS LOAD_COLM ${common_COMPILE_DEFINITIONS}) + +# colm + +add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/gen/parse3.c" + "${CMAKE_CURRENT_BINARY_DIR}/gen/if3.h" + "${CMAKE_CURRENT_BINARY_DIR}/gen/if3.cc" + COMMAND bootstrap2 + ARGS -I "${CMAKE_CURRENT_LIST_DIR}" -c -o parse3.c -e if3.h -x if3.cc "${CMAKE_CURRENT_LIST_DIR}/prog.lm" + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen") + +add_executable(colm + main.cc loadcolm.cc loadfinal.h version.h + "${CMAKE_CURRENT_BINARY_DIR}/gen/parse3.c" + "${CMAKE_CURRENT_BINARY_DIR}/gen/if3.cc") + +if(BUILD_STANDALONE) + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + target_link_libraries(colm -static) + else() + message(FATAL_ERROR "Unsupported toolset for standalone build.") + endif() +endif() + +target_link_libraries(colm libprog libcolm) + +set_property(TARGET colm APPEND PROPERTY + COMPILE_DEFINITIONS LOAD_COLM ${common_COMPILE_DEFINITIONS}) + +if(${PROJECT_NAME}_MAKE_INSTALL) + if(NOT DEFINED CMAKE_INSTALL_CMAKEDIR) + set(CMAKE_INSTALL_CMAKEDIR + "${CMAKE_INSTALL_LIBDIR}/cmake/${_PACKAGE_NAME}" + CACHE STRING "CMake packages") + endif() + install(FILES ${RUNTIME_HDR} + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/colm") + install(TARGETS libcolm colm + EXPORT ${_PACKAGE_NAME}-targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") + install(EXPORT ${_PACKAGE_NAME}-targets + NAMESPACE ${_PACKAGE_NAME}:: + DESTINATION "${CMAKE_INSTALL_CMAKEDIR}") + export(EXPORT ${_PACKAGE_NAME}-targets + NAMESPACE ${_PACKAGE_NAME}:: + FILE "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-targets.cmake") + include(CMakePackageConfigHelpers) + write_basic_package_version_file( + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake" + VERSION ${COLM_VERSION} + COMPATIBILITY AnyNewerVersion) + install(FILES + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake" + "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake" + DESTINATION "${CMAKE_INSTALL_CMAKEDIR}") +endif() diff --git a/src/ChangeLog b/src/ChangeLog new file mode 100644 index 00000000..1479d2d5 --- /dev/null +++ b/src/ChangeLog @@ -0,0 +1,92 @@ +colm 0.12.0 - Feb 1, 2014 +------------------------- + * allow matching against reference types + * refcount fixes for setting references + * can use 'in' expr to search ref types + * can take refs of temps in expresssions + * error handling improvements + * fixed the colm input open check + * added a default capture name to all production definition rhs elements, + defaults to the type name + * allow var ref as the target of a send construct + * added the cast operator + * return nil from open if file open fails + * improvements to locals downrefs (large func offset overflow and iters) + * fixed the typeref for stdout and stderr, now working + * evaluate print arguments left to right + * improved separation of declare, resolve/lookup, and compile passes + * lookup of types in cons/pats uses the cons type or pattern tree namespace + * added the void type, useful for calling func in send expression + * can re-enter namespaces that already exist + * don't search parent scopes when qualifications are present + * added a new syntax for literal tokens; a single backtick is starts the + literal, ends at whitespace or ] in second or greater position + * removed comma as separator in literal statement + * single-quoted strings are now treated like double-quoted strings; they + may appear in expressions and are concatenated with the other string forms + * new syntax for collect-ignore tokens "token <id> -" + * added the system function, which calls C's system + * fixes for contiguous stack regions that fixes some segfaults + * don't use stack top offsets in print instructions + * allow make_tree anywhere + * don't use sp offset for make_tree, removes need for contiguous + * eliminated stack offset from make-token instruction + * print function names in call op debug stmts + +colm 0.11 - May 26, 2013 +------------------------ + * Require <> around ref and ptr type declarations (eg: ptr<type_ref>) + * Added production labels, which follow a production with the form + :Label. Labels are exposed in the C++ interface using an enum. They + are prefixed with _ to avoid conflict with member access functions. + * It's now possible to access Tree::tokdata from the C++ interface. + * The print, stream print, stream push, and stream append operations all + no longer trim trees by default. This makes these functions + consistent with constructor, which refrains from trimming to avoid + extra work. A construct with a tree trim is accomplished with an + additional operator. The above print and stream operations now have + the same usage patterns. + * Fixes to prodNum preservation through tree copy, and to the parser + +colm 0.10 - Apr 18, 2013 +------------------------ + * Implemented the colm parser in colm. Bootstrapping with a parse tree + construction in C++. Using that generated parser to to parse the colm + grammar, from which the primary parser is produced. + * Improved error reporting. + * Added some tracking of progress through a stream for better + backtracking over includes that are pushed onto a stream. + * Updated the vim syntax, no longer highlighting parser, list, etc + (types). + * Parse expressions are now returning trees again. Returning the parser + results in semantics inconsistent with many other areas of the + program. + * Can now put '.' or eos on the end of a send expression to terminate + the parse + * Removed the original send syntax (<<). + * Lowercased and otherwise improved the C interface. + * The default binary name no longer has .bin suffix. + * Fixed -o option, now using -c to mean compile on (library). The + export filename options are now -e and -x. + +colm 0.9 - Feb 19, 2013 +----------------------- + * The parse loop now scans data that is owned by the input stream. It + is copied into a contiguous block in the scanner when the token is + consumed. + * The syntax of lexical regions was altered to omit the name. The + curlies were replaced with lex ... end syntax. + * The syntax of namespaces were altered. Curlies were replaced with + namespace <ID> ... end. + +colm 0.8 - Dec 29, 2012 +----------------------- + * The parse statement now includes a call to the finish operation. It + returns a value of type parser<Type>. The result tree and any error + message can be retrieved from this object using 'tree' and 'error' + members. + * Dropped curly brackets and the name from the syntax of lexical region + defintition. Using "lex ... end" + * Dropped curly brackets from the syntax of namespaces and context blocks. + Using "namespace N ... end N" and "context N ... end N" + * Now have a growable stack instead of a large, fixed, pre-allocated stack. diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 00000000..5a53f040 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,207 @@ +# +# Copyright 2007-2018 Adrian Thurston <thurston@colm.net> +# + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +AM_CPPFLAGS = -I$(top_srcdir)/aapl + +AUTOMAKE_OPTIONS = subdir-objects + +bin_PROGRAMS = colm +bin_SCRIPTS = colm-wrap + +RUNTIME_SRC = \ + map.c pdarun.c list.c input.c stream.c debug.c \ + codevect.c pool.c string.c tree.c iter.c \ + bytecode.c program.c struct.c commit.c \ + print.c + +RUNTIME_HDR = \ + config.h bytecode.h defs.h debug.h pool.h input.h \ + pdarun.h map.h type.h tree.h struct.h program.h colm.h \ + internal.h colmex.h + +lib_LTLIBRARIES = libcolm.la +noinst_LIBRARIES = libprog.a + +libcolm_la_SOURCES = $(RUNTIME_SRC) +libcolm_la_LDFLAGS = -release ${COLM_VERSION} -no-undefined + +if LINKER_NO_UNDEFINED +libcolm_la_LDFLAGS += -Wl,--no-undefined +endif + +common_CFLAGS = \ + -Wall \ + -DPREFIX='"$(prefix)"' + +libprog_a_SOURCES = \ + buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \ + input.h keyops.h map.h compiler.h \ + parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \ + redfsm.h rtvector.h tree.h version.h global.h colm.h parser.h cstring.h \ + internal.h \ + \ + resolve.cc lookup.cc synthesis.cc parsetree.cc \ + fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \ + fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \ + redfsm.cc fsmexec.cc redbuild.cc closure.cc fsmap.cc \ + dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc \ + exports.cc compiler.cc parser.cc reduce.cc + +libprog_a_CXXFLAGS = $(common_CFLAGS) + +colmincdir = $(includedir)/colm + +colminc_HEADERS = $(RUNTIME_HDR) + +BUILT_SOURCES = version.h include/colm + +if EXTERNAL_COLM + +# +# Generate the parser using a single run with an external colm program. +# +BUILD_PARSE_3_WITH = $(EXTERNAL_COLM)/bin/colm$(EXEEXT) +AM_CPPFLAGS += $(EXTERNAL_INC) +AM_LDFLAGS = $(EXTERNAL_LIBS) + +else + +noinst_PROGRAMS = bootstrap0 bootstrap1 bootstrap2 + +BUILD_PARSE_3_WITH = $(builddir)/bootstrap2$(EXEEXT) +AM_CPPFLAGS += -Iinclude +AM_LDFLAGS = -L. + +# +# bootstrap0: The input program for bootstrap0 is construced using internal +# data structure constructors. It produces a program that can parse a grammar +# using limited features. No code is supported. +# +bootstrap0_CXXFLAGS = $(common_CFLAGS) -DCONS_INIT +bootstrap0_SOURCES = consinit.cc consinit.h main.cc version.h +bootstrap0_LDADD = libprog.a libcolm.la + +# +# bootstrap1: The input program is specified using a stripped down colm syntax. +# It produces a program that can parse most colm syntax, with the exception of +# the colm syntax/semantics that is implemented in colm itself. +# + +gen/bootstrap1.pack: colm-wrap bootstrap0$(EXEEXT) + mkdir -p gen + $(builddir)/colm-wrap -w bootstrap0 -o $@ \ + -c -p gen/parse1.c -e gen/if1.h -x gen/if1.cc no-input + +gen/parse1.c: gen/bootstrap1.pack + $(builddir)/colm-wrap -o $@ $< + +gen/if1.h: gen/bootstrap1.pack + $(builddir)/colm-wrap -o $@ $< + +gen/if1.cc: gen/bootstrap1.pack + $(builddir)/colm-wrap -o $@ $< + +# Listing if1.h in BUILT_SOURCES isn't sufficient because it depends on the +# building of bootstrap0. Automake wants to put all built sources into a list +# of files built before ANYTHING else (which includes bootstrap0). Not sure if +# it is rejected by automake or make. But in any case, it doesn't work. Fixed +# with the following additional dependency. +BUILT_SOURCES += gen/parse1.c gen/if1.h gen/if1.cc +loadinit.cc: gen/if1.h + +bootstrap1_CXXFLAGS = $(common_CFLAGS) -DLOAD_INIT +bootstrap1_CFLAGS = $(common_CFLAGS) +bootstrap1_SOURCES = loadinit.h loadinit.cc main.cc version.h +nodist_bootstrap1_SOURCES = gen/if1.h gen/if1.cc gen/parse1.c +bootstrap1_LDADD = libprog.a libcolm.la + +# +# bootstrap2: The input program is specified using the colm grammar used in +# bootstrap1, plus some rewrite rules that implement the final parts of +# syntax/semantics. It produces a program that can parse full colm programs, +# and thus generates the sources used in the colm binary. +# + +gen/bootstrap2.pack: colm-wrap bootstrap1$(EXEEXT) colm.lm + mkdir -p gen + $(builddir)/colm-wrap -w bootstrap1 -o $@ \ + -c -p gen/parse2.c -e gen/if2.h -x gen/if2.cc colm.lm + +gen/parse2.c: gen/bootstrap2.pack + $(builddir)/colm-wrap -o $@ $< + +gen/if2.h: gen/bootstrap2.pack + $(builddir)/colm-wrap -o $@ $< + +gen/if2.cc: gen/bootstrap2.pack + $(builddir)/colm-wrap -o $@ $< + +BUILT_SOURCES += gen/parse2.c gen/if2.h gen/if2.cc +loadboot2.cc: gen/if2.h + +bootstrap2_CXXFLAGS = $(common_CFLAGS) -DLOAD_COLM +bootstrap2_CFLAGS = $(common_CFLAGS) +bootstrap2_SOURCES = main.cc loadboot2.cc loadfinal.h version.h +nodist_bootstrap2_SOURCES = gen/if2.h gen/if2.cc gen/parse2.c +bootstrap2_LDADD = libprog.a libcolm.la + +endif + +gen/bootstrap3.pack: colm-wrap $(BUILD_PARSE_3_WITH) prog.lm colm.lm + mkdir -p gen + $(builddir)/colm-wrap -w $(BUILD_PARSE_3_WITH) -o $@ \ + -c -p gen/parse3.c -e gen/if3.h -x gen/if3.cc prog.lm + +gen/parse3.c: gen/bootstrap3.pack + $(builddir)/colm-wrap -o $@ $< + +gen/if3.h: gen/bootstrap3.pack + $(builddir)/colm-wrap -o $@ $< + +gen/if3.cc: gen/bootstrap3.pack + $(builddir)/colm-wrap -o $@ $< + +BUILT_SOURCES += gen/parse3.c gen/if3.h gen/if3.cc +loadcolm.cc: gen/if3.h + +colm_CXXFLAGS = $(common_CFLAGS) -DLOAD_COLM +colm_CFLAGS = $(common_CFLAGS) +colm_SOURCES = main.cc loadcolm.cc loadfinal.h version.h +nodist_colm_SOURCES = gen/if3.h gen/if3.cc gen/parse3.c +colm_LDADD = libprog.a -lcolm + +include/colm: + mkdir -p include + ln -s .. include/colm + +version.h: Makefile + echo '#define COLM_VERSION "$(COLM_VERSION)"' > version.h + echo '#define COLM_PUBDATE "$(COLM_PUBDATE)"' >> version.h + +config.h: ../src/config.h + cp ../src/config.h ./ + +CLEANFILES = $(BUILT_SOURCES) gen/bootstrap1.pack gen/bootstrap2.pack gen/bootstrap3.pack +EXTRA_DIST = prog.lm colm.lm loadfinal.cc colm-wrap.sh + +colm-wrap: colm-wrap.sh + @$(top_srcdir)/sedsubst $< $@ -w,+x $(SED_SUBST) diff --git a/src/buffer.h b/src/buffer.h new file mode 100644 index 00000000..58db85de --- /dev/null +++ b/src/buffer.h @@ -0,0 +1,57 @@ +/* + * Copyright 2003-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_BUFFER_H +#define _COLM_BUFFER_H + +#define BUFFER_INITIAL_SIZE 4096 + +/* An automatically grown buffer for collecting tokens. Always reuses space; + * never down resizes. */ +struct Buffer +{ + Buffer() + { + data = (char*) malloc( BUFFER_INITIAL_SIZE ); + allocated = BUFFER_INITIAL_SIZE; + length = 0; + } + ~Buffer() { free(data); } + + void append( char p ) + { + if ( length == allocated ) { + allocated *= 2; + data = (char*) realloc( data, allocated ); + } + data[length++] = p; + } + + void clear() { length = 0; } + + char *data; + int allocated; + int length; +}; + +#endif /* _COLM_BUFFER_H */ + diff --git a/src/bytecode.c b/src/bytecode.c new file mode 100644 index 00000000..39aee070 --- /dev/null +++ b/src/bytecode.c @@ -0,0 +1,5025 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <colm/bytecode.h> + +#include <sys/types.h> +#if defined(HAVE_SYS_WAIT_H) +#include <sys/wait.h> +#endif +#include <assert.h> +#include <string.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> + +#include <colm/pool.h> +#include <colm/debug.h> +#include <colm/colm.h> + +#define TRUE_VAL 1 +#define FALSE_VAL 0 + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +#define read_byte( i ) do { \ + i = ((uchar) *instr++); \ +} while(0) + +#define read_half( i ) do { \ + i = ((word_t) *instr++); \ + i |= ((word_t) *instr++) << 8; \ +} while(0) + +/* There are better ways. */ +#if SIZEOF_LONG == 4 + + #define read_type( type, i ) do { \ + word_t _w; \ + _w = ((word_t) *instr++); \ + _w |= ((word_t) *instr++) << 8; \ + _w |= ((word_t) *instr++) << 16; \ + _w |= ((word_t) *instr++) << 24; \ + i = (type) _w; \ + } while(0) + + #define read_type_p( Type, i, p ) do { \ + i = ((Type) p[0]); \ + i |= ((Type) p[1]) << 8; \ + i |= ((Type) p[2]) << 16; \ + i |= ((Type) p[3]) << 24; \ + } while(0) + + #define consume_word() instr += 4 + +#else + + #define read_type( type, i ) do { \ + word_t _w; \ + _w = ((word_t) *instr++); \ + _w |= ((word_t) *instr++) << 8; \ + _w |= ((word_t) *instr++) << 16; \ + _w |= ((word_t) *instr++) << 24; \ + _w |= ((word_t) *instr++) << 32; \ + _w |= ((word_t) *instr++) << 40; \ + _w |= ((word_t) *instr++) << 48; \ + _w |= ((word_t) *instr++) << 56; \ + i = (type) _w; \ + } while(0) + + #define read_type_p( type, i, p ) do { \ + i = ((type) p[0]); \ + i |= ((type) p[1]) << 8; \ + i |= ((type) p[2]) << 16; \ + i |= ((type) p[3]) << 24; \ + i |= ((type) p[4]) << 32; \ + i |= ((type) p[5]) << 40; \ + i |= ((type) p[6]) << 48; \ + i |= ((type) p[7]) << 56; \ + } while(0) + + #define consume_word() instr += 8 +#endif + +#define read_tree( i ) read_type( tree_t*, i ) +#define read_parser( i ) read_type( parser_t*, i ) +#define read_word( i ) read_type( word_t, i ) +#define read_stream( i ) read_type( stream_t*, i ) +#define read_input( i ) read_type( input_t*, i ) + +#define read_word_p( i, p ) read_type_p( word_t, i, p ) + +#define consume_byte() instr += 1 +#define consume_half() instr += 2 + +static void rcode_downref( program_t *prg, tree_t **sp, code_t *instr ); + +static void make_stdin( program_t *prg ) +{ + if ( prg->stdin_val == 0 ) + prg->stdin_val = colm_stream_open_fd( prg, "<stdin>", 0 ); +} + +static void make_stdout( program_t *prg ) +{ + if ( prg->stdout_val == 0 ) + prg->stdout_val = colm_stream_open_fd( prg, "<stdout>", 1 ); +} + +static void make_stderr( program_t *prg ) +{ + if ( prg->stderr_val == 0 ) + prg->stderr_val = colm_stream_open_fd( prg, "<stderr>", 2 ); +} + +static void flush_streams( program_t *prg ) +{ + if ( prg->stdout_val != 0 ) { + struct stream_impl *si = prg->stdout_val->impl; + si->funcs->flush_stream( prg, si ); + } + + if ( prg->stderr_val != 0 ) { + struct stream_impl *si = prg->stderr_val->impl; + si->funcs->flush_stream( prg, si ); + } +} + +void colm_parser_set_context( program_t *prg, tree_t **sp, parser_t *parser, struct_t *val ) +{ + parser->pda_run->context = val; +} + +static head_t *tree_to_str_xml( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ) +{ + /* Collect the tree data. */ + str_collect_t collect; + init_str_collect( &collect ); + + colm_print_tree_collect_xml( prg, sp, &collect, tree, trim ); + + /* Set up the input stream. */ + head_t *ret = string_alloc_full( prg, collect.data, collect.length ); + + str_collect_destroy( &collect ); + + return ret; +} + +static head_t *tree_to_str_xml_ac( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ) +{ + /* Collect the tree data. */ + str_collect_t collect; + init_str_collect( &collect ); + + colm_print_tree_collect_xml_ac( prg, sp, &collect, tree, trim ); + + /* Set up the input stream. */ + head_t *ret = string_alloc_full( prg, collect.data, collect.length ); + + str_collect_destroy( &collect ); + + return ret; +} + +static head_t *tree_to_str_postfix( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ) +{ + /* Collect the tree data. */ + str_collect_t collect; + init_str_collect( &collect ); + + colm_postfix_tree_collect( prg, sp, &collect, tree, trim ); + + /* Set up the input stream. */ + head_t *ret = string_alloc_full( prg, collect.data, collect.length ); + + str_collect_destroy( &collect ); + + return ret; +} + +static void input_push_text( struct colm_program *prg, struct input_impl *is, + struct colm_location *loc, const char *data, long length ) +{ + is->funcs->prepend_data( prg, is, loc, colm_alph_from_cstr( data ), length ); +} + +static void colm_stream_push_tree( struct colm_program *prg, struct input_impl *is, + tree_t *tree, int ignore ) +{ + is->funcs->prepend_tree( prg, is, tree, ignore ); +} + +static void colm_stream_push_stream( struct colm_program *prg, struct input_impl *is, stream_t *stream ) +{ + is->funcs->prepend_stream( prg, is, stream ); +} + +static void colm_undo_stream_push( program_t *prg, tree_t **sp, struct input_impl *is, long length ) +{ + if ( length < 0 ) { + /* tree_t *tree = */ is->funcs->undo_prepend_tree( prg, is ); + // colm_tree_downref( prg, sp, tree ); + } + else { + is->funcs->undo_prepend_data( prg, is, length ); + } +} + + +static word_t stream_append_text( program_t *prg, tree_t **sp, input_t *dest, tree_t *input, int trim ) +{ + long length = 0; + struct input_impl *impl = input_to_impl( dest ); + + if ( input->id == LEL_ID_PTR ) { + assert(false); + } + else { + /* Collect the tree data. */ + str_collect_t collect; + init_str_collect( &collect ); + colm_print_tree_collect( prg, sp, &collect, input, trim ); + + /* Load it into the input. */ + impl->funcs->append_data( prg, impl, colm_alph_from_cstr( collect.data ), collect.length ); + length = collect.length; + str_collect_destroy( &collect ); + } + + return length; +} + +static word_t stream_append_tree( program_t *prg, tree_t **sp, input_t *dest, tree_t *to_append ) +{ + long length = 0; + struct input_impl *impl = input_to_impl( dest ); + + if ( to_append->id == LEL_ID_PTR ) { + assert(false); + } + else if ( to_append->id == LEL_ID_STR ) { + /* Collect the tree data. */ + str_collect_t collect; + init_str_collect( &collect ); + colm_print_tree_collect( prg, sp, &collect, to_append, false ); + + /* Load it into the to_append. */ + impl->funcs->append_data( prg, impl, colm_alph_from_cstr( collect.data ), collect.length ); + length = collect.length; + str_collect_destroy( &collect ); + } + else { + colm_tree_upref( prg, to_append ); + impl->funcs->append_tree( prg, impl, to_append ); + } + + return length; +} + +static word_t stream_append_stream( program_t *prg, tree_t **sp, input_t *dest, stream_t *stream ) +{ + long length = 0; + + struct input_impl *impl = input_to_impl( dest ); + impl->funcs->append_stream( prg, impl, stream ); + + return length; +} + +static void stream_undo_append( program_t *prg, tree_t **sp, + struct input_impl *is, tree_t *input, long length ) +{ + if ( input->id == LEL_ID_PTR ) + assert(false); + else if ( input->id == LEL_ID_STR ) + is->funcs->undo_append_data( prg, is, length ); + else { + is->funcs->undo_append_data( prg, is, length ); + } +} + +static void stream_undo_append_stream( program_t *prg, tree_t **sp, struct input_impl *is, + tree_t *input, long length ) +{ + is->funcs->undo_append_stream( prg, is ); +} + +static tree_t *stream_pull_bc( program_t *prg, tree_t **sp, struct pda_run *pda_run, + input_t *input, tree_t *length ) +{ + long len = ((long)length); + struct input_impl *impl = input_to_impl( input ); + head_t *tokdata = colm_stream_pull( prg, sp, pda_run, impl, len ); + return construct_string( prg, tokdata ); +} + + +static void undo_stream_pull( struct colm_program *prg, struct input_impl *is, + const char *data, long length ) +{ + //debug( REALM_PARSE, "undoing stream pull\n" ); + is->funcs->undo_consume_data( prg, is, colm_alph_from_cstr( data ), length ); +} + +static void undo_pull( program_t *prg, input_t *input, tree_t *str ) +{ + struct input_impl *impl = input_to_impl( input ); + const char *data = string_data( ( (str_t*)str )->value ); + long length = string_length( ( (str_t*)str )->value ); + undo_stream_pull( prg, impl, data, length ); +} + +static long input_push( program_t *prg, tree_t **sp, struct input_impl *in, tree_t *tree, int ignore ) +{ + long length = -1; + if ( tree->id == LEL_ID_PTR ) { + assert(false); + } + else if ( tree->id == LEL_ID_STR ) { + /* This should become a compile error. If it's text, it's up to the + * scanner to decide. Want to force it then send a token. */ + assert( !ignore ); + + /* Collect the tree data. */ + str_collect_t collect; + init_str_collect( &collect ); + colm_print_tree_collect( prg, sp, &collect, tree, false ); + + input_push_text( prg, in, tree->tokdata->location, collect.data, collect.length ); + length = collect.length; + str_collect_destroy( &collect ); + } + else { + colm_tree_upref( prg, tree ); + colm_stream_push_tree( prg, in, tree, ignore ); + } + + return length; +} + +static long input_push_stream( program_t *prg, tree_t **sp, + struct input_impl *in, stream_t *stream ) +{ + colm_stream_push_stream( prg, in, stream ); + return -1; +} + +static void set_local( execution_t *exec, long field, tree_t *tree ) +{ + if ( tree != 0 ) + assert( tree->refs >= 1 ); + vm_set_local( exec, field, tree ); +} + +static tree_t *get_local_split( program_t *prg, execution_t *exec, long field ) +{ + tree_t *val = vm_get_local( exec, field ); + tree_t *split = split_tree( prg, val ); + vm_set_local( exec, field, split ); + return split; +} + +static void downref_local_trees( program_t *prg, tree_t **sp, + execution_t *exec, struct local_info *locals, long locals_len ) +{ + long i; + for ( i = locals_len-1; i >= 0; i-- ) { + if ( locals[i].type == LI_Tree ) { + debug( prg, REALM_BYTECODE, "local tree downref: %ld\n", + (long)locals[i].offset ); + + tree_t *tree = (tree_t*) vm_get_local( exec, (long)locals[i].offset ); + colm_tree_downref( prg, sp, tree ); + } + } +} + +static void downref_locals( program_t *prg, tree_t ***psp, + execution_t *exec, struct local_info *locals, long locals_len ) +{ + long i; + for ( i = locals_len-1; i >= 0; i-- ) { + switch ( locals[i].type ) { + case LI_Tree: { + debug( prg, REALM_BYTECODE, "local tree downref: %ld\n", + (long)locals[i].offset ); + tree_t *tree = (tree_t*) vm_get_local( exec, (long)locals[i].offset ); + colm_tree_downref( prg, *psp, tree ); + break; + } + case LI_Iter: { + debug( prg, REALM_BYTECODE, "local iter downref: %ld\n", + (long)locals[i].offset ); + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal( exec, (long)locals[i].offset ); + colm_tree_iter_destroy( prg, psp, iter ); + break; + } + case LI_RevIter: { + debug( prg, REALM_BYTECODE, "local rev iter downref: %ld\n", + (long)locals[i].offset ); + rev_tree_iter_t *riter = (rev_tree_iter_t*) vm_get_plocal( exec, + (long)locals[i].offset ); + colm_rev_tree_iter_destroy( prg, psp, riter ); + break; + } + case LI_UserIter: { + debug( prg, REALM_BYTECODE, "local user iter downref: %ld\n", + (long)locals[i].offset ); + user_iter_t *uiter = (user_iter_t*) vm_get_local( exec, locals[i].offset ); + colm_uiter_unwind( prg, psp, uiter ); + break; + } + } + } +} + + +static tree_t *construct_arg0( program_t *prg, int argc, const char **argv, const int *argl ) +{ + tree_t *arg0 = 0; + if ( argc > 0 ) { + const char *argv0 = argv[0]; + size_t len = argl != 0 ? argl[0] : strlen( argv[0] ); + head_t *head = colm_string_alloc_pointer( prg, argv0, len ); + arg0 = construct_string( prg, head ); + colm_tree_upref( prg, arg0 ); + } + return arg0; +} + +static list_t *construct_argv( program_t *prg, int argc, const char **argv, const int *argl ) +{ + list_t *list = (list_t*)colm_construct_generic( prg, prg->rtd->argv_generic_id, 0 ); + int i; + for ( i = 1; i < argc; i++ ) { + size_t len = argl != 0 ? argl[i] : strlen(argv[i]); + const char *argv_i = argv[i]; + head_t *head = colm_string_alloc_pointer( prg, argv_i, len ); + tree_t *arg = construct_string( prg, head ); + colm_tree_upref( prg, arg ); + + struct_t *strct = colm_struct_new_size( prg, 16 ); + strct->id = prg->rtd->argv_el_id; + colm_struct_set_field( strct, tree_t*, 0, arg ); + list_el_t *list_el = colm_struct_get_addr( strct, list_el_t*, 1 ); + colm_list_append( list, list_el ); + } + + return list; +} + + +static list_t *construct_stds( program_t *prg ) +{ + make_stdout( prg ); + + list_t *list = (list_t*)colm_construct_generic( prg, prg->rtd->stds_generic_id, 0 ); + + struct_t *strct = colm_struct_new_size( prg, 16 ); + strct->id = prg->rtd->stds_el_id; + colm_struct_set_field( strct, stream_t*, 0, prg->stdout_val ); + list_el_t *list_el = colm_struct_get_addr( strct, list_el_t*, 1 ); + colm_list_append( list, list_el ); + + return list; +} + +/* + * Execution environment + */ + +void colm_rcode_downref_all( program_t *prg, tree_t **sp, struct rt_code_vect *rev ) +{ + while ( rev->tab_len > 0 ) { + /* Read the length */ + code_t *prcode = rev->data + rev->tab_len - SIZEOF_WORD; + word_t len; + read_word_p( len, prcode ); + + /* Find the start of block. */ + long start = rev->tab_len - len - SIZEOF_WORD; + prcode = rev->data + start; + + /* Execute it. */ + rcode_downref( prg, sp, prcode ); + + /* Backup over it. */ + rev->tab_len -= len + SIZEOF_WORD; + } +} + +static code_t *pcr_call( program_t *prg, execution_t *exec, tree_t ***psp, code_t *instr, parser_t *parser ) +{ + tree_t **sp = *psp; + + int frame_size = 0; + if ( parser->pda_run->frame_id >= 0 ) { + struct frame_info *fi = &prg->rtd->frame_info[parser->pda_run->frame_id]; + frame_size = fi->frame_size; + } + + vm_contiguous( 8 + frame_size ); + + vm_push_type( tree_t**, exec->frame_ptr ); + vm_push_type( tree_t**, exec->iframe_ptr ); + vm_push_type( long, exec->frame_id ); + vm_push_type( word_t, exec->steps ); + vm_push_type( word_t, exec->pcr ); + vm_push_parser( exec->parser ); + vm_push_type( word_t, exec->WV ); + + /* Return back to this instruction. We are alternating between + * parsing and calling instructions. */ + code_t *return_to = instr - SIZEOF_CODE; + vm_push_type( code_t*, return_to ); + + exec->frame_ptr = 0; + exec->iframe_ptr = 0; + exec->frame_id = 0; + exec->steps = 0; + exec->parser = parser; + + instr = parser->pda_run->code; + exec->WV = 1; + + exec->frame_id = parser->pda_run->frame_id; + + if ( parser->pda_run->frame_id >= 0 ) { + struct frame_info *fi = &prg->rtd->frame_info[parser->pda_run->frame_id]; + + exec->frame_ptr = vm_ptop(); + vm_pushn( fi->frame_size ); + memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); + } + + *psp = sp; + return instr; +} + +void colm_execute( program_t *prg, execution_t *exec, code_t *code ) +{ + tree_t **sp = prg->stack_root; + + struct frame_info *fi = &prg->rtd->frame_info[prg->rtd->root_frame_id]; + + /* Set up the stack as if we have + * called. We allow a return value. */ + + long stretch = FR_AA + fi->frame_size; + vm_contiguous( stretch ); + + vm_push_tree( 0 ); + vm_push_tree( 0 ); + vm_push_tree( 0 ); + vm_push_tree( 0 ); + vm_push_tree( 0 ); + + exec->frame_ptr = vm_ptop(); + vm_pushn( fi->frame_size ); + memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); + + /* Execution loop. */ + sp = colm_execute_code( prg, exec, sp, code ); + + downref_locals( prg, &sp, exec, fi->locals, fi->locals_len ); + vm_popn( fi->frame_size ); + + vm_pop_ignore(); + vm_pop_ignore(); + colm_tree_downref( prg, sp, prg->return_val ); + prg->return_val = vm_pop_tree(); + vm_pop_ignore(); + + prg->stack_root = sp; +} + +tree_t *colm_run_func( struct colm_program *prg, int frame_id, + const char **params, int param_count ) +{ + /* Make the arguments available to the program. */ + prg->argc = 0; + prg->argv = 0; + prg->argl = 0; + + execution_t execution; + memset( &execution, 0, sizeof(execution) ); + + tree_t **sp = prg->stack_root; + + struct frame_info *fi = &prg->rtd->frame_info[frame_id]; + code_t *code = fi->codeWC; + + vm_pushn( param_count ); + execution.call_args = vm_ptop(); + memset( vm_ptop(), 0, sizeof(word_t) * param_count ); + + int p; + for ( p = 0; p < param_count; p++ ) { + if ( params[p] == 0 ) { + ((value_t*)execution.call_args)[p] = 0; + } + else { + const char *param_p = params[p]; + size_t param_len = strlen(params[p]); + head_t *head = colm_string_alloc_pointer( prg, param_p, param_len ); + tree_t *tree = construct_string( prg, head ); + colm_tree_upref( prg, tree ); + ((tree_t**)execution.call_args)[p] = tree; + } + } + + long stretch = FR_AA + fi->frame_size; + vm_contiguous( stretch ); + + /* Set up the stack as if we have called. We allow a return value. */ + vm_push_tree( (tree_t*)execution.call_args ); + vm_push_tree( 0 ); + vm_push_tree( 0 ); + vm_push_tree( 0 ); + vm_push_tree( 0 ); + + execution.frame_id = frame_id; + + execution.frame_ptr = vm_ptop(); + vm_pushn( fi->frame_size ); + memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); + + /* Execution loop. */ + sp = colm_execute_code( prg, &execution, sp, code ); + + colm_tree_downref( prg, sp, prg->return_val ); + prg->return_val = execution.ret_val; + + vm_popn( param_count ); + + assert( sp == prg->stack_root ); + + return prg->return_val; +}; + +int colm_make_reverse_code( struct pda_run *pda_run ) +{ + struct rt_code_vect *reverse_code = &pda_run->reverse_code; + struct rt_code_vect *rcode_collect = &pda_run->rcode_collect; + + /* Do we need to revert the left hand side? */ + + /* Check if there was anything generated. */ + if ( rcode_collect->tab_len == 0 ) + return false; + + if ( pda_run->rc_block_count == 0 ) { + /* One reverse code run for the DECK terminator. */ + append_code_val( reverse_code, IN_PCR_END_DECK ); + append_code_val( reverse_code, IN_PCR_RET ); + append_word( reverse_code, 2 ); + pda_run->rc_block_count += 1; + colm_increment_steps( pda_run ); + } + + long start_length = reverse_code->tab_len; + + /* Go backwards, group by group, through the reverse code. Push each group + * to the global reverse code stack. */ + code_t *p = rcode_collect->data + rcode_collect->tab_len; + while ( p != rcode_collect->data ) { + p--; + long len = *p; + p = p - len; + append_code_vect( reverse_code, p, len ); + } + + /* Stop, then place a total length in the global stack. */ + append_code_val( reverse_code, IN_PCR_RET ); + long length = reverse_code->tab_len - start_length; + append_word( reverse_code, length ); + + /* Clear the revere code buffer. */ + rcode_collect->tab_len = 0; + + pda_run->rc_block_count += 1; + colm_increment_steps( pda_run ); + + return true; +} + +void colm_transfer_reverse_code( struct pda_run *pda_run, parse_tree_t *parse_tree ) +{ + if ( pda_run->rc_block_count > 0 ) { + //debug( REALM_PARSE, "attaching reverse code to token\n" ); + parse_tree->flags |= PF_HAS_RCODE; + pda_run->rc_block_count = 0; + } +} + +static void rcode_unit_term( execution_t *exec ) +{ + append_code_val( &exec->parser->pda_run->rcode_collect, exec->rcode_unit_len ); + exec->rcode_unit_len = 0; +} + +static void rcode_unit_start( execution_t *exec ) +{ + exec->rcode_unit_len = 0; +} + +static void rcode_code( execution_t *exec, const code_t code ) +{ + append_code_val( &exec->parser->pda_run->rcode_collect, code ); + exec->rcode_unit_len += SIZEOF_CODE; +} + +static void rcode_half( execution_t *exec, const half_t half ) +{ + append_half( &exec->parser->pda_run->rcode_collect, half ); + exec->rcode_unit_len += SIZEOF_HALF; +} + +static void rcode_word( execution_t *exec, const word_t word ) +{ + append_word( &exec->parser->pda_run->rcode_collect, word ); + exec->rcode_unit_len += SIZEOF_WORD; +} + +code_t *colm_pop_reverse_code( struct rt_code_vect *all_rev ) +{ + /* Read the length */ + code_t *prcode = all_rev->data + all_rev->tab_len - SIZEOF_WORD; + word_t len; + read_word_p( len, prcode ); + + /* Find the start of block. */ + long start = all_rev->tab_len - len - SIZEOF_WORD; + prcode = all_rev->data + start; + + /* Backup over it. */ + all_rev->tab_len -= len + SIZEOF_WORD; + return prcode; +} + +tree_t **colm_execute_code( program_t *prg, execution_t *exec, tree_t **sp, code_t *instr ) +{ + /* When we exit we are going to verify that we did not eat up any stack + * space. */ + tree_t **root = sp; + code_t c; + +again: + c = *instr++; + //debug( REALM_BYTECODE, "--in 0x%x\n", c ); + + switch ( c ) { + case IN_RESTORE_LHS: { + tree_t *restore; + read_tree( restore ); + + debug( prg, REALM_BYTECODE, "IN_RESTORE_LHS\n" ); + colm_tree_downref( prg, sp, exec->parser->pda_run->parse_input->shadow->tree ); + exec->parser->pda_run->parse_input->shadow->tree = restore; + break; + } + case IN_LOAD_NIL: { + debug( prg, REALM_BYTECODE, "IN_LOAD_NIL\n" ); + vm_push_tree( 0 ); + break; + } + case IN_LOAD_TREE: { + tree_t *tree; + read_tree( tree ); + vm_push_tree( tree ); + debug( prg, REALM_BYTECODE, "IN_LOAD_TREE %p id: %d refs: %d\n", + tree, tree->id, tree->refs ); + break; + } + case IN_LOAD_WORD: { + debug( prg, REALM_BYTECODE, "IN_LOAD_WORD\n" ); + word_t w; + read_word( w ); + vm_push_type( word_t, w ); + break; + } + case IN_LOAD_TRUE: { + debug( prg, REALM_BYTECODE, "IN_LOAD_TRUE\n" ); + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + break; + } + case IN_LOAD_FALSE: { + debug( prg, REALM_BYTECODE, "IN_LOAD_FALSE\n" ); + //colm_tree_upref( prg, prg->falseVal ); + vm_push_tree( prg->false_val ); + break; + } + case IN_LOAD_INT: { + word_t i; + read_word( i ); + + debug( prg, REALM_BYTECODE, "IN_LOAD_INT %d\n", i ); + + value_t value = i; + vm_push_value( value ); + break; + } + case IN_LOAD_STR: { + word_t offset; + read_word( offset ); + + debug( prg, REALM_BYTECODE, "IN_LOAD_STR %d\n", offset ); + + head_t *lit = make_literal( prg, offset ); + tree_t *tree = construct_string( prg, lit ); + colm_tree_upref( prg, tree ); + vm_push_tree( tree ); + break; + } + case IN_READ_REDUCE: { + half_t generic_id; + half_t reducer_id; + read_half( generic_id ); + read_half( reducer_id ); + + input_t *input = vm_pop_input(); + + debug( prg, REALM_BYTECODE, "IN_READ_REDUCE %hd %hd\n", generic_id, reducer_id ); + + prg->rtd->read_reduce( prg, reducer_id, input ); + + vm_push_tree( 0 ); + + break; + } + + /* + * LOAD_GLOBAL + */ + case IN_LOAD_GLOBAL_R: { + debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_R\n" ); + + vm_push_struct( prg->global ); + break; + } + case IN_LOAD_GLOBAL_WV: { + debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_WV\n" ); + + assert( exec->WV ); + + vm_push_struct( prg->global ); + + /* Set up the reverse instruction. */ + rcode_unit_start( exec ); + rcode_code( exec, IN_LOAD_GLOBAL_BKT ); + break; + } + case IN_LOAD_GLOBAL_WC: { + debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_WC\n" ); + + assert( !exec->WV ); + + /* This is identical to the _R version, but using it for writing + * would be confusing. */ + vm_push_struct( prg->global ); + break; + } + case IN_LOAD_GLOBAL_BKT: { + debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" ); + + vm_push_struct( prg->global ); + break; + } + + case IN_LOAD_INPUT_R: { + debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_R\n" ); + + assert( exec->parser != 0 ); + vm_push_input( exec->parser->input ); + break; + } + case IN_LOAD_INPUT_WV: { + debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_WV\n" ); + + assert( exec->WV ); + + assert( exec->parser != 0 ); + vm_push_input( exec->parser->input ); + + /* Set up the reverse instruction. */ + rcode_unit_start( exec ); + rcode_code( exec, IN_LOAD_INPUT_BKT ); + rcode_word( exec, (word_t)exec->parser->input ); + break; + } + case IN_LOAD_INPUT_WC: { + debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_WC\n" ); + + assert( !exec->WV ); + + assert( exec->parser != 0 ); + vm_push_input( exec->parser->input ); + break; + } + case IN_LOAD_INPUT_BKT: { + tree_t *accum_stream; + read_tree( accum_stream ); + + debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" ); + + colm_tree_upref( prg, accum_stream ); + vm_push_tree( accum_stream ); + break; + } + + case IN_LOAD_CONTEXT_R: { + debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_R\n" ); + + vm_push_type( struct_t*, exec->parser->pda_run->context ); + break; + } + case IN_LOAD_CONTEXT_WV: { + debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_WV\n" ); + + assert( exec->WV ); + + vm_push_type( struct_t *, exec->parser->pda_run->context ); + + /* Set up the reverse instruction. */ + rcode_unit_start( exec ); + rcode_code( exec, IN_LOAD_CONTEXT_BKT ); + break; + } + case IN_LOAD_CONTEXT_WC: { + debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_WC\n" ); + + assert( !exec->WV ); + + /* This is identical to the _R version, but using it for writing + * would be confusing. */ + vm_push_type( struct_t *, exec->parser->pda_run->context ); + break; + } + case IN_LOAD_CONTEXT_BKT: { + debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" ); + + vm_push_type( struct_t *, exec->parser->pda_run->context ); + break; + } + + case IN_SET_PARSER_CONTEXT: { + debug( prg, REALM_BYTECODE, "IN_SET_PARSER_CONTEXT\n" ); + + struct_t *strct = vm_pop_struct(); + parser_t *parser = vm_pop_parser(); + + colm_parser_set_context( prg, sp, parser, strct ); + + vm_push_parser( parser ); + break; + } + + case IN_SET_PARSER_INPUT: { + debug( prg, REALM_BYTECODE, "IN_SET_PARSER_INPUT\n" ); + + input_t *to_replace_with = vm_pop_input(); + parser_t *parser = vm_pop_parser(); + + parser->input = to_replace_with; + + vm_push_parser( parser ); + + break; + } + + case IN_INIT_CAPTURES: { + consume_byte(); + + debug( prg, REALM_BYTECODE, "IN_INIT_CAPTURES\n" ); + + /* If there are captures (this is a translate block) then copy them into + * the local frame now. */ + struct lang_el_info *lel_info = prg->rtd->lel_info; + struct pda_run *pda_run = exec->parser->pda_run; + alph_t **mark = pda_run->mark; + + int i, num_capture_attr = lel_info[pda_run->token_id].num_capture_attr; + for ( i = 0; i < num_capture_attr; i++ ) { + struct lang_el_info *lei = &lel_info[exec->parser->pda_run->token_id]; + CaptureAttr *ca = &prg->rtd->capture_attr[lei->capture_attr + i]; + head_t *data = string_alloc_full( prg, + colm_cstr_from_alph( mark[ca->mark_enter] ), + mark[ca->mark_leave] - mark[ca->mark_enter] ); + tree_t *string = construct_string( prg, data ); + colm_tree_upref( prg, string ); + set_local( exec, -1 - i, string ); + } + break; + } + case IN_INIT_RHS_EL: { + half_t position; + short field; + read_half( position ); + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_INIT_RHS_EL %hd\n", field ); + + tree_t *val = get_rhs_el( prg, exec->parser->pda_run->red_lel->shadow->tree, position ); + colm_tree_upref( prg, val ); + vm_set_local(exec, field, val); + break; + } + + case IN_INIT_LHS_EL: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_INIT_LHS_EL %hd\n", field ); + + /* We transfer it to to the local field. Possibly take a copy. */ + tree_t *val = exec->parser->pda_run->red_lel->shadow->tree; + + /* Save it. */ + colm_tree_upref( prg, val ); + exec->parser->pda_run->parsed = val; + + exec->parser->pda_run->red_lel->shadow->tree = 0; + vm_set_local(exec, field, val); + break; + } + case IN_STORE_LHS_EL: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_STORE_LHS_EL %hd\n", field ); + + tree_t *val = vm_get_local(exec, field); + vm_set_local(exec, field, 0); + exec->parser->pda_run->red_lel->shadow->tree = val; + break; + } + case IN_UITER_ADVANCE: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_UITER_ADVANCE\n" ); + + /* Get the iterator. */ + user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); + + long yield_size = vm_ssize() - uiter->root_size; + assert( uiter->yield_size == yield_size ); + + /* Fix the return instruction pointer. */ + uiter->stack_root[-IFR_AA + IFR_RIN] = (SW)instr; + + instr = uiter->resume; + exec->frame_ptr = uiter->frame; + exec->iframe_ptr = &uiter->stack_root[-IFR_AA]; + break; + } + case IN_UITER_GET_CUR_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_UITER_GET_CUR_R\n" ); + + user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); + tree_t *val = uiter->ref.kid->tree; + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_UITER_GET_CUR_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_UITER_GET_CUR_WC\n" ); + + user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); + split_ref( prg, &sp, &uiter->ref ); + tree_t *split = uiter->ref.kid->tree; + colm_tree_upref( prg, split ); + vm_push_tree( split ); + break; + } + case IN_UITER_SET_CUR_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_UITER_SET_CUR_WC\n" ); + + tree_t *t = vm_pop_tree(); + user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); + split_ref( prg, &sp, &uiter->ref ); + tree_t *old = uiter->ref.kid->tree; + set_uiter_cur( prg, uiter, t ); + colm_tree_downref( prg, sp, old ); + break; + } + case IN_GET_LOCAL_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_R %hd\n", field ); + + tree_t *val = vm_get_local(exec, field); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_GET_LOCAL_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_WC %hd\n", field ); + + tree_t *split = get_local_split( prg, exec, field ); + colm_tree_upref( prg, split ); + vm_push_tree( split ); + break; + } + case IN_SET_LOCAL_WC: { + short field; + read_half( field ); + debug( prg, REALM_BYTECODE, "IN_SET_LOCAL_WC %hd\n", field ); + + tree_t *val = vm_pop_tree(); + colm_tree_downref( prg, sp, vm_get_local(exec, field) ); + set_local( exec, field, val ); + break; + } + case IN_GET_LOCAL_VAL_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_VAL_R %hd\n", field ); + + tree_t *val = vm_get_local(exec, field); + vm_push_tree( val ); + break; + } + case IN_SET_LOCAL_VAL_WC: { + short field; + read_half( field ); + debug( prg, REALM_BYTECODE, "IN_SET_LOCAL_VAL_WC %hd\n", field ); + + tree_t *val = vm_pop_tree(); + vm_set_local(exec, field, val); + break; + } + case IN_SAVE_RET: { + debug( prg, REALM_BYTECODE, "IN_SAVE_RET\n" ); + + value_t val = vm_pop_value(); + vm_set_local(exec, FR_RV, (tree_t*)val); + break; + } + case IN_GET_LOCAL_REF_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_REF_R\n" ); + + ref_t *ref = (ref_t*) vm_get_plocal(exec, field); + tree_t *val = ref->kid->tree; + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_GET_LOCAL_REF_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_REF_WC\n" ); + + ref_t *ref = (ref_t*) vm_get_plocal(exec, field); + split_ref( prg, &sp, ref ); + tree_t *val = ref->kid->tree; + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_SET_LOCAL_REF_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_SET_LOCAL_REF_WC\n" ); + + tree_t *val = vm_pop_tree(); + ref_t *ref = (ref_t*) vm_get_plocal(exec, field); + split_ref( prg, &sp, ref ); + ref_set_value( prg, sp, ref, val ); + break; + } + case IN_GET_FIELD_TREE_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_R %d\n", field ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *val = colm_tree_get_field( obj, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_GET_FIELD_TREE_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_WC %d\n", field ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *split = get_field_split( prg, obj, field ); + colm_tree_upref( prg, split ); + vm_push_tree( split ); + break; + } + case IN_GET_FIELD_TREE_WV: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_WV\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *split = get_field_split( prg, obj, field ); + colm_tree_upref( prg, split ); + vm_push_tree( split ); + + /* Set up the reverse instruction. */ + rcode_code( exec, IN_GET_FIELD_TREE_BKT ); + rcode_half( exec, field ); + break; + } + case IN_GET_FIELD_TREE_BKT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_BKT\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *split = get_field_split( prg, obj, field ); + colm_tree_upref( prg, split ); + vm_push_tree( split ); + break; + } + case IN_SET_FIELD_TREE_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_WC %d\n", field ); + + tree_t *obj = vm_pop_tree(); + tree_t *val = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + /* Downref the old value. */ + tree_t *prev = colm_tree_get_field( obj, field ); + colm_tree_downref( prg, sp, prev ); + + colm_tree_set_field( prg, obj, field, val ); + break; + } + case IN_SET_FIELD_TREE_WV: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_WV %d\n", field ); + + tree_t *obj = vm_pop_tree(); + tree_t *val = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + /* Save the old value, then set the field. */ + tree_t *prev = colm_tree_get_field( obj, field ); + colm_tree_set_field( prg, obj, field, val ); + + /* Set up the reverse instruction. */ + rcode_code( exec, IN_SET_FIELD_TREE_BKT ); + rcode_half( exec, field ); + rcode_word( exec, (word_t)prev ); + rcode_unit_term( exec ); + break; + } + case IN_SET_FIELD_TREE_BKT: { + short field; + tree_t *val; + read_half( field ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_BKT\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + /* Downref the old value. */ + tree_t *prev = colm_tree_get_field( obj, field ); + colm_tree_downref( prg, sp, prev ); + + colm_tree_set_field( prg, obj, field, val ); + break; + } + case IN_SET_FIELD_TREE_LEAVE_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_LEAVE_WC\n" ); + + /* Note that we don't downref the object here because we are + * leaving it on the stack. */ + tree_t *obj = vm_pop_tree(); + tree_t *val = vm_pop_tree(); + + /* Downref the old value. */ + tree_t *prev = colm_tree_get_field( obj, field ); + colm_tree_downref( prg, sp, prev ); + + /* Set the field. */ + colm_tree_set_field( prg, obj, field, val ); + + /* Leave the object on the top of the stack. */ + vm_push_tree( obj ); + break; + } + case IN_GET_FIELD_VAL_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_FIELD_VAL_R %d\n", field ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *pointer = colm_tree_get_field( obj, field ); + value_t value = 0; + if ( pointer != 0 ) + value = colm_get_pointer_val( pointer ); + vm_push_value( value ); + break; + } + case IN_SET_FIELD_VAL_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_SET_FIELD_VAL_WC %d\n", field ); + + tree_t *obj = vm_pop_tree(); + value_t value = vm_pop_value(); + colm_tree_downref( prg, sp, obj ); + + /* Downref the old value. */ + tree_t *prev = colm_tree_get_field( obj, field ); + colm_tree_downref( prg, sp, prev ); + + /* Make it into a pointer. */ + tree_t *pointer = colm_construct_pointer( prg, value ); + colm_tree_upref( prg, pointer ); + + colm_tree_set_field( prg, obj, field, pointer ); + break; + } + case IN_NEW_STRUCT: { + short id; + read_half( id ); + + debug( prg, REALM_BYTECODE, "IN_NEW_STRUCT %hd\n", id ); + struct_t *item = colm_struct_new( prg, id ); + vm_push_struct( item ); + break; + } + case IN_NEW_STREAM: { + debug( prg, REALM_BYTECODE, "IN_NEW_STREAM\n" ); + stream_t *item = colm_stream_open_collect( prg ); + vm_push_stream( item ); + break; + } + case IN_GET_COLLECT_STRING: { + debug( prg, REALM_BYTECODE, "IN_GET_COLLECT_STRING\n" ); + stream_t *stream = vm_pop_stream(); + str_t *str = collect_string( prg, stream ); + colm_tree_upref( prg, (tree_t*)str ); + vm_push_string( str ); + break; + } + case IN_GET_STRUCT_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_R %d\n", field ); + + tree_t *obj = vm_pop_tree(); + tree_t *val = colm_struct_get_field( obj, tree_t*, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_GET_STRUCT_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_WC %d\n", field ); + + tree_t *obj = vm_pop_tree(); + tree_t *val = colm_struct_get_field( obj, tree_t*, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + + break; + } + case IN_GET_STRUCT_WV: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_WV\n" ); + + tree_t *obj = vm_pop_tree(); + tree_t *val = colm_struct_get_field( obj, tree_t*, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + + /* Set up the reverse instruction. */ + rcode_code( exec, IN_GET_STRUCT_BKT ); + rcode_half( exec, field ); + break; + } + case IN_GET_STRUCT_BKT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_BKT\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *split = get_field_split( prg, obj, field ); + colm_tree_upref( prg, split ); + vm_push_tree( split ); + break; + } + case IN_SET_STRUCT_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_WC %d\n", field ); + + tree_t *obj = vm_pop_tree(); + tree_t *val = vm_pop_tree(); + + /* Downref the old value. */ + tree_t *prev = colm_struct_get_field( obj, tree_t*, field ); + colm_tree_downref( prg, sp, prev ); + colm_struct_set_field( obj, tree_t*, field, val ); + break; + } + case IN_SET_STRUCT_WV: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_WV %d\n", field ); + + struct_t *obj = vm_pop_struct(); + tree_t *val = vm_pop_tree(); + + /* Save the old value, then set the field. */ + tree_t *prev = colm_struct_get_field( obj, tree_t*, field ); + colm_struct_set_field( obj, tree_t*, field, val ); + + /* Set up the reverse instruction. */ + rcode_code( exec, IN_SET_STRUCT_BKT ); + rcode_half( exec, field ); + rcode_word( exec, (word_t)prev ); + rcode_unit_term( exec ); + break; + } + case IN_SET_STRUCT_BKT: { + short field; + tree_t *val; + read_half( field ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_BKT\n" ); + + tree_t *obj = vm_pop_tree(); + + /* Downref the old value. */ + tree_t *prev = colm_struct_get_field( obj, tree_t*, field ); + colm_tree_downref( prg, sp, prev ); + + colm_struct_set_field( obj, tree_t*, field, val ); + break; + } + case IN_GET_STRUCT_VAL_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_VAL_R %d\n", field ); + + tree_t *obj = vm_pop_tree(); + tree_t *val = colm_struct_get_field( obj, tree_t*, field ); + vm_push_tree( val ); + break; + } + case IN_SET_STRUCT_VAL_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_WC %d\n", field ); + + struct_t *strct = vm_pop_struct(); + tree_t *val = vm_pop_tree(); + + colm_struct_set_field( strct, tree_t*, field, val ); + break; + } + case IN_SET_STRUCT_VAL_WV: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_WV %d\n", field ); + + struct_t *strct = vm_pop_struct(); + tree_t *val = vm_pop_tree(); + + tree_t *prev = colm_struct_get_field( strct, tree_t*, field ); + colm_struct_set_field( strct, tree_t*, field, val ); + + rcode_code( exec, IN_SET_STRUCT_VAL_BKT ); + rcode_half( exec, field ); + rcode_word( exec, (word_t)prev ); + rcode_unit_term( exec ); + break; + } + case IN_SET_STRUCT_VAL_BKT: { + short field; + tree_t *val; + read_half( field ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_BKT\n" ); + + tree_t *obj = vm_pop_tree(); + + colm_struct_set_field( obj, tree_t*, field, val ); + break; + } + case IN_GET_RHS_VAL_R: { + debug( prg, REALM_BYTECODE, "IN_GET_RHS_VAL_R\n" ); + int i, done = 0; + uchar len; + + tree_t *obj = vm_pop_tree(), *val = 0; + colm_tree_downref( prg, sp, obj ); + + read_byte( len ); + for ( i = 0; i < len; i++ ) { + uchar prod_num, child_num; + read_byte( prod_num ); + read_byte( child_num ); + if ( !done && obj->prod_num == prod_num ) { + val = get_rhs_el( prg, obj, child_num ); + done = 1; + } + } + + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_GET_RHS_VAL_WC: + fatal( "UNIMPLEMENTED INSRUCTION: IN_GET_RHS_VAL_WC\n" ); + break; + case IN_GET_RHS_VAL_WV: + fatal( "UNIMPLEMENTED INSRUCTION: IN_GET_RHS_VAL_WV\n" ); + break; + case IN_GET_RHS_VAL_BKT: + fatal( "UNIMPLEMENTED INSRUCTION: IN_GET_RHS_VAL_BKT\n" ); + break; + + case IN_SET_RHS_VAL_WC: + debug( prg, REALM_BYTECODE, "IN_SET_RHS_VAL_WC\n" ); + int i, done = 0; + uchar len; + + tree_t *obj = vm_pop_tree(); + tree_t *val = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + read_byte( len ); + for ( i = 0; i < len; i++ ) { + uchar prod_num, child_num; + read_byte( prod_num ); + read_byte( child_num ); + if ( !done && obj->prod_num == prod_num ) { + tree_t *prev = get_rhs_el( prg, obj, child_num ); + colm_tree_downref( prg, sp, prev ); + set_rhs_el( prg, obj, child_num, val ); + done = 1; + } + } + + //colm_tree_upref( prg, val ); + //vm_push_tree( val ); + break; + case IN_SET_RHS_VAL_WV: + fatal( "UNIMPLEMENTED INSRUCTION: IN_SET_RHS_VAL_WV\n" ); + break; + case IN_SET_RHS_VAL_BKT: + fatal( "UNIMPLEMENTED INSRUCTION: IN_SET_RHS_VAL_BKT\n" ); + break; + case IN_POP_TREE: { + debug( prg, REALM_BYTECODE, "IN_POP_TREE\n" ); + + tree_t *val = vm_pop_tree(); + colm_tree_downref( prg, sp, val ); + break; + } + case IN_POP_VAL: { + debug( prg, REALM_BYTECODE, "IN_POP_VAL\n" ); + + vm_pop_tree(); + break; + } + case IN_POP_N_WORDS: { + short n; + read_half( n ); + + debug( prg, REALM_BYTECODE, "IN_POP_N_WORDS %hd\n", n ); + + vm_popn( n ); + break; + } + case IN_INT_TO_STR: { + debug( prg, REALM_BYTECODE, "IN_INT_TO_STR\n" ); + + value_t i = vm_pop_value(); + head_t *res = int_to_str( prg, (long)i ); + tree_t *str = construct_string( prg, res ); + colm_tree_upref( prg, str ); + vm_push_tree( str ); + break; + } + case IN_TREE_TO_STR_XML: { + debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_XML_AC\n" ); + + tree_t *tree = vm_pop_tree(); + head_t *res = tree_to_str_xml( prg, sp, tree, false, false ); + tree_t *str = construct_string( prg, res ); + colm_tree_upref( prg, str ); + vm_push_tree( str ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_TREE_TO_STR_XML_AC: { + debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_XML_AC\n" ); + + tree_t *tree = vm_pop_tree(); + head_t *res = tree_to_str_xml_ac( prg, sp, tree, false, false ); + tree_t *str = construct_string( prg, res ); + colm_tree_upref( prg, str ); + vm_push_tree( str ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_TREE_TO_STR_POSTFIX: { + debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_XML_AC\n" ); + + tree_t *tree = vm_pop_tree(); + head_t *res = tree_to_str_postfix( prg, sp, tree, false, false ); + tree_t *str = construct_string( prg, res ); + colm_tree_upref( prg, str ); + vm_push_tree( str ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_TREE_TO_STR: { + debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR\n" ); + + tree_t *tree = vm_pop_tree(); + head_t *res = tree_to_str( prg, sp, tree, false, false ); + tree_t *str = construct_string( prg, res ); + colm_tree_upref( prg, str ); + vm_push_tree( str ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_TREE_TO_STR_TRIM: { + debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_TRIM\n" ); + + tree_t *tree = vm_pop_tree(); + head_t *res = tree_to_str( prg, sp, tree, true, false ); + tree_t *str = construct_string( prg, res ); + colm_tree_upref( prg, str ); + vm_push_tree( str ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_TREE_TO_STR_TRIM_A: { + debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_TRIM_A\n" ); + + tree_t *tree = vm_pop_tree(); + head_t *res = tree_to_str( prg, sp, tree, true, true ); + tree_t *str = construct_string( prg, res ); + colm_tree_upref( prg, str ); + vm_push_tree( str ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_TREE_TRIM: { + debug( prg, REALM_BYTECODE, "IN_TREE_TRIM\n" ); + + tree_t *tree = vm_pop_tree(); + tree_t *trimmed = tree_trim( prg, sp, tree ); + vm_push_tree( trimmed ); + break; + } + case IN_CONCAT_STR: { + debug( prg, REALM_BYTECODE, "IN_CONCAT_STR\n" ); + + str_t *s2 = vm_pop_string(); + str_t *s1 = vm_pop_string(); + head_t *res = concat_str( s1->value, s2->value ); + tree_t *str = construct_string( prg, res ); + colm_tree_upref( prg, str ); + colm_tree_downref( prg, sp, (tree_t*)s1 ); + colm_tree_downref( prg, sp, (tree_t*)s2 ); + vm_push_tree( str ); + break; + } + + case IN_STR_LENGTH: { + debug( prg, REALM_BYTECODE, "IN_STR_LENGTH\n" ); + + str_t *str = vm_pop_string(); + long len = string_length( str->value ); + value_t res = len; + vm_push_value( res ); + colm_tree_downref( prg, sp, (tree_t*)str ); + break; + } + case IN_JMP_FALSE_TREE: { + short dist; + read_half( dist ); + + debug( prg, REALM_BYTECODE, "IN_JMP_FALSE_TREE %d\n", dist ); + + tree_t *tree = vm_pop_tree(); + if ( test_false( prg, tree ) ) + instr += dist; + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_JMP_TRUE_TREE: { + short dist; + read_half( dist ); + + debug( prg, REALM_BYTECODE, "IN_JMP_TRUE_TREE %d\n", dist ); + + tree_t *tree = vm_pop_tree(); + if ( !test_false( prg, tree ) ) + instr += dist; + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_JMP_FALSE_VAL: { + short dist; + read_half( dist ); + + debug( prg, REALM_BYTECODE, "IN_JMP_FALSE_VAL %d\n", dist ); + + tree_t *tree = vm_pop_tree(); + if ( tree == 0 ) + instr += dist; + break; + } + case IN_JMP_TRUE_VAL: { + short dist; + read_half( dist ); + + debug( prg, REALM_BYTECODE, "IN_JMP_TRUE_VAL %d\n", dist ); + + tree_t *tree = vm_pop_tree(); + if ( tree != 0 ) + instr += dist; + break; + } + case IN_JMP: { + short dist; + read_half( dist ); + + debug( prg, REALM_BYTECODE, "IN_JMP\n" ); + + instr += dist; + break; + } + case IN_REJECT: { + debug( prg, REALM_BYTECODE, "IN_REJECT\n" ); + exec->parser->pda_run->reject = true; + break; + } + + /* + * Binary comparison operators. + */ + case IN_TST_EQL_TREE: { + debug( prg, REALM_BYTECODE, "IN_TST_EQL_TREE\n" ); + + tree_t *o2 = vm_pop_tree(); + tree_t *o1 = vm_pop_tree(); + long r = colm_cmp_tree( prg, o1, o2 ); + value_t val = r == 0 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + colm_tree_downref( prg, sp, o1 ); + colm_tree_downref( prg, sp, o2 ); + break; + } + case IN_TST_EQL_VAL: { + debug( prg, REALM_BYTECODE, "IN_TST_EQL_VAL\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + value_t val = o1 == o2 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + break; + } + case IN_TST_NOT_EQL_TREE: { + debug( prg, REALM_BYTECODE, "IN_TST_NOT_EQL_TREE\n" ); + + tree_t *o2 = vm_pop_tree(); + tree_t *o1 = vm_pop_tree(); + long r = colm_cmp_tree( prg, o1, o2 ); + value_t val = r != 0 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + colm_tree_downref( prg, sp, o1 ); + colm_tree_downref( prg, sp, o2 ); + break; + } + case IN_TST_NOT_EQL_VAL: { + debug( prg, REALM_BYTECODE, "IN_TST_NOT_EQL_VAL\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + value_t val = o1 != o2 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + break; + } + case IN_TST_LESS_VAL: { + debug( prg, REALM_BYTECODE, "IN_TST_LESS_VAL\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + value_t res = (long)o1 < (long)o2 ? TRUE_VAL : FALSE_VAL; + vm_push_value( res ); + break; + } + case IN_TST_LESS_TREE: { + debug( prg, REALM_BYTECODE, "IN_TST_LESS_TREE\n" ); + + tree_t *o2 = vm_pop_tree(); + tree_t *o1 = vm_pop_tree(); + long r = colm_cmp_tree( prg, o1, o2 ); + value_t val = r < 0 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + colm_tree_downref( prg, sp, o1 ); + colm_tree_downref( prg, sp, o2 ); + break; + } + case IN_TST_LESS_EQL_VAL: { + debug( prg, REALM_BYTECODE, "IN_TST_LESS_EQL_VAL\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + value_t val = (long)o1 <= (long)o2 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + break; + } + case IN_TST_LESS_EQL_TREE: { + debug( prg, REALM_BYTECODE, "IN_TST_LESS_EQL_TREE\n" ); + + tree_t *o2 = vm_pop_tree(); + tree_t *o1 = vm_pop_tree(); + long r = colm_cmp_tree( prg, o1, o2 ); + value_t val = r <= 0 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + colm_tree_downref( prg, sp, o1 ); + colm_tree_downref( prg, sp, o2 ); + break; + } + case IN_TST_GRTR_VAL: { + debug( prg, REALM_BYTECODE, "IN_TST_GRTR_VAL\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + value_t val = (long)o1 > (long)o2 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + break; + } + case IN_TST_GRTR_TREE: { + debug( prg, REALM_BYTECODE, "IN_TST_GRTR_TREE\n" ); + + tree_t *o2 = vm_pop_tree(); + tree_t *o1 = vm_pop_tree(); + long r = colm_cmp_tree( prg, o1, o2 ); + value_t val = r > 0 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + colm_tree_downref( prg, sp, o1 ); + colm_tree_downref( prg, sp, o2 ); + break; + } + case IN_TST_GRTR_EQL_VAL: { + debug( prg, REALM_BYTECODE, "IN_TST_GRTR_EQL_VAL\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + + value_t val = (long)o1 >= (long)o2 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + break; + } + case IN_TST_GRTR_EQL_TREE: { + debug( prg, REALM_BYTECODE, "IN_TST_GRTR_EQL_TREE\n" ); + + tree_t *o2 = vm_pop_tree(); + tree_t *o1 = vm_pop_tree(); + long r = colm_cmp_tree( prg, o1, o2 ); + value_t val = r >= 0 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + colm_tree_downref( prg, sp, o1 ); + colm_tree_downref( prg, sp, o2 ); + break; + } + case IN_TST_LOGICAL_AND: { + debug( prg, REALM_BYTECODE, "IN_TST_LOGICAL_AND\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + value_t val = o1 && o2 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + break; + } + case IN_TST_LOGICAL_OR: { + debug( prg, REALM_BYTECODE, "IN_TST_LOGICAL_OR\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + value_t val = o1 || o2 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + break; + } + + case IN_TST_NZ_TREE: { + debug( prg, REALM_BYTECODE, "IN_TST_NZ_TREE\n" ); + + tree_t *tree = vm_pop_tree(); + long r = !test_false( prg, tree ); + colm_tree_downref( prg, sp, tree ); + vm_push_value( r ); + break; + } + + case IN_NOT_VAL: { + debug( prg, REALM_BYTECODE, "IN_NOT_VAL\n" ); + + value_t o1 = vm_pop_value(); + value_t val = o1 == 0 ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + break; + } + + case IN_NOT_TREE: { + debug( prg, REALM_BYTECODE, "IN_NOT_TREE\n" ); + + tree_t *tree = vm_pop_tree(); + long r = test_false( prg, tree ); + value_t val = r ? TRUE_VAL : FALSE_VAL; + vm_push_value( val ); + colm_tree_downref( prg, sp, tree ); + break; + } + + case IN_ADD_INT: { + debug( prg, REALM_BYTECODE, "IN_ADD_INT\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + long r = (long)o1 + (long)o2; + value_t val = r; + vm_push_value( val ); + break; + } + case IN_MULT_INT: { + debug( prg, REALM_BYTECODE, "IN_MULT_INT\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + long r = (long)o1 * (long)o2; + value_t val = r; + vm_push_value( val ); + break; + } + case IN_DIV_INT: { + debug( prg, REALM_BYTECODE, "IN_DIV_INT\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + long r = (long)o1 / (long)o2; + value_t val = r; + vm_push_value( val ); + break; + } + case IN_SUB_INT: { + debug( prg, REALM_BYTECODE, "IN_SUB_INT\n" ); + + value_t o2 = vm_pop_value(); + value_t o1 = vm_pop_value(); + long r = (long)o1 - (long)o2; + value_t val = r; + vm_push_value( val ); + break; + } + case IN_DUP_VAL: { + debug( prg, REALM_BYTECODE, "IN_DUP_VAL\n" ); + + word_t val = (word_t)vm_top(); + vm_push_type( word_t, val ); + break; + } + case IN_DUP_TREE: { + debug( prg, REALM_BYTECODE, "IN_DUP_TREE\n" ); + + tree_t *val = vm_top(); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_TRITER_FROM_REF: { + short field; + half_t arg_size; + half_t search_type_id; + read_half( field ); + read_half( arg_size ); + read_half( search_type_id ); + + debug( prg, REALM_BYTECODE, "IN_TRITER_FROM_REF " + "%hd %hd %hd\n", field, arg_size, search_type_id ); + + ref_t root_ref; + root_ref.kid = vm_pop_kid(); + root_ref.next = vm_pop_ref(); + void *mem = vm_get_plocal(exec, field); + + tree_t **stack_root = vm_ptop(); + long root_size = vm_ssize(); + + colm_init_tree_iter( (tree_iter_t*)mem, stack_root, + arg_size, root_size, &root_ref, search_type_id ); + break; + } + case IN_TRITER_UNWIND: + case IN_TRITER_DESTROY: { + short field; + read_half( field ); + + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); + debug( prg, REALM_BYTECODE, "IN_TRITER_DESTROY %hd %d\n", + field, iter->yield_size ); + colm_tree_iter_destroy( prg, &sp, iter ); + break; + } + case IN_REV_TRITER_FROM_REF: { + short field; + half_t arg_size; + half_t search_type_id; + read_half( field ); + read_half( arg_size ); + read_half( search_type_id ); + + debug( prg, REALM_BYTECODE, "IN_REV_TRITER_FROM_REF " + "%hd %hd %hd\n", field, arg_size, search_type_id ); + + ref_t root_ref; + root_ref.kid = vm_pop_kid(); + root_ref.next = vm_pop_ref(); + + tree_t **stack_root = vm_ptop(); + long root_size = vm_ssize(); + + int children = 0; + kid_t *kid = tree_child( prg, root_ref.kid->tree ); + while ( kid != 0 ) { + vm_push_kid( kid ); + kid = kid->next; + children++; + } + + void *mem = vm_get_plocal(exec, field); + colm_init_rev_tree_iter( (rev_tree_iter_t*)mem, stack_root, + arg_size, root_size, &root_ref, search_type_id, children ); + break; + } + case IN_REV_TRITER_UNWIND: + case IN_REV_TRITER_DESTROY: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_REV_TRITER_DESTROY\n" ); + + rev_tree_iter_t *iter = (rev_tree_iter_t*) vm_get_plocal(exec, field); + colm_rev_tree_iter_destroy( prg, &sp, iter ); + break; + } + case IN_TREE_SEARCH: { + word_t id; + read_word( id ); + + debug( prg, REALM_BYTECODE, "IN_TREE_SEARCH\n" ); + + tree_t *tree = vm_pop_tree(); + tree_t *res = tree_search( prg, tree, id ); + colm_tree_upref( prg, res ); + vm_push_tree( res ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_TRITER_ADVANCE: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_TRITER_ADVANCE\n" ); + + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); + tree_t *res = tree_iter_advance( prg, &sp, iter ); + //colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_TRITER_NEXT_CHILD: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_TRITER_NEXT_CHILD\n" ); + + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); + tree_t *res = tree_iter_next_child( prg, &sp, iter ); + //colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_REV_TRITER_PREV_CHILD: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_REV_TRITER_PREV_CHILD\n" ); + + rev_tree_iter_t *iter = (rev_tree_iter_t*) vm_get_plocal(exec, field); + tree_t *res = tree_rev_iter_prev_child( prg, &sp, iter ); + //colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_TRITER_NEXT_REPEAT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_TRITER_NEXT_REPEAT\n" ); + + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); + tree_t *res = tree_iter_next_repeat( prg, &sp, iter ); + //colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_TRITER_PREV_REPEAT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_TRITER_PREV_REPEAT\n" ); + + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); + tree_t *res = tree_iter_prev_repeat( prg, &sp, iter ); + //colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_TRITER_GET_CUR_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_TRITER_GET_CUR_R\n" ); + + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); + tree_t *tree = tree_iter_deref_cur( iter ); + colm_tree_upref( prg, tree ); + vm_push_tree( tree ); + break; + } + case IN_TRITER_GET_CUR_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_TRITER_GET_CUR_WC\n" ); + + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); + split_iter_cur( prg, &sp, iter ); + tree_t *tree = tree_iter_deref_cur( iter ); + colm_tree_upref( prg, tree ); + vm_push_tree( tree ); + break; + } + case IN_TRITER_SET_CUR_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_TRITER_SET_CUR_WC\n" ); + + tree_t *tree = vm_pop_tree(); + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); + split_iter_cur( prg, &sp, iter ); + tree_t *old = tree_iter_deref_cur( iter ); + set_triter_cur( prg, iter, tree ); + colm_tree_downref( prg, sp, old ); + break; + } + case IN_GEN_ITER_FROM_REF: { + short field; + half_t arg_size; + half_t generic_id; + read_half( field ); + read_half( arg_size ); + read_half( generic_id ); + + debug( prg, REALM_BYTECODE, "IN_GEN_ITER_FROM_REF " + "%hd %hd %hd\n", field, arg_size, generic_id ); + + ref_t root_ref; + root_ref.kid = vm_pop_kid(); + root_ref.next = vm_pop_ref(); + void *mem = vm_get_plocal(exec, field); + + tree_t **stack_root = vm_ptop(); + long root_size = vm_ssize(); + + colm_init_list_iter( (generic_iter_t*)mem, stack_root, arg_size, + root_size, &root_ref, generic_id ); + break; + } + case IN_GEN_ITER_UNWIND: + case IN_GEN_ITER_DESTROY: { + short field; + read_half( field ); + + generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); + + debug( prg, REALM_BYTECODE, "IN_LIST_ITER_DESTROY %d\n", iter->yield_size ); + + colm_list_iter_destroy( prg, &sp, iter ); + break; + } + case IN_LIST_ITER_ADVANCE: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_LIST_ITER_ADVANCE\n" ); + + generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); + tree_t *res = colm_list_iter_advance( prg, &sp, iter ); + //colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_REV_LIST_ITER_ADVANCE: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_REV_LIST_ITER_ADVANCE\n" ); + + generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); + tree_t *res = colm_rev_list_iter_advance( prg, &sp, iter ); + //colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_MAP_ITER_ADVANCE: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_MAP_ITER_ADVANCE\n" ); + + generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); + tree_t *res = colm_map_iter_advance( prg, &sp, iter ); + //colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_GEN_ITER_GET_CUR_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GEN_ITER_GET_CUR_R\n" ); + + generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); + tree_t *tree = colm_list_iter_deref_cur( prg, iter ); + //colm_tree_upref( prg, tree ); + vm_push_tree( tree ); + break; + } + case IN_GEN_VITER_GET_CUR_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GEN_VITER_GET_CUR_R\n" ); + + generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); + value_t value = colm_viter_deref_cur( prg, iter ); + vm_push_value( value ); + break; + } + case IN_MATCH: { + half_t pattern_id; + read_half( pattern_id ); + + debug( prg, REALM_BYTECODE, "IN_MATCH\n" ); + + tree_t *tree = vm_pop_tree(); + + /* Run the match, push the result. */ + int root_node = prg->rtd->pat_repl_info[pattern_id].offset; + + /* Bindings are indexed starting at 1. Zero bindId to represent no + * binding. We make a space for it here rather than do math at + * access them. */ + long num_bindings = prg->rtd->pat_repl_info[pattern_id].num_bindings; + tree_t *bindings[1+num_bindings]; + memset( bindings, 0, sizeof(tree_t*)*(1+num_bindings) ); + + kid_t kid; + kid.tree = tree; + kid.next = 0; + int matched = match_pattern( bindings, prg, root_node, &kid, false ); + + if ( !matched ) + memset( bindings, 0, sizeof(tree_t*)*(1+num_bindings) ); + else { + int b; + for ( b = 1; b <= num_bindings; b++ ) + assert( bindings[b] != 0 ); + } + + tree_t *result = matched ? tree : 0; + colm_tree_upref( prg, result ); + vm_push_tree( result ? tree : 0 ); + int b; + for ( b = 1; b <= num_bindings; b++ ) { + colm_tree_upref( prg, bindings[b] ); + vm_push_tree( bindings[b] ); + } + + colm_tree_downref( prg, sp, tree ); + break; + } + + case IN_PROD_NUM: { + debug( prg, REALM_BYTECODE, "IN_PROD_NUM\n" ); + + tree_t *tree = vm_pop_tree(); + colm_tree_downref( prg, sp, tree ); + + value_t v = tree->prod_num; + vm_push_value( v ); + break; + } + + case IN_PRINT_TREE: { + uchar trim; + read_byte( trim ); + + debug( prg, REALM_BYTECODE, "IN_PRINT_TREE %d\n", (int)trim ); + + tree_t *to_send = vm_pop_tree(); + stream_t *stream = vm_pop_stream(); + + struct stream_impl *si = stream_to_impl( stream ); + + int auto_trim; + if ( trim == TRIM_YES ) + auto_trim = true; + else if ( trim == TRIM_NO ) + auto_trim = false; + else + auto_trim = si->funcs->get_option( prg, si, 0 ); + + si->funcs->print_tree( prg, sp, si, to_send, auto_trim ); + vm_push_stream( stream ); + colm_tree_downref( prg, sp, to_send ); + break; + } + + case IN_SEND_TEXT_W: { + uchar trim; + read_byte( trim ); + + debug( prg, REALM_BYTECODE, "IN_SEND_TEXT_W %d\n", (int)trim ); + + tree_t *to_send = vm_pop_tree(); + parser_t *parser = vm_pop_parser(); + + struct input_impl *si = input_to_impl( parser->input ); + + int auto_trim; + if ( trim == TRIM_YES ) + auto_trim = true; + else if ( trim == TRIM_NO ) + auto_trim = false; + else + auto_trim = si->funcs->get_option( prg, si, 0 ); + + word_t len = stream_append_text( prg, sp, parser->input, to_send, auto_trim ); + + vm_push_parser( parser ); + + if ( !exec->WV ) + colm_tree_downref( prg, sp, to_send ); + else { + rcode_unit_start( exec ); + rcode_code( exec, IN_SEND_TEXT_BKT ); + rcode_word( exec, (word_t) parser ); + rcode_word( exec, (word_t) to_send ); + rcode_word( exec, (word_t) len ); + rcode_unit_term( exec ); + } + + exec->steps = parser->pda_run->steps; + exec->pcr = PCR_START; + break; + } + + case IN_SEND_TEXT_BKT: { + parser_t *parser; + tree_t *sent; + word_t len; + read_parser( parser ); + read_tree( sent ); + read_word( len ); + + debug( prg, REALM_BYTECODE, "IN_SEND_TEXT_BKT\n" ); + + struct input_impl *si = input_to_impl( parser->input ); + stream_undo_append( prg, sp, si, sent, len ); + + colm_tree_downref( prg, sp, sent ); + break; + } + + case IN_SEND_TREE_W: { + uchar trim; + read_byte( trim ); + + debug( prg, REALM_BYTECODE, "IN_SEND_TREE_W %d\n", (int)trim ); + + tree_t *to_send = vm_pop_tree(); + parser_t *parser = vm_pop_parser(); + + struct input_impl *si = input_to_impl( parser->input ); + + int auto_trim; + if ( trim == TRIM_YES ) + auto_trim = true; + else if ( trim == TRIM_NO ) + auto_trim = false; + else + auto_trim = si->funcs->get_option( prg, si, 0 ); + + if ( auto_trim ) + to_send = tree_trim( prg, sp, to_send ); + + word_t len = stream_append_tree( prg, sp, parser->input, to_send ); + + vm_push_parser( parser ); + + if ( !exec->WV ) + colm_tree_downref( prg, sp, to_send ); + else { + rcode_unit_start( exec ); + rcode_code( exec, IN_SEND_TREE_BKT ); + rcode_word( exec, (word_t) parser ); + rcode_word( exec, (word_t) to_send ); + rcode_word( exec, (word_t) len ); + rcode_unit_term( exec ); + } + + exec->steps = parser->pda_run->steps; + exec->pcr = PCR_START; + break; + } + + case IN_SEND_TREE_BKT: { + parser_t *parser; + tree_t *sent; + word_t len; + read_parser( parser ); + read_tree( sent ); + read_word( len ); + + debug( prg, REALM_BYTECODE, "IN_SEND_TREE_BKT\n" ); + + struct input_impl *si = input_to_impl( parser->input ); + stream_undo_append( prg, sp, si, sent, len ); + + colm_tree_downref( prg, sp, sent ); + break; + } + + case IN_SEND_NOTHING: { + parser_t *parser = vm_pop_parser(); + vm_push_parser( parser ); + exec->steps = parser->pda_run->steps; + exec->pcr = PCR_START; + break; + } + case IN_SEND_STREAM_W: { + debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_W\n" ); + + stream_t *to_send = vm_pop_stream(); + parser_t *parser = vm_pop_parser(); + + word_t len = stream_append_stream( prg, sp, parser->input, to_send ); + + vm_push_parser( parser ); + + if ( exec->WV ) { + rcode_unit_start( exec ); + rcode_code( exec, IN_SEND_STREAM_BKT ); + rcode_word( exec, (word_t) parser ); + rcode_word( exec, (word_t) to_send ); + rcode_word( exec, (word_t) len ); + rcode_unit_term( exec ); + } + + exec->steps = parser->pda_run->steps; + exec->pcr = PCR_START; + + break; + } + + case IN_SEND_STREAM_BKT: { + parser_t *parser; + tree_t *sent; + word_t len; + read_parser( parser ); + read_tree( sent ); + read_word( len ); + + debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_BKT\n" ); + + struct input_impl *si = input_to_impl( parser->input ); + stream_undo_append_stream( prg, sp, si, sent, len ); + break; + } + + case IN_SEND_EOF_W: { + struct input_impl *si; + + debug( prg, REALM_BYTECODE, "IN_SEND_EOF_W\n" ); + parser_t *parser = vm_pop_parser(); + vm_push_parser( parser ); + + si = input_to_impl( parser->input ); + si->funcs->set_eof_mark( prg, si, true ); + + if ( exec->WV ) { + rcode_unit_start( exec ); + rcode_code( exec, IN_SEND_EOF_BKT ); + rcode_word( exec, (word_t) parser ); + rcode_unit_term( exec ); + } + + exec->steps = parser->pda_run->steps; + exec->pcr = PCR_START; + break; + } + + case IN_SEND_EOF_BKT: { + parser_t *parser; + read_parser( parser ); + + debug( prg, REALM_BYTECODE, "IN_SEND_EOF_BKT\n" ); + + struct input_impl *si = input_to_impl( parser->input ); + si->funcs->set_eof_mark( prg, si, false ); + break; + } + + case IN_INPUT_CLOSE_WC: { + debug( prg, REALM_BYTECODE, "IN_INPUT_CLOSE_WC\n" ); + + stream_t *stream = vm_pop_stream(); + struct stream_impl *si = stream->impl; + + si->funcs->close_stream( prg, si ); + + vm_push_stream( stream ); + break; + } + case IN_INPUT_AUTO_TRIM_WC: { + debug( prg, REALM_BYTECODE, "IN_INPUT_AUTO_TRIM_WC\n" ); + + stream_t *stream = vm_pop_stream(); + value_t auto_trim = vm_pop_value(); + struct stream_impl *si = stream->impl; + + si->funcs->set_option( prg, si, 0, (long) auto_trim ); + + vm_push_stream( stream ); + break; + } + case IN_IINPUT_AUTO_TRIM_WC: { + debug( prg, REALM_BYTECODE, "IN_INPUT_AUTO_TRIM_WC\n" ); + + input_t *input = vm_pop_input(); + value_t auto_trim = vm_pop_value(); + struct input_impl *ii = input->impl; + + ii->funcs->set_option( prg, ii, 0, (long) auto_trim ); + + vm_push_input( input ); + break; + } + + case IN_SET_ERROR: { + debug( prg, REALM_BYTECODE, "IN_SET_ERROR\n" ); + + tree_t *error = vm_pop_tree(); + colm_tree_downref( prg, sp, prg->error ); + prg->error = error; + break; + } + + case IN_GET_ERROR: { + debug( prg, REALM_BYTECODE, "IN_GET_ERROR\n" ); + + vm_pop_tree(); + colm_tree_upref( prg, prg->error ); + vm_push_tree( prg->error ); + break; + } + + /* stream: + * Push value and stash current on IN_PCR_CALL. The instructions + * exectued by a call need access to the stream the parser was called + * with. We need to preserver the stream for the caller, so we push + * first set it to the current stream. + * pcr: + * Need to preserve the pcr value between pda run invocations. Push + * current pcr value and start fresh with a new value on PCR_CALL. + * steps: + * Init from the PDA run when we start to parse. Need to preserve the + * starting steps value from the start of parsing to the moment we + * write the backtrack instruction. Start fresh with a private value + * on a PCR_CALL by pushing and initializing. */ + + case IN_PARSE_INIT_BKT: { + debug( prg, REALM_BYTECODE, "IN_PARSE_INIT_BKT\n" ); + + parser_t *parser; + word_t steps; + + read_parser( parser ); + read_word( steps ); + + vm_push_parser( parser ); + + exec->steps = steps; + exec->pcr = PCR_START; + break; + } + + case IN_LOAD_RETVAL: { + debug( prg, REALM_BYTECODE, "IN_LOAD_RETVAL\n" ); + vm_push_tree( exec->ret_val ); + break; + } + + case IN_PCR_RET: { + debug( prg, REALM_BYTECODE, "IN_PCR_RET\n" ); + + if ( exec->frame_id >= 0 ) { + struct frame_info *fi = &prg->rtd->frame_info[exec->frame_id]; + downref_local_trees( prg, sp, exec, fi->locals, fi->locals_len ); + debug( prg, REALM_BYTECODE, "RET: %d\n", fi->frame_size ); + + vm_popn( fi->frame_size ); + } + + instr = vm_pop_type(code_t*); + + exec->WV = vm_pop_type(word_t); + exec->parser = vm_pop_parser(); + exec->pcr = vm_pop_type(word_t); + exec->steps = vm_pop_type(word_t); + exec->frame_id = vm_pop_type(long); + exec->iframe_ptr = vm_pop_type(tree_t**); + exec->frame_ptr = vm_pop_type(tree_t**); + + assert( instr != 0 ); + break; + } + + case IN_PCR_END_DECK: { + debug( prg, REALM_BYTECODE, "IN_PCR_END_DECK\n" ); + exec->parser->pda_run->on_deck = false; + break; + } + + case IN_PARSE_FRAG_W: { + parser_t *parser = vm_pop_parser(); + vm_push_parser( parser ); + + debug( prg, REALM_BYTECODE, "IN_PARSE_FRAG_W\n" ); + + exec->pcr = colm_parse_frag( prg, sp, parser->pda_run, + parser->input, exec->pcr ); + + /* If done, jump to the terminating instruction, otherwise fall + * through to call some code, then jump back here. */ + if ( exec->pcr != PCR_DONE ) + instr = pcr_call( prg, exec, &sp, instr, parser ); + else { + if ( exec->WV ) { + rcode_unit_start( exec ); + + rcode_code( exec, IN_PARSE_INIT_BKT ); + rcode_word( exec, (word_t)parser ); + rcode_word( exec, (word_t)exec->steps ); + rcode_code( exec, IN_PARSE_FRAG_BKT ); + rcode_unit_term( exec ); + } + + if ( prg->induce_exit ) + goto out; + } + break; + } + + case IN_PARSE_FRAG_BKT: { + parser_t *parser = vm_pop_parser(); + vm_push_parser( parser ); + + debug( prg, REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" ); + + exec->pcr = colm_parse_undo_frag( prg, sp, parser->pda_run, + parser->input, exec->pcr, exec->steps ); + + if ( exec->pcr != PCR_DONE ) + instr = pcr_call( prg, exec, &sp, instr, parser ); + else { + vm_pop_parser(); + } + break; + } + + case IN_REDUCE_COMMIT: { + parser_t *parser = vm_pop_parser(); + vm_push_parser( parser ); + + debug( prg, REALM_BYTECODE, "IN_REDUCE_COMMIT\n" ); + + colm_parse_reduce_commit( prg, sp, parser->pda_run ); + break; + } + + + case IN_INPUT_PULL_WV: { + debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_WV\n" ); + + input_t *input = vm_pop_input(); + tree_t *len = vm_pop_tree(); + tree_t *string = stream_pull_bc( prg, sp, 0, input, len ); + colm_tree_upref( prg, string ); + vm_push_tree( string ); + + /* Single unit. */ + colm_tree_upref( prg, string ); + rcode_code( exec, IN_INPUT_PULL_BKT ); + rcode_word( exec, (word_t) string ); + rcode_unit_term( exec ); + + //colm_tree_downref( prg, sp, len ); + break; + } + + case IN_INPUT_PULL_WC: { + debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_WC\n" ); + + input_t *input = vm_pop_input(); + tree_t *len = vm_pop_tree(); + tree_t *string = stream_pull_bc( prg, sp, 0, input, len ); + colm_tree_upref( prg, string ); + vm_push_tree( string ); + + //colm_tree_downref( prg, sp, len ); + break; + } + case IN_INPUT_PULL_BKT: { + tree_t *string; + read_tree( string ); + + input_t *input = vm_pop_input(); + + debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" ); + + undo_pull( prg, input, string ); + colm_tree_downref( prg, sp, string ); + break; + } + case IN_INPUT_PUSH_WV: { + debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_WV\n" ); + + input_t *input = vm_pop_input(); + tree_t *tree = vm_pop_tree(); + long len = input_push( prg, sp, input_to_impl( input ), tree, false ); + vm_push_tree( 0 ); + + /* Single unit. */ + rcode_code( exec, IN_INPUT_PUSH_BKT ); + rcode_word( exec, len ); + rcode_unit_term( exec ); + + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_INPUT_PUSH_IGNORE_WV: { + debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_IGNORE_WV\n" ); + + input_t *input = vm_pop_input(); + tree_t *tree = vm_pop_tree(); + long len = input_push( prg, sp, input_to_impl( input ), tree, true ); + vm_push_tree( 0 ); + + /* Single unit. */ + rcode_code( exec, IN_INPUT_PUSH_BKT ); + rcode_word( exec, len ); + rcode_unit_term( exec ); + + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_INPUT_PUSH_BKT: { + word_t len; + read_word( len ); + + debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_BKT %d\n", len ); + + input_t *input = vm_pop_input(); + colm_undo_stream_push( prg, sp, input_to_impl( input ), len ); + break; + } + case IN_INPUT_PUSH_STREAM_WV: { + debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_STREAM_WV\n" ); + + input_t *input = vm_pop_input(); + stream_t *to_push = vm_pop_stream(); + long len = input_push_stream( prg, sp, input_to_impl( input ), to_push ); + vm_push_tree( 0 ); + + /* Single unit. */ + rcode_code( exec, IN_INPUT_PUSH_BKT ); + rcode_word( exec, len ); + rcode_unit_term( exec ); + break; + } + case IN_INPUT_PUSH_STREAM_BKT: { + word_t len; + read_word( len ); + + debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_STREAM_BKT %d\n", len ); + + input_t *input = vm_pop_input(); + colm_undo_stream_push( prg, sp, input_to_impl( input ), len ); + break; + } + case IN_CONS_GENERIC: { + half_t generic_id; + half_t stop_id; + read_half( generic_id ); + read_half( stop_id ); + + debug( prg, REALM_BYTECODE, "IN_CONS_GENERIC %hd %hd\n", generic_id, stop_id ); + + struct_t *gen = colm_construct_generic( prg, generic_id, stop_id ); + vm_push_struct( gen ); + break; + } + case IN_CONS_REDUCER: { + half_t generic_id; + half_t reducer_id; + read_half( generic_id ); + read_half( reducer_id ); + + debug( prg, REALM_BYTECODE, "IN_CONS_REDUCER %hd\n", generic_id ); + + struct_t *gen = colm_construct_reducer( prg, generic_id, reducer_id ); + vm_push_struct( gen ); + break; + } + case IN_CONS_OBJECT: { + half_t lang_el_id; + read_half( lang_el_id ); + + debug( prg, REALM_BYTECODE, "IN_CONS_OBJECT %hd\n", lang_el_id ); + + tree_t *repl_tree = colm_construct_object( prg, 0, 0, lang_el_id ); + vm_push_tree( repl_tree ); + break; + } + case IN_CONSTRUCT: { + half_t pattern_id; + read_half( pattern_id ); + + debug( prg, REALM_BYTECODE, "IN_CONSTRUCT\n" ); + + //struct lang_el_info *lelInfo = prg->rtd->lelInfo; + //struct pat_cons_node *nodes = prg->rtd->patReplNodes; + int root_node = prg->rtd->pat_repl_info[pattern_id].offset; + + /* Note that bindIds are indexed at one. Add one spot for them. */ + int num_bindings = prg->rtd->pat_repl_info[pattern_id].num_bindings; + tree_t *bindings[1+num_bindings]; + + int b; + for ( b = 1; b <= num_bindings; b++ ) { + bindings[b] = vm_pop_tree(); + assert( bindings[b] != 0 ); + } + + tree_t *repl_tree = colm_construct_tree( prg, 0, bindings, root_node ); + + vm_push_tree( repl_tree ); + break; + } + case IN_CONSTRUCT_TERM: { + half_t token_id; + read_half( token_id ); + + debug( prg, REALM_BYTECODE, "IN_CONSTRUCT_TERM\n" ); + + /* Pop the string we are constructing the token from. */ + str_t *str = vm_pop_string(); + tree_t *res = colm_construct_term( prg, token_id, str->value ); + colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_MAKE_TOKEN: { + uchar nargs; + int i; + read_byte( nargs ); + + debug( prg, REALM_BYTECODE, "IN_MAKE_TOKEN\n" ); + + tree_t *arg[nargs]; + for ( i = nargs-1; i >= 0; i-- ) + arg[i] = vm_pop_tree(); + + tree_t *result = colm_construct_token( prg, arg, nargs ); + for ( i = 1; i < nargs; i++ ) + colm_tree_downref( prg, sp, arg[i] ); + vm_push_tree( result ); + break; + } + case IN_MAKE_TREE: { + uchar nargs; + int i; + read_byte( nargs ); + + debug( prg, REALM_BYTECODE, "IN_MAKE_TREE\n" ); + + tree_t *arg[nargs]; + for ( i = nargs-1; i >= 0; i-- ) + arg[i] = vm_pop_tree(); + + tree_t *result = make_tree( prg, arg, nargs ); + for ( i = 1; i < nargs; i++ ) + colm_tree_downref( prg, sp, arg[i] ); + + vm_push_tree( result ); + break; + } + case IN_TREE_CAST: { + half_t lang_el_id; + read_half( lang_el_id ); + + debug( prg, REALM_BYTECODE, "IN_TREE_CAST %hd\n", lang_el_id ); + + tree_t *tree = vm_pop_tree(); + tree_t *res = cast_tree( prg, lang_el_id, tree ); + colm_tree_upref( prg, res ); + colm_tree_downref( prg, sp, tree ); + vm_push_tree( res ); + break; + } + case IN_PTR_ACCESS_WV: { + debug( prg, REALM_BYTECODE, "IN_PTR_ACCESS_WV\n" ); + + struct_t *ptr = vm_pop_struct(); + vm_push_struct( ptr ); + + /* This is an initial global load. Need to reverse execute it. */ + rcode_unit_start( exec ); + rcode_code( exec, IN_PTR_ACCESS_BKT ); + rcode_word( exec, (word_t) ptr ); + break; + } + case IN_PTR_ACCESS_BKT: { + word_t p; + read_word( p ); + + debug( prg, REALM_BYTECODE, "IN_PTR_ACCESS_BKT\n" ); + + struct_t *ptr = (struct_t*)p; + vm_push_type( struct_t *, ptr ); + break; + } + case IN_REF_FROM_LOCAL: { + short int field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_REF_FROM_LOCAL %hd\n", field ); + + /* First push the null next pointer, then the kid pointer. */ + kid_t *kid = (kid_t*)vm_get_plocal(exec, field); + vm_contiguous( 2 ); + vm_push_ref( 0 ); + vm_push_kid( kid ); + break; + } + case IN_REF_FROM_REF: { + short int field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_REF_FROM_REF %hd\n", field ); + + ref_t *ref = (ref_t*)vm_get_plocal(exec, field); + vm_contiguous( 2 ); + vm_push_ref( ref ); + vm_push_kid( ref->kid ); + break; + } + case IN_REF_FROM_QUAL_REF: { + short int back; + short int field; + read_half( back ); + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_REF_FROM_QUAL_REF\n" ); + + ref_t *ref = (ref_t*)(sp + back); + + tree_t *obj = ref->kid->tree; + kid_t *attr_kid = get_field_kid( obj, field ); + + vm_contiguous( 2 ); + vm_push_ref( ref ); + vm_push_kid( attr_kid ); + break; + } + case IN_RHS_REF_FROM_QUAL_REF: { + short int back; + int i, done = 0; + uchar len; + + read_half( back ); + + debug( prg, REALM_BYTECODE, "IN_RHS_REF_FROM_QUAL_REF\n" ); + + ref_t *ref = (ref_t*)(sp + back); + + tree_t *obj = ref->kid->tree; + kid_t *attr_kid = 0; + + read_byte( len ); + for ( i = 0; i < len; i++ ) { + uchar prod_num, child_num; + read_byte( prod_num ); + read_byte( child_num ); + if ( !done && obj->prod_num == prod_num ) { + attr_kid = get_rhs_el_kid( prg, obj, child_num ); + done = 1; + } + } + + vm_contiguous( 2 ); + vm_push_ref( ref ); + vm_push_kid( attr_kid ); + break; + } + case IN_REF_FROM_BACK: { + short int back; + read_half( back ); + + debug( prg, REALM_BYTECODE, "IN_REF_FROM_BACK %hd\n", back ); + + kid_t *ptr = (kid_t*)(sp + back); + + vm_contiguous( 2 ); + vm_push_ref( 0 ); + vm_push_kid( ptr ); + break; + } + case IN_TRITER_REF_FROM_CUR: { + short int field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_TRITER_REF_FROM_CUR\n" ); + + /* Push the next pointer first, then the kid. */ + tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); + ref_t *ref = &iter->ref; + vm_contiguous( 2 ); + vm_push_ref( ref ); + vm_push_kid( iter->ref.kid ); + break; + } + case IN_UITER_REF_FROM_CUR: { + short int field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_UITER_REF_FROM_CUR\n" ); + + /* Push the next pointer first, then the kid. */ + user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); + vm_contiguous( 2 ); + vm_push_ref( uiter->ref.next ); + vm_push_kid( uiter->ref.kid ); + break; + } + case IN_GET_TOKEN_DATA_R: { + debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_DATA_R\n" ); + + tree_t *tree = vm_pop_tree(); + head_t *data = string_copy( prg, tree->tokdata ); + tree_t *str = construct_string( prg, data ); + colm_tree_upref( prg, str ); + vm_push_tree( str ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_SET_TOKEN_DATA_WC: { + debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_WC\n" ); + + tree_t *tree = vm_pop_tree(); + tree_t *val = vm_pop_tree(); + head_t *head = string_copy( prg, ((str_t*)val)->value ); + string_free( prg, tree->tokdata ); + tree->tokdata = head; + + colm_tree_downref( prg, sp, tree ); + colm_tree_downref( prg, sp, val ); + break; + } + case IN_SET_TOKEN_DATA_WV: { + debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_WV\n" ); + + tree_t *tree = vm_pop_tree(); + tree_t *val = vm_pop_tree(); + + head_t *oldval = tree->tokdata; + head_t *head = string_copy( prg, ((str_t*)val)->value ); + tree->tokdata = head; + + /* Set up reverse code. Needs no args. */ + rcode_code( exec, IN_SET_TOKEN_DATA_BKT ); + rcode_word( exec, (word_t)oldval ); + rcode_unit_term( exec ); + + colm_tree_downref( prg, sp, tree ); + colm_tree_downref( prg, sp, val ); + break; + } + case IN_SET_TOKEN_DATA_BKT: { + debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT \n" ); + + word_t oldval; + read_word( oldval ); + + tree_t *tree = vm_pop_tree(); + head_t *head = (head_t*)oldval; + string_free( prg, tree->tokdata ); + tree->tokdata = head; + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_GET_TOKEN_FILE_R: { + debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_FILE_R\n" ); + tree_t *tree = vm_pop_tree(); + tree_t *str = 0; + if ( tree->tokdata->location ) { + const char *fn = tree->tokdata->location->name; + size_t fnlen = strlen( fn ); + head_t *data = string_alloc_full( prg, fn, fnlen ); + str = construct_string( prg, data ); + colm_tree_upref( prg, str ); + } + vm_push_tree( str ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_GET_TOKEN_LINE_R: { + debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_LINE_R\n" ); + + tree_t *tree = vm_pop_tree(); + value_t integer = 0; + if ( tree->tokdata->location ) + integer = tree->tokdata->location->line; + vm_push_value( integer ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_GET_TOKEN_COL_R: { + debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_COL_R\n" ); + + tree_t *tree = vm_pop_tree(); + value_t integer = 0; + if ( tree->tokdata->location ) + integer = tree->tokdata->location->column; + vm_push_value( integer ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_GET_TOKEN_POS_R: { + debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_POS_R\n" ); + + tree_t *tree = vm_pop_tree(); + value_t integer = 0; + if ( tree->tokdata->location ) + integer = tree->tokdata->location->byte; + vm_push_value( integer ); + colm_tree_downref( prg, sp, tree ); + break; + } + case IN_GET_MATCH_LENGTH_R: { + debug( prg, REALM_BYTECODE, "IN_GET_MATCH_LENGTH_R\n" ); + + value_t integer = string_length(exec->parser->pda_run->tokdata); + vm_push_value( integer ); + break; + } + case IN_GET_MATCH_TEXT_R: { + debug( prg, REALM_BYTECODE, "IN_GET_MATCH_TEXT_R\n" ); + + head_t *s = string_copy( prg, exec->parser->pda_run->tokdata ); + tree_t *tree = construct_string( prg, s ); + colm_tree_upref( prg, tree ); + vm_push_tree( tree ); + break; + } + case IN_LIST_LENGTH: { + debug( prg, REALM_BYTECODE, "IN_LIST_LENGTH\n" ); + + list_t *list = vm_pop_list(); + long len = colm_list_length( list ); + value_t res = len; + vm_push_value( res ); + break; + } + case IN_GET_LIST_EL_MEM_R: { + short gen_id, field; + read_half( gen_id ); + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LIST_EL_MEM_R\n" ); + + struct_t *s = vm_pop_struct(); + + list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); + struct_t *val = colm_list_el_get( prg, list_el, gen_id, field ); + vm_push_struct( val ); + break; + } + case IN_GET_LIST_MEM_R: { + short gen_id, field; + read_half( gen_id ); + read_half( field ); + + debug( prg, REALM_BYTECODE, + "IN_GET_LIST_MEM_R %hd %hd\n", gen_id, field ); + + list_t *list = vm_pop_list(); + struct_t *val = colm_list_get( prg, list, gen_id, field ); + vm_push_struct( val ); + break; + } + case IN_GET_LIST_MEM_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_WC\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_GET_LIST_MEM_WV: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_WV\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + + /* Set up the reverse instruction. */ + rcode_code( exec, IN_GET_LIST_MEM_BKT ); + rcode_half( exec, field ); + break; + } + case IN_GET_LIST_MEM_BKT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_BKT\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *res = get_list_mem_split( prg, (list_t*)obj, field ); + colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_GET_VLIST_MEM_R: { + short gen_id, field; + read_half( gen_id ); + read_half( field ); + + debug( prg, REALM_BYTECODE, + "IN_GET_VLIST_MEM_R %hd %hd\n", gen_id, field ); + + list_t *list = vm_pop_list(); + struct_t *el = colm_list_get( prg, list, gen_id, field ); + + value_t val = colm_struct_get_field( el, value_t, 0 ); + vm_push_value( val ); + break; + } + case IN_GET_VLIST_MEM_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_VLIST_MEM_WC\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_GET_VLIST_MEM_WV: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_VLIST_MEM_WV\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + + /* Set up the reverse instruction. */ + rcode_code( exec, IN_GET_LIST_MEM_BKT ); + rcode_half( exec, field ); + break; + } + case IN_GET_VLIST_MEM_BKT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_VLIST_MEM_BKT\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *res = get_list_mem_split( prg, (list_t*)obj, field ); + colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + case IN_GET_PARSER_STREAM: { + debug( prg, REALM_BYTECODE, "IN_GET_PARSER_STREAM\n" ); + parser_t *parser = vm_pop_parser(); + vm_push_input( parser->input ); + break; + } + case IN_GET_PARSER_MEM_R: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_PARSER_MEM_R %hd\n", field ); + + parser_t *parser = vm_pop_parser(); + + tree_t *val = get_parser_mem( parser, field ); + + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + + case IN_GET_MAP_EL_MEM_R: { + short gen_id, field; + read_half( gen_id ); + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_MAP_EL_MEM_R\n" ); + + struct_t *strct = vm_pop_struct(); + + map_el_t *map_el = colm_struct_to_map_el( prg, strct, gen_id ); + struct_t *val = colm_map_el_get( prg, map_el, gen_id, field ); + vm_push_struct( val ); + break; + } + case IN_MAP_LENGTH: { + debug( prg, REALM_BYTECODE, "IN_MAP_LENGTH\n" ); + + tree_t *obj = vm_pop_tree(); + long len = map_length( (map_t*)obj ); + value_t res = len; + vm_push_value( res ); + break; + } + case IN_GET_MAP_MEM_R: { + short gen_id, field; + read_half( gen_id ); + read_half( field ); + + debug( prg, REALM_BYTECODE, + "IN_GET_MAP_MEM_R %hd %hd\n", gen_id, field ); + + map_t *map = vm_pop_map(); + struct_t *val = colm_map_get( prg, map, gen_id, field ); + vm_push_struct( val ); + break; + } + case IN_GET_MAP_MEM_WC: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_WC\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + break; + } + case IN_GET_MAP_MEM_WV: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_WV\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); + colm_tree_upref( prg, val ); + vm_push_tree( val ); + + /* Set up the reverse instruction. */ + rcode_code( exec, IN_GET_MAP_MEM_BKT ); + rcode_half( exec, field ); + break; + } + case IN_GET_MAP_MEM_BKT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_BKT\n" ); + + tree_t *obj = vm_pop_tree(); + colm_tree_downref( prg, sp, obj ); + + tree_t *res = get_list_mem_split( prg, (list_t*)obj, field ); + colm_tree_upref( prg, res ); + vm_push_tree( res ); + break; + } + + case IN_STASH_ARG: { + half_t pos; + half_t size; + read_half( pos ); + read_half( size ); + + debug( prg, REALM_BYTECODE, "IN_STASH_ARG %hd %hd\n", pos, size ); + + while ( size > 0 ) { + value_t v = vm_pop_value(); + ((value_t*)exec->call_args)[pos] = v; + size -= 1; + pos += 1; + } + + break; + } + + case IN_PREP_ARGS: { + half_t size; + read_half( size ); + + debug( prg, REALM_BYTECODE, "IN_PREP_ARGS %hd\n", size ); + + vm_push_type( tree_t**, exec->call_args ); + vm_pushn( size ); + exec->call_args = vm_ptop(); + memset( vm_ptop(), 0, sizeof(word_t) * size ); + break; + } + + case IN_CLEAR_ARGS: { + half_t size; + read_half( size ); + + debug( prg, REALM_BYTECODE, "IN_CLEAR_ARGS %hd\n", size ); + + vm_popn( size ); + exec->call_args = vm_pop_type( tree_t** ); + break; + } + + case IN_HOST: { + half_t func_id; + read_half( func_id ); + + debug( prg, REALM_BYTECODE, "IN_HOST %hd\n", func_id ); + + sp = prg->rtd->host_call( prg, func_id, sp ); + break; + } + case IN_CALL_WV: { + half_t func_id; + read_half( func_id ); + + struct function_info *fi = &prg->rtd->function_info[func_id]; + struct frame_info *fr = &prg->rtd->frame_info[fi->frame_id]; + + debug( prg, REALM_BYTECODE, "IN_CALL_WV %s\n", fr->name ); + + vm_contiguous( FR_AA + fi->frame_size ); + + vm_push_type( tree_t**, exec->call_args ); + vm_push_value( 0 ); /* Return value. */ + vm_push_type( code_t*, instr ); + vm_push_type( tree_t**, exec->frame_ptr ); + vm_push_type( long, exec->frame_id ); + + instr = fr->codeWV; + exec->frame_id = fi->frame_id; + + exec->frame_ptr = vm_ptop(); + vm_pushn( fr->frame_size ); + memset( vm_ptop(), 0, sizeof(word_t) * fr->frame_size ); + break; + } + case IN_CALL_WC: { + half_t func_id; + read_half( func_id ); + + struct function_info *fi = &prg->rtd->function_info[func_id]; + struct frame_info *fr = &prg->rtd->frame_info[fi->frame_id]; + + debug( prg, REALM_BYTECODE, "IN_CALL_WC %s %d\n", fr->name, fr->frame_size ); + + vm_contiguous( FR_AA + fi->frame_size ); + + vm_push_type( tree_t**, exec->call_args ); + vm_push_value( 0 ); /* Return value. */ + vm_push_type( code_t*, instr ); + vm_push_type( tree_t**, exec->frame_ptr ); + vm_push_type( long, exec->frame_id ); + + instr = fr->codeWC; + exec->frame_id = fi->frame_id; + + exec->frame_ptr = vm_ptop(); + vm_pushn( fr->frame_size ); + memset( vm_ptop(), 0, sizeof(word_t) * fr->frame_size ); + break; + } + case IN_YIELD: { + debug( prg, REALM_BYTECODE, "IN_YIELD\n" ); + + kid_t *kid = vm_pop_kid(); + ref_t *next = vm_pop_ref(); + user_iter_t *uiter = (user_iter_t*) vm_plocal_iframe( IFR_AA ); + + if ( kid == 0 || kid->tree == 0 || + kid->tree->id == uiter->search_id || + uiter->search_id == prg->rtd->any_id ) + { + /* Store the yeilded value. */ + uiter->ref.kid = kid; + uiter->ref.next = next; + uiter->yield_size = vm_ssize() - uiter->root_size; + uiter->resume = instr; + uiter->frame = exec->frame_ptr; + + /* Restore the instruction and frame pointer. */ + instr = (code_t*) vm_local_iframe(IFR_RIN); + exec->frame_ptr = (tree_t**) vm_local_iframe(IFR_RFR); + exec->iframe_ptr = (tree_t**) vm_local_iframe(IFR_RIF); + + /* Return the yield result on the top of the stack. */ + tree_t *result = uiter->ref.kid != 0 ? prg->true_val : prg->false_val; + //colm_tree_upref( prg, result ); + vm_push_tree( result ); + } + break; + } + case IN_UITER_CREATE_WV: { + short field; + half_t func_id, search_id; + read_half( field ); + read_half( func_id ); + read_half( search_id ); + + debug( prg, REALM_BYTECODE, "IN_UITER_CREATE_WV\n" ); + + struct function_info *fi = prg->rtd->function_info + func_id; + + vm_contiguous( (sizeof(user_iter_t) / sizeof(word_t)) + FR_AA + fi->frame_size ); + + user_iter_t *uiter = colm_uiter_create( prg, &sp, fi, search_id ); + vm_set_local(exec, field, (SW) uiter); + + /* This is a setup similar to as a call, only the frame structure + * is slightly different for user iterators. We aren't going to do + * the call. We don't need to set up the return ip because the + * uiter advance will set it. The frame we need to do because it + * is set once for the lifetime of the iterator. */ + vm_push_type( tree_t**, exec->call_args ); + vm_push_value( 0 ); + + vm_push_type( code_t*, 0 ); /* Return instruction pointer, */ + vm_push_type( tree_t**, exec->iframe_ptr ); /* Return iframe. */ + vm_push_type( tree_t**, exec->frame_ptr ); /* Return frame. */ + + uiter->frame = vm_ptop(); + vm_pushn( fi->frame_size ); + memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); + + uiter_init( prg, sp, uiter, fi, true ); + break; + } + case IN_UITER_CREATE_WC: { + short field; + half_t func_id, search_id; + read_half( field ); + read_half( func_id ); + read_half( search_id ); + + debug( prg, REALM_BYTECODE, "IN_UITER_CREATE_WC\n" ); + + struct function_info *fi = prg->rtd->function_info + func_id; + + vm_contiguous( (sizeof(user_iter_t) / sizeof(word_t)) + FR_AA + fi->frame_size ); + + user_iter_t *uiter = colm_uiter_create( prg, &sp, fi, search_id ); + vm_set_local(exec, field, (SW) uiter); + + /* This is a setup similar to as a call, only the frame structure + * is slightly different for user iterators. We aren't going to do + * the call. We don't need to set up the return ip because the + * uiter advance will set it. The frame we need to do because it + * is set once for the lifetime of the iterator. */ + vm_push_type( tree_t**, exec->call_args ); + vm_push_value( 0 ); + + vm_push_type( code_t*, 0 ); /* Return instruction pointer, */ + vm_push_type( tree_t**, exec->iframe_ptr ); /* Return iframe. */ + vm_push_type( tree_t**, exec->frame_ptr ); /* Return frame. */ + + uiter->frame = vm_ptop(); + vm_pushn( fi->frame_size ); + memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); + + uiter_init( prg, sp, uiter, fi, false ); + break; + } + case IN_UITER_DESTROY: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_UITER_DESTROY %hd\n", field ); + + user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); + colm_uiter_destroy( prg, &sp, uiter ); + break; + } + + case IN_UITER_UNWIND: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_UITER_UNWIND %hd\n", field ); + + user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); + colm_uiter_unwind( prg, &sp, uiter ); + break; + } + + case IN_RET: { + struct frame_info *fi = &prg->rtd->frame_info[exec->frame_id]; + downref_local_trees( prg, sp, exec, fi->locals, fi->locals_len ); + vm_popn( fi->frame_size ); + + exec->frame_id = vm_pop_type(long); + exec->frame_ptr = vm_pop_type(tree_t**); + instr = vm_pop_type(code_t*); + exec->ret_val = vm_pop_tree(); + vm_pop_value(); + //vm_popn( fi->argSize ); + + fi = &prg->rtd->frame_info[exec->frame_id]; + debug( prg, REALM_BYTECODE, "IN_RET %s\n", fi->name ); + + /* This if for direct calls of functions. */ + if ( instr == 0 ){ + //assert( sp == root ); + return sp; + } + + /* Might be some unwind code. */ + { + short unwind_len; + read_half( unwind_len ); + if ( unwind_len > 0 ) { + instr += unwind_len; + debug( prg, REALM_BYTECODE, + "skipping unwind code length: %hd\n", unwind_len ); + } + } + + break; + } + case IN_TO_UPPER: { + debug( prg, REALM_BYTECODE, "IN_TO_UPPER\n" ); + + tree_t *in = vm_pop_tree(); + head_t *head = string_to_upper( in->tokdata ); + tree_t *upper = construct_string( prg, head ); + colm_tree_upref( prg, upper ); + vm_push_tree( upper ); + colm_tree_downref( prg, sp, in ); + break; + } + case IN_TO_LOWER: { + debug( prg, REALM_BYTECODE, "IN_TO_LOWER\n" ); + + tree_t *in = vm_pop_tree(); + head_t *head = string_to_lower( in->tokdata ); + tree_t *lower = construct_string( prg, head ); + colm_tree_upref( prg, lower ); + vm_push_tree( lower ); + colm_tree_downref( prg, sp, in ); + break; + } + case IN_OPEN_FILE: { + debug( prg, REALM_BYTECODE, "IN_OPEN_FILE\n" ); + + tree_t *mode = vm_pop_tree(); + tree_t *name = vm_pop_tree(); + stream_t *res = colm_stream_open_file( prg, name, mode ); + vm_push_stream( res ); + colm_tree_downref( prg, sp, name ); + colm_tree_downref( prg, sp, mode ); + break; + } + case IN_GET_CONST: { + short constValId; + read_half( constValId ); + + switch ( constValId ) { + case CONST_STDIN: { + debug( prg, REALM_BYTECODE, "CONST_STDIN\n" ); + + /* Pop the root object. */ + vm_pop_tree(); + + make_stdin( prg ); + + vm_push_stream( prg->stdin_val ); + break; + } + case CONST_STDOUT: { + debug( prg, REALM_BYTECODE, "CONST_STDOUT\n" ); + + /* Pop the root object. */ + vm_pop_tree(); + make_stdout( prg ); + + vm_push_stream( prg->stdout_val ); + break; + } + case CONST_STDERR: { + debug( prg, REALM_BYTECODE, "CONST_STDERR\n" ); + + /* Pop the root object. */ + vm_pop_tree(); + + make_stderr( prg ); + + vm_push_stream( prg->stderr_val ); + break; + } + case CONST_ARG: { + word_t offset; + read_word( offset ); + + debug( prg, REALM_BYTECODE, "CONST_ARG %d\n", offset ); + + /* Pop the root object. */ + vm_pop_tree(); + + head_t *lit = make_literal( prg, offset ); + tree_t *tree = construct_string( prg, lit ); + colm_tree_upref( prg, tree ); + vm_push_tree( tree ); + break; + } + } + break; + } + case IN_SYSTEM: { + debug( prg, REALM_BYTECODE, "IN_SYSTEM\n" ); + + vm_pop_tree(); + str_t *cmd = vm_pop_string(); + + char *cmd0 = malloc( cmd->value->length + 1 ); + memcpy( cmd0, cmd->value->data, cmd->value->length ); + cmd0[cmd->value->length] = 0; + + int res = system( cmd0 ); + + free( cmd0 ); + +#if defined(HAVE_SYS_WAIT_H) + if ( WIFSIGNALED( res ) ) + raise( WTERMSIG( res ) ); + res = WEXITSTATUS( res ); +#else + // WARNING: Check result +#endif + + colm_tree_downref( prg, sp, (tree_t*)cmd ); + + value_t val = res; + vm_push_value( val ); + break; + } + + case IN_DONE: + return sp; + + case IN_FN: { + c = *instr++; + switch ( c ) { + case FN_STR_ATOI: { + debug( prg, REALM_BYTECODE, "FN_STR_ATOI\n" ); + + str_t *str = vm_pop_string(); + word_t res = str_atoi( str->value ); + value_t integer = res; + vm_push_value( integer ); + colm_tree_downref( prg, sp, (tree_t*)str ); + break; + } + case FN_STR_ATOO: { + debug( prg, REALM_BYTECODE, "FN_STR_ATOO\n" ); + + str_t *str = vm_pop_string(); + word_t res = str_atoo( str->value ); + value_t integer = res; + vm_push_value( integer ); + colm_tree_downref( prg, sp, (tree_t*)str ); + break; + } + case FN_STR_UORD8: { + debug( prg, REALM_BYTECODE, "FN_STR_UORD8\n" ); + + str_t *str = vm_pop_string(); + word_t res = str_uord8( str->value ); + value_t integer = res; + vm_push_value( integer ); + colm_tree_downref( prg, sp, (tree_t*)str ); + break; + } + case FN_STR_UORD16: { + debug( prg, REALM_BYTECODE, "FN_STR_UORD16\n" ); + + str_t *str = vm_pop_string(); + word_t res = str_uord16( str->value ); + value_t integer = res; + vm_push_value( integer ); + colm_tree_downref( prg, sp, (tree_t*)str ); + break; + } + case FN_STR_PREFIX: { + debug( prg, REALM_BYTECODE, "FN_STR_PREFIX\n" ); + + str_t *str = vm_pop_string(); + value_t len = vm_pop_value(); + + str_t *res = string_prefix( prg, str, (long) len ); + colm_tree_upref( prg, (tree_t*) res ); + vm_push_string( res ); + colm_tree_downref( prg, sp, (tree_t*)str ); + break; + } + case FN_STR_SUFFIX: { + debug( prg, REALM_BYTECODE, "FN_STR_SUFFIX\n" ); + + str_t *str = vm_pop_string(); + value_t pos = vm_pop_value(); + + str_t *res = string_suffix( prg, str, (long) pos ); + colm_tree_upref( prg, (tree_t*) res ); + vm_push_string( res ); + colm_tree_downref( prg, sp, (tree_t*)str ); + break; + } + case FN_PREFIX: { + debug( prg, REALM_BYTECODE, "FN_PREFIX\n" ); + + value_t len = vm_pop_value(); + str_t *str = vm_pop_string(); + + str_t *res = string_prefix( prg, str, (long) len ); + colm_tree_upref( prg, (tree_t*) res ); + vm_push_string( res ); + colm_tree_downref( prg, sp, (tree_t*)str ); + break; + } + case FN_SUFFIX: { + debug( prg, REALM_BYTECODE, "FN_SUFFIX\n" ); + + value_t pos = vm_pop_value(); + str_t *str = vm_pop_string(); + + str_t *res = string_suffix( prg, str, (long) pos ); + colm_tree_upref( prg, (tree_t*) res ); + vm_push_string( res ); + colm_tree_downref( prg, sp, (tree_t*)str ); + break; + } + case FN_SPRINTF: { + debug( prg, REALM_BYTECODE, "FN_SPRINTF\n" ); + + vm_pop_tree(); + value_t integer = vm_pop_value(); + str_t *format = vm_pop_string(); + head_t *res = string_sprintf( prg, format, (long)integer ); + str_t *str = (str_t*)construct_string( prg, res ); + colm_tree_upref( prg, (tree_t*)str ); + vm_push_string( str ); + colm_tree_downref( prg, sp, (tree_t*)format ); + break; + } + case FN_LOAD_ARG0: { + half_t field; + read_half( field ); + debug( prg, REALM_BYTECODE, "FN_LOAD_ARG0 %lu\n", field ); + + /* tree_t comes back upreffed. */ + tree_t *tree = construct_arg0( prg, prg->argc, prg->argv, prg->argl ); + tree_t *prev = colm_struct_get_field( prg->global, tree_t*, field ); + colm_tree_downref( prg, sp, prev ); + colm_struct_set_field( prg->global, tree_t*, field, tree ); + break; + } + case FN_LOAD_ARGV: { + half_t field; + read_half( field ); + debug( prg, REALM_BYTECODE, "FN_LOAD_ARGV %lu\n", field ); + + list_t *list = construct_argv( prg, prg->argc, prg->argv, prg->argl ); + colm_struct_set_field( prg->global, list_t*, field, list ); + break; + } + case FN_INIT_STDS: { + half_t field; + read_half( field ); + debug( prg, REALM_BYTECODE, "FN_INIT_STDS %lu\n", field ); + + list_t *list = construct_stds( prg ); + colm_struct_set_field( prg->global, list_t*, field, list ); + break; + } + case FN_STOP: { + debug( prg, REALM_BYTECODE, "FN_STOP\n" ); + + flush_streams( prg ); + goto out; + } + + case FN_LIST_PUSH_HEAD_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_WC\n" ); + + list_t *list = vm_pop_list(); + struct_t *s = vm_pop_struct(); + + list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); + colm_list_prepend( list, list_el ); + + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + break; + } + case FN_LIST_PUSH_HEAD_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_WV\n" ); + + list_t *list = vm_pop_list(); + struct_t *s = vm_pop_struct(); + + list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); + colm_list_prepend( list, list_el ); + + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + + /* Set up reverse code. Needs no args. */ + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_LIST_PUSH_HEAD_BKT ); + rcode_unit_term( exec ); + break; + } + case FN_LIST_PUSH_HEAD_BKT: { + debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_BKT\n" ); + + list_t *list = vm_pop_list(); + colm_list_detach_head( list ); + break; + } + case FN_LIST_PUSH_TAIL_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_WC\n" ); + + list_t *list = vm_pop_list(); + struct_t *s = vm_pop_struct(); + + list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); + colm_list_append( list, list_el ); + + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + break; + } + case FN_LIST_PUSH_TAIL_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_WV\n" ); + + list_t *list = vm_pop_list(); + struct_t *s = vm_pop_struct(); + + list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); + colm_list_append( list, list_el ); + + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + + /* Set up reverse code. Needs no args. */ + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_LIST_PUSH_TAIL_BKT ); + rcode_unit_term( exec ); + break; + } + case FN_LIST_PUSH_TAIL_BKT: { + debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_BKT\n" ); + + list_t *list = vm_pop_list(); + colm_list_detach_tail( list ); + break; + } + case FN_LIST_POP_TAIL_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_WC\n" ); + + list_t *list = vm_pop_list(); + + list_el_t *tail = list->tail; + colm_list_detach_tail( list ); + struct_t *s = colm_generic_el_container( prg, tail, gen_id ); + + vm_push_struct( s ); + break; + } + case FN_LIST_POP_TAIL_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_WV\n" ); + + list_t *list = vm_pop_list(); + + list_el_t *tail = list->tail; + colm_list_detach_tail( list ); + struct_t *s = colm_generic_el_container( prg, tail, gen_id ); + + vm_push_struct( s ); + + /* Set up reverse. */ + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_LIST_POP_TAIL_BKT ); + rcode_half( exec, gen_id ); + rcode_word( exec, (word_t)s ); + rcode_unit_term( exec ); + break; + } + case FN_LIST_POP_TAIL_BKT: { + short gen_id; + tree_t *val; + read_half( gen_id ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_BKT\n" ); + + list_t *list = vm_pop_list(); + struct_t *s = (struct_t*) val; + + list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); + + colm_list_append( list, list_el ); + break; + } + case FN_LIST_POP_HEAD_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_WC\n" ); + + list_t *list = vm_pop_list(); + + list_el_t *head = list->head; + colm_list_detach_head( list ); + struct_t *s = colm_generic_el_container( prg, head, gen_id ); + + vm_push_struct( s ); + break; + } + case FN_LIST_POP_HEAD_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_WV\n" ); + + list_t *list = vm_pop_list(); + + list_el_t *head = list->head; + colm_list_detach_head( list ); + struct_t *s = colm_generic_el_container( prg, head, gen_id ); + + vm_push_struct( s ); + + /* Set up reverse. The result comes off the list downrefed. + * Need it up referenced for the reverse code too. */ + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_LIST_POP_HEAD_BKT ); + rcode_half( exec, gen_id ); + rcode_word( exec, (word_t)s ); + rcode_unit_term( exec ); + break; + } + case FN_LIST_POP_HEAD_BKT: { + short gen_id; + tree_t *val; + read_half( gen_id ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_BKT\n" ); + + list_t *list = vm_pop_list(); + struct_t *s = (struct_t*) val; + + list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); + + colm_list_prepend( list, list_el ); + break; + } + case FN_MAP_FIND: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_MAP_FIND %hd\n", gen_id ); + + map_t *map = vm_pop_map(); + tree_t *key = vm_pop_tree(); + + map_el_t *map_el = colm_map_find( prg, map, key ); + + struct colm_struct *strct = map_el != 0 ? + colm_generic_el_container( prg, map_el, gen_id ) : 0; + + vm_push_struct( strct ); + + if ( map->generic_info->key_type == TYPE_TREE ) + colm_tree_downref( prg, sp, key ); + break; + } + case FN_MAP_INSERT_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_WC %hd\n", gen_id ); + + map_t *map = vm_pop_map(); + struct_t *s = vm_pop_struct(); + + map_el_t *map_el = colm_struct_to_map_el( prg, s, gen_id ); + + colm_map_insert( prg, map, map_el ); + + vm_push_tree( prg->true_val ); + break; + } + case FN_MAP_INSERT_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_WV %hd\n", gen_id ); + + map_t *map = vm_pop_map(); + struct_t *s = vm_pop_struct(); + + map_el_t *map_el = colm_struct_to_map_el( prg, s, gen_id ); + + map_el_t *inserted = colm_map_insert( prg, map, map_el ); + + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_MAP_INSERT_BKT ); + rcode_half( exec, gen_id ); + rcode_code( exec, inserted != 0 ? 1 : 0 ); + rcode_word( exec, (word_t)map_el ); + rcode_unit_term( exec ); + break; + } + + case FN_MAP_INSERT_BKT: { + short gen_id; + uchar inserted; + word_t wmap_el; + + read_half( gen_id ); + read_byte( inserted ); + read_word( wmap_el ); + + map_el_t *map_el = (map_el_t*)wmap_el; + + debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_BKT %d\n", + (int)inserted ); + + map_t *map = vm_pop_map(); + + if ( inserted ) + colm_map_detach( prg, map, map_el ); + break; + } + case FN_MAP_DETACH_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_WC %hd\n", gen_id ); + + map_t *map = vm_pop_map(); + struct_t *s = vm_pop_struct(); + + map_el_t *map_el = colm_struct_to_map_el( prg, s, gen_id ); + + colm_map_detach( prg, map, map_el ); + + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + break; + } + case FN_MAP_DETACH_WV: { + debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_WV\n" ); + + tree_t *obj = vm_pop_tree(); + tree_t *key = vm_pop_tree(); + struct tree_pair pair = map_remove( prg, (map_t*)obj, key ); + + colm_tree_upref( prg, pair.val ); + vm_push_tree( pair.val ); + + /* Reverse instruction. */ + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_MAP_DETACH_BKT ); + rcode_word( exec, (word_t)pair.key ); + rcode_word( exec, (word_t)pair.val ); + rcode_unit_term( exec ); + + colm_tree_downref( prg, sp, obj ); + colm_tree_downref( prg, sp, key ); + break; + } + case FN_MAP_DETACH_BKT: { + tree_t *key, *val; + read_tree( key ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_BKT\n" ); + + /* Either both or neither. */ + assert( ( key == 0 ) ^ ( val != 0 ) ); + + tree_t *obj = vm_pop_tree(); + #if 0 + if ( key != 0 ) + map_unremove( prg, (map_t*)obj, key, val ); + #endif + + colm_tree_downref( prg, sp, obj ); + break; + } + case FN_VMAP_INSERT_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_WC %hd\n", gen_id ); + + map_t *map = vm_pop_map(); + struct_t *value = vm_pop_struct(); + struct_t *key = vm_pop_struct(); + + colm_vmap_insert( prg, map, key, value ); + + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + break; + } + case FN_VMAP_INSERT_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_WV %hd\n", gen_id ); + + map_t *map = vm_pop_map(); + struct_t *value = vm_pop_struct(); + struct_t *key = vm_pop_struct(); + + map_el_t *inserted = colm_vmap_insert( prg, map, key, value ); + + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_VMAP_INSERT_BKT ); + rcode_half( exec, gen_id ); + rcode_code( exec, inserted != 0 ? 1 : 0 ); + rcode_word( exec, (word_t)inserted ); + rcode_unit_term( exec ); + break; + } + case FN_VMAP_INSERT_BKT: { + short gen_id; + uchar inserted; + word_t wmap_el; + + read_half( gen_id ); + read_byte( inserted ); + read_word( wmap_el ); + + map_el_t *map_el = (map_el_t*)wmap_el; + + debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_BKT %d\n", + (int)inserted ); + + map_t *map = vm_pop_map(); + + if ( inserted ) + colm_map_detach( prg, map, map_el ); + break; + } + case FN_VMAP_REMOVE_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VMAP_REMOVE_WC %hd\n", gen_id ); + + map_t *map = vm_pop_map(); + tree_t *key = vm_pop_tree(); + + colm_vmap_remove( prg, map, key ); + + //colm_tree_upref( prg, prg->trueVal ); + vm_push_tree( prg->true_val ); + break; + } + case FN_VMAP_FIND: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VMAP_FIND %hd\n", gen_id ); + + map_t *map = vm_pop_map(); + tree_t *key = vm_pop_tree(); + + tree_t *result = colm_vmap_find( prg, map, key ); + + vm_push_tree( result ); + + if ( map->generic_info->key_type == TYPE_TREE ) + colm_tree_downref( prg, sp, key ); + break; + } + case FN_VLIST_PUSH_TAIL_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_TAIL_WC %hd\n", gen_id ); + + list_t *list = vm_pop_list(); + value_t value = vm_pop_value(); + + colm_vlist_append( prg, list, value ); + + vm_push_tree( prg->true_val ); + break; + } + case FN_VLIST_PUSH_TAIL_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_TAIL_WV %hd\n", gen_id ); + + list_t *list = vm_pop_list(); + value_t value = vm_pop_value(); + + colm_vlist_append( prg, list, value ); + + vm_push_tree( prg->true_val ); + + /* Set up reverse code. Needs no args. */ + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_VLIST_PUSH_TAIL_BKT ); + rcode_unit_term( exec ); + break; + } + case FN_VLIST_PUSH_TAIL_BKT: { + debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_TAIL_BKT\n" ); + + list_t *list = vm_pop_list(); + colm_list_detach_tail( list ); + break; + } + case FN_VLIST_PUSH_HEAD_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_HEAD_WC %hd\n", gen_id ); + + list_t *list = vm_pop_list(); + value_t value = vm_pop_value(); + + colm_vlist_prepend( prg, list, value ); + + vm_push_tree( prg->true_val ); + break; + } + case FN_VLIST_PUSH_HEAD_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_HEAD_WV %hd\n", gen_id ); + + list_t *list = vm_pop_list(); + value_t value = vm_pop_value(); + + colm_vlist_prepend( prg, list, value ); + + vm_push_tree( prg->true_val ); + + /* Set up reverse code. Needs no args. */ + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_VLIST_PUSH_HEAD_BKT ); + rcode_unit_term( exec ); + break; + } + case FN_VLIST_PUSH_HEAD_BKT: { + debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_HEAD_BKT\n" ); + + list_t *list = vm_pop_list(); + colm_list_detach_head( list ); + break; + } + case FN_VLIST_POP_HEAD_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_POP_HEAD_WC %hd\n", gen_id ); + + list_t *list = vm_pop_list(); + + value_t result = colm_vlist_detach_head( prg, list ); + vm_push_value( result ); + break; + } + case FN_VLIST_POP_HEAD_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_POP_HEAD_WV %hd\n", gen_id ); + + list_t *list = vm_pop_list(); + + value_t result = colm_vlist_detach_head( prg, list ); + vm_push_value( result ); + + /* Set up reverse. */ + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_VLIST_POP_HEAD_BKT ); + rcode_half( exec, gen_id ); + rcode_word( exec, (word_t)result ); + rcode_unit_term( exec ); + break; + } + case FN_VLIST_POP_HEAD_BKT: { + short gen_id; + tree_t *val; + read_half( gen_id ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_POP_HEAD_BKT\n" ); + + list_t *list = vm_pop_list(); + + colm_vlist_prepend( prg, list, (value_t)val ); + break; + } + case FN_VLIST_POP_TAIL_WC: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_POP_TAIL_WC %hd\n", gen_id ); + + list_t *list = vm_pop_list(); + + value_t result = colm_vlist_detach_tail( prg, list ); + vm_push_value( result ); + break; + } + case FN_VLIST_POP_TAIL_WV: { + short gen_id; + read_half( gen_id ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_POP_TAIL_WV %hd\n", gen_id ); + + list_t *list = vm_pop_list(); + + value_t result = colm_vlist_detach_tail( prg, list ); + vm_push_value( result ); + + /* Set up reverse. */ + rcode_code( exec, IN_FN ); + rcode_code( exec, FN_VLIST_POP_TAIL_BKT ); + rcode_half( exec, gen_id ); + rcode_word( exec, (word_t)result ); + rcode_unit_term( exec ); + break; + } + case FN_VLIST_POP_TAIL_BKT: { + short gen_id; + tree_t *val; + read_half( gen_id ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "FN_VLIST_POP_TAIL_BKT\n" ); + + list_t *list = vm_pop_list(); + + colm_vlist_append( prg, list, (value_t)val ); + break; + } + + case FN_EXIT_HARD: { + debug( prg, REALM_BYTECODE, "FN_EXIT\n" ); + + vm_pop_tree(); + prg->exit_status = vm_pop_type(long); + prg->induce_exit = 1; + exit( prg->exit_status ); + } + case FN_EXIT: { + /* The unwind code follows the exit call (exception, see + * synthesis). */ + short unwind_len; + read_half( unwind_len ); + + debug( prg, REALM_BYTECODE, "FN_EXIT, unwind len: %hd\n", unwind_len ); + + vm_pop_tree(); + prg->exit_status = vm_pop_type(long); + prg->induce_exit = 1; + + while ( true ) { + /* We stop on the root, leaving the psuedo-call setup on the + * stack. Note we exclude the local data. */ + if ( exec->frame_id == prg->rtd->root_frame_id ) + break; + + struct frame_info *fi = &prg->rtd->frame_info[exec->frame_id]; + + debug( prg, REALM_BYTECODE, "FN_EXIT, popping frame %s, " + "unwind-len %hd, arg-size %ld\n", + ( fi->name != 0 ? fi->name : "<no-name>" ), + unwind_len, fi->arg_size ); + + if ( unwind_len > 0 ) + sp = colm_execute_code( prg, exec, sp, instr ); + + downref_locals( prg, &sp, exec, fi->locals, fi->locals_len ); + vm_popn( fi->frame_size ); + + /* Call layout. */ + exec->frame_id = vm_pop_type(long); + exec->frame_ptr = vm_pop_type(tree_t**); + instr = vm_pop_type(code_t*); + + tree_t *ret_val = vm_pop_tree(); + vm_pop_value(); + + /* The IN_PREP_ARGS stack data. */ + vm_popn( fi->arg_size ); + vm_pop_value(); + + if ( fi->ret_tree ) { + /* Problem here. */ + colm_tree_downref( prg, sp, ret_val ); + } + + read_half( unwind_len ); + } + + goto out; + } + default: { + fatal( "UNKNOWN FUNCTION: 0x%02x -- something is wrong\n", c ); + break; + }} + break; + } + + /* Halt is a default instruction given by the compiler when it is + * asked to generate and instruction it doesn't have. It is deliberate + * and can represent "not implemented" or "compiler error" because a + * variable holding instructions was not properly initialize. */ + case IN_HALT: { + fatal( "IN_HALT -- compiler did something wrong\n" ); + exit(1); + break; + } + default: { + fatal( "UNKNOWN INSTRUCTION: 0x%02x -- something is wrong\n", *(instr-1) ); + assert(false); + break; + } + } + goto again; + +out: + if ( ! prg->induce_exit ) + assert( sp == root ); + return sp; +} + +/* + * Deleteing rcode required downreffing any trees held by it. + */ +static void rcode_downref( program_t *prg, tree_t **sp, code_t *instr ) +{ +again: + switch ( *instr++ ) { + case IN_PARSE_INIT_BKT: { + debug( prg, REALM_BYTECODE, "IN_PARSE_INIT_BKT\n" ); + + consume_word(); //( parser ); + consume_word(); //( steps ); + + break; + } + case IN_SEND_EOF_BKT: { + debug( prg, REALM_BYTECODE, "IN_SEND_EOF_BKT\n" ); + consume_word(); //( parser ); + break; + } + + case IN_LOAD_TREE: { + tree_t *w; + read_tree( w ); + debug( prg, REALM_BYTECODE, "IN_LOAD_TREE %p\n", w ); + colm_tree_downref( prg, sp, w ); + break; + } + case IN_LOAD_WORD: { + consume_word(); + debug( prg, REALM_BYTECODE, "IN_LOAD_WORD\n" ); + break; + } + case IN_RESTORE_LHS: { + tree_t *restore; + read_tree( restore ); + debug( prg, REALM_BYTECODE, "IN_RESTORE_LHS\n" ); + colm_tree_downref( prg, sp, restore ); + break; + } + + case IN_PARSE_FRAG_BKT: { + debug( prg, REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" ); + break; + } + case IN_PCR_RET: { + debug( prg, REALM_BYTECODE, "IN_PCR_RET\n" ); + return; + } + case IN_PCR_END_DECK: { + debug( prg, REALM_BYTECODE, "IN_PCR_END_DECK\n" ); + return; + } + case IN_SEND_TEXT_BKT: { + tree_t *input; + + consume_word(); //( parser ); + read_tree( input ); + consume_word(); //( len ); + + debug( prg, REALM_BYTECODE, "IN_SEND_TEXT_BKT\n" ); + + colm_tree_downref( prg, sp, input ); + break; + } + case IN_SEND_TREE_BKT: { + tree_t *input; + + consume_word(); //( parser ); + read_tree( input ); + consume_word(); //( len ); + + debug( prg, REALM_BYTECODE, "IN_SEND_TREE_BKT\n" ); + + colm_tree_downref( prg, sp, input ); + break; + } + case IN_SEND_STREAM_BKT: { + consume_word(); //( sptr ); + consume_word(); //( input ); + consume_word(); //( len ); + + debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_BKT\n" ); + break; + } + + case IN_INPUT_PULL_BKT: { + tree_t *string; + read_tree( string ); + + debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" ); + + colm_tree_downref( prg, sp, string ); + break; + } + case IN_INPUT_PUSH_BKT: { + consume_word(); //( len ); + + debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" ); + break; + } + case IN_LOAD_GLOBAL_BKT: { + debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" ); + break; + } + case IN_LOAD_CONTEXT_BKT: { + debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" ); + break; + } + case IN_LOAD_INPUT_BKT: { + consume_word(); //( input ); + debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" ); + break; + } + case IN_GET_FIELD_TREE_BKT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_BKT %hd\n", field ); + break; + } + case IN_SET_FIELD_TREE_BKT: { + short field; + tree_t *val; + read_half( field ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_BKT %hd\n", field ); + + colm_tree_downref( prg, sp, val ); + break; + } + case IN_SET_STRUCT_BKT: { + short field; + tree_t *val; + read_half( field ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_BKT %hd\n", field ); + + colm_tree_downref( prg, sp, val ); + break; + } + case IN_SET_STRUCT_VAL_BKT: { + consume_half(); //( field ); + consume_word(); //( val ); + + debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_BKT\n" ); + break; + } + case IN_PTR_ACCESS_BKT: { + consume_word(); //( ptr ); + + debug( prg, REALM_BYTECODE, "IN_PTR_ACCESS_BKT\n" ); + break; + } + case IN_SET_TOKEN_DATA_BKT: { + word_t oldval; + read_word( oldval ); + + debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT\n" ); + + head_t *head = (head_t*)oldval; + string_free( prg, head ); + break; + } + case IN_GET_LIST_MEM_BKT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_BKT %hd\n", field ); + break; + } + case IN_GET_MAP_MEM_BKT: { + short field; + read_half( field ); + + debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_BKT %hd\n", field ); + break; + } + case IN_FN: { + switch ( *instr++ ) { + case FN_LIST_PUSH_HEAD_BKT: { + debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_BKT\n" ); + break; + } + case FN_LIST_POP_HEAD_BKT: { + consume_half(); //( genId ); + consume_word(); //( val ); + + debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_BKT\n" ); + + break; + } + case FN_LIST_PUSH_TAIL_BKT: { + debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_BKT\n" ); + break; + } + case FN_LIST_POP_TAIL_BKT: { + consume_half(); //( genId ); + consume_word(); //( val ); + + debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_BKT\n" ); + + break; + } + case FN_MAP_INSERT_BKT: { + #ifdef DEBUG + uchar inserted; + consume_half(); //( genId ); + read_byte( inserted ); + consume_word(); //( wmapEl ); + #else + consume_half(); //( genId ); + consume_byte(); // inserted + consume_word(); //( wmapEl ); + #endif + + debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_BKT %d\n", + (int)inserted ); + break; + } + case FN_VMAP_INSERT_BKT: { + short gen_id; + + #ifdef DEBUG + uchar inserted; + read_half( gen_id ); + read_byte( inserted ); + consume_word(); //read_word( wmap_el ); + #else + read_half( gen_id ); + consume_byte(); + consume_word(); //read_word( wmap_el ); + #endif + + //map_el_t *map_el = (map_el_t*)wmap_el; + + debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_BKT %d\n", + (int)inserted ); + + break; + } + case FN_MAP_DETACH_BKT: { + tree_t *key, *val; + read_tree( key ); + read_tree( val ); + + debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_BKT\n" ); + + colm_tree_downref( prg, sp, key ); + colm_tree_downref( prg, sp, val ); + break; + } + + case FN_VLIST_PUSH_TAIL_BKT: { + break; + } + + case FN_VLIST_PUSH_HEAD_BKT: { + break; + } + + case FN_VLIST_POP_HEAD_BKT: { + short gen_id; + //word_t result; + read_half( gen_id ); + consume_word(); //read_word( result ); + break; + } + + case FN_VLIST_POP_TAIL_BKT: { + short gen_id; + //word_t result; + read_half( gen_id ); + consume_word(); //read_word( result ); + break; + } + + default: { + fatal( "UNKNOWN FUNCTION 0x%02x: -- reverse code downref\n", *(instr-1)); + assert(false); + }} + break; + } + default: { + fatal( "UNKNOWN INSTRUCTION 0x%02x: -- reverse code downref\n", *(instr-1)); + assert(false); + break; + } + } + goto again; +} + + diff --git a/src/bytecode.h b/src/bytecode.h new file mode 100644 index 00000000..02cd78f4 --- /dev/null +++ b/src/bytecode.h @@ -0,0 +1,678 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_BYTECODE_H +#define _COLM_BYTECODE_H + +#include <colm/pdarun.h> +#include <colm/type.h> +#include <colm/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +typedef unsigned long ulong; +typedef unsigned char uchar; + +#define IN_NONE 0x00 +#define IN_LOAD_INT 0x01 +#define IN_LOAD_STR 0x02 +#define IN_LOAD_NIL 0x03 +#define IN_LOAD_TRUE 0x04 +#define IN_LOAD_FALSE 0x05 +#define IN_LOAD_TREE 0x06 +#define IN_LOAD_WORD 0x07 + +#define IN_ADD_INT 0x08 +#define IN_SUB_INT 0x09 +#define IN_MULT_INT 0x0a +#define IN_DIV_INT 0x0b + +#define IN_TST_EQL_VAL 0x59 +#define IN_TST_EQL_TREE 0x0c +#define IN_TST_NOT_EQL_TREE 0x0d +#define IN_TST_NOT_EQL_VAL 0x5f +#define IN_TST_LESS_VAL 0x0e +#define IN_TST_LESS_TREE 0xbd +#define IN_TST_GRTR_VAL 0x0f +#define IN_TST_GRTR_TREE 0xbf +#define IN_TST_LESS_EQL_VAL 0x10 +#define IN_TST_LESS_EQL_TREE 0xc0 +#define IN_TST_GRTR_EQL_VAL 0x11 +#define IN_TST_GRTR_EQL_TREE 0xcd +#define IN_TST_LOGICAL_AND 0x12 +#define IN_TST_LOGICAL_OR 0x13 + +#define IN_TST_NZ_TREE 0xd1 + +#define IN_LOAD_RETVAL 0xd4 + +#define IN_STASH_ARG 0x20 +#define IN_PREP_ARGS 0xe8 +#define IN_CLEAR_ARGS 0xe9 + +#define IN_GEN_ITER_FROM_REF 0xd3 +#define IN_GEN_ITER_DESTROY 0xd5 +#define IN_GEN_ITER_UNWIND 0x74 +#define IN_GEN_ITER_GET_CUR_R 0xdf +#define IN_GEN_VITER_GET_CUR_R 0xe7 +#define IN_LIST_ITER_ADVANCE 0xde +#define IN_REV_LIST_ITER_ADVANCE 0x77 +#define IN_MAP_ITER_ADVANCE 0xe6 + +#define IN_NOT_VAL 0x14 +#define IN_NOT_TREE 0xd2 + +#define IN_JMP 0x15 +#define IN_JMP_FALSE_TREE 0x16 +#define IN_JMP_TRUE_TREE 0x17 +#define IN_JMP_FALSE_VAL 0xb8 +#define IN_JMP_TRUE_VAL 0xed + +#define IN_STR_LENGTH 0x19 +#define IN_CONCAT_STR 0x1a +#define IN_TREE_TRIM 0x1b + +#define IN_POP_TREE 0x1d +#define IN_POP_N_WORDS 0x1e +#define IN_POP_VAL 0xbe +#define IN_DUP_VAL 0x1f +#define IN_DUP_TREE 0xf2 + +#define IN_REJECT 0x21 +#define IN_MATCH 0x22 +#define IN_PROD_NUM 0x6a +#define IN_CONSTRUCT 0x23 +#define IN_CONS_OBJECT 0xf0 +#define IN_CONS_GENERIC 0xf1 +#define IN_TREE_CAST 0xe4 + +#define IN_GET_LOCAL_R 0x25 +#define IN_GET_LOCAL_WC 0x26 +#define IN_SET_LOCAL_WC 0x27 + +#define IN_GET_LOCAL_REF_R 0x28 +#define IN_GET_LOCAL_REF_WC 0x29 +#define IN_SET_LOCAL_REF_WC 0x2a + +#define IN_SAVE_RET 0x2b + +#define IN_GET_FIELD_TREE_R 0x2c +#define IN_GET_FIELD_TREE_WC 0x2d +#define IN_GET_FIELD_TREE_WV 0x2e +#define IN_GET_FIELD_TREE_BKT 0x2f + +#define IN_SET_FIELD_TREE_WV 0x30 +#define IN_SET_FIELD_TREE_WC 0x31 +#define IN_SET_FIELD_TREE_BKT 0x32 +#define IN_SET_FIELD_TREE_LEAVE_WC 0x33 + +#define IN_GET_FIELD_VAL_R 0x5e +#define IN_SET_FIELD_VAL_WC 0x60 + +#define IN_GET_MATCH_LENGTH_R 0x34 +#define IN_GET_MATCH_TEXT_R 0x35 + +#define IN_GET_TOKEN_DATA_R 0x36 +#define IN_SET_TOKEN_DATA_WC 0x37 +#define IN_SET_TOKEN_DATA_WV 0x38 +#define IN_SET_TOKEN_DATA_BKT 0x39 + +#define IN_GET_TOKEN_FILE_R 0x80 +#define IN_GET_TOKEN_LINE_R 0x3b +#define IN_GET_TOKEN_POS_R 0x3a +#define IN_GET_TOKEN_COL_R 0x81 + +#define IN_INIT_RHS_EL 0x3c +#define IN_INIT_LHS_EL 0x3d +#define IN_INIT_CAPTURES 0x3e +#define IN_STORE_LHS_EL 0x3f +#define IN_RESTORE_LHS 0x40 + +#define IN_TRITER_FROM_REF 0x41 +#define IN_TRITER_ADVANCE 0x42 +#define IN_TRITER_NEXT_CHILD 0x43 +#define IN_TRITER_GET_CUR_R 0x44 +#define IN_TRITER_GET_CUR_WC 0x45 +#define IN_TRITER_SET_CUR_WC 0x46 +#define IN_TRITER_UNWIND 0x73 +#define IN_TRITER_DESTROY 0x47 +#define IN_TRITER_NEXT_REPEAT 0x48 +#define IN_TRITER_PREV_REPEAT 0x49 + +#define IN_REV_TRITER_FROM_REF 0x4a +#define IN_REV_TRITER_DESTROY 0x4b +#define IN_REV_TRITER_UNWIND 0x75 +#define IN_REV_TRITER_PREV_CHILD 0x4c + +#define IN_UITER_DESTROY 0x4d +#define IN_UITER_UNWIND 0x71 +#define IN_UITER_CREATE_WV 0x4e +#define IN_UITER_CREATE_WC 0x4f +#define IN_UITER_ADVANCE 0x50 +#define IN_UITER_GET_CUR_R 0x51 +#define IN_UITER_GET_CUR_WC 0x52 +#define IN_UITER_SET_CUR_WC 0x53 + +#define IN_TREE_SEARCH 0x54 + +#define IN_LOAD_GLOBAL_R 0x55 +#define IN_LOAD_GLOBAL_WV 0x56 +#define IN_LOAD_GLOBAL_WC 0x57 +#define IN_LOAD_GLOBAL_BKT 0x58 + +#define IN_PTR_ACCESS_WV 0x5a +#define IN_PTR_ACCESS_BKT 0x61 + +#define IN_REF_FROM_LOCAL 0x62 +#define IN_REF_FROM_REF 0x63 +#define IN_REF_FROM_QUAL_REF 0x64 +#define IN_RHS_REF_FROM_QUAL_REF 0xee +#define IN_REF_FROM_BACK 0xe3 +#define IN_TRITER_REF_FROM_CUR 0x65 +#define IN_UITER_REF_FROM_CUR 0x66 + +#define IN_GET_MAP_EL_MEM_R 0x6c + +#define IN_MAP_LENGTH 0x67 + +#define IN_LIST_LENGTH 0x72 + +#define IN_GET_LIST_MEM_R 0x79 +#define IN_GET_LIST_MEM_WC 0x7a +#define IN_GET_LIST_MEM_WV 0x7b +#define IN_GET_LIST_MEM_BKT 0x7c + +#define IN_GET_VLIST_MEM_R 0xeb +#define IN_GET_VLIST_MEM_WC 0xec +#define IN_GET_VLIST_MEM_WV 0x70 +#define IN_GET_VLIST_MEM_BKT 0x5c + +#define IN_CONS_REDUCER 0x76 +#define IN_READ_REDUCE 0x69 + +#define IN_DONE 0x78 + +#define IN_GET_LIST_EL_MEM_R 0xf5 + +#define IN_GET_MAP_MEM_R 0x6d +#define IN_GET_MAP_MEM_WV 0x7d +#define IN_GET_MAP_MEM_WC 0x7e +#define IN_GET_MAP_MEM_BKT 0x7f + +#define IN_TREE_TO_STR_XML 0x6e +#define IN_TREE_TO_STR_XML_AC 0x6f +#define IN_TREE_TO_STR_POSTFIX 0xb6 + +#define IN_HOST 0xea + +#define IN_CALL_WC 0x8c +#define IN_CALL_WV 0x8d +#define IN_RET 0x8e +#define IN_YIELD 0x8f +#define IN_HALT 0x8b + +#define IN_INT_TO_STR 0x97 +#define IN_TREE_TO_STR 0x98 +#define IN_TREE_TO_STR_TRIM 0x99 +#define IN_TREE_TO_STR_TRIM_A 0x18 + +#define IN_CREATE_TOKEN 0x9a +#define IN_MAKE_TOKEN 0x9b +#define IN_MAKE_TREE 0x9c +#define IN_CONSTRUCT_TERM 0x9d + +#define IN_INPUT_PULL_WV 0x9e +#define IN_INPUT_PULL_WC 0xe1 +#define IN_INPUT_PULL_BKT 0x9f + +#define IN_INPUT_CLOSE_WC 0xef +#define IN_INPUT_AUTO_TRIM_WC 0x82 +#define IN_IINPUT_AUTO_TRIM_WC 0x83 + +#define IN_PARSE_FRAG_W 0xa2 +#define IN_PARSE_INIT_BKT 0xa1 +#define IN_PARSE_FRAG_BKT 0xa6 + +#define IN_SEND_NOTHING 0xa0 +#define IN_SEND_TEXT_W 0x89 +#define IN_SEND_TEXT_BKT 0x8a + +#define IN_PRINT_TREE 0xa3 + +#define IN_SEND_TREE_W 0xa9 +#define IN_SEND_TREE_BKT 0xaa + +#define IN_REPLACE_STREAM 0x88 + +#define IN_SEND_STREAM_W 0x90 +#define IN_SEND_STREAM_BKT 0x1c + +#define IN_SEND_EOF_W 0x87 +#define IN_SEND_EOF_BKT 0xa4 + +#define IN_REDUCE_COMMIT 0xa5 + +#define IN_PCR_RET 0xb2 +#define IN_PCR_END_DECK 0xb3 + +#define IN_OPEN_FILE 0xb4 + +#define IN_GET_CONST 0xb5 + +#define IN_TO_UPPER 0xb9 +#define IN_TO_LOWER 0xba + +#define IN_LOAD_INPUT_R 0xc1 +#define IN_LOAD_INPUT_WV 0xc2 +#define IN_LOAD_INPUT_WC 0xc3 +#define IN_LOAD_INPUT_BKT 0xc4 + +#define IN_INPUT_PUSH_WV 0xc5 +#define IN_INPUT_PUSH_BKT 0xc6 +#define IN_INPUT_PUSH_IGNORE_WV 0xc7 + +#define IN_INPUT_PUSH_STREAM_WV 0xf3 +#define IN_INPUT_PUSH_STREAM_BKT 0xf4 + +#define IN_LOAD_CONTEXT_R 0xc8 +#define IN_LOAD_CONTEXT_WV 0xc9 +#define IN_LOAD_CONTEXT_WC 0xca +#define IN_LOAD_CONTEXT_BKT 0xcb + +#define IN_SET_PARSER_CONTEXT 0xd0 +#define IN_SET_PARSER_INPUT 0x96 + +#define IN_GET_RHS_VAL_R 0xd7 +#define IN_GET_RHS_VAL_WC 0xd8 +#define IN_GET_RHS_VAL_WV 0xd9 +#define IN_GET_RHS_VAL_BKT 0xda +#define IN_SET_RHS_VAL_WC 0xdb +#define IN_SET_RHS_VAL_WV 0xdc +#define IN_SET_RHS_VAL_BKT 0xdd + +#define IN_GET_PARSER_MEM_R 0x5b + +#define IN_GET_STREAM_MEM_R 0xb7 + +#define IN_GET_PARSER_STREAM 0x6b + +#define IN_GET_ERROR 0xcc +#define IN_SET_ERROR 0xe2 + +#define IN_SYSTEM 0xe5 + +#define IN_GET_STRUCT_R 0xf7 +#define IN_GET_STRUCT_WC 0xf8 +#define IN_GET_STRUCT_WV 0xf9 +#define IN_GET_STRUCT_BKT 0xfa +#define IN_SET_STRUCT_WC 0xfb +#define IN_SET_STRUCT_WV 0xfc +#define IN_SET_STRUCT_BKT 0xfd +#define IN_GET_STRUCT_VAL_R 0x93 +#define IN_SET_STRUCT_VAL_WV 0x94 +#define IN_SET_STRUCT_VAL_WC 0x95 +#define IN_SET_STRUCT_VAL_BKT 0x5d +#define IN_NEW_STRUCT 0xfe + +#define IN_GET_LOCAL_VAL_R 0x91 +#define IN_SET_LOCAL_VAL_WC 0x92 + +#define IN_NEW_STREAM 0x24 +#define IN_GET_COLLECT_STRING 0x68 + +/* + * Const things to get. + */ +#define CONST_STDIN 0x10 +#define CONST_STDOUT 0x11 +#define CONST_STDERR 0x12 +#define CONST_ARG 0x13 + + + +/* + * IN_FN instructions. + */ + +#define IN_FN 0xff +#define FN_NONE 0x00 +#define FN_STOP 0x0a + +#define FN_STR_ATOI 0x1d +#define FN_STR_ATOO 0x38 +#define FN_STR_UORD8 0x01 +#define FN_STR_SORD8 0x02 +#define FN_STR_UORD16 0x03 +#define FN_STR_SORD16 0x04 +#define FN_STR_UORD32 0x05 +#define FN_STR_SORD32 0x06 +#define FN_STR_PREFIX 0x36 +#define FN_STR_SUFFIX 0x37 +#define FN_SPRINTF 0xd6 +#define FN_LOAD_ARGV 0x07 +#define FN_LOAD_ARG0 0x08 +#define FN_INIT_STDS 0x3e + + +#define FN_LIST_PUSH_TAIL_WV 0x11 +#define FN_LIST_PUSH_TAIL_WC 0x12 +#define FN_LIST_PUSH_TAIL_BKT 0x13 +#define FN_LIST_POP_TAIL_WV 0x14 +#define FN_LIST_POP_TAIL_WC 0x15 +#define FN_LIST_POP_TAIL_BKT 0x16 +#define FN_LIST_PUSH_HEAD_WV 0x17 +#define FN_LIST_PUSH_HEAD_WC 0x18 +#define FN_LIST_PUSH_HEAD_BKT 0x19 +#define FN_LIST_POP_HEAD_WV 0x1a +#define FN_LIST_POP_HEAD_WC 0x1b +#define FN_LIST_POP_HEAD_BKT 0x1c + +#define FN_MAP_FIND 0x24 +#define FN_MAP_INSERT_WV 0x1e +#define FN_MAP_INSERT_WC 0x1f +#define FN_MAP_INSERT_BKT 0x20 +#define FN_MAP_DETACH_WV 0x21 +#define FN_MAP_DETACH_WC 0x22 +#define FN_MAP_DETACH_BKT 0x23 + +#define FN_VMAP_FIND 0x29 +#define FN_VMAP_INSERT_WC 0x25 +#define FN_VMAP_INSERT_WV 0x26 +#define FN_VMAP_INSERT_BKT 0x3d +#define FN_VMAP_REMOVE_WC 0x27 +#define FN_VMAP_REMOVE_WV 0x28 + +#define FN_VLIST_PUSH_TAIL_WV 0x2a +#define FN_VLIST_PUSH_TAIL_WC 0x2b +#define FN_VLIST_PUSH_TAIL_BKT 0x2c +#define FN_VLIST_POP_TAIL_WV 0x2d +#define FN_VLIST_POP_TAIL_WC 0x2e +#define FN_VLIST_POP_TAIL_BKT 0x2f +#define FN_VLIST_PUSH_HEAD_WV 0x30 +#define FN_VLIST_PUSH_HEAD_WC 0x31 +#define FN_VLIST_PUSH_HEAD_BKT 0x32 +#define FN_VLIST_POP_HEAD_WV 0x33 +#define FN_VLIST_POP_HEAD_WC 0x34 +#define FN_VLIST_POP_HEAD_BKT 0x35 +#define FN_EXIT 0x39 +#define FN_EXIT_HARD 0x3a +#define FN_PREFIX 0x3b +#define FN_SUFFIX 0x3c + +#define TRIM_DEFAULT 0x01 +#define TRIM_YES 0x02 +#define TRIM_NO 0x03 + +/* Types of Generics. */ +enum GEN { + GEN_PARSER = 0x14, + GEN_LIST = 0x15, + GEN_MAP = 0x16 +}; + +/* Known language element ids. */ +enum LEL_ID { + LEL_ID_PTR = 1, + LEL_ID_STR = 2, + LEL_ID_IGNORE = 3 +}; + +/* + * Flags + */ + +/* A tree that has been generated by a termDup. */ +#define PF_TERM_DUP 0x0001 + +/* Has been processed by the commit function. All children have also been + * processed. */ +#define PF_COMMITTED 0x0002 + +/* Created by a token generation action, not made from the input. */ +#define PF_ARTIFICIAL 0x0004 + +/* Named node from a pattern or constructor. */ +#define PF_NAMED 0x0008 + +/* There is reverse code associated with this tree node. */ +#define PF_HAS_RCODE 0x0010 + +#define PF_RIGHT_IGNORE 0x0020 + +#define PF_LEFT_IL_ATTACHED 0x0400 +#define PF_RIGHT_IL_ATTACHED 0x0800 + +#define AF_LEFT_IGNORE 0x0100 +#define AF_RIGHT_IGNORE 0x0200 + +#define AF_SUPPRESS_LEFT 0x4000 +#define AF_SUPPRESS_RIGHT 0x8000 + +/* + * Call stack. + */ + +/* Number of spots in the frame, after the args. */ +#define FR_AA 5 + +/* Positions relative to the frame pointer. */ +#define FR_CA 4 /* call args */ +#define FR_RV 3 /* return value */ +#define FR_RI 2 /* return instruction */ +#define FR_RFP 1 /* return frame pointer */ +#define FR_RFD 0 /* return frame id. */ + +/* + * Calling Convention: + * a1 + * a2 + * a3 + * ... + * return value FR_RV + * return instr FR_RI + * return frame ptr FR_RFP + * return frame id FR_RFD + */ + +/* + * User iterator call stack. + * Adds an iframe pointer, removes the return value. + */ + +/* Number of spots in the frame, after the args. */ +#define IFR_AA 5 + +/* Positions relative to the frame pointer. */ +#define IFR_RIN 2 /* return instruction */ +#define IFR_RIF 1 /* return iframe pointer */ +#define IFR_RFR 0 /* return frame pointer */ + +#define vm_push_type(type, i) \ + ( ( sp == prg->sb_beg ? (sp = vm_bs_add(prg, sp, 1)) : 0 ), (*((type*)(--sp)) = (i)) ) + +#define vm_pushn(n) \ + ( ( (sp-(n)) < prg->sb_beg ? (sp = vm_bs_add(prg, sp, n)) : 0 ), (sp -= (n)) ) + +#define vm_pop_type(type) \ + ({ SW r = *sp; (sp+1) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, 1)) : (sp += 1); (type)r; }) + +#define vm_push_tree(i) vm_push_type(tree_t*, i) +#define vm_push_input(i) vm_push_type(input_t*, i) +#define vm_push_stream(i) vm_push_type(stream_t*, i) +#define vm_push_struct(i) vm_push_type(struct_t*, i) +#define vm_push_parser(i) vm_push_type(parser_t*, i) +#define vm_push_value(i) vm_push_type(value_t, i) +#define vm_push_string(i) vm_push_type(str_t*, i) +#define vm_push_kid(i) vm_push_type(kid_t*, i) +#define vm_push_ref(i) vm_push_type(ref_t*, i) +#define vm_push_string(i) vm_push_type(str_t*, i) +#define vm_push_ptree(i) vm_push_type(parse_tree_t*, i) + +#define vm_pop_tree() vm_pop_type(tree_t*) +#define vm_pop_input() vm_pop_type(input_t*) +#define vm_pop_stream() vm_pop_type(stream_t*) +#define vm_pop_struct() vm_pop_type(struct_t*) +#define vm_pop_parser() vm_pop_type(parser_t*) +#define vm_pop_list() vm_pop_type(list_t*) +#define vm_pop_map() vm_pop_type(map_t*) +#define vm_pop_value() vm_pop_type(value_t) +#define vm_pop_string() vm_pop_type(str_t*) +#define vm_pop_kid() vm_pop_type(kid_t*) +#define vm_pop_ref() vm_pop_type(ref_t*) +#define vm_pop_ptree() vm_pop_type(parse_tree_t*) + +#define vm_pop_ignore() \ + ({ (sp+1) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, 1)) : (sp += 1); }) + +#define vm_popn(n) \ + ({ (sp+(n)) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, n)) : (sp += (n)); }) + +#define vm_contiguous(n) \ + ( ( (sp-(n)) < prg->sb_beg ? (sp = vm_bs_add(prg, sp, n)) : 0 ) ) + +#define vm_top() (*sp) +#define vm_ptop() (sp) + +#define vm_ssize() ( prg->sb_total + (prg->sb_end - sp) ) + +#define vm_local_iframe(o) (exec->iframe_ptr[o]) +#define vm_plocal_iframe(o) (&exec->iframe_ptr[o]) + +void vm_init( struct colm_program * ); +tree_t** vm_bs_add( struct colm_program *, tree_t **, int ); +tree_t** vm_bs_pop( struct colm_program *, tree_t **, int ); +void vm_clear( struct colm_program * ); + +typedef tree_t *SW; +typedef tree_t **StackPtr; + +/* Can't use sizeof() because we have used types that are bigger than the + * serial representation. */ +#define SIZEOF_CODE 1 +#define SIZEOF_HALF 2 +#define SIZEOF_WORD sizeof(word_t) + +typedef struct colm_execution +{ + tree_t **frame_ptr; + tree_t **iframe_ptr; + long frame_id; + tree_t **call_args; + + long rcode_unit_len; + + parser_t *parser; + long steps; + long pcr; + tree_t *ret_val; + char WV; +} execution_t; + +struct colm_execution; + +static inline tree_t **vm_get_plocal( struct colm_execution *exec, int o ) +{ + if ( o >= FR_AA ) { + tree_t **call_args = (tree_t**)exec->frame_ptr[FR_CA]; + return &call_args[o - FR_AA]; + } + else { + return &exec->frame_ptr[o]; + } +} + +static inline tree_t *vm_get_local( struct colm_execution *exec, int o ) +{ + if ( o >= FR_AA ) { + tree_t **call_args = (tree_t**)exec->frame_ptr[FR_CA]; + return call_args[o - FR_AA]; + } + else { + return exec->frame_ptr[o]; + } +} + +static inline void vm_set_local( struct colm_execution *exec, int o, tree_t* v ) +{ + if ( o >= FR_AA ) { + tree_t **call_args = (tree_t**)exec->frame_ptr[FR_CA]; + call_args[o - FR_AA] = v; + } + else { + exec->frame_ptr[o] = v; + } +} + + +long string_length( head_t *str ); +const char *string_data( head_t *str ); +head_t *init_str_space( long length ); +head_t *string_copy( struct colm_program *prg, head_t *head ); +void string_free( struct colm_program *prg, head_t *head ); +void string_shorten( head_t *tokdata, long newlen ); +head_t *concat_str( head_t *s1, head_t *s2 ); +word_t str_atoi( head_t *str ); +word_t str_atoo( head_t *str ); +word_t str_uord16( head_t *head ); +word_t str_uord8( head_t *head ); +word_t cmp_string( head_t *s1, head_t *s2 ); +head_t *string_to_upper( head_t *s ); +head_t *string_to_lower( head_t *s ); +head_t *string_sprintf( program_t *prg, str_t *format, long integer ); + +head_t *make_literal( struct colm_program *prg, long litoffset ); +head_t *int_to_str( struct colm_program *prg, word_t i ); + +void colm_execute( struct colm_program *prg, execution_t *exec, code_t *code ); +void reduction_execution( execution_t *exec, tree_t **sp ); +void generation_execution( execution_t *exec, tree_t **sp ); +void reverse_execution( execution_t *exec, tree_t **sp, struct rt_code_vect *all_rev ); + +kid_t *alloc_attrs( struct colm_program *prg, long length ); +void free_attrs( struct colm_program *prg, kid_t *attrs ); +kid_t *get_attr_kid( tree_t *tree, long pos ); + +tree_t *split_tree( struct colm_program *prg, tree_t *t ); + +void colm_rcode_downref_all( struct colm_program *prg, tree_t **sp, struct rt_code_vect *cv ); +int colm_make_reverse_code( struct pda_run *pda_run ); +void colm_transfer_reverse_code( struct pda_run *pda_run, parse_tree_t *tree ); + +void split_ref( struct colm_program *prg, tree_t ***sp, ref_t *from_ref ); + +void alloc_global( struct colm_program *prg ); +tree_t **colm_execute_code( struct colm_program *prg, + execution_t *exec, tree_t **sp, code_t *instr ); +code_t *colm_pop_reverse_code( struct rt_code_vect *all_rev ); + +#ifdef __cplusplus +} +#endif + +#endif /* _COLM_BYTECODE_H */ + diff --git a/src/closure.cc b/src/closure.cc new file mode 100644 index 00000000..066bf12b --- /dev/null +++ b/src/closure.cc @@ -0,0 +1,458 @@ +/* + * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> + +#include <iostream> + +#include "compiler.h" + +using std::endl; +using std::cerr; + +void Compiler::lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, + PdaTrans *expandFrom, Production *prod ) +{ + /* We use dot sets for finding unique states. In the future, should merge + * dots sets with the stateSet pointer (only need one of these). */ + assert( dest != prodState ); + dest->dotSet.insert( prodState->dotSet ); + + /* Get the epsilons, context, out priorities. */ + dest->pendingCommits.insert( prodState->pendingCommits ); + //if ( prodState->pendingCommits.length() > 0 ) + // cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl; + + if ( prodState->transMap.length() > 0 ) { + assert( prodState->transMap.length() == 1 ); + PdaTrans *srcTrans = prodState->transMap[0].value; + + /* Look for the source in the destination. */ + TransMapEl *destTel = dest->transMap.find( srcTrans->lowKey ); + if ( destTel == 0 ) { + /* Make a new state and transition to it. */ + PdaState *newState = pdaGraph->addState(); + PdaTrans *newTrans = new PdaTrans(); + + /* Attach the new transition to the new state. */ + newTrans->lowKey = srcTrans->lowKey; + pdaGraph->attachTrans( dest, newState, newTrans ); + pdaGraph->addInTrans( newTrans, srcTrans ); + + /* The transitions we make during lr0 closure are all shifts. */ + assert( newTrans->isShift ); + assert( srcTrans->isShift ); + + /* The new state must have its state set setup. */ + newState->stateSet = new PdaStateSet; + newState->stateSet->insert( srcTrans->toState ); + + /* Insert the transition into the map. Be sure to set destTel, it + * is needed below. */ + dest->transMap.insert( srcTrans->lowKey, newTrans, &destTel ); + + /* If the item is a non-term, queue it for closure. */ + LangEl *langEl = langElIndex[srcTrans->lowKey]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + pdaGraph->transClosureQueue.append( newTrans ); + //cerr << "put to trans closure queue" << endl; + } + } + else { + //cerr << "merging transitions" << endl; + destTel->value->toState->stateSet->insert( srcTrans->toState ); + pdaGraph->addInTrans( destTel->value, srcTrans ); + } + + /* If this is an expansion then we may need to bring in commits. */ + if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { + //cerr << "SETTING COMMIT ON CLOSURE ROUND" << endl; + destTel->value->commits.insert( expandFrom->commits ); + + expandFrom->commits.empty(); + } + } + else { + /* ProdState does not have any transitions out. It is at the end of a + * production. */ + if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { + //cerr << "SETTING COMMIT IN PENDING LOOKAHEAD" << endl; + for ( LongSet::Iter len = expandFrom->commits; len.lte(); len++ ) + dest->pendingCommits.insert( ProdIdPair( prod->prodId, *len ) ); + + expandFrom->commits.empty(); + } + } +} + +void Compiler::lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ) +{ + /* State should not already be closed. */ + assert( !state->inClosedMap ); + + /* This is used each time we invoke closure, it must be cleared. */ + pdaGraph->transClosureQueue.abandon(); + + /* Drag in the core items. */ + for ( PdaStateSet::Iter ssi = *state->stateSet; ssi.lte(); ssi++ ) + lr0BringInItem( pdaGraph, state, *ssi, 0, 0 ); + + /* Now bring in the derived items. */ + while ( pdaGraph->transClosureQueue.length() > 0 ) { + PdaTrans *toClose = pdaGraph->transClosureQueue.detachFirst(); + //cerr << "have a transition to derive" << endl; + + /* Get the langEl. */ + LangEl *langEl = langElIndex[toClose->lowKey]; + + /* Make graphs for all of the productions that the non + * terminal goes to that are not already in the state's dotSet. */ + for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { + /* Bring in the start state of the production. */ + lr0BringInItem( pdaGraph, state, prod->fsm->startState, toClose, prod ); + } + } + + /* Try and insert into the closed dict. */ + DotSetMapEl *lastFound; + if ( pdaGraph->closedMap.insert( state, &lastFound ) ) { + /* Insertion into closed dict succeeded. There is no state with the + * same dot set. The state is now closed. It is guaranteed a spot in + * the closed dict and it will never go away (states never deleted + * during closure). */ + pdaGraph->stateClosedList.append( state ); + state->inClosedMap = true; + + /* Add all of the states in the out transitions to the closure queue. + * This will give us a depth first search of the graph. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + /* Get the state the transEl goes to. */ + PdaState *targ = trans->value->toState; + + /* If the state on this tranisition has not already been slated + * for closure, then add it to the queue. */ + if ( !targ->onClosureQueue && !targ->inClosedMap ) { + pdaGraph->stateClosureQueue.append( targ ); + targ->onClosureQueue = true; + } + } + } + else { + /* Insertion into closed dict failed. There is an existing state + * with the same dot set. Get the existing state. */ + pdaGraph->inTransMove( lastFound, state ); + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + pdaGraph->stateList.detach( tel->value->toState ); + delete tel->value->toState; + delete tel->value; + } + pdaGraph->stateList.detach( state ); + delete state; + } +} + +/* Invoke cloure on the graph. We use a queue here to achieve a breadth + * first search of the tree we build. Note, there are back edges in this + * tree. They are the edges made when upon closure, a dot set exists + * already. */ +void Compiler::lr0CloseAllStates( PdaGraph *pdaGraph ) +{ + /* While there are items on the closure queue. */ + while ( pdaGraph->stateClosureQueue.length() > 0 ) { + /* Pop the first item off. */ + PdaState *state = pdaGraph->stateClosureQueue.detachFirst(); + state->onClosureQueue = false; + + /* Invoke closure upon the state. */ + lr0InvokeClosure( pdaGraph, state ); + } +} + +void Compiler::transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, + PdaState *state, long prodId ) +{ + ProdIdPairSet &pendingCommits = state->pendingCommits; + for ( ProdIdPairSet::Iter pi = pendingCommits; pi.lte(); pi++ ) { + if ( pi->onReduce == prodId ) + trans->commits.insert( pi->length ); + } +} + +void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ) +{ + for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { + int prodId = ets->prodId; + PdaState *expandTo = ets->state; + + for ( FollowToAdd::Iter fkey = followKeys; fkey.lte(); fkey++ ) { + TransMapEl *transEl = expandTo->transMap.find( fkey->key ); + + if ( transEl != 0 ) { + /* Set up the follow transition. */ + PdaTrans *destTrans = transEl->value; + + transferCommits( pdaGraph, destTrans, expandTo, prodId ); + + pdaGraph->addInReduction( destTrans, prodId, fkey->value ); + } + else { + /* Set up the follow transition. */ + PdaTrans *followTrans = new PdaTrans; + followTrans->lowKey = fkey->key; + followTrans->isShift = false; + followTrans->reductions.insert( prodId, fkey->value ); + + transferCommits( pdaGraph, followTrans, expandTo, prodId ); + + pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); + expandTo->transMap.insert( followTrans->lowKey, followTrans ); + pdaGraph->transClosureQueue.append( followTrans ); + } + } + } +} + +long PdaTrans::maxPrior() +{ + long prior = LONG_MIN; + if ( isShift && shiftPrior > prior ) + prior = shiftPrior; + for ( ReductionMap::Iter red = reductions; red.lte(); red++ ) { + if ( red->value > prior ) + prior = red->value; + } + return prior; +} + +void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ) +{ + /* Finding non-terminals into the state. */ + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + long key = in->lowKey; + LangEl *langEl = langElIndex[key]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + /* Finding the following transitions. */ + FollowToAdd followKeys; + for ( TransMap::Iter fout = state->transMap; fout.lte(); fout++ ) { + int fkey = fout->key; + LangEl *flel = langElIndex[fkey]; + if ( flel == 0 || flel->type == LangEl::Term ) { + long prior = fout->value->maxPrior(); + followKeys.insert( fkey, prior ); + } + } + + if ( followKeys.length() > 0 ) + lalr1AddFollow2( pdaGraph, in, followKeys ); + } + } +} + +void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, + long followKey, long prior ) +{ + for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { + int prodId = ets->prodId; + PdaState *expandTo = ets->state; + + TransMapEl *transEl = expandTo->transMap.find( followKey ); + if ( transEl != 0 ) { + /* Add in the reductions, or in the shift. */ + PdaTrans *destTrans = transEl->value; + + transferCommits( pdaGraph, destTrans, expandTo, prodId ); + + pdaGraph->addInReduction( destTrans, prodId, prior ); + } + else { + /* Set up the follow transition. */ + PdaTrans *followTrans = new PdaTrans; + followTrans->lowKey = followKey; + followTrans->isShift = false; + followTrans->reductions.insert( prodId, prior ); + + transferCommits( pdaGraph, followTrans, expandTo, prodId ); + + pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); + expandTo->transMap.insert( followTrans->lowKey, followTrans ); + pdaGraph->transClosureQueue.append( followTrans ); + } + } +} + +void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ) +{ + PdaState *state = trans->fromState; + int fkey = trans->lowKey; + LangEl *flel = langElIndex[fkey]; + if ( flel == 0 || flel->type == LangEl::Term ) { + /* Finding non-terminals into the state. */ + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + long key = in->lowKey; + LangEl *langEl = langElIndex[key]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + //cerr << "FOLLOW PRIOR TRANSFER 2: " << prior << endl; + long prior = trans->maxPrior(); + lalr1AddFollow2( pdaGraph, in, fkey, prior ); + } + } + } +} + +/* Add follow sets to an LR(0) graph to make it LALR(1). */ +void Compiler::lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls ) +{ + /* Make the state that all reduction actions go to. Since a reduction pops + * states of the stack and sets the new target state, this state is + * actually never reached. Just here to link the trans to. */ + actionDestState = pdaGraph->addState(); + pdaGraph->setFinState( actionDestState ); + + for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + /* Get the entry into the graph and traverse over start. */ + PdaState *overStart = pdaGraph->followFsm( (*pe)->startState, (*pe)->rootDef->fsm ); + + /* Add _eof after the initial _start. */ + PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState, + (*pe)->eofLel->id, (*pe)->eofLel->id ); + eofTrans->isShift = true; + } + + /* This was used during lr0 table construction. */ + pdaGraph->transClosureQueue.abandon(); + + /* Need to pass over every state initially. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) + lalr1AddFollow1( pdaGraph, state ); + + /* While the closure queue has items, pop them off and add follow + * characters. */ + while ( pdaGraph->transClosureQueue.length() > 0 ) { + /* Pop the first item off and add Follow for it . */ + PdaTrans *trans = pdaGraph->transClosureQueue.detachFirst(); + lalr1AddFollow1( pdaGraph, trans ); + } +} + +void Compiler::linkExpansions( PdaGraph *pdaGraph ) +{ + pdaGraph->setStateNumbers(); + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + /* Find transitions out on non terminals. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + long key = trans->key; + LangEl *langEl = langElIndex[key]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + /* For each production that the non terminal expand to ... */ + for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { + /* Follow the production and add to the trans's expand to set. */ + PdaState *followRes = pdaGraph->followFsm( state, prod->fsm ); + + //LangEl *lel = langElIndex[key]; + //cerr << state->stateNum << ", "; + //if ( lel != 0 ) + // cerr << lel->data; + //else + // cerr << (char)key; + //cerr << " -> " << (*fto)->stateNum << " on " << + // prod->data << " (fss = " << fin.pos() << ")" << endl; + trans->value->expandTo.insert( ExpandToEl( followRes, prod->prodId ) ); + } + } + } + } +} + +/* Add terminal versions of all nonterminal transitions. */ +void Compiler::addDupTerms( PdaGraph *pdaGraph ) +{ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + PdaTransList newTranitions; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + LangEl *lel = langElIndex[trans->value->lowKey]; + if ( lel->type == LangEl::NonTerm ) { + PdaTrans *dupTrans = new PdaTrans; + dupTrans->lowKey = lel->termDup->id; + dupTrans->isShift = true; + + /* Save the target state in to state. In the next loop when we + * attach the transition we must clear this because the + * attaching code requires the transition to be unattached. */ + dupTrans->toState = trans->value->toState; + newTranitions.append( dupTrans ); + + /* Commit code used? */ + //transferCommits( pdaGraph, followTrans, expandTo, prodId ); + } + } + + for ( PdaTrans *dup = newTranitions.head; dup != 0; ) { + PdaTrans *next = dup->next; + PdaState *toState = dup->toState; + dup->toState = 0; + pdaGraph->attachTrans( state, toState, dup ); + state->transMap.insert( dup->lowKey, dup ); + dup = next; + } + } +} + +/* Generate a LALR(1) graph. */ +void Compiler::lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls ) +{ + /* Make the intial graph. */ + pdaGraph->langElIndex = langElIndex; + + for ( Vector<LangEl*>::Iter r = parserEls; r.lte(); r++ ) { + /* Create the entry point. */ + PdaState *rs = pdaGraph->addState(); + pdaGraph->entryStateSet.insert( rs ); + + /* State set of just one state. */ + rs->stateSet = new PdaStateSet; + rs->stateSet->insert( (*r)->rootDef->fsm->startState ); + + /* Queue the start state for closure. */ + rs->onClosureQueue = true; + pdaGraph->stateClosureQueue.append( rs ); + + (*r)->startState = rs; + } + + /* Run the lr0 closure. */ + lr0CloseAllStates( pdaGraph ); + + /* Add terminal versions of all nonterminal transitions. */ + addDupTerms( pdaGraph ); + + /* Link production expansions to the place they expand to. */ + linkExpansions( pdaGraph ); + + /* Walk the graph adding follow sets to the LR(0) graph. */ + lalr1AddFollowSets( pdaGraph, parserEls ); + +// /* Set the commit on the final eof shift. */ +// PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id ); +// PdaTrans *eofTrans = overStart->toState->findTrans( eofLangEl->id ); +// eofTrans->afterShiftCommits.insert( 2 ); +} diff --git a/src/codegen.cc b/src/codegen.cc new file mode 100644 index 00000000..6ff9983f --- /dev/null +++ b/src/codegen.cc @@ -0,0 +1,62 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include "fsmcodegen.h" + +void FsmCodeGen::writeIncludes() +{ + out << + "#include <stdio.h>\n" + "#include <stdlib.h>\n" + "#include <string.h>\n" + "#include <assert.h>\n" + "\n" + "#include <colm/pdarun.h>\n" + "#include <colm/debug.h>\n" + "#include <colm/bytecode.h>\n" + "#include <colm/defs.h>\n" + "#include <colm/input.h>\n" + "#include <colm/tree.h>\n" + "#include <colm/program.h>\n" + "#include <colm/colm.h>\n" + "\n"; +} + +void FsmCodeGen::writeMain( long activeRealm ) +{ + out << + "int main( int argc, const char **argv )\n" + "{\n" + " struct colm_program *prg;\n" + " int exit_status;\n" + "\n" + " prg = colm_new_program( &" << objectName << " );\n" + " colm_set_debug( prg, " << activeRealm << " );\n" + " colm_run_program( prg, argc, argv );\n" + " exit_status = colm_delete_program( prg );\n" + " return exit_status;\n" + "}\n" + "\n"; + + out.flush(); +} diff --git a/src/codevect.c b/src/codevect.c new file mode 100644 index 00000000..50b86336 --- /dev/null +++ b/src/codevect.c @@ -0,0 +1,183 @@ +/* + * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> +#include <stdlib.h> + +#include <colm/rtvector.h> +#include <colm/pdarun.h> + +void init_rt_code_vect( struct rt_code_vect *vect ) +{ + vect->data = 0; + vect->tab_len = 0; + vect->alloc_len = 0; +} + +static long new_size_up( long existing, long needed ) +{ + return needed > existing ? (needed<<1) : existing; +} + +static long new_size_down( long existing, long needed ) +{ + return needed < (existing>>2) ? (needed<<1) : existing; +} + +/* Up resize the data for len elements using Resize::upResize to tell us the + * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ +static void up_resize( struct rt_code_vect *vect, long len ) +{ + /* Ask the resizer what the new tabLen will be. */ + long new_len = new_size_up(vect->alloc_len, len); + + /* Did the data grow? */ + if ( new_len > vect->alloc_len ) { + vect->alloc_len = new_len; + if ( vect->data != 0 ) { + /* Table exists already, resize it up. */ + vect->data = (code_t*) realloc( vect->data, sizeof(code_t) * new_len ); + //if ( vect->data == 0 ) + // throw std::bad_alloc(); + } + else { + /* Create the data. */ + vect->data = (code_t*) malloc( sizeof(code_t) * new_len ); + //if ( vect->data == 0 ) + // throw std::bad_alloc(); + } + } +} + +/* Down resize the data for len elements using Resize::downResize to determine + * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ +static void down_resize( struct rt_code_vect *vect, long len) +{ + /* Ask the resizer what the new tabLen will be. */ + long new_len = new_size_down( vect->alloc_len, len ); + + /* Did the data shrink? */ + if ( new_len < vect->alloc_len ) { + vect->alloc_len = new_len; + if ( new_len == 0 ) { + /* Simply free the data. */ + free( vect->data ); + vect->data = 0; + } + else { + /* Not shrinking to size zero, realloc it to the smaller size. */ + vect->data = (code_t*) realloc( vect->data, sizeof(code_t) * new_len ); + //if ( vect->data == 0 ) + // throw std::bad_alloc(); + } + } +} + + +void colm_rt_code_vect_empty( struct rt_code_vect *vect ) +{ + if ( vect->data != 0 ) { + /* Free the data space. */ + free( vect->data ); + vect->data = 0; + vect->tab_len = vect->alloc_len = 0; + } +} + +void colm_rt_code_vect_replace( struct rt_code_vect *vect, long pos, + const code_t *val, long len ) +{ + long end_pos, i; + //code_t *item; + + /* If we are given a negative position to replace at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = vect->tab_len + pos; + + /* The end is the one past the last item that we want + * to write to. */ + end_pos = pos + len; + + /* Make sure we have enough space. */ + if ( end_pos > vect->tab_len ) { + up_resize( vect, end_pos ); + + /* Delete any objects we need to delete. */ + //item = vect->data + pos; + //for ( i = pos; i < vect->tabLen; i++, item++ ) + // item->~code_t(); + + /* We are extending the vector, set the new data length. */ + vect->tab_len = end_pos; + } + else { + /* Delete any objects we need to delete. */ + //item = vect->data + pos; + //for ( i = pos; i < endPos; i++, item++ ) + // item->~code_t(); + } + + /* Copy data in using copy constructor. */ + code_t *dst = vect->data + pos; + const code_t *src = val; + for ( i = 0; i < len; i++, dst++, src++ ) + *dst = *src; +} + +void colm_rt_code_vect_remove( struct rt_code_vect *vect, long pos, long len ) +{ + long new_len, len_to_slide_over, end_pos; + code_t *dst;//, *item; + + /* If we are given a negative position to remove at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = vect->tab_len + pos; + + /* The first position after the last item deleted. */ + end_pos = pos + len; + + /* The new data length. */ + new_len = vect->tab_len - len; + + /* The place in the data we are deleting at. */ + dst = vect->data + pos; + + /* Call Destructors. */ + //item = dst; + //for ( long i = 0; i < len; i += 1, item += 1 ) + // item->~code_t(); + + /* Shift data over if necessary. */ + len_to_slide_over = vect->tab_len - end_pos; + if ( len > 0 && len_to_slide_over > 0 ) + memmove(dst, dst + len, sizeof(code_t)*len_to_slide_over); + + /* Shrink the data if necessary. */ + down_resize( vect, new_len ); + + /* Set the new data length. */ + vect->tab_len = new_len; +} + + diff --git a/src/colm-config.cmake.in b/src/colm-config.cmake.in new file mode 100644 index 00000000..7e3b8f27 --- /dev/null +++ b/src/colm-config.cmake.in @@ -0,0 +1,3 @@ +# @_PACKAGE_NAME@-config.cmake Generated from colm-config.cmake.in by cmake + +include("${CMAKE_CURRENT_LIST_DIR}/@_PACKAGE_NAME@-targets.cmake") diff --git a/src/colm-wrap.sh b/src/colm-wrap.sh new file mode 100644 index 00000000..8140afca --- /dev/null +++ b/src/colm-wrap.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# + +# This wrapper around the colm program (and bootstrap programs) allows us to +# limit ourselves to one output file per makefile rule. It packs up multiple +# colm output files into one pack file, which is used as an intermediate file. +# We can extract the individual files from the pack one at a time, in separate +# rules. +# +# Ultimately this functionality should be rolled into the colm program itself. +# Until that is complete, this wrapper exists. +# + +unset CMD +unset ARGS +unset OUTPUT +unset PACKS + +while getopts "w:p:o:e:x:RcD:I:L:vdlirS:M:vHh?-:sVa:m:b:E:" opt; do + + # For the colm wrapper case. + case "$opt" in + w) + # Which command to wrap. + CMD=$OPTARG + ;; + o) + # Pack file name. For wrapping. + OUTPUT=$OPTARG + ;; + [pexm]) + ARGS="$ARGS -$opt $OPTARG.pack" + PACKS="$PACKS $OPTARG.pack" + ;; + [DILSMambE-]) ARGS="$ARGS -$opt $OPTARG" ;; + [RcvdlirvHhsV]) ARGS="$ARGS -$opt" ;; + ?) + exit 1; + ;; + esac +done + +# Shift over the args. +shift $((OPTIND - 1)); + +INPUT="$1" +if [ -z "$INPUT" ]; then + echo colm-wrap: no input file given >&2 + exit 1 +fi + +if [ -z "$OUTPUT" ]; then + echo colm-wrap: no output file given >&2 + exit 1 +fi + +# Default command to colm. +if [ "${INPUT%.pack}" != "$INPUT" ]; then + tar -xmf "$INPUT" "$OUTPUT.pack" + mv $OUTPUT.pack $OUTPUT + EXIT_STATUS=$? +else + CMD=${CMD:=colm} + if [ "`basename $0`" != "$0" ] && [ -x "`dirname $0`/$CMD" ]; then + COLM="`dirname $0`/$CMD" + else + COLM=@prefix@/bin/$CMD + fi + + $COLM $ARGS "$INPUT" + EXIT_STATUS=$? + if [ $EXIT_STATUS = 0 ]; then + tar -cf "$OUTPUT" $PACKS + fi + rm -f $PACKS +fi + +exit $EXIT_STATUS + diff --git a/src/colm.h b/src/colm.h new file mode 100644 index 00000000..39506cab --- /dev/null +++ b/src/colm.h @@ -0,0 +1,159 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_COLM_H +#define _COLM_COLM_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct colm_data; +struct colm_tree; +struct colm_kid; +struct colm_program; +struct colm_sections; +struct colm_tree; +struct colm_location; + +struct indent_impl +{ + /* Indentation. */ + int level; + int indent; +}; + +extern struct colm_sections colm_object; + +typedef unsigned long colm_value_t; +typedef unsigned char colm_alph_t; + +struct colm_tree +{ + /* First four will be overlaid in other structures. */ + short id; + unsigned short flags; + long refs; + struct colm_kid *child; + + struct colm_data *tokdata; + + /* FIXME: this needs to go somewhere else. Will do for now. */ + unsigned short prod_num; +}; + +struct colm_print_args +{ + void *arg; + int comm; + int attr; + int trim; + struct indent_impl *indent; + + void (*out)( struct colm_print_args *args, const char *data, int length ); + void (*open_tree)( struct colm_program *prg, struct colm_tree **sp, + struct colm_print_args *args, struct colm_kid *parent, struct colm_kid *kid ); + void (*print_term)( struct colm_program *prg, struct colm_tree **sp, + struct colm_print_args *args, struct colm_kid *kid ); + void (*close_tree)( struct colm_program *prg, struct colm_tree **sp, + struct colm_print_args *args, struct colm_kid *parent, struct colm_kid *kid ); +}; + +void colm_print_null( struct colm_program *prg, struct colm_tree **sp, + struct colm_print_args *args, struct colm_kid *parent, struct colm_kid *kid ); +void colm_print_term_tree( struct colm_program *prg, struct colm_tree **sp, + struct colm_print_args *print_args, struct colm_kid *kid ); + +struct colm_tree **colm_vm_root( struct colm_program *prg ); +struct colm_tree *colm_return_val( struct colm_program *prg ); +void colm_print_tree_args( struct colm_program *prg, struct colm_tree **sp, + struct colm_print_args *print_args, struct colm_tree *tree ); + +int colm_repeat_end( struct colm_tree *tree ); +int colm_list_last( struct colm_tree *tree ); + +struct colm_tree *colm_get_rhs_val( struct colm_program *prg, struct colm_tree *tree, int *a ); +struct colm_tree *colm_get_attr( struct colm_tree *tree, long pos ); +struct colm_tree *colm_get_global( struct colm_program *prg, long pos ); +struct colm_tree *colm_get_repeat_next( struct colm_tree *tree ); +struct colm_tree *colm_get_repeat_val( struct colm_tree *tree ); +struct colm_tree *colm_get_left_repeat_next( struct colm_tree *tree ); +struct colm_tree *colm_get_left_repeat_val( struct colm_tree *tree ); +struct colm_location *colm_find_location( struct colm_program *prg, struct colm_tree *tree ); + +static inline const colm_alph_t *colm_alph_from_cstr( const char *cstr ) { return (const colm_alph_t*)cstr; } +static inline const char *colm_cstr_from_alph( const colm_alph_t *alph ) { return (const char*)alph; } + +/* Debug realms. To turn on, pass to colm_set_debug before invocation. */ +#define COLM_DBG_BYTECODE 0x00000001 +#define COLM_DBG_PARSE 0x00000002 +#define COLM_DBG_MATCH 0x00000004 +#define COLM_DBG_COMPILE 0x00000008 +#define COLM_DBG_POOL 0x00000010 +#define COLM_DBG_PRINT 0x00000020 +#define COLM_DBG_INPUT 0x00000040 +#define COLM_DBG_SCAN 0x00000080 + +#define COLM_RN_NEITHER 0x00 +#define COLM_RN_DATA 0x01 +#define COLM_RN_LOC 0x02 +#define COLM_RN_BOTH 0x03 + +/* + * Primary Interface. + */ + +/* Allocate a program. Takes program static data as arg. Normally this is + * &colm_object. */ +struct colm_program *colm_new_program( struct colm_sections *rtd ); + +/* Enable debug realms for a program. */ +void colm_set_debug( struct colm_program *prg, long active_realm ); + +/* Run a top-level colm program. */ +void colm_run_program( struct colm_program *prg, int argc, const char **argv ); + +/* Run a top-level colm program, with argument lengths (allows binary data). */ +void colm_run_program2( struct colm_program *prg, int argc, const char **argv, const int *argl ); + +/* Run a single exported colm function. */ +struct colm_tree *colm_run_func( struct colm_program *prg, int frame_id, + const char **params, int param_count ); + +/* Delete a colm program. Clears all memory. */ +int colm_delete_program( struct colm_program *prg ); + +/* Set the pointer to the reduce struct used. */ +void *colm_get_reduce_ctx( struct colm_program *prg ); +void colm_set_reduce_ctx( struct colm_program *prg, void *ctx ); +void colm_set_reduce_clean( struct colm_program *prg, unsigned char reduce_clean ); + +const char *colm_error( struct colm_program *prg, int *length ); + +const char **colm_extract_fns( struct colm_program *prg ); + +#ifdef __cplusplus +} +#endif + +#endif /* _COLM_COLM_H */ + diff --git a/src/colm.lm b/src/colm.lm new file mode 100644 index 00000000..c95d0c41 --- /dev/null +++ b/src/colm.lm @@ -0,0 +1,910 @@ +# Main region. +lex + token DEF / 'def' / + token LEX / 'lex' / + token END / 'end' / + token TOKEN / 'token' / + token RL / 'rl' / + token IGNORE / 'ignore' / + token PRINT / 'print' / + token PRINTS / 'prints' / + token PARSE / 'parse' / + token REDUCE / 'reduce' / + token READ_REDUCE /'read_reduce'/ + token PARSE_TREE / 'parse_tree' / + token PARSE_STOP / 'parse_stop' / + token CONS / 'construct' | 'cons' / + token MATCH / 'match' / + token REQUIRE / 'require' / + token SEND / 'send' / + token SEND_TREE / 'send_tree' / + token NAMESPACE / 'namespace' / + token REDUCTION / 'reduction' / + token FOR / 'for' / + token IF / 'if' / + token YIELD / 'yield' / + token WHILE / 'while' / + token ELSIF / 'elsif' / + token ELSE / 'else' / + token IN / 'in' / + token PARSER / 'parser' | 'accum' / + token LIST / 'list' / + token LIST_EL / 'list_el' / + token MAP / 'map' / + token MAP_EL / 'map_el' / + token PTR / 'ptr' / + token ITER / 'iter' / + token REF / 'ref' / + token EXPORT / 'export' / + token RETURN / 'return' / + token BREAK / 'break' / + token REJECT / 'reject' / + token REDUCEFIRST / 'reducefirst' / + token ALIAS / 'alias' / + token COMMIT / 'commit' / + token NEW / 'new' / + token PREEOF / 'preeof' / + token GLOBAL / 'global' / + token EOS / 'eos' / + token CAST / 'cast' / + token SWITCH / 'switch' / + token CASE / 'case' / + token DEFAULT / 'default' / + token INT / 'int' / + token BOOL / 'bool' / + token VOID / 'void' / + + token MAKE_TOKEN / 'make_token' / + token MAKE_TREE / 'make_tree' / + + token TYPEID / 'typeid' / + + token LITERAL / 'literal' / + token CONTEXT / 'context' / + token STRUCT / 'struct' / + token NI /'ni'/ + + token NIL / 'nil' / + token TRUE / 'true' / + token FALSE / 'false' / + + token LEFT /'left'/ + token RIGHT /'right'/ + token NONASSOC /'nonassoc'/ + + token INCLUDE /'include'/ + + token id / + ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) . + ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' )* + / + + token number + / ( '0' .. '9' ) + / + + token backtick_lit / + '`' . + ^( ' ' | '\n' | '\t' ) . + ^( ' ' | '\n' | '\t' | '`' | ']' )* . + ( '`' | '' ) + / + + token DQ / '\"' / - ni + token SQ / '\'' / - ni + token TILDE / '~' / - ni + + token SQOPEN /'['/ + token SQCLOSE /']'/ + token BAR /'|'/ + token FSLASH /'/'/ + token COLON /':'/ + token DOUBLE_COLON /'::'/ + token DOT /'.'/ + token ARROW /'->'/ + token POPEN /'('/ + token PCLOSE /')'/ + token COPEN /'{'/ + token CCLOSE /'}'/ + token STAR /'*'/ + token QUESTION /'?'/ + token EQUALS /'='/ + token EQEQ /'=='/ + token NEQ /'!='/ + token COMMA /','/ + token LT /'<'/ + token GT /'>'/ + token LTEQ /'<='/ + token GTEQ /'>='/ + token BANG /'!'/ + token DOLLAR /'$'/ + token CARET /'^'/ + token AT /'@'/ + token PERCENT /'%'/ + token PLUS /'+'/ + token MINUS /'-'/ + token AMPAMP /'&&'/ + token BARBAR /'||'/ + + ignore / ( '\n' | '\t' | ' ' )+ / + ignore / '#' . ( ^'\n' )* . '\n' / +end + +lex + token LIT_DQ / '\"' / + token LIT_DQ_NL / '\n' / + token LIT_SQOPEN / '[' / + token LIT_SQCLOSE / ']' / + + token lit_dq_data + / ( ^( '\n' | '\"' | '[' | ']' | '\\' ) | '\\' . any )+ / +end + +lex + token CONS_SQ / '\'' / + token CONS_SQ_NL / '\n' / + + token sq_cons_data + / ( ^( '\n' | '\'' | '\\' ) | '\\' . any )+ / +end + +lex + token TILDE_NL / '\n' / + token tilde_data + / ( ^'\n' )+ / +end + +lex + token lex_id / + ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) . + ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' )* + / + + token lex_uint + / ( '0' .. '9' )+ / + + token lex_hex + / '0x' . ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' )+ / + + token lex_lit / + '\'' . ( ^( '\'' | '\\' ) | '\\' . any )* . ( '\'' | '\'i' ) | + '\"' . ( ^( '\"' | '\\' ) | '\\' . any )* . ( '\"' | '\"i' ) + / + + token LEX_DOT /'.'/ + token LEX_BAR /'|'/ + token LEX_AMP /'&'/ + token LEX_DASH /'-'/ + token LEX_POPEN /'('/ + token LEX_PCLOSE /')'/ + token LEX_STAR /'*'/ + token LEX_STARSTAR /'**'/ + token LEX_QUESTION /'?'/ + token LEX_PLUS /'+'/ + token LEX_CARET /'^'/ + token LEX_DOTDOT /'..'/ + token LEX_SQOPEN_POS /'['/ - ni + token LEX_SQOPEN_NEG /'[^'/ - ni + token LEX_FSLASH /'/'/ + + token LEX_DASHDASH /'--'/ + token LEX_COLON_GT /':>'/ + token LEX_COLON_GTGT /':>>'/ + token LEX_LT_COLON /'<:'/ + + ignore / + ( '\n' | '\t' | ' ' ) . + ( '\n' | '\t' | ' ' )* + / + + ignore / '#' . ( ^'\n' )* . '\n' / +end + +lex + token RE_DASH / '-' / + token RE_CHAR / ^( '\\' | '-' | ']' ) | '\\' . any / + token RE_SQCLOSE / ']' / +end + +def start + [RootItemList: root_item<*] + +def root_item + [rl_def] :Rl commit +| [literal_def] :Literal commit +| [token_def] :Token commit +| [ic_def] :IgnoreCollector commit +| [ignore_def] :Ignore commit +| [cfl_def] :Cfl commit +| [region_def] :Region commit +| [struct_def] :Struct commit +| [namespace_def] :Namespace commit +| [function_def] :Function commit +| [in_host_def] :InHost commit +| [iter_def] :Iter commit +| [statement] :Statement commit +| [global_def] :Global commit +| [export_def] :Export commit +| [pre_eof_def] :PreEof commit +| [precedence_def] :Precedence commit +| [alias_def] :Alias commit +| [_include] :Include commit +| [reduction_def] :Reduction commit + +def _include + [INCLUDE SQ SqConsDataList: sq_cons_data<* sq_lit_term] + +def precedence_def + [pred_type pred_token_list] + +def pred_type + [LEFT] :Left +| [RIGHT] :Right +| [NONASSOC] :NonAssoc + +def pred_token_list + [pred_token_list COMMA pred_token] :List +| [pred_token] :Base + +def pred_token + [region_qual id] :Id +| [region_qual backtick_lit] :Lit + +def pre_eof_def + [PREEOF COPEN lang_stmt_list CCLOSE] + +def alias_def + [ALIAS id type_ref] + +def struct_item + [struct_var_def] :StructVar commit +| [literal_def] :Literal commit +| [rl_def] :Rl commit +| [token_def] :Token commit +| [ic_def] :IgnoreCollector commit +| [ignore_def] :Ignore commit +| [cfl_def] :Cfl commit +| [region_def] :Region commit +| [struct_def] :Struct commit +| [function_def] :Function commit +| [in_host_def] :InHost commit +| [iter_def] :Iter commit +| [export_def] :Export commit +| [pre_eof_def] :PreEof commit +| [precedence_def] :Precedence commit +| [alias_def] :Alias commit + +def export_def + [EXPORT var_def opt_def_init] + +def global_def + [GLOBAL var_def opt_def_init] + +def iter_def + [ITER id POPEN ParamVarDefList: param_var_def_list PCLOSE + COPEN lang_stmt_list CCLOSE] + +def reference_type_ref + [REF LT type_ref GT] + +def param_var_def_seq + [param_var_def COMMA param_var_def_seq] +| [param_var_def] + +def param_var_def_list + [param_var_def_seq] +| [] + +def param_var_def + [id COLON type_ref] :Type +| [id COLON reference_type_ref] :Ref + +def opt_export + [EXPORT] :Export +| [] + +def function_def + [opt_export type_ref id + POPEN ParamVarDefList: param_var_def_list PCLOSE + COPEN lang_stmt_list CCLOSE] + +def in_host_def + [opt_export type_ref id + POPEN ParamVarDefList: param_var_def_list PCLOSE + EQUALS HostFunc: id] + +def struct_var_def + [var_def] + +def struct_key + [STRUCT] | [CONTEXT] + +def struct_def + [struct_key id ItemList: struct_item<* END] + +def literal_keyword + [LITERAL] +| [TOKEN] + +def literal_def + [literal_keyword literal_list] + +def literal_list + [literal_list literal_item] :Item +| [literal_item] :Base + +def literal_item + [no_ignore_left backtick_lit no_ignore_right] + +def no_ignore_left + [NI MINUS] :Ni +| [] + +def no_ignore_right + [MINUS NI] :Ni +| [] + +def reduction_def + [REDUCTION id ItemList: reduction_item<* END] + +lex + token RED_OPEN / '{' / + token RED_CLOSE / '}' / + + token red_id / + ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) . + ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' )* + / + + token red_comment / + '//' . ( ^'\n' )* . '\n' | + '/*' . any* :> '*/' + / + + token red_ws / + ( '\n' | '\t' | ' ' )+ + / + + token red_lit / + '\'' . ( ^( '\'' | '\\' ) | '\\' . any )* . ( '\'' | '\'i' ) | + '\"' . ( ^( '\"' | '\\' ) | '\\' . any )* . ( '\"' | '\"i' ) + / + + token RED_LHS / '$' . '$' / + token RED_RHS_REF / '$' . red_id / + token RED_RHS_LOC / '@' . red_id / + token RED_TREE_REF / '$*' . red_id / + + token RED_RHS_NREF / '$' . ('1' .. '9') . ('0' .. '9')* / + token RED_RHS_NLOC / '@' . ('1' .. '9') . ('0' .. '9')* / + token RED_TREE_NREF / '$*' . ('1' .. '9') . ('0' .. '9')* / + + token red_any / any / +end + +def red_nonterm + [type_ref RED_OPEN HostItems: host_item<* RED_CLOSE] + +def red_action + [type_ref COLON id RED_OPEN HostItems: host_item<* RED_CLOSE] + +def host_item + [red_id] +| [red_lit] +| [red_comment] +| [red_ws] +| [red_any] +| [RED_LHS] +| [RED_RHS_REF] +| [RED_TREE_REF] +| [RED_RHS_LOC] +| [RED_RHS_NREF] +| [RED_TREE_NREF] +| [RED_RHS_NLOC] +| [RED_OPEN HostItems: host_item<* RED_CLOSE] + +def reduction_item + [red_nonterm] :NonTerm commit +| [red_action] :Action commit + +def namespace_def + [NAMESPACE id ItemList: namespace_item<* END] + +def namespace_item + [rl_def] :Rl commit +| [literal_def] :Literal commit +| [token_def] :Token commit +| [ic_def] :IgnoreCollector commit +| [ignore_def] :Ignore commit +| [cfl_def] :Cfl commit +| [region_def] :Region commit +| [struct_def] :Struct commit +| [namespace_def] :Namespace commit +| [function_def] :Function commit +| [in_host_def] :InHost commit +| [iter_def] :Iter commit +| [pre_eof_def] :PreEof commit +| [precedence_def] :Precedence commit +| [alias_def] :Alias commit +| [_include] :Include commit +| [global_def] :Global commit + +def obj_var_list + [] + +def opt_reduce_first + [REDUCEFIRST] +| [] + +def cfl_def + [DEF id + VarDefList: var_def<* + opt_reduce_first + prod_list] + +def region_def + [LEX RootItemList: root_item<* END] + +def rl_def + [RL id LEX_FSLASH lex_expr LEX_FSLASH] + +def opt_lex_expr + [lex_expr] +| [] + +def token_def + [TOKEN id VarDefList: var_def<* + no_ignore_left + LEX_FSLASH opt_lex_expr LEX_FSLASH + no_ignore_right + opt_translate] + +def ic_def + [TOKEN id MINUS] + +def opt_translate + [COPEN lang_stmt_list CCLOSE] :Translate +| [] + +def opt_id + [id] :Id +| [] + +def ignore_def + [IGNORE opt_id LEX_FSLASH opt_lex_expr LEX_FSLASH] + +def prod_sublist + [prod_sublist BAR prod_el_list] +| [prod_el_list] + +def prod_el + [opt_prod_el_name region_qual id opt_repeat] :Id +| [opt_prod_el_name region_qual backtick_lit opt_repeat] :Lit +| [opt_prod_el_name POPEN prod_sublist PCLOSE opt_repeat ] :SubList + +def opt_prod_el_name + [id COLON] :Name +| [] + +def prod_el_list + [prod_el_list prod_el] :List +| [] + +def opt_commit + [COMMIT] :Commit +| [] + +def opt_prod_name + [COLON id] :Name +| [] + +def prod + [SQOPEN prod_el_list SQCLOSE + opt_prod_name + opt_commit + opt_reduce] + +def opt_reduce + [COPEN lang_stmt_list CCLOSE] :Reduce +| [] + +def prod_list + [prod_list BAR prod] :List +| [prod] :Base + +def case_clause + [CASE pattern block_or_single] :Pattern commit +| [CASE id block_or_single] :Id commit +| [CASE id pattern block_or_single] :IdPat commit + +def default_clause + [DEFAULT block_or_single] commit + +def case_clause_list + [case_clause case_clause_list] :Recursive +| [case_clause] :BaseCase +| [default_clause] :BaseDefault + +# Note a commit on switch doesn't work because the default clause in +# case_clause follow sets cause a premature commit. We could use a proper list +# for case clauses, followed by an optional default, but just move the commits +# to the clauses, which is is a better commit strategy anyways. Gives more +# regular commits. + +def statement + [print_stmt] :Print commit +| [var_def opt_def_init] :VarDef commit +| [FOR id COLON type_ref IN iter_call block_or_single] :For commit +| [IF code_expr block_or_single elsif_list] :If commit +| [SWITCH var_ref case_clause_list] :SwitchUnder +| [SWITCH var_ref COPEN case_clause_list CCLOSE] :SwitchBlock +| [WHILE code_expr block_or_single] :While commit +| [var_ref EQUALS code_expr] :LhsVarRef commit +| [YIELD var_ref] :Yield commit +| [RETURN code_expr] :Return commit +| [BREAK] :Break commit +| [REJECT] :Reject commit +| [var_ref POPEN call_arg_list PCLOSE] :Call +| [stmt_or_factor] :StmtOrFactor +| [accumulate opt_eos] :BareSend + +def elsif_list + [elsif_clause elsif_list] :Clause +| [optional_else] :OptElse + +def elsif_clause + [ELSIF code_expr block_or_single] + +def optional_else + [ELSE block_or_single] :Else +| [] + +def call_arg_seq + [code_expr COMMA call_arg_seq] +| [code_expr] + +def call_arg_list + [call_arg_seq] +| [] + +def iter_call + [E1 var_ref POPEN call_arg_list PCLOSE] :Call +| [E2 id] :Id +| [E3 code_expr] :Expr + +def block_or_single + [COPEN lang_stmt_list CCLOSE] :Block +| [statement] :Single + +def require_pattern + [REQUIRE var_ref pattern] + +def opt_require_stmt + [require_pattern lang_stmt_list] :Require +| [] :Base + +def lang_stmt_list + [StmtList: statement<* opt_require_stmt] + +def opt_def_init + [EQUALS code_expr] :Init +| [] :Base + +def var_def + [id COLON type_ref] + +def print_stmt + [PRINT POPEN call_arg_list PCLOSE] :Tree +| [PRINTS POPEN var_ref COMMA call_arg_list PCLOSE] :PrintStream +| [PRINT accumulate] :Accum + +def expr_stmt + [code_expr] + +def code_expr + [code_expr AMPAMP code_relational] :AmpAmp +| [code_expr BARBAR code_relational] :BarBar +| [code_relational] :Base + +def code_relational + [code_relational EQEQ code_additive] :EqEq +| [code_relational NEQ code_additive] :Neq +| [code_relational LT code_additive] :Lt +| [code_relational GT code_additive] :Gt +| [code_relational LTEQ code_additive] :LtEq +| [code_relational GTEQ code_additive] :GtEq +| [code_additive] :Base + +def code_additive + [code_additive PLUS code_multiplicitive] :Plus +| [code_additive MINUS code_multiplicitive] :Minus +| [code_multiplicitive] :Base + +def code_multiplicitive + [code_multiplicitive STAR code_unary] :Star +| [code_multiplicitive FSLASH code_unary] :Fslash +| [code_unary] :Base + +def code_unary + [BANG code_factor] :Bang +| [DOLLAR code_factor] :Dollar +| [DOLLAR DOLLAR code_factor] :DollarDollar +| [CARET code_factor] :Caret +| [AT code_factor] :At +| [PERCENT code_factor] :Percent +| [code_factor] :Base + +def opt_eos + [DOT] :Dot +| [EOS] :Eos +| [] + +def code_factor + [number] :Number +| [var_ref POPEN call_arg_list PCLOSE] :Call +| [var_ref] :VarRef +| [NIL] :Nil +| [TRUE] :True +| [FALSE] :False +| [POPEN code_expr PCLOSE] :Paren +| [string] :String +| [type_ref IN var_ref] :In +| [TYPEID LT type_ref GT] :TypeId +| [CAST LT type_ref GT code_factor] :Cast +| [stmt_or_factor] :StmtOrFactor + +def type_ref + [region_qual id opt_repeat] :Id +| [INT] :Int +| [BOOL] :Bool +| [VOID] :Void +| [PARSER LT type_ref GT] :Parser +| [LIST LT type_ref GT] :List +| [MAP LT KeyType: type_ref COMMA ValType: type_ref GT] :Map +| [LIST_EL LT type_ref GT] :ListEl +| [MAP_EL LT KeyType: type_ref COMMA ValType: type_ref GT] :MapEl + +def region_qual + [region_qual id DOUBLE_COLON] :Qual +| [] :Base + +def opt_repeat + [STAR] :Star +| [PLUS] :Plus +| [QUESTION] :Question +| [LT STAR] :LeftStar +| [LT PLUS] :LeftPlus +| [] + +def opt_capture + [id COLON] :Id +| [] + +def opt_field_init + [POPEN FieldInitList: field_init<* PCLOSE] :Init +| [] :Base + +def field_init + [code_expr] + +def stmt_or_factor + [PARSE opt_capture type_ref opt_field_init accumulate] :Parse +| [PARSE_TREE opt_capture type_ref opt_field_init accumulate] :ParseTree +| [PARSE_STOP opt_capture type_ref opt_field_init accumulate] :ParseStop +| [REDUCE id type_ref opt_field_init accumulate] :Reduce +| [READ_REDUCE id type_ref opt_field_init accumulate] :ReadReduce +| [SEND var_ref accumulate opt_eos] :Send +| [SEND_TREE var_ref accumulate opt_eos] :SendTree +| [MAKE_TREE POPEN call_arg_list PCLOSE] :MakeTree +| [MAKE_TOKEN POPEN call_arg_list PCLOSE] :MakeToken +| [CONS opt_capture type_ref opt_field_init constructor] :Cons +| [MATCH var_ref pattern] :Match +| [NEW opt_capture type_ref POPEN FieldInitList: field_init<* PCLOSE] :New + +# +# Pattern +# + +def opt_label + [id COLON] :Id +| [] + +def dq_lit_term + [LIT_DQ] | [LIT_DQ_NL] + +def sq_lit_term + [CONS_SQ] | [CONS_SQ_NL] + +def opt_tilde_data + [tilde_data] +| [] + +def pattern_el_lel + [region_qual id opt_repeat] :Id +| [region_qual backtick_lit opt_repeat] :Lit + +def pattern_el + [opt_label pattern_el_lel] :PatternEl +| [DQ LitpatElList: litpat_el<* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde + +def litpat_el + [lit_dq_data] :ConsData +| [LIT_SQOPEN PatternElList: pattern_el<* LIT_SQCLOSE] :SubList + +def pattern_top_el + [DQ LitpatElList: litpat_el<* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde + +def pattern_list + [pattern_top_el pattern_list] :List +| [pattern_top_el] :Base + +def pattern + [pattern_list] :TopList +| [SQOPEN PatternElList: pattern_el<* SQCLOSE] :SubList + +# +# Constructor List +# + +def E1 [] +def E2 [] +def E3 [] +def E4 [] + +def cons_el + [E1 region_qual backtick_lit] :Lit +| [E1 DQ LitConsElList: lit_cons_el<* dq_lit_term] :Dq +| [E1 SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq +| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde +| [E2 code_expr] :CodeExpr + +def lit_cons_el + [lit_dq_data] :ConsData +| [LIT_SQOPEN ConsElList: cons_el<* LIT_SQCLOSE] :SubList + +def cons_top_el + [DQ LitConsElList: lit_cons_el<* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde + +def cons_list + [cons_top_el cons_list] :List +| [cons_top_el] :Base + +def constructor + [cons_list] :TopList +| [SQOPEN ConsElList: cons_el<* SQCLOSE] :SubList + +# +# Accumulate +# + +def accum_el + [E1 DQ LitAccumElList: lit_accum_el<* dq_lit_term] :Dq +| [E1 SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq +| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde +| [E2 code_expr] :CodeExpr + +def lit_accum_el + [lit_dq_data] :ConsData +| [LIT_SQOPEN AccumElList: accum_el<* LIT_SQCLOSE] :SubList + +def accum_top_el + [DQ LitAccumElList: lit_accum_el<* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde +| [SQOPEN AccumElList: accum_el<* SQCLOSE] :SubList + +def accum_list + [accum_top_el accum_list] :List +| [accum_top_el] :Base + +def accumulate + [accum_list] + +# +# String List +# + +def string_el + [E1 DQ LitStringElList: lit_string_el<* dq_lit_term] :Dq +| [E1 SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq +| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde +| [E2 code_expr] :CodeExpr + +def lit_string_el + [lit_dq_data] :ConsData +| [LIT_SQOPEN StringElList: string_el<* LIT_SQCLOSE] :SubList + +def string_top_el + [DQ LitStringElList: lit_string_el<* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data<* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde + +def string_list + [string_top_el string_list] :List +| [string_top_el] :Base + +def string + [string_list] :TopList +| [SQOPEN StringElList: string_el<* SQCLOSE] :SubList + +# +# Variable References +# + +def var_ref + [region_qual qual id] + +def qual + [qual id DOT] :Dot +| [qual id ARROW] :Arrow +| [] :Base + +# +# Lexical analysis. +# + +def lex_expr + [lex_expr LEX_BAR lex_term] :Bar +| [lex_expr LEX_AMP lex_term] :Amp +| [lex_expr LEX_DASH lex_term] :Dash +| [lex_expr LEX_DASHDASH lex_term] :DashDash +| [lex_term] :Base + +def opt_lex_dot + [LEX_DOT] +| [] + +def lex_term + [lex_term opt_lex_dot lex_factor_rep] :Dot +| [lex_term LEX_COLON_GT lex_factor_rep] :ColonGt +| [lex_term LEX_COLON_GTGT lex_factor_rep] :ColonGtGt +| [lex_term LEX_LT_COLON lex_factor_rep] :LtColon +| [lex_factor_rep] :Base + +def lex_factor_rep + [lex_factor_rep LEX_STAR] :Star +| [lex_factor_rep LEX_STARSTAR] :StarStar +| [lex_factor_rep LEX_PLUS] :Plus +| [lex_factor_rep LEX_QUESTION] :Question +| [lex_factor_rep COPEN lex_uint CCLOSE ] :Exact +| [lex_factor_rep COPEN COMMA lex_uint CCLOSE ] :Max +| [lex_factor_rep COPEN lex_uint COMMA CCLOSE ] :Min +| [lex_factor_rep COPEN Low: lex_uint COMMA High: lex_uint CCLOSE ] :Range +| [lex_factor_neg] :Base + +def lex_factor_neg + [LEX_CARET lex_factor_neg] :Caret +| [lex_factor] :Base + +def lex_range_lit + [lex_lit] :Lit +| [lex_num] :Number + +def lex_num + [lex_uint] +| [lex_hex] + +#| [LEX_DASH num] + +def lex_factor + [lex_lit] :Literal +| [lex_id] :Id +| [lex_uint] :Number +| [lex_hex] :Hex +| [Low: lex_range_lit LEX_DOTDOT High: lex_range_lit] :Range +| [LEX_SQOPEN_POS reg_or_data RE_SQCLOSE] :PosOrBlock +| [LEX_SQOPEN_NEG reg_or_data RE_SQCLOSE] :NegOrBlock +| [LEX_POPEN lex_expr LEX_PCLOSE] :Paren + +def reg_or_data + [reg_or_data reg_or_char] :Data +| [] :Base + +def reg_or_char + [RE_CHAR] :Char +| [Low: RE_CHAR RE_DASH High: RE_CHAR] :Range diff --git a/src/colmex.h b/src/colmex.h new file mode 100644 index 00000000..2abc7b2f --- /dev/null +++ b/src/colmex.h @@ -0,0 +1,109 @@ +#ifndef _COLMEX_H +#define _COLMEX_H + +#include <colm/colm.h> +#include <colm/tree.h> +#include <colm/colmex.h> +#include <string.h> +#include <string> +#include <vector> + +inline void appendString( colm_print_args *args, const char *data, int length ) +{ + std::string *str = (std::string*)args->arg; + *str += std::string( data, length ); +} + +inline std::string printTreeStr( colm_program *prg, colm_tree *tree, bool trim ) +{ + std::string str; + struct indent_impl indent = { -1, 0 }; + colm_print_args printArgs = { &str, 1, 0, trim, &indent, &appendString, + &colm_print_null, &colm_print_term_tree, &colm_print_null }; + colm_print_tree_args( prg, colm_vm_root(prg), &printArgs, tree ); + return str; +} + +struct ExportTree +{ + ExportTree( colm_program *prg, colm_tree *tree ) + : __prg(prg), __tree(tree) {} + + std::string text() { return printTreeStr( __prg, __tree, true ); } + colm_location *loc() { return colm_find_location( __prg, __tree ); } + std::string text_notrim() { return printTreeStr( __prg, __tree, false ); } + std::string text_ws() { return printTreeStr( __prg, __tree, false ); } + colm_data *data() { return __tree->tokdata; } + operator colm_tree *() { return __tree; } + + colm_program *__prg; + colm_tree *__tree; +}; + +/* Non-recursive tree iterator. Runs an in-order traversal and when it finds a + * search target it yields it and then resumes searching the next child. It + * does not go into what it finds. This iterator can be used to search lists, + * regardless if they are left-recursive or right-recursive. */ +template <class SearchType> struct RepeatIter +{ + RepeatIter( const ExportTree &root ) + : + prg(root.__prg), + search_id(SearchType::ID) + { + /* We use one code path for the first call to forward and all + * subsequent calls. To achieve this we create a sentinal in front of + * root called first and point cur to it. On the first forward() call + * it will be as if we just visited the sentinal. + * + * Note that we are also creating a kid for root, rather than + * jump into root's child list so we can entertain the + * possiblity that root is exactly the thing we want to visit. + */ + + memset( &first, 0, sizeof(first) ); + memset( &kid, 0, sizeof(kid) ); + + first.next = &kid; + kid.tree = root.__tree; + cur = &first; + next(); + } + + colm_program *prg; + colm_kid first, kid, *cur; + int search_id; + std::vector<colm_kid*> stack; + + void next() + { + goto return_to; + recurse: + + if ( cur->tree->id == search_id ) + return; + else { + stack.push_back( cur ); + cur = tree_child( prg, cur->tree ); + while ( cur != 0 ) { + goto recurse; + return_to: cur = cur->next; + } + if ( stack.size() == 0 ) { + cur = 0; + return; + } + cur = stack.back(); + stack.pop_back(); + goto return_to; + } + } + + bool end() + { return cur == 0; } + + SearchType value() + { return SearchType( prg, cur->tree ); } +}; + +#endif diff --git a/src/commit.c b/src/commit.c new file mode 100644 index 00000000..28da8c1d --- /dev/null +++ b/src/commit.c @@ -0,0 +1,111 @@ +/* + * Copyright 2015-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#include "config.h" +#include "debug.h" +#include "pdarun.h" +#include "bytecode.h" +#include "tree.h" +#include "pool.h" +#include "internal.h" + +void commit_clear_kid_list( program_t *prg, tree_t **sp, kid_t *kid ) +{ + kid_t *next; + while ( kid ) { + colm_tree_downref( prg, sp, kid->tree ); + next = kid->next; + kid_free( prg, kid ); + kid = next; + } +} + +void commit_clear_parse_tree( program_t *prg, tree_t **sp, + struct pda_run *pda_run, parse_tree_t *pt ) +{ + tree_t **top = vm_ptop(); + + if ( pt == 0 ) + return; + +free_tree: + if ( pt->next != 0 ) { + vm_push_ptree( pt->next ); + } + + if ( pt->left_ignore != 0 ) { + vm_push_ptree( pt->left_ignore ); + } + + if ( pt->child != 0 ) { + vm_push_ptree( pt->child ); + } + + if ( pt->right_ignore != 0 ) { + vm_push_ptree( pt->right_ignore ); + } + + /* Only the root level of the stack has tree + * shadows and we are below that. */ + assert( pt->shadow == 0 ); + parse_tree_free( pda_run, pt ); + + /* Any trees to downref? */ + if ( sp != top ) { + pt = vm_pop_ptree(); + goto free_tree; + } +} + +static int been_committed( parse_tree_t *parse_tree ) +{ + return parse_tree->flags & PF_COMMITTED; +} + +void commit_reduce( program_t *prg, tree_t **root, struct pda_run *pda_run ) +{ + tree_t **sp = root; + parse_tree_t *pt = pda_run->stack_top; + + /* The top level of the stack is linked right to left. This is the + * traversal order we need for committing. */ + while ( pt != 0 && !been_committed( pt ) ) { + vm_push_ptree( pt ); + pt = pt->next; + } + + while ( sp != root ) { + pt = vm_pop_ptree(); + + prg->rtd->commit_reduce_forward( prg, sp, pda_run, pt ); + pt->child = 0; + + pt->flags |= PF_COMMITTED; + pt = pt->next; + } +} diff --git a/src/compiler.cc b/src/compiler.cc new file mode 100644 index 00000000..72f87dac --- /dev/null +++ b/src/compiler.cc @@ -0,0 +1,1263 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <unistd.h> +#include <assert.h> +#include <iostream> + +#include "redbuild.h" +#include "pdacodegen.h" +#include "fsmcodegen.h" +#include "colm.h" + +using std::ostringstream; +using std::cout; +using std::cerr; +using std::endl; + +char machineMain[] = "main"; +exit_object endp; +void operator<<( ostream &out, exit_object & ) +{ + out << endl; + exit(1); +} + +/* Perform minimization after an operation according + * to the command line args. */ +void afterOpMinimize( FsmGraph *fsm, bool lastInSeq ) +{ + /* Switch on the prefered minimization algorithm. */ + if ( lastInSeq ) { + /* First clean up the graph. FsmGraph operations may leave these + * lying around. There should be no dead end states. The subtract + * intersection operators are the only places where they may be + * created and those operators clean them up. */ + fsm->removeUnreachableStates(); + fsm->minimizePartition2(); + } +} + +/* Count the transitions in the fsm by walking the state list. */ +int countTransitions( FsmGraph *fsm ) +{ + int numTrans = 0; + FsmState *state = fsm->stateList.head; + while ( state != 0 ) { + numTrans += state->outList.length(); + state = state->next; + } + return numTrans; +} + +Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ) +{ + /* Reset errno so we can check for overflow or underflow. In the event of + * an error, sets the return val to the upper or lower bound being tested + * against. */ + errno = 0; + unsigned int size = keyOps->alphType->size; + bool unusedBits = size < sizeof(unsigned long); + + unsigned long ul = strtoul( str, 0, 16 ); + + + if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ul = 1 << (size * 8); + } + + if ( keyOps->alphType->isSigned && unusedBits && ul >> (size * 8 - 1) ) + ul |= (ULONG_MAX >> (size*8 ) ) << (size*8); + + return Key( (long)ul ); +} + +Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd ) +{ + /* Convert the number to a decimal. First reset errno so we can check + * for overflow or underflow. */ + errno = 0; + long long minVal = keyOps->alphType->minVal; + long long maxVal = keyOps->alphType->maxVal; + + long long ll = strtoll( str, 0, 10 ); + + /* Check for underflow. */ + if ( (errno == ERANGE && ll < 0) || ll < minVal) { + error(loc) << "literal " << str << " underflows the alphabet type" << endl; + ll = minVal; + } + /* Check for overflow. */ + else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ll = maxVal; + } + + return Key( (long)ll ); +} + +/* Make an fsm key in int format (what the fsm graph uses) from an alphabet + * number returned by the parser. Validates that the number doesn't overflow + * the alphabet type. */ +Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd ) +{ + /* Switch on hex/decimal format. */ + if ( str[0] == '0' && str[1] == 'x' ) + return makeFsmKeyHex( str, loc, pd ); + else + return makeFsmKeyDec( str, loc, pd ); +} + +/* Make an fsm int format (what the fsm graph uses) from a single character. + * Performs proper conversion depending on signed/unsigned property of the + * alphabet. */ +Key makeFsmKeyChar( char c, Compiler *pd ) +{ + /* Copy from a char type. */ + return Key( c ); +} + +/* Make an fsm key array in int format (what the fsm graph uses) from a string + * of characters. Performs proper conversion depending on signed/unsigned + * property of the alphabet. */ +void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd ) +{ + /* Copy from a char star type. */ + char *src = data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); +} + +/* Like makeFsmKeyArray except the result has only unique keys. They ordering + * will be changed. */ +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, Compiler *pd ) +{ + /* Copy from a char star type. */ + char *src = data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } +} + +FsmGraph *dotFsm( Compiler *pd ) +{ + FsmGraph *retFsm = new FsmGraph(); + retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +FsmGraph *dotStarFsm( Compiler *pd ) +{ + FsmGraph *retFsm = new FsmGraph(); + retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +/* Make a builtin type. Depends on the signed nature of the alphabet type. */ +FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd ) +{ + /* FsmGraph created to return. */ + FsmGraph *retFsm = 0; + + switch ( builtin ) { + case BT_Any: { + /* All characters. */ + retFsm = dotFsm( pd ); + break; + } + case BT_Ascii: { + /* Ascii characters 0 to 127. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( 0, 127 ); + break; + } + case BT_Extend: { + /* Ascii extended characters. This is the full byte range. Dependent + * on signed, vs no signed. If the alphabet is one byte then just use + * dot fsm. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( -128, 127 ); + break; + } + case BT_Alpha: { + /* Alpha [A-Za-z]. */ + FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph(); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + upper->unionOp( lower ); + upper->minimizePartition2(); + retFsm = upper; + break; + } + case BT_Digit: { + /* Digits [0-9]. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( '0', '9' ); + break; + } + case BT_Alnum: { + /* Alpha numerics [0-9A-Za-z]. */ + FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph(); + FsmGraph *upper = new FsmGraph(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lower: { + /* Lower case characters. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( 'a', 'z' ); + break; + } + case BT_Upper: { + /* Upper case characters. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( 'A', 'Z' ); + break; + } + case BT_Cntrl: { + /* Control characters. */ + FsmGraph *cntrl = new FsmGraph(); + FsmGraph *highChar = new FsmGraph(); + cntrl->rangeFsm( 0, 31 ); + highChar->concatFsm( 127 ); + cntrl->unionOp( highChar ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Graph: { + /* Graphical ascii characters [!-~]. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( '!', '~' ); + break; + } + case BT_Print: { + /* Printable characters. Same as graph except includes space. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( ' ', '~' ); + break; + } + case BT_Punct: { + /* Punctuation. */ + FsmGraph *range1 = new FsmGraph(); + FsmGraph *range2 = new FsmGraph(); + FsmGraph *range3 = new FsmGraph(); + FsmGraph *range4 = new FsmGraph(); + range1->rangeFsm( '!', '/' ); + range2->rangeFsm( ':', '@' ); + range3->rangeFsm( '[', '`' ); + range4->rangeFsm( '{', '~' ); + range1->unionOp( range2 ); + range1->unionOp( range3 ); + range1->unionOp( range4 ); + range1->minimizePartition2(); + retFsm = range1; + break; + } + case BT_Space: { + /* Whitespace: [\t\v\f\n\r ]. */ + FsmGraph *cntrl = new FsmGraph(); + FsmGraph *space = new FsmGraph(); + cntrl->rangeFsm( '\t', '\r' ); + space->concatFsm( ' ' ); + cntrl->unionOp( space ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Xdigit: { + /* Hex digits [0-9A-Fa-f]. */ + FsmGraph *digit = new FsmGraph(); + FsmGraph *upper = new FsmGraph(); + FsmGraph *lower = new FsmGraph(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'F' ); + lower->rangeFsm( 'a', 'f' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lambda: { + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + break; + } + case BT_Empty: { + retFsm = new FsmGraph(); + retFsm->emptyFsm(); + break; + }} + + return retFsm; +} + +/* + * Compiler + */ + +/* Initialize the structure that will collect info during the parse of a + * machine. */ +Compiler::Compiler( ) +: + nextPriorKey(0), + nextNameId(0), + alphTypeSet(false), + getKeyExpr(0), + accessExpr(0), + curStateExpr(0), + lowerNum(0), + upperNum(0), + errorCount(0), + curActionOrd(0), + curPriorOrd(0), + nextEpsilonResolvedLink(0), + nextTokenId(1), + rootCodeBlock(0), + mainReturnUT(0), + //access(0), + //tokenStruct(0), + + ptrLangEl(0), + strLangEl(0), + anyLangEl(0), + rootLangEl(0), + noTokenLangEl(0), + eofLangEl(0), + errorLangEl(0), + ignoreLangEl(0), + + firstNonTermId(0), + prodIdIndex(0), + + global(0), + globalSel(0), + globalObjectDef(0), + arg0(0), + argv(0), + + stream(0), + inputSel(0), + streamSel(0), + + uniqueTypeNil(0), + uniqueTypePtr(0), + uniqueTypeBool(0), + uniqueTypeInt(0), + uniqueTypeStr(0), + uniqueTypeIgnore(0), + uniqueTypeAny(0), + uniqueTypeInput(0), + uniqueTypeStream(0), + nextPatConsId(0), + nextGenericId(1), + nextFuncId(0), + nextHostId(0), + nextObjectId(1), /* 0 is reserved for no object. */ + nextFrameId(0), + nextParserId(0), + revertOn(true), + predValue(0), + nextMatchEndNum(0), + argvTypeRef(0), + inContiguous(false), + contiguousOffset(0), + contiguousStretch(0) +{ +} + +/* Clean up the data collected during a parse. */ +Compiler::~Compiler() +{ + /* Delete all the nodes in the action list. Will cause all the + * string data that represents the actions to be deallocated. */ + actionList.empty(); + + for ( CharVectVect::Iter fns = streamFileNames; fns.lte(); fns++ ) { + const char **ptr = *fns; + while ( *ptr != 0 ) { + ::free( (void*)*ptr ); + ptr += 1; + } + free( (void*) *fns ); + } +} + +ostream &operator<<( ostream &out, const Token &token ) +{ + out << token.data; + return out; +} + +/* Write out a name reference. */ +ostream &operator<<( ostream &out, const NameRef &nameRef ) +{ + int pos = 0; + if ( nameRef[pos] == 0 ) { + out << "::"; + pos += 1; + } + out << nameRef[pos++]; + for ( ; pos < nameRef.length(); pos++ ) + out << "::" << nameRef[pos]; + return out; +} + +NameInst **Compiler::makeNameIndex() +{ + /* The number of nodes in the tree can now be given by nextNameId. Put a + * null pointer on the end of the list to terminate it. */ + NameInst **nameIndex = new NameInst*[nextNameId+1]; + memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) ); + + for ( NameInstList::Iter ni = nameInstList; ni.lte(); ni++ ) + nameIndex[ni->id] = ni; + + return nameIndex; +} + +void Compiler::createBuiltin( const char *name, BuiltinMachine builtin ) +{ + LexExpression *expression = LexExpression::cons( builtin ); + LexJoin *join = LexJoin::cons( expression ); + LexDefinition *varDef = new LexDefinition( name, join ); + GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); + rootNamespace->rlMap.insert( graphDictEl ); +} + +/* Initialize the graph dict with builtin types. */ +void Compiler::initGraphDict( ) +{ + createBuiltin( "any", BT_Any ); + createBuiltin( "ascii", BT_Ascii ); + createBuiltin( "extend", BT_Extend ); + createBuiltin( "alpha", BT_Alpha ); + createBuiltin( "digit", BT_Digit ); + createBuiltin( "alnum", BT_Alnum ); + createBuiltin( "lower", BT_Lower ); + createBuiltin( "upper", BT_Upper ); + createBuiltin( "cntrl", BT_Cntrl ); + createBuiltin( "graph", BT_Graph ); + createBuiltin( "print", BT_Print ); + createBuiltin( "punct", BT_Punct ); + createBuiltin( "space", BT_Space ); + createBuiltin( "xdigit", BT_Xdigit ); + createBuiltin( "null", BT_Lambda ); + createBuiltin( "zlen", BT_Lambda ); + createBuiltin( "empty", BT_Empty ); +} + +/* Initialize the key operators object that will be referenced by all fsms + * created. */ +void Compiler::initKeyOps( ) +{ + /* Signedness and bounds. */ + const HostType *alphType = alphTypeSet ? userAlphType : + &hostLang->hostTypes[hostLang->defaultHostType]; + thisKeyOps.setAlphType( alphType ); + + if ( lowerNum != 0 ) { + /* If ranges are given then interpret the alphabet type. */ + thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); + thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); + } +} + +/* Remove duplicates of unique actions from an action table. */ +void Compiler::removeDups( ActionTable &table ) +{ + /* Scan through the table looking for unique actions to + * remove duplicates of. */ + for ( int i = 0; i < table.length(); i++ ) { + /* Remove any duplicates ahead of i. */ + for ( int r = i+1; r < table.length(); ) { + if ( table[r].value == table[i].value ) + table.vremove(r); + else + r += 1; + } + } +} + +/* Remove duplicates from action lists. This operates only on transition and + * eof action lists and so should be called once all actions have been + * transfered to their final resting place. */ +void Compiler::removeActionDups( FsmGraph *graph ) +{ + /* Loop all states. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + /* Loop all transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + removeDups( trans->actionTable ); + removeDups( state->toStateActionTable ); + removeDups( state->fromStateActionTable ); + removeDups( state->eofActionTable ); + } +} + +Action *Compiler::newAction( const String &name, InlineList *inlineList ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + loc.fileName = 0; + + Action *action = Action::cons( loc, name, inlineList ); + actionList.append( action ); + return action; +} + +void Compiler::initLongestMatchData() +{ + if ( regionSetList.length() > 0 ) { + /* The initActId action gives act a default value. */ + InlineList *il4 = InlineList::cons(); + il4->append( InlineItem::cons( InputLoc(), InlineItem::LmInitAct ) ); + initActId = newAction( "initact", il4 ); + initActId->isLmAction = true; + + /* The setTokStart action sets tokstart. */ + InlineList *il5 = InlineList::cons(); + il5->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokStart ) ); + setTokStart = newAction( "tokstart", il5 ); + setTokStart->isLmAction = true; + + /* The setTokEnd action sets tokend. */ + InlineList *il3 = InlineList::cons(); + il3->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokEnd ) ); + setTokEnd = newAction( "tokend", il3 ); + setTokEnd->isLmAction = true; + + /* The action will also need an ordering: ahead of all user action + * embeddings. */ + initActIdOrd = curActionOrd++; + setTokStartOrd = curActionOrd++; + setTokEndOrd = curActionOrd++; + } +} + +void Compiler::finishGraphBuild( FsmGraph *graph ) +{ + /* Resolve any labels that point to multiple states. Any labels that are + * still around are referenced only by gotos and calls and they need to be + * made into deterministic entry points. */ + graph->deterministicEntry(); + + /* + * All state construction is now complete. + */ + + /* Transfer global error actions. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) + graph->transferErrorActions( state, 0 ); + + removeActionDups( graph ); + + /* Remove unreachable states. There should be no dead end states. The + * subtract and intersection operators are the only places where they may + * be created and those operators clean them up. */ + graph->removeUnreachableStates(); + + /* No more fsm operations are to be done. Action ordering numbers are + * no longer of use and will just hinder minimization. Clear them. */ + graph->nullActionKeys(); + + /* Transition priorities are no longer of use. We can clear them + * because they will just hinder minimization as well. Clear them. */ + graph->clearAllPriorities(); + + /* Minimize here even if we minimized at every op. Now that function + * keys have been cleared we may get a more minimal fsm. */ + graph->minimizePartition2(); + graph->compressTransitions(); +} + +/* Build the name tree and supporting data structures. */ +NameInst *Compiler::makeNameTree() +{ + /* Create the root name. */ + nextNameId = 1; + + /* First make the name tree. */ + for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) { + /* Recurse on the instance. */ + rel->makeNameTree( rel->loc, this ); + } + + return 0; +} + +FsmGraph *Compiler::makeAllRegions() +{ + /* Build the name tree and supporting data structures. */ + makeNameTree(); + NameInst **nameIndex = makeNameIndex(); + + int numGraphs = 0; + FsmGraph **graphs = new FsmGraph*[regionImplList.length()]; + + /* Make all the instantiations, we know that main exists in this list. */ + for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) { + /* Build the graph from a walk of the parse tree. */ + FsmGraph *newGraph = rel->walk( this ); + + /* Wrap up the construction. */ + finishGraphBuild( newGraph ); + + /* Save off the new graph. */ + graphs[numGraphs++] = newGraph; + } + + /* NOTE: If putting in minimization here we need to include eofTarget + * into the minimization algorithm. It is currently set by the longest + * match operator and not considered anywhere else. */ + + FsmGraph *all; + if ( numGraphs == 0 ) { + all = new FsmGraph; + all->lambdaFsm(); + } + else { + /* Add all the other graphs into the first. */ + all = graphs[0]; + all->globOp( graphs+1, numGraphs-1 ); + delete[] graphs; + } + + /* Go through all the token regions and check for lmRequiresErrorState. */ + for ( RegionImplList::Iter reg = regionImplList; reg.lte(); reg++ ) { + if ( reg->lmSwitchHandlesError ) + all->lmRequiresErrorState = true; + } + + all->nameIndex = nameIndex; + + return all; +} + +void Compiler::analyzeAction( Action *action, InlineList *inlineList ) +{ + /* FIXME: Actions used as conditions should be very constrained. */ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + //if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) + // action->anyCall = true; + + /* Need to recurse into longest match items. */ + if ( item->type == InlineItem::LmSwitch ) { + RegionImpl *lm = item->tokenRegion; + for ( TokenInstanceListReg::Iter lmi = lm->tokenInstanceList; lmi.lte(); lmi++ ) { + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + } + + if ( item->type == InlineItem::LmOnLast || + item->type == InlineItem::LmOnNext || + item->type == InlineItem::LmOnLagBehind ) + { + TokenInstance *lmi = item->longestMatchPart; + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + + if ( item->children != 0 ) + analyzeAction( action, item->children ); + } +} + +void Compiler::analyzeGraph( FsmGraph *graph ) +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + analyzeAction( act, act->inlineList ); + + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + /* The transition list. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + + for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) + at->value->numToStateRefs += 1; + + for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) + at->value->numFromStateRefs += 1; + + for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) + at->value->numEofRefs += 1; + } +} + +FsmGraph *Compiler::makeScanner() +{ + /* Make the graph, do minimization. */ + FsmGraph *fsmGraph = makeAllRegions(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( gblErrorCount > 0 ) + return 0; + + analyzeGraph( fsmGraph ); + + /* Decide if an error state is necessary. + * 1. There is an error transition + * 2. There is a gap in the transitions + * 3. The longest match operator requires it. */ + if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() ) + fsmGraph->errState = fsmGraph->addState(); + + /* State numbers need to be assigned such that all final states have a + * larger state id number than all non-final states. This enables the + * first_final mechanism to function correctly. We also want states to be + * ordered in a predictable fashion. So we first apply a depth-first + * search, then do a stable sort by final state status, then assign + * numbers. */ + + fsmGraph->depthFirstOrdering(); + fsmGraph->sortStatesByFinal(); + fsmGraph->setStateNumbers( 0 ); + + return fsmGraph; +} + +LangEl *Compiler::makeRepeatProd( const InputLoc &loc, Namespace *nspace, + const String &repeatName, UniqueType *ut, bool left ) +{ + LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm ); + prodName->isRepeat = true; + prodName->leftRecursive = left; + + ProdElList *prodElList1 = new ProdElList; + + /* Build the first production of the repeat. */ + TypeRef *typeRef1 = TypeRef::cons( loc, ut ); + ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, + InputLoc(), 0, false, typeRef1, 0 ); + + UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); + TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT ); + ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, + InputLoc(), 0, false, typeRef2, 0 ); + + if ( left ) { + prodElList1->append( factor2 ); + prodElList1->append( factor1 ); + } + else { + prodElList1->append( factor1 ); + prodElList1->append( factor2 ); + } + + Production *newDef1 = Production::cons( InputLoc(), + prodName, prodElList1, String(), false, 0, + prodList.length(), prodName->defList.length() ); + + prodName->defList.append( newDef1 ); + prodList.append( newDef1 ); + + /* Build the second production of the repeat. */ + ProdElList *prodElList2 = new ProdElList; + + Production *newDef2 = Production::cons( InputLoc(), + prodName, prodElList2, String(), false, 0, + prodList.length(), prodName->defList.length() ); + + prodName->defList.append( newDef2 ); + prodList.append( newDef2 ); + + return prodName; +} + +LangEl *Compiler::makeListProd( const InputLoc &loc, Namespace *nspace, + const String &listName, UniqueType *ut, bool left ) +{ + LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm ); + prodName->isList = true; + prodName->leftRecursive = left; + + /* Build the first production of the list. */ + TypeRef *typeRef1 = TypeRef::cons( loc, ut ); + ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 ); + + UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); + TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT ); + ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef2, 0 ); + + ProdElList *prodElList1 = new ProdElList; + if ( left ) { + prodElList1->append( factor2 ); + prodElList1->append( factor1 ); + } + else { + prodElList1->append( factor1 ); + prodElList1->append( factor2 ); + } + + Production *newDef1 = Production::cons( loc, + prodName, prodElList1, String(), false, 0, + prodList.length(), prodName->defList.length() ); + + prodName->defList.append( newDef1 ); + prodList.append( newDef1 ); + + /* Build the second production of the list. */ + TypeRef *typeRef3 = TypeRef::cons( loc, ut ); + ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef3, 0 ); + + ProdElList *prodElList2 = new ProdElList; + prodElList2->append( factor3 ); + + Production *newDef2 = Production::cons( loc, + prodName, prodElList2, String(), false, 0, + prodList.length(), prodName->defList.length() ); + + prodName->defList.append( newDef2 ); + prodList.append( newDef2 ); + + return prodName; +} + +LangEl *Compiler::makeOptProd( const InputLoc &loc, Namespace *nspace, + const String &optName, UniqueType *ut ) +{ + LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm ); + prodName->isOpt = true; + + ProdElList *prodElList1 = new ProdElList; + + /* Build the first production of the repeat. */ + TypeRef *typeRef1 = TypeRef::cons( loc, ut ); + ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef1, 0 ); + prodElList1->append( factor1 ); + + Production *newDef1 = Production::cons( loc, + prodName, prodElList1, String(), false, 0, + prodList.length(), prodName->defList.length() ); + + prodName->defList.append( newDef1 ); + prodList.append( newDef1 ); + + /* Build the second production of the repeat. */ + ProdElList *prodElList2 = new ProdElList; + + Production *newDef2 = Production::cons( loc, + prodName, prodElList2, String(), false, 0, + prodList.length(), prodName->defList.length() ); + + prodName->defList.append( newDef2 ); + prodList.append( newDef2 ); + + return prodName; +} + +Namespace *Namespace::findNamespace( const String &name ) +{ + for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) { + if ( strcmp( name, (*c)->name ) == 0 ) + return *c; + } + return 0; +} + +Reduction *Namespace::findReduction( const String &name ) +{ + for ( ReductionVect::Iter r = reductions; r.lte(); r++ ) { + if ( strcmp( name, (*r)->name ) == 0 ) + return *r; + } + return 0; +} + +/* Search from a previously resolved qualification. (name 1+ in a qual list). */ +Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart ) +{ + /* While there are still parts in the qualification. */ + while ( qualPart.lte() ) { + Namespace *child = from->findNamespace( *qualPart ); + if ( child == 0 ) + return 0; + + from = child; + qualPart.increment(); + } + + return from; +} + +Namespace *NamespaceQual::getQual( Compiler *pd ) +{ + /* Do the search only once. */ + if ( cachedNspaceQual != 0 ) + return cachedNspaceQual; + + if ( qualNames.length() == 0 ) { + /* No qualification, use the region the qualification was + * declared in. */ + cachedNspaceQual = declInNspace; + } + else if ( strcmp( qualNames[0], "root" ) == 0 ) { + /* First item is "root." Start the downward search from there. */ + StringVect::Iter qualPart = qualNames; + qualPart.increment(); + cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart ); + return cachedNspaceQual; + } + else { + /* Have a qualification. Move upwards through the declared + * regions looking for the first part. */ + StringVect::Iter qualPart = qualNames; + Namespace *parentNamespace = declInNspace; + while ( parentNamespace != 0 ) { + /* Search for the first part underneath the current parent. */ + Namespace *child = parentNamespace->findNamespace( *qualPart ); + + if ( child != 0 ) { + /* Found the first part. Start going below the result. */ + qualPart.increment(); + cachedNspaceQual = searchFrom( child, qualPart ); + return cachedNspaceQual; + } + + /* Not found, move up to the parent. */ + parentNamespace = parentNamespace->parentNamespace; + } + + /* Failed to find the place to start from. */ + cachedNspaceQual = 0; + } + + return cachedNspaceQual; +} + +void Compiler::initEmptyScanner( RegionSet *regionSet, TokenRegion *reg ) +{ + if ( reg != 0 && reg->impl->tokenInstanceList.length() == 0 ) { + reg->impl->wasEmpty = true; + + static int def = 1; + String name( 64, "__%p_DEF_PAT_%d", reg, def++ ); + + LexJoin *join = LexJoin::cons( LexExpression::cons( BT_Any ) ); + + TokenDef *tokenDef = TokenDef::cons( name, String(), false, false, + join, 0, internal, nextTokenId++, rootNamespace, + regionSet, 0, 0 ); + + TokenInstance *tokenInstance = TokenInstance::cons( tokenDef, + join, internal, nextTokenId++, + rootNamespace, reg ); + + reg->impl->tokenInstanceList.append( tokenInstance ); + + /* These do not go in the namespace so so they cannot get declared + * in the declare pass. */ + LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term ); + + tokenInstance->tokenDef->tdLangEl = lel; + lel->tokenDef = tokenDef; + } +} + +void Compiler::initEmptyScanners() +{ + for ( RegionSetList::Iter regionSet = regionSetList; regionSet.lte(); regionSet++ ) { + initEmptyScanner( regionSet, regionSet->tokenIgnore ); + initEmptyScanner( regionSet, regionSet->tokenOnly ); + initEmptyScanner( regionSet, regionSet->ignoreOnly ); + initEmptyScanner( regionSet, regionSet->collectIgnore ); + } +} + +pda_run *Compiler::parsePattern( program_t *prg, tree_t **sp, const InputLoc &loc, + int parserId, struct input_impl *sourceStream ) +{ + struct pda_run *pdaRun = new pda_run; + colm_pda_init( prg, pdaRun, pdaTables, parserId, 0, false, 0, false ); + + long pcr = colm_parse_loop( prg, sp, pdaRun, sourceStream, PCR_START ); + assert( pcr == PCR_DONE ); + if ( pdaRun->parse_error ) { + cerr << ( loc.fileName != 0 ? loc.fileName : "<input>" ) << + ":" << loc.line << ":" << loc.col; + + if ( pdaRun->parse_error_text != 0 ) { + colm_data *tokdata = pdaRun->parse_error_text->tokdata; + cerr << ": relative error: "; + cerr.write( (const char*)tokdata->data, tokdata->length ); + } + else { + cerr << ": parse error"; + } + + cerr << endl; + gblErrorCount += 1; + } + + return pdaRun; +} + +void Compiler::parsePatterns() +{ + program_t *prg = colm_new_program( runtimeData ); + + colm_set_debug( prg, gblActiveRealm ); + + /* Turn off context-dependent parsing. */ + prg->ctx_dep_parsing = 0; + + tree_t **sp = prg->stack_root; + + for ( ConsList::Iter cons = replList; cons.lte(); cons++ ) { + if ( cons->langEl != 0 ) { + struct input_impl *in = colm_impl_new_cons( strdup("<internal>"), cons ); + cons->pdaRun = parsePattern( prg, sp, cons->loc, cons->langEl->parserId, in ); + } + } + + for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { + struct input_impl *in = colm_impl_new_pat( strdup("<internal>"), pat ); + pat->pdaRun = parsePattern( prg, sp, pat->loc, pat->langEl->parserId, in ); + } + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + fillInPatterns( prg ); +} + +void Compiler::collectParserEls( BstSet<LangEl*> &parserEls ) +{ + for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { + /* We assume the reduction action compilation phase was run before + * pattern parsing and it decorated the pattern with the target type. */ + assert( pat->langEl != 0 ); + if ( pat->langEl->type != LangEl::NonTerm ) + error(pat->loc) << "pattern type is not a non-terminal" << endp; + + if ( pat->langEl->parserId < 0 ) { + /* Make a parser for the language element. */ + parserEls.insert( pat->langEl ); + pat->langEl->parserId = nextParserId++; + } + } + + for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) { + /* We need the the language element from the compilation process. */ + assert( repl->langEl != 0 ); + + if ( repl->langEl->parserId < 0 ) { + /* Make a parser for the language element. */ + parserEls.insert( repl->langEl ); + repl->langEl->parserId = nextParserId++; + } + } + + /* Make parsers that we need. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->parserId >= 0 ) + parserEls.insert( lel ); + } +} + +void Compiler::writeHostCall() +{ + /* + * Host Call + */ + for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) { + *outStream << + "value_t " << hc->hostCall << "( program_t *prg, tree_t **sp"; + for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++ ) { + *outStream << + ", value_t"; + } + *outStream << " );\n"; + } + + *outStream << + "tree_t **" << objectName << "_host_call( program_t *prg, long code, tree_t **sp )\n" + "{\n" + " value_t rtn = 0;\n" + " switch ( code ) {\n"; + + for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) { + *outStream << + " case " << hc->funcId << ": {\n"; + + int pos = hc->paramList->length() - 1; + for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos-- ) { + *outStream << + " value_t p" << pos << " = vm_pop_value();\n"; + } + + *outStream << + " rtn = " << hc->hostCall << "( prg, sp"; + + pos = 0; + for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos++ ) { + *outStream << + ", p" << pos; + } + *outStream << " );\n" + " break;\n" + " }\n"; + } + + *outStream << + " }\n" + " vm_push_value( rtn );\n" + " return sp;\n" + "}\n"; + +} + +void Compiler::generateOutput( long activeRealm, bool includeCommit ) +{ + FsmCodeGen *fsmGen = new FsmCodeGen( *outStream, redFsm, fsmTables ); + + PdaCodeGen *pdaGen = new PdaCodeGen( *outStream ); + + fsmGen->writeIncludes(); + pdaGen->defineRuntime(); + fsmGen->writeCode(); + + /* Make parsers that we need. */ + pdaGen->writeParserData( 0, pdaTables ); + + /* Write the runtime data. */ + pdaGen->writeRuntimeData( runtimeData, pdaTables ); + + writeHostCall(); + + if ( includeCommit ) + writeCommitStub(); + + if ( !gblLibrary ) + fsmGen->writeMain( activeRealm ); + + outStream->flush(); +} + + +void Compiler::prepGrammar() +{ + /* This will create language elements. */ + wrapNonTerminals(); + + makeLangElIds(); + makeStructElIds(); + makeLangElNames(); + makeDefinitionNames(); + noUndefindLangEls(); + + /* Put the language elements in an index by language element id. */ + langElIndex = new LangEl*[nextLelId+1]; + memset( langElIndex, 0, sizeof(LangEl*)*(nextLelId+1) ); + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) + langElIndex[lel->id] = lel; + + makeProdFsms(); + + /* Allocate the Runtime data now. Every PdaTable that we make + * will reference it, but it will be filled in after all the tables are + * built. */ + runtimeData = new colm_sections; +} + +void Compiler::compile() +{ + beginProcessing(); + initKeyOps(); + + /* Declare types. */ + declarePass(); + + /* Resolve type references. */ + resolvePass(); + + makeTerminalWrappers(); + makeEofElements(); + + /* + * Parsers + */ + + /* Init the longest match data */ + initLongestMatchData(); + FsmGraph *fsmGraph = makeScanner(); + + prepGrammar(); + + placeAllLanguageObjects(); + placeAllStructObjects(); + placeAllFrameObjects(); + placeAllFunctions(); + + /* Compile bytecode. */ + compileByteCode(); + + /* Make the reduced scanner. */ + RedFsmBuild reduce( this, fsmGraph ); + redFsm = reduce.reduceMachine(); + + BstSet<LangEl*> parserEls; + collectParserEls( parserEls ); + + makeParser( parserEls ); + + /* Make the scanner tables. */ + fsmTables = redFsm->makeFsmTables(); + + /* Now that all parsers are built, make the global runtimeData. */ + makeRuntimeData(); + + /* + * All compilation is now complete. + */ + + /* Parse constructors and patterns. */ + parsePatterns(); +} + diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 00000000..67d5b40e --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,1158 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_PARSEDATA_H +#define _COLM_PARSEDATA_H + +#include <limits.h> + +#include <iostream> + +#include <avlmap.h> +#include <avlset.h> +#include <bstmap.h> +#include <vector.h> +#include <bstset.h> +#include <dlist.h> +#include <dlistmel.h> +#include <fsmgraph.h> +#include <compare.h> + +#include "global.h" +#include "keyops.h" +#include "parsetree.h" +#include "cstring.h" +#include "pdagraph.h" +#include "pdarun.h" +#include "bytecode.h" +#include "program.h" +#include "internal.h" + +using std::ostream; + +struct exit_object { }; +extern exit_object endp; +void operator<<( std::ostream &out, exit_object & ); +extern const char *objectName; +extern bool hostAdapters; + +/* Forwards. */ +struct RedFsm; +struct LangEl; +struct Compiler; +struct PdaCodeGen; +struct FsmCodeGen; + +#define SHIFT_CODE 0x1 +#define REDUCE_CODE 0x2 +#define SHIFT_REDUCE_CODE 0x3 + +typedef Vector<const char**> CharVectVect; + +/* This is used for tracking the current stack of include file/machine pairs. It is + * is used to detect and recursive include structure. */ +struct IncludeStackItem +{ + IncludeStackItem( const char *fileName ) + : fileName(fileName) {} + + const char *fileName; +}; + +typedef Vector<IncludeStackItem> IncludeStack; +typedef Vector<const char *> ArgsVector; + +struct DefineArg +{ + DefineArg( String name, String value ) + : name(name), value(value) {} + + String name; + String value; +}; + +typedef Vector<DefineArg> DefineVector; + +extern DefineVector defineArgs; +extern ArgsVector includePaths; + +inline long makeReduceCode( long reduction, bool isShiftReduce ) +{ + return ( isShiftReduce ? SHIFT_REDUCE_CODE : REDUCE_CODE ) | + ( reduction << 2 ); +} + +struct ProdEl; +struct ProdElList; +struct PdaLiteral; +struct Production; + +/* A pointer to this is in struct pda_run, but it's specification is not known by the + * runtime code. The runtime functions that access it are defined in + * ctinput.cpp and stubbed in fsmcodegen.cpp */ +struct bindings + : public Vector<parse_tree_t*> +{}; + +struct DefListEl { Production *prev, *next; }; +struct LelDefListEl { Production *prev, *next; }; +typedef Vector< LangEl* > LangElVect; +typedef Vector< ProdEl* > FactorVect; + +typedef AvlMap<String, long, ColmCmpStr> StringMap; +typedef AvlMapEl<String, long> StringMapEl; + +enum PredType { + PredLeft, + PredRight, + PredNonassoc, + PredNone +}; + +struct PredDecl +{ + PredDecl( TypeRef *typeRef, long predValue ) + : typeRef(typeRef), predValue(predValue) + {} + + TypeRef *typeRef; + PredType predType; + long predValue; + + PredDecl *prev, *next; +}; + +typedef DList<PredDecl> PredDeclList; + +/* Graph dictionary. */ +struct Production +: + public DefListEl, public LelDefListEl +{ + Production() + : + prodName(0), prodElList(0), prodCommit(false), redBlock(0), + prodId(0), prodNum(0), fsm(0), fsmLength(0), uniqueEmptyLeader(0), + isLeftRec(false), localFrame(0), lhsField(0), predOf(0) + {} + + static Production* cons( const InputLoc &loc, LangEl *prodName, ProdElList *prodElList, + String name, bool prodCommit, CodeBlock *redBlock, int prodId, int prodNum ) + { + Production *p = new Production; + p->loc = loc; + p->prodName = prodName; + p->_name = name; + p->prodElList = prodElList; + p->prodCommit = prodCommit; + p->redBlock = redBlock; + p->prodId = prodId; + p->prodNum = prodNum; + return p; + } + + InputLoc loc; + LangEl *prodName; + ProdElList *prodElList; + String _name; + bool prodCommit; + + CodeBlock *redBlock; + + int prodId; + int prodNum; + + PdaGraph *fsm; + int fsmLength; + String data; + LongSet reducesTo; + + LangEl *uniqueEmptyLeader; + + ProdIdSet nonTermFirstSet; + AlphSet firstSet; + bool isLeftRec; + + ObjectDef *localFrame; + ObjectField *lhsField; + + LangEl *predOf; + + UnsignedCharVect copy; +}; + +struct CmpDefById +{ + static int compare( Production *d1, Production *d2 ) + { + if ( d1->prodId < d2->prodId ) + return -1; + else if ( d1->prodId > d2->prodId ) + return 1; + else + return 0; + } +}; + + +/* Map dotItems to productions. */ +typedef BstMap< int, Production*, CmpOrd<int> > DotItemIndex; +typedef BstMapEl< int, Production*> DotItemIndexEl; + +struct DefList +: + public DListMel<Production, DefListEl> +{}; + +/* A vector of production vectors. Each non terminal can have many productions. */ +struct LelDefList +: + public DListMel<Production, LelDefListEl> +{}; + +/* A set of machines made during a closure round. */ +typedef Vector< PdaGraph* > Machines; + +/* List of language elements. */ +typedef DList<LangEl> LelList; + +typedef Vector< TokenInstance* > TokenInstanceVect; + +struct UniqueType; + +typedef Vector<LangEl*> LangElVect; +typedef BstSet<LangEl*> LangElSet; + +/* A language element class. Can be a nonTerm or a term. */ +struct LangEl : public DListEl<LangEl> +{ + enum Type { Unknown, Term, NonTerm }; + + LangEl( Namespace *nspace, const String &name, Type type ); + ~LangEl(); + + /* The region the language element was defined in. */ + Namespace *nspace; + + String name; + String lit; + + String fullName; + String fullLit; + + /* For referencing the type. */ + String refName; + + /* For declaring things inside the type. */ + String declName; + + String xmlTag; + + Type type; + long id; + String displayString; + long numAppearances; + bool commit; + bool isIgnore; + bool reduceFirst; + bool isLiteral; + bool isRepeat; + bool isList; + bool isOpt; + bool parseStop; + bool isEOF; + + /* For a list or a repeat. Defaults to right recursive. */ + bool leftRecursive; + + LangEl *repeatOf; + + /* Productions from the language element if it is a non-terminal. */ + LelDefList defList; + + TokenDef *tokenDef; + Production *rootDef; + LangEl *termDup; + LangEl *eofLel; + + PdaGraph *pdaGraph; + struct pda_tables *pdaTables; + + PdaState *startState; + + CodeBlock *transBlock; + + ObjectDef *objectDef; + + long thisSize; + long ofiOffset; + + long parserId; + + PredType predType; + long predValue; + + StructDef *contextDef; + StructDef *contextIn; + bool noPreIgnore; + bool noPostIgnore; + bool isZero; + RegionSet *regionSet; +}; + +struct ProdEl +{ + /* Language elements a factor node can be. */ + enum Type { + LiteralType, + ReferenceType + }; + + /* Construct with a reference to a var def. */ + ProdEl( Type type, const InputLoc &loc, ObjectField *captureField, + bool commit, TypeRef *typeRef, int priorVal ) + : + type(type), + production(0), + captureField(captureField), + rhsElField(0), + commit(commit), + typeRef(typeRef), + langEl(0), + priorVal(priorVal) + {} + + ProdEl( const InputLoc &loc, TypeRef *typeRef ) + : + type(ReferenceType), + production(0), + captureField(0), + rhsElField(0), + commit(false), + typeRef(typeRef), + langEl(0), + priorVal(0) + {} + + Type type; + Production *production; + int pos; + ObjectField *captureField; + ObjectField *rhsElField; + bool commit; + TypeRef *typeRef; + LangEl *langEl; + int priorVal; + + ProdEl *prev, *next; +}; + +struct ProdElList : public DList<ProdEl> +{ + PdaGraph *walk( Compiler *pd, Production *prod ); +}; + +/* This should be renamed. It is a literal string in a type reference. */ +struct PdaLiteral +{ + PdaLiteral( const InputLoc &loc, const String &data ) + : loc(loc), data(data), value(0) { } + + InputLoc loc; + String data; + long value; +}; + +/* Nodes in the tree that use this action. */ +typedef Vector<NameInst*> ActionRefs; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct Action +: + public DListEl<Action>, + public AvlTreeEl<Action> +{ +public: + + static Action *cons( const InputLoc &loc, const String &name, InlineList *inlineList ) + { + Action *a = new Action; + a->loc = (loc); + a->name = (name); + a->markType = (MarkNone); + a->objField = (0); + a->markId = (-1); + a->inlineList = (inlineList); + a->actionId = (-1); + a->numTransRefs = (0); + a->numToStateRefs = (0); + a->numFromStateRefs = (0); + a->numEofRefs = (0); + a->numCondRefs = (0); + a->anyCall = (false); + a->isLmAction = (false); + return a; + } + + static Action *cons( MarkType markType, long markId ) + { + Action *a = new Action; + a->name = ("mark"); + a->markType = (markType); + a->objField = (0); + a->markId = (markId); + a->inlineList = (InlineList::cons()); + a->actionId = (-1); + a->numTransRefs = (0); + a->numToStateRefs = (0); + a->numFromStateRefs = (0); + a->numEofRefs = (0); + a->numCondRefs = (0); + a->anyCall = (false); + a->isLmAction = (false); + return a; + } + + /* Key for action dictionary. */ + const String &getKey() const { return name; } + + /* Data collected during parse. */ + InputLoc loc; + String name; + + MarkType markType; + ObjectField *objField; + long markId; + + InlineList *inlineList; + int actionId; + + void actionName( ostream &out ) + { + if ( name != 0 ) + out << name; + else + out << loc.line << ":" << loc.col; + } + + /* Places in the input text that reference the action. */ + ActionRefs actionRefs; + + /* Number of references in the final machine. */ + bool numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numCondRefs; + bool anyCall; + + bool isLmAction; +}; + +/* A list of actions. */ +typedef DList<Action> ActionList; + +struct VarDef; +struct LexJoin; +struct LexTerm; +struct FactorAug; +struct FactorLabel; +struct FactorRep; +struct FactorNeg; +struct Factor; +struct Literal; +struct Range; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct TokenRegion; + +/* tree_t of instantiated names. */ +typedef BstMapEl<String, NameInst*> NameMapEl; +typedef BstMap<String, NameInst*, ColmCmpStr> NameMap; +typedef Vector<NameInst*> NameVect; +typedef BstSet<NameInst*> NameSet; + +/* Node in the tree of instantiated names. */ +struct NameInst +{ + NameInst( int id ) + : id(id) {} + + int id; + + /* Pointers for the name search queue. */ + NameInst *prev, *next; +}; + +typedef DList<NameInst> NameInstList; + +/* Stack frame used in walking the name tree. */ +struct NameFrame +{ + NameInst *prevNameInst; + int prevNameChild; + NameInst *prevLocalScope; +}; + +/* Class to collect information about the machine during the + * parse of input. */ +struct Compiler +{ + /* Create a new parse data object. This is done at the beginning of every + * fsm specification. */ + Compiler(); + ~Compiler(); + + /* + * Setting up the graph dict. + */ + + void compileLiteralTokens(); + void initEmptyScanners(); + void initEmptyScanner( RegionSet *regionSet, TokenRegion *reg ); + void initUniqueTypes(); + + /* Initialize a graph dict with the basic fsms. */ + void initGraphDict(); + void createBuiltin( const char *name, BuiltinMachine builtin ); + + /* Make a name id in the current name instantiation scope if it is not + * already there. */ + NameInst *makeJoinNameTree( LexJoin *join ); + NameInst *makeNameTree(); + NameInst **makeNameIndex(); + + void printNameTree( NameInst *rootName ); + void printNameIndex( NameInst **nameIndex ); + + /* Resove name references in action code and epsilon transitions. */ + NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ); + void resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ); + + /* Set the alphabet type. If type types are not valid returns false. */ + bool setAlphType( char *s1, char *s2 ); + bool setAlphType( char *s1 ); + + /* Unique actions. */ + void removeDups( ActionTable &actionTable ); + void removeActionDups( FsmGraph *graph ); + + /* Dumping the name instantiation tree. */ + void printNameInst( NameInst *nameInst, int level ); + + /* Make the graph from a graph dict node. Does minimization. */ + void finishGraphBuild( FsmGraph *graph ); + FsmGraph *makeAllRegions(); + FsmGraph *makeScanner(); + + void analyzeAction( Action *action, InlineList *inlineList ); + void analyzeGraph( FsmGraph *graph ); + void resolvePrecedence( PdaGraph *pdaGraph ); + LangEl *predOf( PdaTrans *trans, long action ); + bool precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 ); + bool precedenceRemoveBoth( LangEl *l1, LangEl *l2 ); + + void placeFrameFields( ObjectDef *localFrame ); + void placeUserFunction( Function *func, bool isUserIter ); + void placeAllStructObjects(); + void placeAllLanguageObjects(); + void placeAllFrameObjects(); + void placeAllFunctions(); + + void initKeyOps(); + + /* + * Data collected during the parse. + */ + + /* List of actions. Will be pasted into a switch statement. */ + ActionList actionList; + + /* The id of the next priority name and label. */ + int nextPriorKey, nextNameId; + + /* Alphabet type. */ + const HostType *userAlphType; + bool alphTypeSet; + + /* Element type and get key expression. */ + InlineList *getKeyExpr; + InlineList *accessExpr; + InlineList *curStateExpr; + + /* The alphabet range. */ + char *lowerNum, *upperNum; + Key lowKey, highKey; + InputLoc rangeLowLoc, rangeHighLoc; + + /* Number of errors encountered parsing the fsm spec. */ + int errorCount; + + /* Counting the action and priority ordering. */ + int curActionOrd; + int curPriorOrd; + + /* Root of the name tree. */ + NameInst *curNameInst; + int curNameChild; + NameInstList nameInstList; + + /* The place where resolved epsilon transitions go. These cannot go into + * the parse tree because a single epsilon op can resolve more than once + * to different nameInsts if the machine it's in is used more than once. */ + NameVect epsilonResolvedLinks; + int nextEpsilonResolvedLink; + + /* Root of the name tree used for doing local name searches. */ + NameInst *localNameScope; + + void setLmInRetLoc( InlineList *inlineList ); + void initLongestMatchData(); + + /* Counter for assigning ids to longest match items. */ + int nextTokenId; + + RegionImplList regionImplList; + RegionList regionList; + RegionSetList regionSetList; + + NamespaceList namespaceList; + + Action *newAction( const String &name, InlineList *inlineList ); + + Action *setTokStart; + int setTokStartOrd; + + Action *initActId; + int initActIdOrd; + + Action *setTokEnd; + int setTokEndOrd; + + CodeBlock *rootCodeBlock; + + void beginProcessing() + { + ::keyOps = &thisKeyOps; + } + + KeyOps thisKeyOps; + + UniqueType *mainReturnUT; + + CharVectVect streamFileNames; + + /* CONTEXT FREE */ + ProdElList *makeProdElList( LangEl *langEl ); + void wrapNonTerminals(); + void makeDefinitionNames(); + void noUndefindLangEls(); + void declareBaseLangEls(); + void makeLangElIds(); + void makeStructElIds(); + void makeLangElNames(); + void makeTerminalWrappers(); + void makeEofElements(); + void makeIgnoreCollectors(); + void resolvePrecedence(); + void resolveReductionActions(); + void findReductionActionProds(); + void resolveReducers(); + + Production *findProductionByLabel( LangEl *langEl, String label ); + + void declarePass(); + void resolvePass(); + + /* Parser generation. */ + void advanceReductions( PdaGraph *pdaGraph ); + void sortActions( PdaGraph *pdaGraph ); + void addDupTerms( PdaGraph *pdaGraph ); + void linkExpansions( PdaGraph *pdaGraph ); + void lalr1FollowEpsilonOp( PdaGraph *pdaGraph ); + + void transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, PdaState *state, long prodId ); + + void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ); + void lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ); + + void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior ); + void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ); + + void lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls ); + + void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, + PdaTrans *expandFrom, Production *prod ); + void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ); + void lr0CloseAllStates( PdaGraph *pdaGraph ); + + void lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls ); + + void reduceActions( PdaGraph *pdaGraph ); + + bool makeNonTermFirstSetProd( Production *prod, PdaState *state ); + void makeNonTermFirstSets(); + + bool makeFirstSetProd( Production *prod, PdaState *state ); + void makeFirstSets(); + + int findIndexOff( struct pda_tables *pdaTables, PdaGraph *pdaGraph, + PdaState *state, int &currLen ); + void trySetTime( PdaTrans *trans, long code, long &time ); + void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey, + bool noPreIgnore, bool noPostIgnore ); + PdaState *followProd( PdaState *tabState, PdaState *prodState ); + void findFollow( AlphSet &result, PdaState *overTab, + PdaState *overSrc, Production *parentDef ); + void pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls ); + void pdaOrderFollow( LangEl *rootEl, PdaState *tabState, + PdaTrans *tabTrans, PdaTrans *srcTrans, + Production *parentDef, Production *definition, long &time ); + void pdaOrderProd( LangEl *rootEl, PdaState *tabState, + PdaState *srcState, Production *parentDef, long &time ); + void analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls ); + + void makeProdFsms(); + void insertUniqueEmptyProductions(); + void printNonTermFirstSets(); + void printFirstSets(); + + LangEl *makeRepeatProd( const InputLoc &loc, Namespace *nspace, + const String &repeatName, UniqueType *ut, bool left ); + LangEl *makeListProd( const InputLoc &loc, Namespace *nspace, + const String &listName, UniqueType *ut, bool left ); + LangEl *makeOptProd( const InputLoc &loc, Namespace *nspace, + const String &optName, UniqueType *ut ); + void resolveProdEl( ProdEl *prodEl ); + void resolveProductionEls(); + + void addMatchText( ObjectDef *frame, LangEl *lel ); + void addMatchLength( ObjectDef *frame, LangEl *lel ); + void addInput( ObjectDef *frame ); + void addThis( ObjectDef *frame ); + void addTransTokVar( ObjectDef *frame, LangEl *lel ); + void addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ); + void addProdRedObjectVar( ObjectDef *localFrame, LangEl *langEl ); + void addProdObjects(); + + void addProdRHSLoads( Production *prod, CodeVect &code, long &insertPos ); + void addProdLHSLoad( Production *prod, CodeVect &code, long &insertPos ); + void addPushBackLHS( Production *prod, CodeVect &code, long &insertPos ); + + void prepGrammar(); + struct pda_run *parsePattern( program_t *prg, tree_t **sp, const InputLoc &loc, + int parserId, struct input_impl *sourceStream ); + void parsePatterns(); + + void collectParserEls( LangElSet &parserEls ); + void makeParser( LangElSet &parserEls ); + PdaGraph *makePdaGraph( BstSet<LangEl*> &parserEls ); + struct pda_tables *makePdaTables( PdaGraph *pdaGraph ); + + void fillInPatterns( program_t *prg ); + void makeRuntimeData(); + + /* Generate and write out the fsm. */ + void generateGraphviz(); + + void verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph ); + void computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph ); + + void initListElField( GenericType *gen, const char *name, int offset ); + void initListFieldEl( GenericType *gen, const char *name, int offset ); + void initListFieldVal( GenericType *gen, const char *name, int offset ); + + void initListFields( GenericType *gen ); + void initListFunctions( GenericType *gen ); + + void initMapElKey( GenericType *gen, const char *name, int offset ); + void initMapElField( GenericType *gen, const char *name, int offset ); + void initMapField( GenericType *gen, const char *name, int offset ); + + void initMapFields( GenericType *gen ); + void initMapFunctions( GenericType *gen ); + + void initVectorFunctions( GenericType *gen ); + void initParserField( GenericType *gen, const char *name, + int offset, TypeRef *typeRef ); + void initParserFunctions( GenericType *gen ); + void initParserFields( GenericType *gen ); + + void addStdin(); + void addStdout(); + void addStderr(); + void addArgv(); + void addStds(); + void addError(); + void addDefineArgs(); + int argvOffset(); + int arg0Offset(); + int stdsOffset(); + void makeDefaultIterators(); + void addLengthField( ObjectDef *objDef, code_t getLength ); + ObjectDef *findObject( const String &name ); + void resolveListElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); + void resolveMapElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); + void resolveElementOf( ObjectDef *obj ); + void makeFuncVisible( Function *func, bool isUserIter ); + void makeInHostVisible( Function *func ); + + void declareFunction( Function *func ); + void declareReductionCode( Production *prod ); + void declareTranslateBlock( LangEl *langEl ); + void declarePreEof( TokenRegion *region ); + void declareRootBlock(); + void declareByteCode(); + + void resolveFunction( Function *func ); + void resolveInHost( Function *func ); + void resolvePreEof( TokenRegion *region ); + void resolveRootBlock(); + void resolveTranslateBlock( LangEl *langEl ); + void resolveReductionCode( Production *prod ); + void resolveParseTree(); + + void compileFunction( Function *func, CodeVect &code ); + void compileFunction( Function *func ); + void compileUserIter( Function *func, CodeVect &code ); + void compileUserIter( Function *func ); + void compilePreEof( TokenRegion *region ); + void compileRootBlock(); + void compileTranslateBlock( LangEl *langEl ); + void findLocals( ObjectDef *localFrame, CodeBlock *block ); + void makeProdCopies( Production *prod ); + void compileReductionCode( Production *prod ); + void removeNonUnparsableRepls(); + void compileByteCode(); + + void resolveUses(); + void generateOutput( long activeRealm, bool includeCommit ); + void compile(); + + void openNameSpace( ostream &out, Namespace *nspace ); + void closeNameSpace( ostream &out, Namespace *nspace ); + void refNameSpace( LangEl *lel, Namespace *nspace ); + void generateExports(); + void generateExportsImpl(); + + struct local_info *makeLocalInfo( Locals &locals ); + short *makeTrees( ObjectDef *objectDef, int &numTrees ); + + /* + * Graphviz Generation + */ + void writeTransList( PdaState *state ); + void writeDotFile( PdaGraph *graph ); + void writeDotFile( ); + + + /* + * Data collected during the parse. + */ + + LelList langEls; + StructElList structEls; + DefList prodList; + + /* Dumping. */ + DotItemIndex dotItemIndex; + + PredDeclList predDeclList; + + /* The name of the file the fsm is from, and the spec name. */ + // EXISTS IN RL: char *fileName; + String parserName; + // EXISTS IN RL: InputLoc sectionLoc; + + /* How to access the instance data. */ + String access; + + /* The name of the token structure. */ + String tokenStruct; + + GenericType *anyList; + GenericType *anyMap; + GenericType *anyVector; + + LangEl *ptrLangEl; + LangEl *strLangEl; + LangEl *anyLangEl; + LangEl *rootLangEl; + LangEl *noTokenLangEl; + LangEl *eofLangEl; + LangEl *errorLangEl; + LangEl *ignoreLangEl; + + Namespace *rootNamespace; + + int nextLelId; + int firstNonTermId; + int firstStructElId; + int structInbuiltId; + int structInputId; + int structStreamId; + + LangEl **langElIndex; + PdaState *actionDestState; + DefSetSet prodSetSet; + + Production **prodIdIndex; + AlphSet literalSet; + + PatList patternList; + ConsList replList; + ParserTextList parserTextList; + + StructDef *global; + StructEl *globalSel; + ObjectDef *globalObjectDef; + ObjectField *arg0; + ObjectField *argv; + ObjectField *stds; + StructDef *argvEl; + StructEl *argvElSel; + StructEl *stdsElSel; + + StructDef *input; + StructDef *stream; + StructEl *inputSel; + StructEl *streamSel; + + VectorTypeIdMap vectorTypeIdMap; + + UniqueType *findUniqueType( enum TYPE typeId ); + UniqueType *findUniqueType( enum TYPE typeId, LangEl *langEl ); + UniqueType *findUniqueType( enum TYPE typeId, IterDef *iterDef ); + UniqueType *findUniqueType( enum TYPE typeId, StructEl *structEl ); + UniqueType *findUniqueType( enum TYPE typeId, GenericType *generic ); + + UniqueGeneric *findUniqueGeneric( UniqueGeneric::Type type, + UniqueType *utKey, UniqueType *utValue ); + UniqueGeneric *findUniqueGeneric( UniqueGeneric::Type type, + UniqueType *utValue ); + + UniqueType *uniqueTypeNil; + UniqueType *uniqueTypeVoid; + UniqueType *uniqueTypePtr; + UniqueType *uniqueTypeBool; + UniqueType *uniqueTypeInt; + UniqueType *uniqueTypeStr; + UniqueType *uniqueTypeIgnore; + UniqueType *uniqueTypeAny; + + UniqueType *uniqueTypeInput; + UniqueType *uniqueTypeStream; + + UniqueTypeMap uniqeTypeMap; + UniqueRepeatMap uniqeRepeatMap; + UniqueGenericMap uniqueGenericMap; + + void declareGlobalFields(); + void declareStrFields(); + + void declareInputField( ObjectDef *objDef, code_t getLength ); + void declareInputFields(); + + void declareStreamField( ObjectDef *objDef, code_t getLength ); + void declareStreamFields(); + + void declareIntFields(); + void declareTokenFields(); + + ObjectDef *intObj; + ObjectDef *strObj; + ObjectDef *inputObj; + ObjectDef *streamObj; + + struct fsm_tables *fsmTables; + struct colm_sections *runtimeData; + + int nextPatConsId; + int nextGenericId; + + FunctionList functionList; + FunctionList inHostList; + int nextFuncId; + int nextHostId; + + enum CompileContext { + CompileTranslation, + CompileReduction, + CompileFunction, + CompileRoot + }; + + CompileContext compileContext; + LongVect returnJumps; + LongVect breakJumps; + Function *curFunction; + + /* For stack unwinding. Used at exits, returns, iterator destroy, etc. */ + CodeVect unwindCode; + + ObjectField *makeDataEl(); + ObjectField *makeFileEl(); + ObjectField *makeLineEl(); + ObjectField *makeColEl(); + ObjectField *makePosEl(); + + IterDef *findIterDef( IterDef::Type type, GenericType *generic ); + IterDef *findIterDef( IterDef::Type type, Function *func ); + IterDef *findIterDef( IterDef::Type type ); + IterDefSet iterDefSet; + + enum GeneratesType { GenToken, GenIgnore, GenCfl }; + + int nextObjectId; + GeneratesType generatesType; + bool generatesIgnore; + + StringMap literalStrings; + + long nextFrameId; + long nextParserId; + + ObjectDef *rootLocalFrame; + + bool revertOn; + + RedFsm *redFsm; + + PdaGraph *pdaGraph; + struct pda_tables *pdaTables; + + long predValue; + long nextMatchEndNum; + + TypeRef *argvTypeRef; + TypeRef *stdsTypeRef; + + bool inContiguous; + int contiguousOffset; + int contiguousStretch; + + void declareReVars(); + + void initReductionNeeds( Reduction *reduction ); + + void findRhsRefs( bool &lhsUsed, Vector<ProdEl*> &rhsUsed, Vector<ProdEl*> &treeUsed, + Vector<ProdEl*> &locUsed, Reduction *reduction, Production *production, + const ReduceTextItemList &list ); + + void computeNeeded( Reduction *reduction, Production *production, + const ReduceTextItemList &list ); + void computeNeeded(); + + void loadRefs( Reduction *reduction, Production *production, + const ReduceTextItemList &list, bool read ); + + void writePostfixReduce( Reduction *reduction ); + void writeParseReduce( Reduction *reduction ); + + void writeParseReduce(); + void writePostfixReduce(); + + void writeHostCall(); + void writeNeeds(); + void writeCommit(); + void writeReduceStructs(); + void writeReduceDispatchers(); + void writeUnescape(); + + void writeLhsRef( Production *production, ReduceTextItem *i ); + void writeRhsRef( Production *production, ReduceTextItem *i ); + void writeTreeRef( Production *production, ReduceTextItem *i ); + void writeRhsLoc( Production *production, ReduceTextItem *i ); + void writeHostItemList( Production *production, const ReduceTextItemList &list ); + void writeCommitStub(); +}; + +void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true ); +Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ); +Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd ); +Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd ); +Key makeFsmKeyChar( char c, Compiler *pd ); +void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd ); +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, Compiler *pd ); +FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd ); +FsmGraph *dotFsm( Compiler *pd ); +FsmGraph *dotStarFsm( Compiler *pd ); + +void errorStateLabels( const NameSet &locations ); + +struct ColmParser; + +typedef AvlMap<String, ColmParser *, ColmCmpStr> ParserDict; +typedef AvlMapEl<String, ColmParser *> ParserDictEl; + +LangEl *declareLangEl( Compiler *pd, Namespace *nspace, + const String &data, LangEl::Type type ); +LangEl *addLangEl( Compiler *pd, Namespace *nspace, + const String &data, LangEl::Type type ); + +StructEl *declareStruct( Compiler *pd, Namespace *nspace, + const String &data, StructDef *context ); + +void declareTypeAlias( Compiler *pd, Namespace *nspace, + const String &data, TypeRef *typeRef ); + +LangEl *findType( Compiler *pd, Namespace *nspace, const String &data ); + +ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, + bool isConst, bool useFnInstr = false, GenericType *useGeneric = 0 ); + +ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, + UniqueType *arg1, bool isConst, bool useFnInstr = false, + GenericType *useGeneric = 0 ); + +ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, + UniqueType *arg1, UniqueType *arg2, bool isConst, + bool useFnInstr = false, GenericType *useGeneric = 0 ); + +ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, + bool isConst, bool useFnInstr = false, GenericType *useGeneric = 0 ); + +ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, + UniqueType *arg1, bool isConst, bool useFnInstr = false, + GenericType *useGeneric = 0 ); + +ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, + UniqueType *arg1, UniqueType *arg2, bool isConst, + bool useFnInstr = false, GenericType *useGeneric = 0 ); + +extern "C" struct input_impl *colm_impl_new_pat( char *name, struct Pattern *pattern ); +extern "C" struct input_impl *colm_impl_new_cons( char *name, struct Constructor *constructor ); + +#endif /* _COLM_PARSEDATA_H */ + diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index 12117f4d..455cffe7 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -9,7 +9,6 @@ #cmakedefine HAVE_SYS_WAIT_H 1 #cmakedefine HAVE_UNISTD_H 1 -#cmakedefine SIZEOF_INT @SIZEOF_INT@ #cmakedefine SIZEOF_LONG @SIZEOF_LONG@ #endif /* _COLM_CONFIG_H */ diff --git a/src/consinit.cc b/src/consinit.cc new file mode 100644 index 00000000..4f59b07c --- /dev/null +++ b/src/consinit.cc @@ -0,0 +1,922 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "consinit.h" + +#include <iostream> + +using std::cout; +using std::cerr; +using std::endl; + +LexTerm *rangeTerm( const char *low, const char *high ) +{ + Literal *lowLit = Literal::cons( internal, String( low ), Literal::LitString ); + Literal *highLit = Literal::cons( internal, String( high ), Literal::LitString ); + Range *range = Range::cons( lowLit, highLit ); + LexFactor *factor = LexFactor::cons( range ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); + LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); + LexTerm *term = LexTerm::cons( factorAug ); + return term; +} + +LexFactorNeg *litFactorNeg( const char *str ) +{ + Literal *lit = Literal::cons( internal, String( str ), Literal::LitString ); + LexFactor *factor = LexFactor::cons( lit ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + return factorNeg; +} + +LexFactorAug *litFactorAug( const char *str ) +{ + Literal *lit = Literal::cons( internal, String( str ), Literal::LitString ); + LexFactor *factor = LexFactor::cons( lit ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); + LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); + return factorAug; +} + +LexTerm *litTerm( const char *str ) +{ + Literal *lit = Literal::cons( internal, String( str ), Literal::LitString ); + LexFactor *factor = LexFactor::cons( lit ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); + LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); + LexTerm *term = LexTerm::cons( factorAug ); + return term; +} + +LexExpression *litExpr( const char *str ) +{ + LexTerm *term = litTerm( str ); + LexExpression *expr = LexExpression::cons( term ); + return expr; +} + +LexExpression *orExpr( LexTerm *term1, LexTerm *term2 ) +{ + LexExpression *expr1 = LexExpression::cons( term1 ); + return LexExpression::cons( expr1, term2, LexExpression::OrType ); +} + +LexExpression *orExpr( LexTerm *term1, LexTerm *term2, LexTerm *term3 ) +{ + LexExpression *expr1 = LexExpression::cons( term1 ); + LexExpression *expr2 = LexExpression::cons( expr1, term2, LexExpression::OrType ); + LexExpression *expr3 = LexExpression::cons( expr2, term3, LexExpression::OrType ); + return expr3; +} + +LexExpression *orExpr( LexTerm *term1, LexTerm *term2, LexTerm *term3, LexTerm *term4 ) +{ + LexExpression *expr1 = LexExpression::cons( term1 ); + LexExpression *expr2 = LexExpression::cons( expr1, term2, LexExpression::OrType ); + LexExpression *expr3 = LexExpression::cons( expr2, term3, LexExpression::OrType ); + LexExpression *expr4 = LexExpression::cons( expr3, term4, LexExpression::OrType ); + return expr4; +} + +LexExpression *orExpr( LexTerm *term1, LexTerm *term2, LexTerm *term3, + LexTerm *term4, LexTerm *term5, LexTerm *term6 ) +{ + LexExpression *expr1 = LexExpression::cons( term1 ); + LexExpression *expr2 = LexExpression::cons( expr1, term2, LexExpression::OrType ); + LexExpression *expr3 = LexExpression::cons( expr2, term3, LexExpression::OrType ); + LexExpression *expr4 = LexExpression::cons( expr3, term4, LexExpression::OrType ); + return expr4; +} + +LexFactorAug *starFactorAug( LexExpression *expr ) +{ + LexJoin *join = LexJoin::cons( expr ); + LexFactor *factor = LexFactor::cons( join ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); + LexFactorRep *staredRep = LexFactorRep::cons( internal, + factorRep, 0, 0, LexFactorRep::StarType ); + LexFactorAug *factorAug = LexFactorAug::cons( staredRep ); + return factorAug; +} + +LexFactorAug *starFactorAug( LexTerm *term ) +{ + LexExpression *expr = LexExpression::cons( term ); + return starFactorAug( expr ); +} + +LexFactorAug *starFactorAug( LexFactorAug *factorAug ) +{ + LexTerm *term = LexTerm::cons( factorAug ); + return starFactorAug( term ); +} + +LexFactorAug *plusFactorAug( LexExpression *expr ) +{ + LexJoin *join = LexJoin::cons( expr ); + LexFactor *factor = LexFactor::cons( join ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); + LexFactorRep *staredRep = LexFactorRep::cons( internal, factorRep, 0, 0, LexFactorRep::PlusType ); + LexFactorAug *factorAug = LexFactorAug::cons( staredRep ); + return factorAug; +} + +LexTerm *concatTerm( LexFactorAug *fa1, LexFactorAug *fa2 ) +{ + LexTerm *term1 = LexTerm::cons( fa1 ); + LexTerm *term2 = LexTerm::cons( term1, fa2, LexTerm::ConcatType ); + return term2; +} + +LexTerm *concatTerm( LexFactorAug *fa1, LexFactorAug *fa2, LexFactorAug *fa3 ) +{ + LexTerm *term1 = LexTerm::cons( fa1 ); + LexTerm *term2 = LexTerm::cons( term1, fa2, LexTerm::ConcatType ); + LexTerm *term3 = LexTerm::cons( term2, fa3, LexTerm::ConcatType ); + return term3; +} + +LexFactorAug *parensFactorAug( LexExpression *expr ) +{ + LexJoin *join = LexJoin::cons( expr ); + LexFactor *factor = LexFactor::cons( join ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); + LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); + return factorAug; +} + +LexFactorNeg *parensFactorNeg( LexExpression *expr ) +{ + LexJoin *join = LexJoin::cons( expr ); + LexFactor *factor = LexFactor::cons( join ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + return factorNeg; +} + +LexFactorAug *parensFactorAug( LexTerm *term ) +{ + LexExpression *expr = LexExpression::cons( term ); + LexJoin *join = LexJoin::cons( expr ); + LexFactor *factor = LexFactor::cons( join ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); + LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); + return factorAug; +} + +LexFactorAug *charNegFactorAug( LexExpression *expr ) +{ + LexFactorNeg *factorNeg = parensFactorNeg( expr ); + LexFactorNeg *charNeg = LexFactorNeg::cons( factorNeg, LexFactorNeg::CharNegateType ); + LexFactorRep *factorRep = LexFactorRep::cons( charNeg ); + LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); + return factorAug; +} + +LexTerm *charNegTerm( LexExpression *expr ) +{ + LexFactorAug *factorAug = charNegFactorAug( expr ); + LexTerm *term = LexTerm::cons( factorAug ); + return term; +} + +LexTerm *parensTerm( LexExpression *expr ) +{ + LexFactorAug *factorAug = parensFactorAug( expr ); + return LexTerm::cons( factorAug ); +} + +void ConsInit::wsIgnore() +{ + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, String(), pd->nextObjectId++ ); + + LexTerm *r1 = litTerm( "' '" ); + LexTerm *r2 = litTerm( "'\t'" ); + LexTerm *r3 = litTerm( "'\v'" ); + LexTerm *r4 = litTerm( "'\n'" ); + LexTerm *r5 = litTerm( "'\r'" ); + LexTerm *r6 = litTerm( "'\f'" ); + + LexExpression *whitespace = orExpr( r1, r2, r3, r4, r5, r6 ); + LexFactorAug *whitespaceRep = plusFactorAug( whitespace ); + + LexTerm *term = LexTerm::cons( whitespaceRep ); + LexExpression *expr = LexExpression::cons( term ); + LexJoin *join = LexJoin::cons( expr ); + + defineToken( internal, String(), join, objectDef, 0, true, false, false ); +} + +void ConsInit::commentIgnore() +{ + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, String(), pd->nextObjectId++ ); + + LexFactorAug *pound = litFactorAug( "'#'" ); + LexExpression *newline = litExpr( "'\\n'" ); + + LexFactorAug *commChars = charNegFactorAug( newline ); + LexFactorAug *restOfLine = starFactorAug( commChars ); + + LexFactorAug *termNewline = litFactorAug( "'\\n'" ); + + LexTerm *concat = concatTerm( pound, restOfLine, termNewline ); + LexExpression *expr = LexExpression::cons( concat ); + + LexJoin *join = LexJoin::cons( expr ); + + defineToken( internal, String(), join, objectDef, 0, true, false, false ); +} + +void ConsInit::idToken() +{ + String hello( "id" ); + + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, hello, pd->nextObjectId++ ); + + LexTerm *r1 = rangeTerm( "'a'", "'z'" ); + LexTerm *r2 = rangeTerm( "'A'", "'Z'" ); + LexTerm *r3 = litTerm( "'_'" ); + LexFactorAug *first = parensFactorAug( orExpr( r1, r2, r3 ) ); + + LexTerm *r4 = rangeTerm( "'a'", "'z'" ); + LexTerm *r5 = rangeTerm( "'A'", "'Z'" ); + LexTerm *r6 = litTerm( "'_'" ); + LexTerm *r7 = rangeTerm( "'0'", "'9'" ); + LexExpression *second = orExpr( r4, r5, r6, r7 ); + LexFactorAug *secondStar = starFactorAug( second ); + + LexTerm *concat = concatTerm( first, secondStar ); + + LexExpression *expr = LexExpression::cons( concat ); + LexJoin *join = LexJoin::cons( expr ); + + defineToken( internal, hello, join, objectDef, 0, false, false, false ); +} + +void ConsInit::literalToken() +{ + String hello( "literal" ); + + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, hello, pd->nextObjectId++ ); + + LexFactorAug *r1 = litFactorAug( "'\\''" ); + + /* [^'\\] */ + LexExpression *singleQuoteBackSlash = orExpr( + litTerm( "'\\''" ), + litTerm( "'\\\\'" ) ); + + LexTerm *freeChars = charNegTerm( singleQuoteBackSlash ); + + /* '\\' any */ + LexFactorAug *backSlash = litFactorAug( "'\\\\'" ); + LexExpression *any = LexExpression::cons( BT_Any ); + LexTerm *escape = concatTerm( backSlash, parensFactorAug( any ) ); + + /* Union and repeat. */ + LexExpression *charOrEscape = orExpr( freeChars, escape ); + LexFactorAug *r2 = starFactorAug( charOrEscape ); + + LexFactorAug *r3 = litFactorAug( "'\''" ); + + LexTerm *concat = concatTerm( r1, r2, r3 ); + LexExpression *expr = LexExpression::cons( concat ); + LexJoin *join = LexJoin::cons( expr ); + + defineToken( internal, hello, join, objectDef, 0, false, false, false ); +} + +void ConsInit::keyword( const String &name, const String &lit ) +{ + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, name, pd->nextObjectId++ ); + LexTerm *term = litTerm( lit ); + LexExpression *expr = LexExpression::cons( term ); + LexJoin *join = LexJoin::cons( expr ); + defineToken( internal, name, join, objectDef, 0, false, false, false ); +} + +void ConsInit::keyword( const String &kw ) +{ + literalDef( internal, kw, false, false ); +} + +ProdEl *ConsInit::prodRefName( const String &name ) +{ + ProdEl *prodEl = prodElName( internal, name, + NamespaceQual::cons( curNspace() ), 0, + RepeatNone, false ); + return prodEl; +} + +ProdEl *ConsInit::prodRefName( const String &capture, const String &name ) +{ + ObjectField *captureField = ObjectField::cons( internal, + ObjectField::RhsNameType, 0, capture ); + ProdEl *prodEl = prodElName( internal, name, + NamespaceQual::cons( curNspace() ), captureField, + RepeatNone, false ); + return prodEl; +} + +ProdEl *ConsInit::prodRefNameLeftRepeat( const String &name ) +{ + ProdEl *prodEl = prodElName( internal, name, + NamespaceQual::cons( curNspace() ), 0, + RepeatLeftRepeat, false ); + return prodEl; +} + +ProdEl *ConsInit::prodRefNameLeftRepeat( const String &capture, const String &name ) +{ + ObjectField *captureField = ObjectField::cons( internal, + ObjectField::RhsNameType, 0, capture ); + ProdEl *prodEl = prodElName( internal, name, + NamespaceQual::cons( curNspace() ), captureField, + RepeatLeftRepeat, false ); + return prodEl; +} + +ProdEl *ConsInit::prodRefLit( const String &lit ) +{ + ProdEl *prodEl = prodElLiteral( internal, lit, + NamespaceQual::cons( curNspace() ), 0, + RepeatNone, false ); + return prodEl; +} + +Production *ConsInit::production() +{ + ProdElList *prodElList = new ProdElList; + return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); +} + +Production *ConsInit::production( ProdEl *prodEl1 ) +{ + ProdElList *prodElList = new ProdElList; + appendProdEl( prodElList, prodEl1 ); + return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); +} + +Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2 ) +{ + ProdElList *prodElList = new ProdElList; + appendProdEl( prodElList, prodEl1 ); + appendProdEl( prodElList, prodEl2 ); + return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); +} + +Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3 ) +{ + ProdElList *prodElList = new ProdElList; + appendProdEl( prodElList, prodEl1 ); + appendProdEl( prodElList, prodEl2 ); + appendProdEl( prodElList, prodEl3 ); + return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); +} + +Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3, ProdEl *prodEl4 ) +{ + ProdElList *prodElList = new ProdElList; + appendProdEl( prodElList, prodEl1 ); + appendProdEl( prodElList, prodEl2 ); + appendProdEl( prodElList, prodEl3 ); + appendProdEl( prodElList, prodEl4 ); + return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); +} + +Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5 ) +{ + ProdElList *prodElList = new ProdElList; + appendProdEl( prodElList, prodEl1 ); + appendProdEl( prodElList, prodEl2 ); + appendProdEl( prodElList, prodEl3 ); + appendProdEl( prodElList, prodEl4 ); + appendProdEl( prodElList, prodEl5 ); + return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); +} + +Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5, + ProdEl *prodEl6, ProdEl *prodEl7 ) +{ + ProdElList *prodElList = new ProdElList; + appendProdEl( prodElList, prodEl1 ); + appendProdEl( prodElList, prodEl2 ); + appendProdEl( prodElList, prodEl3 ); + appendProdEl( prodElList, prodEl4 ); + appendProdEl( prodElList, prodEl5 ); + appendProdEl( prodElList, prodEl6 ); + appendProdEl( prodElList, prodEl7 ); + return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); +} + +void ConsInit::definition( const String &name, Production *prod1, Production *prod2, + Production *prod3, Production *prod4 ) +{ + LelDefList *defList = new LelDefList; + prodAppend( defList, prod1 ); + prodAppend( defList, prod2 ); + prodAppend( defList, prod3 ); + prodAppend( defList, prod4 ); + + NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, + name, pd->nextObjectId++ ); + cflDef( ntDef, objectDef, defList ); +} + +void ConsInit::definition( const String &name, Production *prod1, + Production *prod2, Production *prod3 ) +{ + LelDefList *defList = new LelDefList; + prodAppend( defList, prod1 ); + prodAppend( defList, prod2 ); + prodAppend( defList, prod3 ); + + NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, + name, pd->nextObjectId++ ); + cflDef( ntDef, objectDef, defList ); +} + +void ConsInit::definition( const String &name, Production *prod1, Production *prod2 ) +{ + LelDefList *defList = new LelDefList; + prodAppend( defList, prod1 ); + prodAppend( defList, prod2 ); + + NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, + name, pd->nextObjectId++ ); + cflDef( ntDef, objectDef, defList ); +} + +void ConsInit::definition( const String &name, Production *prod ) +{ + LelDefList *defList = new LelDefList; + prodAppend( defList, prod ); + + NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, + name, pd->nextObjectId++ ); + cflDef( ntDef, objectDef, defList ); +} + +void ConsInit::lexFactor() +{ + ProdEl *prodEl1 = prodRefName( "Literal", "literal" ); + Production *prod1 = production( prodEl1 ); + + ProdEl *prodEl8 = prodRefName( "Id", "id" ); + Production *prod4 = production( prodEl8 ); + + ProdEl *prodEl2 = prodRefLit( "'('" ); + ProdEl *prodEl3 = prodRefName( "Expr", "lex_expr" ); + ProdEl *prodEl4 = prodRefLit( "')'" ); + Production *prod2 = production( prodEl2, prodEl3, prodEl4 ); + + ProdEl *prodEl5 = prodRefName( "Low", "literal" ); + ProdEl *prodEl6 = prodRefLit( "'..'" ); + ProdEl *prodEl7 = prodRefName( "High", "literal" ); + Production *prod3 = production( prodEl5, prodEl6, prodEl7 ); + + definition( "lex_factor", prod1, prod2, prod3, prod4 ); +} + +void ConsInit::lexFactorNeg() +{ + ProdEl *prodEl1 = prodRefLit( "'^'" ); + ProdEl *prodEl2 = prodRefName( "FactorNeg", "lex_factor_neg" ); + Production *prod1 = production( prodEl1, prodEl2 ); + + ProdEl *prodEl3 = prodRefName( "Factor", "lex_factor" ); + Production *prod2 = production( prodEl3 ); + + definition( "lex_factor_neg", prod1, prod2 ); +} + +void ConsInit::lexFactorRep() +{ + ProdEl *prodEl1 = prodRefName( "FactorRep", "lex_factor_rep" ); + ProdEl *prodEl2 = prodRefName( "Star", "STAR" ); + Production *prod1 = production( prodEl1, prodEl2 ); + + ProdEl *prodEl3 = prodRefName( "FactorRep", "lex_factor_rep" ); + ProdEl *prodEl4 = prodRefName( "Plus", "PLUS" ); + Production *prod2 = production( prodEl3, prodEl4 ); + + ProdEl *prodEl5 = prodRefName( "FactorNeg", "lex_factor_neg" ); + Production *prod3 = production( prodEl5 ); + + definition( "lex_factor_rep", prod1, prod2, prod3 ); +} + +void ConsInit::lexTerm() +{ + ProdEl *prodEl1 = prodRefName( "Term", "lex_term" ); + ProdEl *prodEl2 = prodRefName( "Dot", "DOT" ); + ProdEl *prodEl3 = prodRefName( "FactorRep", "lex_factor_rep" ); + Production *prod1 = production( prodEl1, prodEl2, prodEl3 ); + + ProdEl *prodEl4 = prodRefName( "Term", "lex_term" ); + ProdEl *prodEl5 = prodRefName( "ColonLt", "COLON_LT" ); + ProdEl *prodEl6 = prodRefName( "FactorRep", "lex_factor_rep" ); + Production *prod2 = production( prodEl4, prodEl5, prodEl6 ); + + ProdEl *prodEl7 = prodRefName( "FactorRep", "lex_factor_rep" ); + Production *prod3 = production( prodEl7 ); + + definition( "lex_term", prod1, prod2, prod3 ); +} + +void ConsInit::lexExpr() +{ + ProdEl *prodEl1 = prodRefName( "Expr", "lex_expr" ); + ProdEl *prodEl2 = prodRefLit( "'|'" ); + ProdEl *prodEl3 = prodRefName( "Term", "lex_term" ); + Production *prod1 = production( prodEl1, prodEl2, prodEl3 ); + + ProdEl *prodEl4 = prodRefName( "Term", "lex_term" ); + Production *prod2 = production( prodEl4 ); + + definition( "lex_expr", prod1, prod2 ); +} + +void ConsInit::token() +{ + ProdEl *prodEl1 = prodRefLit( "'token'" ); + ProdEl *prodEl2 = prodRefName( "Id", "id" ); + ProdEl *prodEl3 = prodRefName( "LeftNi", "opt_ni" ); + ProdEl *prodEl4 = prodRefLit( "'/'" ); + ProdEl *prodEl5 = prodRefName( "Expr", "lex_expr" ); + ProdEl *prodEl6 = prodRefLit( "'/'" ); + ProdEl *prodEl7 = prodRefName( "RightNi", "opt_ni" ); + Production *prod1 = production( prodEl1, prodEl2, prodEl3, + prodEl4, prodEl5, prodEl6, prodEl7 ); + definition( "token_def", prod1 ); +} + +void ConsInit::ignore() +{ + ProdEl *prodEl1 = prodRefLit( "'ignore'" ); + ProdEl *prodEl2 = prodRefLit( "'/'" ); + ProdEl *prodEl3 = prodRefName( "Expr", "lex_expr" ); + ProdEl *prodEl4 = prodRefLit( "'/'" ); + Production *prod1 = production( prodEl1, prodEl2, prodEl3, prodEl4 ); + definition( "ignore_def", prod1 ); +} + +void ConsInit::tokenList() +{ + ProdEl *prodEl1 = prodRefName( "TokenList", "token_list" ); + ProdEl *prodEl2 = prodRefName( "TokenDef", "token_def" ); + Production *prod1 = production( prodEl1, prodEl2 ); + + ProdEl *prodEl3 = prodRefName( "TokenList", "token_list" ); + ProdEl *prodEl4 = prodRefName( "IgnoreDef", "ignore_def" ); + Production *prod2 = production( prodEl3, prodEl4 ); + + Production *prod3 = production(); + + definition( "token_list", prod1, prod2, prod3 ); +} + +Production *ConsInit::prodLex() +{ + ProdEl *prodEl1 = prodRefLit( "'lex'" ); + ProdEl *prodEl2 = prodRefName( "TokenList", "token_list" ); + ProdEl *prodEl3 = prodRefLit( "'end'" ); + + return production( prodEl1, prodEl2, prodEl3 ); +} + +void ConsInit::optProdElName() +{ + ProdEl *prodEl1 = prodRefName( "Name", "id" ); + ProdEl *prodEl2 = prodRefLit( "':'" ); + Production *prod1 = production( prodEl1, prodEl2 ); + + Production *prod2 = production(); + + definition( "opt_prod_el_name", prod1, prod2 ); +} + +void ConsInit::optNi() +{ + ProdEl *prodEl1 = prodRefLit( "'-'" ); + ProdEl *prodEl2 = prodRefName( "Ni", "NI" ); + Production *prod1 = production( prodEl1, prodEl2 ); + + Production *prod2 = production(); + + definition( "opt_ni", prod1, prod2 ); +} + +void ConsInit::optRepeat() +{ + ProdEl *prodEl1 = prodRefName( "Star", "STAR" ); + Production *prod1 = production( prodEl1 ); + + ProdEl *prodEl2 = prodRefName( "LeftStar", "LEFT_STAR" ); + Production *prod2 = production( prodEl2 ); + + Production *prod3 = production(); + + definition( "opt_prod_repeat", prod1, prod2, prod3 ); +} + +void ConsInit::prodEl() +{ + ProdEl *prodEl1 = prodRefName( "OptName", "opt_prod_el_name" ); + ProdEl *prodEl2 = prodRefName( "Id", "id" ); + ProdEl *prodEl3 = prodRefName( "OptRepeat", "opt_prod_repeat" ); + Production *prod1 = production( prodEl1, prodEl2, prodEl3 ); + + definition( "prod_el", prod1 ); +} + +void ConsInit::prodElList() +{ + ProdEl *prodEl1 = prodRefName( "ProdElList", "prod_el_list" ); + ProdEl *prodEl2 = prodRefName( "ProdEl", "prod_el" ); + Production *prod1 = production( prodEl1, prodEl2 ); + + Production *prod2 = production(); + + definition( "prod_el_list", prod1, prod2 ); +} + +void ConsInit::optCommit() +{ + ProdEl *prodEl1 = prodRefName( "Commit", "COMMIT" ); + Production *prod1 = production( prodEl1 ); + + Production *prod2 = production(); + + definition( "opt_commit", prod1, prod2 ); +} + +void ConsInit::optProdName() +{ + ProdEl *prodEl1 = prodRefLit( "':'" ); + ProdEl *prodEl2 = prodRefName( "Name", "id" ); + Production *prod1 = production( prodEl1, prodEl2 ); + + Production *prod2 = production(); + + definition( "opt_prod_name", prod1, prod2 ); +} + +void ConsInit::prodVarDef() +{ + ProdEl *prodEl1 = prodRefName( "Name", "id" ); + ProdEl *prodEl2 = prodRefLit( "':'" ); + ProdEl *prodEl3 = prodRefName( "Type", "id" ); + Production *prod1 = production( prodEl1, prodEl2, prodEl3 ); + + definition( "prod_var_def", prod1 ); +} + +/* The prod var list we provide in a basic form, just "id: id." We won't make + * use of them in bootstrap0 or bootstrap1, They are ignored in the loader for + * bootstrap1. We want to use them in bootstrap2 during the rewrite stage. */ +void ConsInit::prodVarList() +{ + ProdEl *prodEl1 = prodRefName( "VarDefList", "prod_var_list" ); + ProdEl *prodEl2 = prodRefName( "VarDef", "prod_var_def" ); + Production *prod1 = production( prodEl1, prodEl2 ); + + Production *prod2 = production(); + + definition( "prod_var_list", prod1, prod2 ); +} + +void ConsInit::prod() +{ + ProdEl *prodEl1 = prodRefLit( "'['" ); + ProdEl *prodEl2 = prodRefName( "ProdElList", "prod_el_list" ); + ProdEl *prodEl3 = prodRefLit( "']'" ); + ProdEl *prodEl4 = prodRefName( "OptName", "opt_prod_name" ); + ProdEl *prodEl5 = prodRefName( "OptCommit", "opt_commit" ); + Production *prod1 = production( prodEl1, prodEl2, prodEl3, prodEl4, prodEl5 ); + + definition( "prod", prod1 ); +} + +void ConsInit::prodList() +{ + ProdEl *prodEl1 = prodRefName( "ProdList", "prod_list" ); + ProdEl *prodEl2 = prodRefLit( "'|'" ); + ProdEl *prodEl3 = prodRefName( "Prod", "prod" ); + Production *prod1 = production( prodEl1, prodEl2, prodEl3 ); + + ProdEl *prodEl4 = prodRefName( "Prod", "prod" ); + Production *prod2 = production( prodEl4 ); + + definition( "prod_list", prod1, prod2 ); +} + +Production *ConsInit::prodProd() +{ + ProdEl *prodEl1 = prodRefLit( "'def'" ); + ProdEl *prodEl2 = prodRefName( "DefId", "id" ); + ProdEl *prodEl3 = prodRefName( "ProdVarList", "prod_var_list" ); + ProdEl *prodEl4 = prodRefName( "ProdList", "prod_list" ); + + return production( prodEl1, prodEl2, prodEl3, prodEl4 ); +} + +void ConsInit::item() +{ + Production *prod1 = prodLex(); + Production *prod2 = prodProd(); + definition( "item", prod1, prod2 ); +} + +void ConsInit::startProd() +{ + ProdEl *prodEl1 = prodRefNameLeftRepeat( "ItemList", "item" ); + Production *prod1 = production( prodEl1 ); + + definition( "start", prod1 ); +} + +void ConsInit::parseInput( StmtList *stmtList ) +{ + /* Pop argv, this yields the file name . */ + CallArgVect *popArgs = new CallArgVect; + QualItemVect *popQual = new QualItemVect; + popQual->append( QualItem( QualItem::Arrow, internal, String( "argv" ) ) ); + + LangVarRef *popRef = LangVarRef::cons( internal, + curNspace(), 0, curLocalFrame()->rootScope, + NamespaceQual::cons( curNspace() ), popQual, String("pop") ); + LangExpr *pop = LangExpr::cons( LangTerm::cons( InputLoc(), popRef, popArgs ) ); + + TypeRef *typeRef = TypeRef::cons( internal, pd->uniqueTypeStr ); + ObjectField *objField = ObjectField::cons( internal, + ObjectField::UserLocalType, typeRef, "A" ); + + LangStmt *stmt = varDef( objField, pop, LangStmt::AssignType ); + stmtList->append( stmt ); + + /* Construct a literal string 'r', for second arg to open. */ + ConsItem *modeConsItem = ConsItem::cons( internal, + ConsItem::InputText, String("r") ); + ConsItemList *modeCons = new ConsItemList; + modeCons->append( modeConsItem ); + LangExpr *modeExpr = LangExpr::cons( LangTerm::cons( internal, modeCons ) ); + + /* Reference A->value */ + QualItemVect *qual = new QualItemVect; + LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, + curLocalFrame()->rootScope, NamespaceQual::cons( curNspace() ), + qual, String("A") ); + LangExpr *Avalue = LangExpr::cons( LangTerm::cons( internal, + LangTerm::VarRefType, varRef ) ); + + /* Call open. */ + QualItemVect *openQual = new QualItemVect; + LangVarRef *openRef = LangVarRef::cons( internal, + 0, 0, curLocalFrame()->rootScope, + NamespaceQual::cons( curNspace() ), openQual, String("open") ); + CallArgVect *openArgs = new CallArgVect; + openArgs->append( new CallArg(Avalue) ); + openArgs->append( new CallArg(modeExpr) ); + LangExpr *open = LangExpr::cons( LangTerm::cons( InputLoc(), openRef, openArgs ) ); + + /* Construct a list containing the open stream. */ + ConsItem *consItem = ConsItem::cons( internal, ConsItem::ExprType, open, ConsItem::TrimDefault ); + ConsItemList *list = ConsItemList::cons( consItem ); + + /* Will capture the parser to "P" */ + objField = ObjectField::cons( internal, + ObjectField::UserLocalType, 0, String("P") ); + + /* Parse the "start" def. */ + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + typeRef = TypeRef::cons( internal, nspaceQual, + String("start"), RepeatNone ); + + /* Parse the above list. */ + LangExpr *parseExpr = parseCmd( internal, false, false, objField, + typeRef, 0, list, true, false, false, "" ); + LangStmt *parseStmt = LangStmt::cons( internal, LangStmt::ExprType, parseExpr ); + stmtList->append( parseStmt ); +} + +void ConsInit::exportTree( StmtList *stmtList ) +{ + /* reference P */ + QualItemVect *qual = new QualItemVect; + LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, + curLocalFrame()->rootScope, NamespaceQual::cons( curNspace() ), qual, String("P") ); + LangExpr *expr = LangExpr::cons( LangTerm::cons( internal, + LangTerm::VarRefType, varRef ) ); + + /* Assign P to ColmTree */ + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, String("start"), RepeatNone ); + ObjectField *program = ObjectField::cons( internal, + ObjectField::StructFieldType, typeRef, String("ColmTree") ); + LangStmt *programExport = exportStmt( program, LangStmt::AssignType, expr ); + stmtList->append( programExport ); +} + +void ConsInit::go( long activeRealm ) +{ + ConsInit::init(); + + StmtList *stmtList = new StmtList; + + /* The token region */ + pushRegionSet( internal ); + + wsIgnore(); + commentIgnore(); + + keyword( "'def'" ); + keyword( "'lex'" ); + keyword( "'end'" ); + keyword( "'token'" ); + keyword( "'ignore'" ); + keyword( "NI", "'ni'" ); + keyword( "COMMIT", "'commit'" ); + + idToken(); + literalToken(); + + keyword( "STAR", "'*'"); + keyword( "PLUS", "'+'"); + keyword( "LEFT_STAR", "'<*'"); + keyword( "'['" ); + keyword( "']'" ); + keyword( "'|'" ); + keyword( "'/'" ); + keyword( "':'" ); + keyword( "DOT", "'.'" ); + keyword( "COLON_LT", "':>'" ); + keyword( "'('" ); + keyword( "')'" ); + keyword( "'..'" ); + keyword( "'^'" ); + keyword( "'-'" ); + + popRegionSet(); + + lexFactor(); + lexFactorNeg(); + lexFactorRep(); + lexTerm(); + lexExpr(); + + optNi(); + optRepeat(); + optProdElName(); + prodEl(); + prodElList(); + optCommit(); + optProdName(); + prodVarDef(); + prodVarList(); + prod(); + prodList(); + ignore(); + token(); + tokenList(); + item(); + startProd(); + + parseInput( stmtList ); + exportTree( stmtList ); + + pd->rootCodeBlock = CodeBlock::cons( stmtList, 0 ); +} diff --git a/src/consinit.h b/src/consinit.h new file mode 100644 index 00000000..614f19d4 --- /dev/null +++ b/src/consinit.h @@ -0,0 +1,113 @@ +/* + * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> + +#include <avltree.h> + +#include "compiler.h" +#include "parser.h" + +#ifndef _COLM_CONSINIT_H +#define _COLM_CONSINIT_H + +struct ConsInit +: + public BaseParser +{ + ConsInit( Compiler *pd ) + : + BaseParser(pd) + {} + + ProdEl *prodRefName( const String &name ); + ProdEl *prodRefName( const String &capture, const String &name ); + ProdEl *prodRefNameLeftRepeat( const String &name ); + ProdEl *prodRefNameLeftRepeat( const String &capture, const String &name ); + ProdEl *prodRefLit( const String &lit ); + + Production *production(); + Production *production( ProdEl *prodEl1 ); + Production *production( ProdEl *prodEl1, ProdEl *prodEl2 ); + Production *production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3 ); + Production *production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3, ProdEl *prodEl4 ); + Production *production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5 ); + Production *production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5, + ProdEl *prodEl6, ProdEl *prodEl7 ); + + void definition( const String &name, Production *prod ); + void definition( const String &name, Production *prod1, Production *prod2 ); + void definition( const String &name, Production *prod1, Production *prod2, Production *prod3 ); + void definition( const String &name, Production *prod1, Production *prod2, + Production *prod3, Production *prod4 ); + + void keyword( const String &name, const String &lit ); + void keyword( const String &kw ); + + void printParseTree( StmtList *stmtList ); + void printParseTree(); + + void literalToken(); + void commentIgnore(); + void wsIgnore(); + void idToken(); + + void token(); + void ignore(); + void tokenList(); + + void lexFactor(); + void lexFactorNeg(); + void lexFactorRep(); + void lexExpr(); + void lexTerm(); + + Production *prodProd(); + Production *prodLex(); + + void optNi(); + void optRepeat(); + void optProdElName(); + void prodEl(); + void prodElList(); + void varDefList(); + void item(); + void prodVarDef(); + void prodVarList(); + void prodList(); + void optProdName(); + void prod(); + void startProd(); + void optCommit(); + + void parseInput( StmtList *stmtList ); + void exportTree( StmtList *stmtList ); + + virtual void go( long activeRealm ); +}; + +#endif /* _COLM_CONSINIT_H */ + diff --git a/src/cstring.h b/src/cstring.h new file mode 100644 index 00000000..3c285153 --- /dev/null +++ b/src/cstring.h @@ -0,0 +1,862 @@ +/* + * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _AAPL_ASTRING_H +#define _AAPL_ASTRING_H + +#include <stdlib.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include <new> +#include <iostream> + +#include "tree.h" + +struct colm_data; + +#ifdef AAPL_NAMESPACE +namespace Aapl { +#endif + +#ifdef AAPL_DOCUMENTATION + +/** + * \defgroup astring String + * \brief Implicitly shared copy-on-write string. + * + * @{ + */ + +/** + * \class String + * \brief Implicitly shared copy-on-write string. + */ + +/*@}*/ + +class String +{ +public: + /** + * \brief Create a null string. Data points to NULL. + */ + String(); + + /** + * \brief Construct a string from a c-style string. + * + * A new buffer is allocated for the c string. Initially, this string will + * be the only String class referencing the data. + */ + String( const char *s ); + + /** + * \brief Construct a string from a c-style string of specific length. + * + * A new buffer is allocated for the c string. Initially, this string will + * be the only String class referencing the data. + */ + String( const char *s, long len ); + + /** + * \brief Construct a string from another String. + * + * A refernce to the buffer allocated for s is taken. A new buffer is + * not allocated. + */ + String( const String &s ); + + /** + * \brief Construct a string using snprintf. + * + * Requires a maximum length for the resulting string. If the formatting + * (not including trailing null) requires more space than maxLen, the + * result will be truncated to maxLen long. Only the length actually + * written will be used by the new string. This string will be the only + * String class referencing the data. + */ + String( long maxLen, const char *format, ... ) + + /** + * \brief Clean up the string. + * + * If the string is not null, the referenced data is detached. If no other + * string refernces the detached data, it is deleted. + */ + ~String(); + + /** + * \brief Set the string from a c-style string. + * + * If this string is not null, the current buffer is dereferenced and + * possibly deleted. A new buffer is allocated (or possibly the old buffer + * reused) for the string. Initially, this string will be the only String + * class referencing the data. + * + * If s is null, then this string becomes a null ptr. + * + * \returns A reference to this. + */ + String &operator=( const char *s ); + + /** + * \brief Set the string from a c-style of specific length. + * + * If this string is not null, the current buffer is dereferenced and + * possibly deleted. A new buffer is allocated (or possibly the old buffer + * reused) for the string. Initially, this string will be the only String + * class referencing the data. + * + * If s is null, then this string becomes a null ptr. + * + * \returns A reference to this. + */ + void setAs( const char *s, long len ); + + /** + * \brief Set the string from a single char. + * + * The current buffer is dereferenced and possibly deleted. A new buffer + * is allocated (or possibly the old buffer reused) for the string. + * Initially, this string will be the only String class referencing the + * data. + * + * If s is null, then this string becomes a null ptr. + * + * \returns A reference to this. + */ + String &operator=( const char c ); + + + /** + * \brief Set the string from another String. + * + * If this string is not null, the current buffer is dereferenced and + * possibly deleted. A reference to the buffer allocated for s is taken. + * A new buffer is not allocated. + * + * If s is null, then this string becomes a null ptr. + * + * \returns a reference to this. + */ + String &operator=( const String &s ); + + /** + * \brief Append a c string to the end of this string. + * + * If this string shares its allocation with another, a copy is first + * taken. The buffer for this string is grown and s is appended to the + * end. + * + * If s is null nothing happens. + * + * \returns a reference to this. + */ + String &operator+=( const char *s ); + + /** + * \brief Append a c string of specific length to the end of this string. + * + * If this string shares its allocation with another, a copy is first + * taken. The buffer for this string is grown and s is appended to the + * end. + * + * If s is null nothing happens. + * + * \returns a reference to this. + */ + void append( const char *s, long len ); + + /** + * \brief Append a single char to the end of this string. + * + * If this string shares its allocation with another, a copy is first + * taken. The buffer for this string is grown and s is appended to the + * end. + * + * \returns a reference to this. + */ + String &operator+=( const char c ); + + /** + * \brief Append a String to the end of this string. + * + * If this string shares its allocation with another, a copy is first + * taken. The buffer for this string is grown and the data of s is + * appeneded to the end. + * + * If s is null nothing happens. + * + * returns a reference to this. + */ + String &operator+=( const String &s ); + + /** + * \brief Cast to a char star. + * + * \returns the string data. A null string returns 0. + */ + operator char*() const; + + /** + * \brief Get a pointer to the data. + * + * \returns the string Data + */ + char *get() const; + + /** + * \brief Get the length of the string + * + * If the string is null, then undefined behaviour results. + * + * \returns the length of the string. + */ + long length() const; + + /** + * \brief Pointer to the data. + * + * Publically accessible pointer to the data. Immediately in front of the + * string data block is the string header which stores the refcount and + * length. Consequently, care should be taken if modifying this pointer. + */ + char *data; +}; + +/** + * \relates String + * \brief Concatenate a c-style string and a String. + * + * \returns The concatenation of the two strings in a String. + */ +String operator+( const String &s1, const char *s2 ); + +/** + * \relates String + * \brief Concatenate a String and a c-style string. + * + * \returns The concatenation of the two strings in a String. + */ +String operator+( const char *s1, const String &s2 ); + +/** + * \relates String + * \brief Concatenate two String classes. + * + * \returns The concatenation of the two strings in a String. + */ +String operator+( const String &s1, const String &s2 ); + +#endif + +template<class T> class StrTmpl +{ +public: + class Fresh {}; + + /* Header located just before string data. Keeps the length and a refcount on + * the data. */ + struct Head + { + long refCount; + long length; + }; + + /** + * \brief Create a null string. + */ + StrTmpl() : data(0) { } + + /* Clean up the string. */ + ~StrTmpl(); + + /* Construct a string from a c-style string. */ + StrTmpl( const char *s ); + + /* Construct a string from a c-style string of specific len. */ + StrTmpl( const char *s, long len ); + + /* Allocate len spaces. */ + StrTmpl( const Fresh &, long len ); + + /* Construct a string from another StrTmpl. */ + StrTmpl( const StrTmpl &s ); + + /* Construct a string from with, sprintf. */ + StrTmpl( long lenGuess, const char *format, ... ); + + /* Construct a string from with, sprintf. */ + StrTmpl( const colm_data *cd ); + + /* Set the string from a c-style string. */ + StrTmpl &operator=( const char *s ); + + /* Set the string from a c-style string of specific len. */ + void setAs( const char *s, long len ); + + /* Allocate len spaces. */ + void setAs( const Fresh &, long len ); + + void chop( long len ); + + /* Construct a string from with, sprintf. */ + void setAs( long lenGuess, const char *format, ... ); + + /* Set the string from a single char. */ + StrTmpl &operator=( const char c ); + + /* Set the string from another StrTmpl. */ + StrTmpl &operator=( const StrTmpl &s ); + + /* Append a c string to the end of this string. */ + StrTmpl &operator+=( const char *s ); + + /* Append a c string to the end of this string of specifi len. */ + void append( const char *s, long len ); + + /* Append a single char to the end of this string. */ + StrTmpl &operator+=( const char c ); + + /* Append an StrTmpl to the end of this string. */ + StrTmpl &operator+=( const StrTmpl &s ); + + /* Cast to a char star. */ + operator char*() const { return data; } + + /* Get a pointer to the data. */ + char *get() const { return data; } + + /* Return the length of the string. Must check for null data pointer. */ + long length() const { return data ? (((Head*)data)-1)->length : 0; } + + /** + * \brief Pointer to the data. + */ + char *data; + +protected: + /* Make space for a string of length len to be appended. */ + char *appendSpace( long len ); + void initSpace( long length ); + void setSpace( long length ); + + template <class FT> friend StrTmpl<FT> operator+( + const StrTmpl<FT> &s1, const char *s2 ); + template <class FT> friend StrTmpl<FT> operator+( + const char *s1, const StrTmpl<FT> &s2 ); + template <class FT> friend StrTmpl<FT> operator+( + const StrTmpl<FT> &s1, const StrTmpl<FT> &s2 ); + +private: + /* A dummy struct solely to make a constructor that will never be + * ambiguous with the public constructors. */ + struct DisAmbig { }; + StrTmpl( char *data, const DisAmbig & ) : data(data) { } +}; + +/* Free all mem used by the string. */ +template<class T> StrTmpl<T>::~StrTmpl() +{ + if ( data != 0 ) { + /* If we are the only ones referencing the string, then delete it. */ + Head *head = ((Head*) data) - 1; + head->refCount -= 1; + if ( head->refCount == 0 ) + free( head ); + } +} + +/* Create from a c-style string. */ +template<class T> StrTmpl<T>::StrTmpl( const char *s ) +{ + if ( s == 0 ) + data = 0; + else { + /* Find the length and allocate the space for the shared string. */ + long length = strlen( s ); + + /* Init space for the data. */ + initSpace( length ); + + /* Copy in the data. */ + memcpy( data, s, length+1 ); + } +} + +/* Create from a c-style string. */ +template<class T> StrTmpl<T>::StrTmpl( const char *s, long length ) +{ + if ( s == 0 ) + data = 0; + else { + /* Init space for the data. */ + initSpace( length ); + + /* Copy in the data. */ + memcpy( data, s, length ); + data[length] = 0; + } +} + +/* Create from a c-style string. */ +template<class T> StrTmpl<T>::StrTmpl( const Fresh &, long length ) +{ + /* Init space for the data. */ + initSpace( length ); + data[length] = 0; +} + +/* Create from another string class. */ +template<class T> StrTmpl<T>::StrTmpl( const StrTmpl &s ) +{ + if ( s.data == 0 ) + data = 0; + else { + /* Take a reference to the string. */ + Head *strHead = ((Head*)s.data) - 1; + strHead->refCount += 1; + data = (char*) (strHead+1); + } +} + +/* Construct a string from with, sprintf. */ +template<class T> StrTmpl<T>::StrTmpl( long lenGuess, const char *format, ... ) +{ + /* Set the string for len. */ + initSpace( lenGuess ); + + va_list args; + + va_start( args, format ); + long written = vsnprintf( data, lenGuess+1, format, args ); + va_end( args ); + + if ( written > lenGuess ) { + setSpace( written ); + va_start( args, format ); + written = vsnprintf( data, written+1, format, args ); + va_end( args ); + } + chop( written ); + + va_end( args ); +} + +/* Create from another string class. */ +template<class T> StrTmpl<T>::StrTmpl( const colm_data *cd ) +{ + if ( cd->data == 0 ) + data = 0; + else { + /* Init space for the data. */ + initSpace( cd->length ); + + /* Copy in the data. */ + memcpy( data, cd->data, cd->length ); + data[cd->length] = 0; + } +} + + + +/* Construct a string from with, sprintf. */ +template<class T> void StrTmpl<T>::setAs( long lenGuess, const char *format, ... ) +{ + /* Set the string for len. */ + setSpace( lenGuess ); + + va_list args; + + /* Write to the temporary buffer. */ + va_start( args, format ); + + long written = vsnprintf( data, lenGuess+1, format, args ); + if ( written > lenGuess ) { + setSpace( written ); + written = vsnprintf( data, written+1, format, args ); + } + chop( written ); + + va_end( args ); +} + +template<class T> void StrTmpl<T>::initSpace( long length ) +{ + /* Find the length and allocate the space for the shared string. */ + Head *head = (Head*) malloc( sizeof(Head) + length+1 ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Init the header. */ + head->refCount = 1; + head->length = length; + + /* Save the pointer to the data. */ + data = (char*) (head+1); +} + + +/* Set this string to be the c string exactly. The old string is discarded. + * Returns a reference to this. */ +template<class T> StrTmpl<T> &StrTmpl<T>::operator=( const char *s ) +{ + if ( s == 0 ) { + /* Just free the data, we are being set to null. */ + if ( data != 0 ) { + Head *head = ((Head*)data) - 1; + head->refCount -= 1; + if ( head->refCount == 0 ) + free(head); + data = 0; + } + } + else { + /* Find the length of the string we are setting. */ + long length = strlen( s ); + + /* Set the string for len. */ + setSpace( length ); + + /* Copy in the data. */ + memcpy( data, s, length+1 ); + } + return *this; +} + +/* Set this string to be the c string exactly. The old string is discarded. + * Returns a reference to this. */ +template<class T> void StrTmpl<T>::setAs( const char *s, long length ) +{ + if ( s == 0 ) { + /* Just free the data, we are being set to null. */ + if ( data != 0 ) { + Head *head = ((Head*)data) - 1; + head->refCount -= 1; + if ( head->refCount == 0 ) + free(head); + data = 0; + } + } + else { + /* Set the string for len. */ + setSpace( length ); + + /* Copy in the data. */ + memcpy( data, s, length ); + data[length] = 0; + } +} + +template<class T> void StrTmpl<T>::chop( long length ) +{ + Head *head = ((Head*)data) - 1; + assert( head->refCount == 1 ); + assert( length <= head->length ); + head->length = length; + data[length] = 0; +} + +/* Set this string to be the c string exactly. The old string is discarded. + * Returns a reference to this. */ +template<class T> void StrTmpl<T>::setAs( const Fresh &, long length ) +{ + setSpace( length ); + data[length] = 0; +} + +/* Set this string to be the single char exactly. The old string is discarded. + * Returns a reference to this. */ +template<class T> StrTmpl<T> &StrTmpl<T>::operator=( const char c ) +{ + /* Set to length 1. */ + setSpace( 1 ); + + /* Copy in the data. */ + data[0] = c; + data[1] = 0; + + /* Return ourselves. */ + return *this; +} + +/* Set this string to be the StrTmpl s exactly. The old string is + * discarded. */ +template<class T> StrTmpl<T> &StrTmpl<T>::operator=( const StrTmpl &s ) +{ + /* Detach from the existing string. */ + if ( data != 0 ) { + Head *head = ((Head*)data) - 1; + head->refCount -= 1; + if ( head->refCount == 0 ) + free( head ); + } + + if ( s.data != 0 ) { + /* Take a reference to the string. */ + Head *strHead = ((Head*)s.data) - 1; + strHead->refCount += 1; + data = (char*)(strHead+1); + } + else { + /* Setting from a null string, just null our pointer. */ + data = 0; + } + return *this; +} + +/* Prepare the string to be set to something else of the given length. */ +template<class T> void StrTmpl<T>::setSpace( long length ) +{ + /* Detach from the existing string. */ + Head *head = ((Head*)data) - 1; + if ( data != 0 && --head->refCount == 0 ) { + /* Resuse the space. */ + head = (Head*) realloc( head, sizeof(Head) + length+1 ); + } + else { + /* Need to make new space, there is no usable old space. */ + head = (Head*) malloc( sizeof(Head) + length+1 ); + } + if ( head == 0 ) + throw std::bad_alloc(); + + /* Init the header. */ + head->refCount = 1; + head->length = length; + + /* Copy in the data and save the pointer to it. */ + data = (char*) (head+1); +} + + +/* Append a c-style string to the end of this string. Returns a reference to + * this */ +template<class T> StrTmpl<T> &StrTmpl<T>::operator+=( const char *s ) +{ + /* Find the length of the string appended. */ + if ( s != 0 ) { + /* Get the string length and make space on the end. */ + long addedLen = strlen( s ); + char *dest = appendSpace( addedLen ); + + /* Copy the data in. Plus one for the null. */ + memcpy( dest, s, addedLen+1 ); + } + return *this; +} + +/* Append a c-style string of specific length to the end of this string. + * Returns a reference to this */ +template<class T> void StrTmpl<T>::append( const char *s, long length ) +{ + /* Find the length of the string appended. */ + if ( s != 0 ) { + /* Make space on the end. */ + char *dest = appendSpace( length ); + + /* Copy the data in. Plus one for the null. */ + memcpy( dest, s, length ); + dest[length] = 0; + } +} + +/* Append a single char to the end of this string. Returns a reference to + * this */ +template<class T> StrTmpl<T> &StrTmpl<T>::operator+=( const char c ) +{ + /* Grow on the end. */ + char *dst = appendSpace( 1 ); + + /* Append a single charachter. */ + dst[0] = c; + dst[1] = 0; + return *this; +} + + +/* Append an StrTmpl string to the end of this string. Returns a reference + * to this */ +template<class T> StrTmpl<T> &StrTmpl<T>::operator+=( const StrTmpl &s ) +{ + /* Find the length of the string appended. */ + if ( s.data != 0 ) { + /* Find the length to append. */ + long addedLen = (((Head*)s.data) - 1)->length; + + /* Make space on the end to put the string. */ + char *dest = appendSpace( addedLen ); + + /* Append the data, add one for the null. */ + memcpy( dest, s.data, addedLen+1 ); + } + return *this; +} + +/* Make space for a string of length len to be appended. */ +template<class T> char *StrTmpl<T>::appendSpace( long len ) +{ + /* Find the length of this and the string appended. */ + Head *head = (((Head*)data) - 1); + long thisLen = head->length; + + if ( head->refCount == 1 ) { + /* No other string is using the space, grow this space. */ + head = (Head*) realloc( head, + sizeof(Head) + thisLen + len + 1 ); + if ( head == 0 ) + throw std::bad_alloc(); + data = (char*) (head+1); + + /* Adjust the length. */ + head->length += len; + } + else { + /* Another string is using this space, make new space. */ + head->refCount -= 1; + Head *newHead = (Head*) malloc( + sizeof(Head) + thisLen + len + 1 ); + if ( newHead == 0 ) + throw std::bad_alloc(); + data = (char*) (newHead+1); + + /* Set the new header and data from this. */ + newHead->refCount = 1; + newHead->length = thisLen + len; + memcpy( data, head+1, thisLen ); + } + + /* Return writing position. */ + return data + thisLen; +} + +/* Concatenate a String and a c-style string. */ +template<class T> StrTmpl<T> operator+( const StrTmpl<T> &s1, const char *s2 ) +{ + /* Find s2 length and alloc the space for the result. */ + long str1Len = (((typename StrTmpl<T>::Head*)(s1.data)) - 1)->length; + long str2Len = strlen( s2 ); + + typename StrTmpl<T>::Head *head = (typename StrTmpl<T>::Head*) + malloc( sizeof(typename StrTmpl<T>::Head) + str1Len + str2Len + 1 ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header. */ + head->refCount = 1; + head->length = str1Len + str2Len; + + /* Save the pointer to data and copy the data in. */ + char *data = (char*) (head+1); + memcpy( data, s1.data, str1Len ); + memcpy( data + str1Len, s2, str2Len + 1 ); + return StrTmpl<T>( data, typename StrTmpl<T>::DisAmbig() ); +} + +/* Concatenate a c-style string and a String. */ +template<class T> StrTmpl<T> operator+( const char *s1, const StrTmpl<T> &s2 ) +{ + /* Find s2 length and alloc the space for the result. */ + long str1Len = strlen( s1 ); + long str2Len = (((typename StrTmpl<T>::Head*)(s2.data)) - 1)->length; + + typename StrTmpl<T>::Head *head = (typename StrTmpl<T>::Head*) + malloc( sizeof(typename StrTmpl<T>::Head) + str1Len + str2Len + 1 ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header. */ + head->refCount = 1; + head->length = str1Len + str2Len; + + /* Save the pointer to data and copy the data in. */ + char *data = (char*) (head+1); + memcpy( data, s1, str1Len ); + memcpy( data + str1Len, s2.data, str2Len + 1 ); + return StrTmpl<T>( data, typename StrTmpl<T>::DisAmbig() ); +} + +/* Add two StrTmpl strings. */ +template<class T> StrTmpl<T> operator+( const StrTmpl<T> &s1, const StrTmpl<T> &s2 ) +{ + /* Find s2 length and alloc the space for the result. */ + long str1Len = (((typename StrTmpl<T>::Head*)(s1.data)) - 1)->length; + long str2Len = (((typename StrTmpl<T>::Head*)(s2.data)) - 1)->length; + typename StrTmpl<T>::Head *head = (typename StrTmpl<T>::Head*) + malloc( sizeof(typename StrTmpl<T>::Head) + str1Len + str2Len + 1 ); + if ( head == 0 ) + throw std::bad_alloc(); + + /* Set up the header. */ + head->refCount = 1; + head->length = str1Len + str2Len; + + /* Save the pointer to data and copy the data in. */ + char *data = (char*) (head+1); + memcpy( data, s1.data, str1Len ); + memcpy( data + str1Len, s2.data, str2Len + 1 ); + return StrTmpl<T>( data, typename StrTmpl<T>::DisAmbig() ); +} + +/* Operator used in case the compiler does not support the conversion. */ +template <class T> inline std::ostream &operator<<( std::ostream &o, const StrTmpl<T> &s ) +{ + return o.write( s.data, s.length() ); +} + +typedef StrTmpl<char> String; + +/** + * \brief Compare two null terminated character sequences. + * + * This comparision class is a wrapper for strcmp. + */ +template<class T> struct CmpStrTmpl +{ + /** + * \brief Compare two null terminated string types. + */ + static inline long compare( const char *k1, const char *k2 ) + { return strcmp(k1, k2); } + + static int compare( const StrTmpl<T> &s1, const StrTmpl<T> &s2 ) + { + if ( s1.length() < s2.length() ) + return -1; + else if ( s1.length() > s2.length() ) + return 1; + else + return memcmp( s1.data, s2.data, s1.length() ); + } +}; + +typedef CmpStrTmpl<char> ColmCmpStr; + +#ifdef AAPL_NAMESPACE +} +#endif + +#endif /* _AAPL_ASTRING_H */ + diff --git a/src/ctinput.cc b/src/ctinput.cc new file mode 100644 index 00000000..f8267487 --- /dev/null +++ b/src/ctinput.cc @@ -0,0 +1,570 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> + +#include <iostream> + +#include "compiler.h" +#include "pool.h" +//#include "debug.h" + +using std::cerr; +using std::endl; + +DEF_INPUT_FUNCS( input_funcs_ct, input_impl_ct ); + +extern input_funcs_ct pat_funcs; +extern input_funcs_ct repl_funcs; + +struct input_impl_ct +{ + struct input_funcs *funcs; + + char *name; + long line; + long column; + long byte; + + struct Pattern *pattern; + struct PatternItem *pat_item; + struct Constructor *constructor; + struct ConsItem *cons_item; + + char eof_mark; + char eof_sent; + + int offset; +}; + +void ct_destructor( program_t *prg, tree_t **sp, struct input_impl_ct *ss ) +{ +} + +char ct_get_eof_sent( struct colm_program *prg, struct input_impl_ct *si ) +{ + return si->eof_sent; +} + +void ct_set_eof_sent( struct colm_program *prg, struct input_impl_ct *si, char eof_sent ) +{ + si->eof_sent = eof_sent; +} + +/* + * Pattern + */ + +struct input_impl *colm_impl_new_pat( char *name, Pattern *pattern ) +{ + struct input_impl_ct *ss = (struct input_impl_ct*)malloc(sizeof(struct input_impl_ct)); + memset( ss, 0, sizeof(struct input_impl_ct) ); + ss->pattern = pattern; + ss->pat_item = pattern->list->head; + ss->funcs = (struct input_funcs*)&pat_funcs; + return (struct input_impl*) ss; +} + +int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int *pskip, + alph_t **pdp, int *copied ) +{ + *copied = 0; + + PatternItem *buf = ss->pat_item; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + return INPUT_EOF; + + if ( buf->form == PatternItem::TypeRefForm ) + return INPUT_LANG_EL; + + assert ( buf->form == PatternItem::InputTextForm ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + alph_t *src = (alph_t*)&buf->data[offset]; + int slen = avail; + + /* Need to skip? */ + if ( *pskip > 0 && slen <= *pskip ) { + /* Skipping the the whole source. */ + *pskip -= slen; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += *pskip; + slen -= *pskip; + *pskip = 0; + + *pdp = src; + *copied += slen; + break; + } + } + + buf = buf->next; + offset = 0; + } + + return INPUT_DATA; +} + + +int pat_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length ) +{ + int copied = 0; + + PatternItem *buf = ss->pat_item; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + break; + + if ( buf->form == PatternItem::TypeRefForm ) + break; + + assert ( buf->form == PatternItem::InputTextForm ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + memcpy( dest+copied, src, slen ) ; + copied += slen; + length -= slen; + } + + if ( length == 0 ) + break; + + buf = buf->next; + offset = 0; + } + + return copied; +} + +void pat_backup( struct input_impl_ct *ss ) +{ + if ( ss->pat_item == 0 ) + ss->pat_item = ss->pattern->list->tail; + else + ss->pat_item = ss->pat_item->prev; +} + +int pat_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int length, location_t *loc ) +{ + //debug( REALM_INPUT, "consuming %ld bytes\n", length ); + + int consumed = 0; + + while ( true ) { + if ( ss->pat_item == 0 ) + break; + + int avail = ss->pat_item->data.length() - ss->offset; + + if ( length >= avail ) { + /* Read up to the end of the data. Advance the + * pattern item. */ + ss->pat_item = ss->pat_item->next; + ss->offset = 0; + + length -= avail; + consumed += avail; + + if ( length == 0 ) + break; + } + else { + ss->offset += length; + consumed += length; + break; + } + } + + return consumed; +} + +int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length ) +{ + int origLen = length; + while ( true ) { + int avail = ss->offset; + + /* Okay to go up to the front of the buffer. */ + if ( length > avail ) { + ss->pat_item = ss->pat_item == 0 ? + ss->pattern->list->tail : + ss->pat_item->prev; + ss->offset = ss->pat_item->data.length(); + length -= avail; + } + else { + ss->offset -= length; + break; + } + } + + return origLen; +} + +LangEl *pat_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, long *bindId, + alph_t **data, long *length ) +{ + LangEl *klangEl = ss->pat_item->prodEl->langEl; + *bindId = ss->pat_item->bindId; + *data = 0; + *length = 0; + + ss->pat_item = ss->pat_item->next; + ss->offset = 0; + return klangEl; +} + +void pat_undo_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss ) +{ + pat_backup( ss ); + ss->offset = ss->pat_item->data.length(); +} + +void ct_set_eof_mark( struct colm_program *prg, struct input_impl_ct *si, char eof_mark ) +{ + si->eof_mark = eof_mark; +} + +void ct_transfer_loc_seq( struct colm_program *prg, location_t *loc, struct input_impl_ct *ss ) +{ + loc->name = ss->name; + loc->line = ss->line; + loc->column = ss->column; + loc->byte = ss->byte; +} + +input_funcs_ct pat_funcs = +{ + &pat_get_parse_block, + &pat_get_data, + + &pat_consume_data, + &pat_undo_consume_data, + + 0, /* consume_tree */ + 0, /* undo_consume_tree */ + + &pat_consume_lang_el, + &pat_undo_consume_lang_el, + + 0, 0, 0, 0, 0, 0, /* prepend funcs. */ + 0, 0, 0, 0, 0, 0, /* append funcs */ + + &ct_set_eof_mark, + + &ct_transfer_loc_seq, + &ct_destructor, + + 0, 0 +}; + + +/* + * Replacements + */ + +struct input_impl *colm_impl_new_cons( char *name, Constructor *constructor ) +{ + struct input_impl_ct *ss = (struct input_impl_ct*)malloc(sizeof(struct input_impl_ct)); + memset( ss, 0, sizeof(struct input_impl_ct) ); + ss->constructor = constructor; + ss->cons_item = constructor->list->head; + ss->funcs = (struct input_funcs*)&repl_funcs; + return (struct input_impl*)ss; +} + +LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, + long *bindId, alph_t **data, long *length ) +{ + LangEl *klangEl = ss->cons_item->type == ConsItem::ExprType ? + ss->cons_item->langEl : ss->cons_item->prodEl->langEl; + *bindId = ss->cons_item->bindId; + + *data = 0; + *length = 0; + + if ( ss->cons_item->type == ConsItem::LiteralType ) { + if ( ss->cons_item->prodEl->typeRef->pdaLiteral != 0 ) { + bool unusedCI; + prepareLitString( ss->cons_item->data, unusedCI, + ss->cons_item->prodEl->typeRef->pdaLiteral->data, + ss->cons_item->prodEl->typeRef->pdaLiteral->loc ); + + *data = (alph_t*)ss->cons_item->data.data; + *length = ss->cons_item->data.length(); + } + } + + ss->cons_item = ss->cons_item->next; + ss->offset = 0; + return klangEl; +} + +int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, + int *pskip, alph_t **pdp, int *copied ) +{ + *copied = 0; + + ConsItem *buf = ss->cons_item; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + return INPUT_EOF; + + if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::LiteralType ) + return INPUT_LANG_EL; + + assert ( buf->type == ConsItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + alph_t *src = (alph_t*)&buf->data[offset]; + int slen = avail; + + /* Need to skip? */ + if ( *pskip > 0 && slen <= *pskip ) { + /* Skipping the the whole source. */ + *pskip -= slen; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += *pskip; + slen -= *pskip; + *pskip = 0; + + *pdp = src; + *copied += slen; + break; + } + } + + buf = buf->next; + offset = 0; + } + + return INPUT_DATA; +} + +int repl_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length ) +{ + int copied = 0; + + ConsItem *buf = ss->cons_item; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + break; + + if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::LiteralType ) + break; + + assert ( buf->type == ConsItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + memcpy( dest+copied, src, slen ) ; + copied += slen; + length -= slen; + } + + if ( length == 0 ) + break; + + buf = buf->next; + offset = 0; + } + + return copied; +} + +void repl_backup( struct input_impl_ct *ss ) +{ + if ( ss->cons_item == 0 ) + ss->cons_item = ss->constructor->list->tail; + else + ss->cons_item = ss->cons_item->prev; +} + +void repl_undo_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss ) +{ + repl_backup( ss ); + ss->offset = ss->cons_item->data.length(); +} + + +int repl_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int length, location_t *loc ) +{ + int consumed = 0; + + while ( true ) { + if ( ss->cons_item == 0 ) + break; + + int avail = ss->cons_item->data.length() - ss->offset; + + if ( length >= avail ) { + /* Read up to the end of the data. Advance the + * pattern item. */ + ss->cons_item = ss->cons_item->next; + ss->offset = 0; + + length -= avail; + consumed += avail; + + if ( length == 0 ) + break; + } + else { + ss->offset += length; + consumed += length; + break; + } + } + + return consumed; +} + +int repl_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length ) +{ + int origLen = length; + while ( true ) { + int avail = ss->offset; + + /* Okay to go up to the front of the buffer. */ + if ( length > avail ) { + ss->cons_item = ss->cons_item == 0 ? + ss->constructor->list->tail : + ss->cons_item->prev; + ss->offset = ss->cons_item->data.length(); + length -= avail; + } + else { + ss->offset -= length; + break; + } + } + + return origLen; +} + +input_funcs_ct repl_funcs = +{ + &repl_get_parse_block, + &repl_get_data, + + &repl_consume_data, + &repl_undo_consume_data, + + 0, /* consume_tree */ + 0, /* undo_consume_tree. */ + + &repl_consume_lang_el, + &repl_undo_consume_lang_el, + + 0, 0, 0, 0, 0, 0, /* prepend. */ + 0, 0, 0, 0, 0, 0, /* append. */ + + &ct_set_eof_mark, + + &ct_transfer_loc_seq, + &ct_destructor, + + 0, 0 +}; + +void pushBinding( pda_run *pdaRun, parse_tree_t *parseTree ) +{ + /* If the item is bound then store it in the bindings array. */ + pdaRun->bindings->push( parseTree ); +} + +extern "C" void internalSendNamedLangEl( program_t *prg, tree_t **sp, + struct pda_run *pdaRun, struct input_impl *is ) +{ + /* All three set by consumeLangEl. */ + long bindId; + alph_t *data; + long length; + + LangEl *klangEl = is->funcs->consume_lang_el( prg, is, &bindId, &data, &length ); + + //cerr << "named langEl: " << prg->rtd->lelInfo[klangEl->id].name << endl; + + /* Copy the token data. */ + head_t *tokdata = 0; + if ( data != 0 ) + tokdata = string_alloc_full( prg, colm_cstr_from_alph( data ), length ); + + kid_t *input = make_token_with_data( prg, pdaRun, is, klangEl->id, tokdata ); + + colm_increment_steps( pdaRun ); + + parse_tree_t *parseTree = parse_tree_allocate( pdaRun ); + parseTree->id = input->tree->id; + parseTree->flags |= PF_NAMED; + parseTree->shadow = input; + + if ( bindId > 0 ) + pushBinding( pdaRun, parseTree ); + + pdaRun->parse_input = parseTree; +} + +extern "C" void internalInitBindings( pda_run *pdaRun ) +{ + /* Bindings are indexed at 1. Need a no-binding. */ + pdaRun->bindings = new bindings; + pdaRun->bindings->push(0); +} + +extern "C" void internalPopBinding( pda_run *pdaRun, parse_tree_t *parseTree ) +{ + parse_tree_t *lastBound = pdaRun->bindings->top(); + if ( lastBound == parseTree ) + pdaRun->bindings->pop(); +} diff --git a/src/debug.c b/src/debug.c new file mode 100644 index 00000000..1cfd24d7 --- /dev/null +++ b/src/debug.c @@ -0,0 +1,82 @@ +/* + * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <colm/debug.h> + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include <colm/program.h> + +const char *const colm_realm_names[REALMS] = + // @NOTE: keep this in sync with 'main.cc': 'processArgs()' '-D' option + { + "BYTECODE", + "PARSE", + "MATCH", + "COMPILE", + "POOL", + "PRINT", + "INPUT", + "SCAN", + }; + +int _debug( struct colm_program *prg, long realm, const char *fmt, ... ) +{ + int result = 0; + if ( prg->active_realm & realm ) { + /* Compute the index by shifting. */ + int ind = 0; + while ( (realm & 0x1) != 0x1 ) { + realm >>= 1; + ind += 1; + } + + fprintf( stderr, "%s: ", colm_realm_names[ind] ); + va_list args; + va_start( args, fmt ); + result = vfprintf( stderr, fmt, args ); + va_end( args ); + } + + return result; +} + +void fatal( const char *fmt, ... ) +{ + va_list args; + fprintf( stderr, "fatal: " ); + va_start( args, fmt ); + vfprintf( stderr, fmt, args ); + va_end( args ); + exit(1); +} + +void message( const char *fmt, ... ) +{ + va_list args; + fprintf( stderr, "message: " ); + va_start( args, fmt ); + vfprintf( stderr, fmt, args ); + va_end( args ); +} diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 00000000..1870f553 --- /dev/null +++ b/src/debug.h @@ -0,0 +1,65 @@ +/* + * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_DEBUG_H +#define _COLM_DEBUG_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "colm.h" +#include "config.h" + +void fatal( const char *fmt, ... ); + +#ifdef DEBUG +#define debug( prg, realm, ... ) _debug( prg, realm, __VA_ARGS__ ) +#define check_realm( realm ) _check_realm( realm ) +#else +#define debug( prg, realm, ... ) +#define check_realm( realm ) +#endif + +int _debug( struct colm_program *prg, long realm, const char *fmt, ... ); + +void message( const char *fmt, ... ); + +#define REALM_BYTECODE COLM_DBG_BYTECODE +#define REALM_PARSE COLM_DBG_PARSE +#define REALM_MATCH COLM_DBG_MATCH +#define REALM_COMPILE COLM_DBG_COMPILE +#define REALM_POOL COLM_DBG_POOL +#define REALM_PRINT COLM_DBG_PRINT +#define REALM_INPUT COLM_DBG_INPUT +#define REALM_SCAN COLM_DBG_SCAN + +#define REALMS 32 + +extern const char *const colm_realm_names[REALMS]; + +#ifdef __cplusplus +} +#endif + +#endif /* _COLM_DEBUG_H */ + diff --git a/src/declare.cc b/src/declare.cc new file mode 100644 index 00000000..b96092e2 --- /dev/null +++ b/src/declare.cc @@ -0,0 +1,1623 @@ +/* + * Copyright 2012-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdbool.h> +#include <assert.h> + +#include <iostream> + +#include "compiler.h" + +void Compiler::initUniqueTypes( ) +{ + uniqueTypeNil = new UniqueType( TYPE_NIL ); + uniqueTypeVoid = new UniqueType( TYPE_VOID ); + uniqueTypePtr = new UniqueType( TYPE_TREE, ptrLangEl ); + uniqueTypeBool = new UniqueType( TYPE_BOOL ); + uniqueTypeInt = new UniqueType( TYPE_INT ); + uniqueTypeStr = new UniqueType( TYPE_TREE, strLangEl ); + uniqueTypeIgnore = new UniqueType( TYPE_TREE, ignoreLangEl ); + uniqueTypeAny = new UniqueType( TYPE_TREE, anyLangEl ); + + uniqueTypeInput = new UniqueType( TYPE_STRUCT, inputSel ); + uniqueTypeStream = new UniqueType( TYPE_STRUCT, streamSel ); + + uniqeTypeMap.insert( uniqueTypeNil ); + uniqeTypeMap.insert( uniqueTypeVoid ); + uniqeTypeMap.insert( uniqueTypePtr ); + uniqeTypeMap.insert( uniqueTypeBool ); + uniqeTypeMap.insert( uniqueTypeInt ); + uniqeTypeMap.insert( uniqueTypeStr ); + uniqeTypeMap.insert( uniqueTypeIgnore ); + uniqeTypeMap.insert( uniqueTypeAny ); + + uniqeTypeMap.insert( uniqueTypeInput ); + uniqeTypeMap.insert( uniqueTypeStream ); +} + +ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, + int nargs, UniqueType **args, bool isConst, bool useFnInstr, + GenericType *useGeneric ) +{ + ObjectMethod *objMethod = new ObjectMethod( retType, name, + methIdWV, methIdWC, nargs, args, 0, isConst ); + objMethod->type = type; + objMethod->useFnInstr = useFnInstr; + + if ( nspace != 0 ) + nspace->rootScope->methodMap.insert( name, objMethod ); + else + obj->rootScope->methodMap.insert( name, objMethod ); + + if ( useGeneric ) { + objMethod->useGenericId = true; + objMethod->generic = useGeneric; + } + + return objMethod; +} + +ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, bool isConst, + bool useFnInstr, GenericType *useGeneric ) +{ + return initFunction( retType, 0, obj, type, name, methIdWV, methIdWC, + 0, 0, isConst, useFnInstr, useGeneric ); +} + +ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, UniqueType *arg1, + bool isConst, bool useFnInstr, GenericType *useGeneric ) +{ + UniqueType *args[] = { arg1 }; + return initFunction( retType, 0, obj, type, name, methIdWV, methIdWC, + 1, args, isConst, useFnInstr, useGeneric ); +} + +ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, + UniqueType *arg1, UniqueType *arg2, + bool isConst, bool useFnInstr, GenericType *useGeneric ) +{ + UniqueType *args[] = { arg1, arg2 }; + return initFunction( retType, 0, obj, type, name, methIdWV, methIdWC, + 2, args, isConst, useFnInstr, useGeneric ); +} + +/* + * With namespace supplied. Global functions. + */ + +ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, bool isConst, + bool useFnInstr, GenericType *useGeneric ) +{ + return initFunction( retType, nspace, obj, type, name, methIdWV, methIdWC, + 0, 0, isConst, useFnInstr, useGeneric ); +} + +ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, UniqueType *arg1, + bool isConst, bool useFnInstr, GenericType *useGeneric ) +{ + UniqueType *args[] = { arg1 }; + return initFunction( retType, nspace, obj, type, name, methIdWV, methIdWC, + 1, args, isConst, useFnInstr, useGeneric ); +} + +ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, + ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, + UniqueType *arg1, UniqueType *arg2, + bool isConst, bool useFnInstr, GenericType *useGeneric ) +{ + UniqueType *args[] = { arg1, arg2 }; + return initFunction( retType, nspace, obj, type, name, methIdWV, methIdWC, + 2, args, isConst, useFnInstr, useGeneric ); +} + +ObjectField *NameScope::checkRedecl( const String &name ) +{ + return owningObj->checkRedecl( this, name ); +} + +void NameScope::insertField( const String &name, ObjectField *value ) +{ + return owningObj->insertField( this, name, value ); +} + +ObjectField *ObjectDef::checkRedecl( NameScope *inScope, const String &name ) +{ + FieldMapEl *objDefMapEl = inScope->fieldMap.find( name ); + if ( objDefMapEl != 0 ) + return objDefMapEl->value; + return 0; +} + +void ObjectDef::insertField( NameScope *inScope, const String &name, ObjectField *value ) +{ + inScope->fieldMap.insert( name, value ); + fieldList.append( value ); + value->scope = inScope; +} + +NameScope *ObjectDef::pushScope( NameScope *curScope ) +{ + NameScope *newScope = new NameScope; + + newScope->owningObj = this; + newScope->parentScope = curScope; + curScope->children.append( newScope ); + + return newScope; +} + +void LexJoin::varDecl( Compiler *pd, TokenDef *tokenDef ) +{ + expr->varDecl( pd, tokenDef ); +} + +void LexExpression::varDecl( Compiler *pd, TokenDef *tokenDef ) +{ + switch ( type ) { + case OrType: case IntersectType: case SubtractType: + case StrongSubtractType: + expression->varDecl( pd, tokenDef ); + term->varDecl( pd, tokenDef ); + break; + case TermType: + term->varDecl( pd, tokenDef ); + break; + case BuiltinType: + break; + } +} + +void LexTerm::varDecl( Compiler *pd, TokenDef *tokenDef ) +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + term->varDecl( pd, tokenDef ); + factorAug->varDecl( pd, tokenDef ); + break; + case FactorAugType: + factorAug->varDecl( pd, tokenDef ); + break; + } +} + +void LexFactorAug::varDecl( Compiler *pd, TokenDef *tokenDef ) +{ + for ( ReCaptureVect::Iter re = reCaptureVect; re.lte(); re++ ) { + if ( tokenDef->objectDef->rootScope->checkRedecl( re->objField->name ) != 0 ) { + error(re->objField->loc) << "label name \"" << + re->objField->name << "\" already in use" << endp; + } + + /* Insert it into the map. */ + tokenDef->objectDef->rootScope->insertField( re->objField->name, re->objField ); + + /* Store it in the TokenDef. */ + tokenDef->reCaptureVect.append( *re ); + } +} + +void Compiler::declareReVars() +{ + for ( NamespaceList::Iter n = namespaceList; n.lte(); n++ ) { + for ( TokenDefListNs::Iter tok = n->tokenDefList; tok.lte(); tok++ ) { + if ( tok->join != 0 ) + tok->join->varDecl( this, tok ); + } + } + + /* FIXME: declare RE captures in token generation actions. */ +#if 0 + /* Add captures to the local frame. We Depend on these becoming the + * first local variables so we can compute their location. */ + + /* Make local variables corresponding to the local capture vector. */ + for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ ) + { + ObjectField *objField = ObjectField::cons( c->objField->loc, + c->objField->typeRef, c->objField->name ); + + /* Insert it into the field map. */ + pd->curLocalFrame->insertField( objField->name, objField ); + } +#endif +} + +LangEl *declareLangEl( Compiler *pd, Namespace *nspace, + const String &data, LangEl::Type type ) +{ + /* If the id is already in the dict, it will be placed in last found. If + * it is not there then it will be inserted and last found will be set to it. */ + TypeMapEl *inDict = nspace->typeMap.find( data ); + if ( inDict != 0 ) + error() << "language element '" << data << "' already defined as something else" << endp; + + /* Language element not there. Make the new lang el and insert.. */ + LangEl *langEl = new LangEl( nspace, data, type ); + TypeMapEl *typeMapEl = new TypeMapEl( TypeMapEl::LangElType, data, langEl ); + nspace->typeMap.insert( typeMapEl ); + pd->langEls.append( langEl ); + + return langEl; +} + +StructEl *declareStruct( Compiler *pd, Namespace *inNspace, + const String &data, StructDef *structDef ) +{ + if ( inNspace != 0 ) { + TypeMapEl *inDict = inNspace->typeMap.find( data ); + if ( inDict != 0 ) + error() << "struct '" << data << "' already defined as something else" << endp; + } + + StructEl *structEl = new StructEl( data, structDef ); + pd->structEls.append( structEl ); + structDef->structEl = structEl; + + if ( inNspace ) { + TypeMapEl *typeMapEl = new TypeMapEl( TypeMapEl::StructType, data, structEl ); + inNspace->typeMap.insert( typeMapEl ); + } + + return structEl; +} + +/* Does not map the new language element. */ +LangEl *addLangEl( Compiler *pd, Namespace *inNspace, + const String &data, LangEl::Type type ) +{ + LangEl *langEl = new LangEl( inNspace, data, type ); + pd->langEls.append( langEl ); + return langEl; +} + +void declareTypeAlias( Compiler *pd, Namespace *nspace, + const String &data, TypeRef *typeRef ) +{ + /* If the id is already in the dict, it will be placed in last found. If + * it is not there then it will be inserted and last found will be set to it. */ + TypeMapEl *inDict = nspace->typeMap.find( data ); + if ( inDict != 0 ) + error() << "alias '" << data << "' already defined as something else" << endp; + + /* Language element not there. Make the new lang el and insert. */ + TypeMapEl *typeMapEl = new TypeMapEl( TypeMapEl::AliasType, data, typeRef ); + nspace->typeMap.insert( typeMapEl ); +} + +LangEl *findType( Compiler *pd, Namespace *nspace, const String &data ) +{ + /* If the id is already in the dict, it will be placed in last found. If + * it is not there then it will be inserted and last found will be set to it. */ + TypeMapEl *inDict = nspace->typeMap.find( data ); + + if ( inDict == 0 ) + error() << "'" << data << "' not declared as anything" << endp; + + return inDict->value; +} + + +void Compiler::declareBaseLangEls() +{ + /* Order here is important because we make assumptions about the inbuilt + * language elements in the runtime. Note tokens are have identifiers set + * in an initial pass. */ + + /* Make a "_notoken" language element. This element is used when a + * generation action fails to generate anything, but there is reverse code + * that needs to be associated with a language element. This allows us to + * always associate reverse code with the first language element produced + * after a generation action. */ + noTokenLangEl = declareLangEl( this, rootNamespace, "_notoken", LangEl::Term ); + noTokenLangEl->isIgnore = true; + + ptrLangEl = declareLangEl( this, rootNamespace, "ptr", LangEl::Term ); + strLangEl = declareLangEl( this, rootNamespace, "str", LangEl::Term ); + ignoreLangEl = declareLangEl( this, rootNamespace, "il", LangEl::Term ); + + /* Make the EOF language element. */ + eofLangEl = 0; + + /* Make the "any" language element */ + anyLangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm ); +} + + +void Compiler::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm ) +{ + UniqueType *prodNameUT = findUniqueType( TYPE_TREE, nonTerm ); + TypeRef *typeRef = TypeRef::cons( internal, prodNameUT ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::LhsElType, typeRef, "lhs" ); + + localFrame->rootScope->insertField( el->name, el ); +} + +void Compiler::addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ) +{ + long position = 1; + for ( ProdElList::Iter rhsEl = *prodElList; rhsEl.lte(); rhsEl++, position++ ) { + if ( rhsEl->type == ProdEl::ReferenceType ) { + /* Use an offset of zero. For frame objects we compute the offset on + * demand. */ + String name( 8, "r%d", position ); + ObjectField *el = ObjectField::cons( InputLoc(), + ObjectField::RedRhsType, rhsEl->typeRef, name ); + rhsEl->rhsElField = el; + + /* Right hand side elements are constant. */ + el->isConst = true; + localFrame->rootScope->insertField( el->name, el ); + } + } +} + +void GenericType::declare( Compiler *pd, Namespace *nspace ) +{ + elUt = elTr->resolveType( pd ); + + if ( typeId == GEN_MAP ) + keyUt = keyTr->resolveType( pd ); + + if ( typeId == GEN_MAP || typeId == GEN_LIST ) + valueUt = valueTr->resolveType( pd ); + + objDef = ObjectDef::cons( ObjectDef::BuiltinType, + "generic", pd->nextObjectId++ ); + + switch ( typeId ) { + case GEN_MAP: + pd->initMapFunctions( this ); + pd->initMapFields( this ); + break; + case GEN_LIST: + pd->initListFunctions( this ); + pd->initListFields( this ); + break; + case GEN_PARSER: + elUt->langEl->parserId = pd->nextParserId++; + pd->initParserFunctions( this ); + pd->initParserFields( this ); + break; + } +} + +void Namespace::declare( Compiler *pd ) +{ + for ( GenericList::Iter g = genericList; g.lte(); g++ ) + g->declare( pd, this ); + + for ( TokenDefListNs::Iter tokenDef = tokenDefList; tokenDef.lte(); tokenDef++ ) { + if ( tokenDef->isLiteral ) { + if ( tokenDef->isZero ) { + assert( tokenDef->regionSet->collectIgnore->zeroLel != 0 ); + tokenDef->tdLangEl = tokenDef->regionSet->collectIgnore->zeroLel; + } + else { + /* Original. Create a token for the literal. */ + LangEl *litEl = declareLangEl( pd, this, tokenDef->name, LangEl::Term ); + + litEl->lit = tokenDef->literal; + litEl->isLiteral = true; + litEl->tokenDef = tokenDef; + litEl->objectDef = tokenDef->objectDef; + + tokenDef->tdLangEl = litEl; + + if ( tokenDef->noPreIgnore ) + litEl->noPreIgnore = true; + if ( tokenDef->noPostIgnore ) + litEl->noPostIgnore = true; + } + } + } + + for ( StructDefList::Iter s = structDefList; s.lte(); s++ ) + declareStruct( pd, this, s->name, s ); + + for ( TokenDefListNs::Iter tokenDef = tokenDefList; tokenDef.lte(); tokenDef++ ) { + /* Literals already taken care of. */ + if ( ! tokenDef->isLiteral ) { + /* Create the token. */ + LangEl *tokEl = declareLangEl( pd, this, tokenDef->name, LangEl::Term ); + tokEl->isIgnore = tokenDef->isIgnore; + tokEl->transBlock = tokenDef->codeBlock; + tokEl->objectDef = tokenDef->objectDef; + tokEl->contextIn = tokenDef->contextIn; + tokEl->tokenDef = tokenDef; + + if ( tokenDef->noPreIgnore ) + tokEl->noPreIgnore = true; + if ( tokenDef->noPostIgnore ) + tokEl->noPostIgnore = true; + + tokenDef->tdLangEl = tokEl; + + if ( tokenDef->isZero ) { + /* Setting zero lel to newly created tokEl. */ + tokenDef->regionSet->collectIgnore->zeroLel = tokEl; + tokEl->isZero = true; + } + } + } + + for ( NtDefList::Iter n = ntDefList; n.lte(); n++ ) { + /* Get the language element. */ + LangEl *langEl = declareLangEl( pd, this, n->name, LangEl::NonTerm ); + //$$->langEl = langEl; + + /* Get the language element. */ + langEl->objectDef = n->objectDef; + langEl->reduceFirst = n->reduceFirst; + langEl->contextIn = n->contextIn; + langEl->defList.transfer( *n->defList ); + + for ( LelDefList::Iter d = langEl->defList; d.lte(); d++ ) { + d->prodName = langEl; + + if ( d->redBlock != 0 ) { + pd->addProdRedObjectVar( d->redBlock->localFrame, langEl ); + pd->addProdRHSVars( d->redBlock->localFrame, d->prodElList ); + } + + /* References to the reduce item. */ + } + } + + for ( TypeAliasList::Iter ta = typeAliasList; ta.lte(); ta++ ) + declareTypeAlias( pd, this, ta->name, ta->typeRef ); + + /* Go into child aliases. */ + for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) + (*c)->declare( pd ); +} + +void Compiler::makeIgnoreCollectors() +{ + for ( RegionSetList::Iter regionSet = regionSetList; regionSet.lte(); regionSet++ ) { + if ( regionSet->collectIgnore->zeroLel == 0 ) { + String name( 128, "_ign_%p", regionSet->tokenIgnore ); + LangEl *zeroLel = new LangEl( rootNamespace, name, LangEl::Term ); + langEls.append( zeroLel ); + zeroLel->isZero = true; + zeroLel->regionSet = regionSet; + + regionSet->collectIgnore->zeroLel = zeroLel; + } + } +} + +void LangStmt::chooseDefaultIter( Compiler *pd, IterCall *iterCall ) const +{ + /* This is two-part, It gets rewritten before evaluation in synthesis. */ + + /* The iterator name. */ + LangVarRef *callVarRef = LangVarRef::cons( loc, 0, context, scope, "triter" ); + + /* The parameters. */ + CallArgVect *callExprVect = new CallArgVect; + callExprVect->append( new CallArg( iterCall->langExpr ) ); + iterCall->langTerm = LangTerm::cons( InputLoc(), callVarRef, callExprVect ); + iterCall->langExpr = 0; + iterCall->form = IterCall::Call; + iterCall->wasExpr = true; +} + +void LangStmt::declareForIter( Compiler *pd ) const +{ + if ( iterCall->form != IterCall::Call ) + chooseDefaultIter( pd, iterCall ); + + objField->typeRef = TypeRef::cons( loc, typeRef, iterCall ); +} + +void LangStmt::declare( Compiler *pd ) const +{ + switch ( type ) { + case ExprType: + break; + case IfType: + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->declare( pd ); + + if ( elsePart != 0 ) + elsePart->declare( pd ); + break; + + case ElseType: + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->declare( pd ); + break; + case RejectType: + break; + case WhileType: + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->declare( pd ); + break; + case AssignType: + break; + case ForIterType: + declareForIter( pd ); + + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->declare( pd ); + break; + case ReturnType: + break; + case BreakType: + break; + case YieldType: + break; + } +} + +void CodeBlock::declare( Compiler *pd ) const +{ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->declare( pd ); +} + +void Compiler::declareFunction( Function *func ) +{ + CodeBlock *block = func->codeBlock; + block->declare( this ); +} + +void Compiler::declareReductionCode( Production *prod ) +{ + CodeBlock *block = prod->redBlock; + block->declare( this ); +} + +void Compiler::declareTranslateBlock( LangEl *langEl ) +{ + CodeBlock *block = langEl->transBlock; + + /* References to the reduce item. */ + addMatchLength( block->localFrame, langEl ); + addMatchText( block->localFrame, langEl ); + addInput( block->localFrame ); + addThis( block->localFrame ); + + block->declare( this ); +} + +void Compiler::declarePreEof( TokenRegion *region ) +{ + CodeBlock *block = region->preEofBlock; + + addInput( block->localFrame ); + addThis( block->localFrame ); + + block->declare( this ); +} + +void Compiler::declareRootBlock() +{ + CodeBlock *block = rootCodeBlock; + block->declare( this ); +} + +void Compiler::declareByteCode() +{ + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) + declareFunction( f ); + + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->redBlock != 0 ) + declareReductionCode( prod ); + } + + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->transBlock != 0 ) + declareTranslateBlock( lel ); + } + + for ( RegionList::Iter r = regionList; r.lte(); r++ ) { + if ( r->preEofBlock != 0 ) + declarePreEof( r ); + } + + declareRootBlock( ); +} + +void Compiler::makeDefaultIterators() +{ + /* Tree iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, + ObjectMethod::Call, "triter", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::Tree ); + objMethod->iterDef = triter; + } + + /* Child iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, + ObjectMethod::Call, "child", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::Child ); + objMethod->iterDef = triter; + } + + /* Reverse iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, + ObjectMethod::Call, "rev_child", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::RevChild ); + objMethod->iterDef = triter; + } + + /* Repeat iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, + ObjectMethod::Call, "repeat", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::Repeat ); + objMethod->iterDef = triter; + } + + /* Reverse repeat iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, + ObjectMethod::Call, "rev_repeat", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::RevRepeat ); + objMethod->iterDef = triter; + } + + /* List iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, + ObjectMethod::Call, "list_iter", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::ListEl ); + objMethod->iterDef = triter; + } + + /* Reverse Value List iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, + ObjectMethod::Call, "rev_list_iter", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::RevListVal ); + objMethod->iterDef = triter; + } + + /* Map iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, + ObjectMethod::Call, "map_iter", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::MapEl ); + objMethod->iterDef = triter; + } +} + +void Compiler::addMatchLength( ObjectDef *frame, LangEl *lel ) +{ + /* Make the type ref. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); + + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( InputLoc(), + ObjectField::InbuiltFieldType, typeRef, "match_length" ); + el->isConst = true; + el->inGetR = IN_GET_MATCH_LENGTH_R; + el->inGetValR = IN_GET_MATCH_LENGTH_R; + frame->rootScope->insertField( el->name, el ); +} + +void Compiler::addMatchText( ObjectDef *frame, LangEl *lel ) +{ + /* Make the type ref. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); + + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "match_text" ); + el->isConst = true; + el->inGetR = IN_GET_MATCH_TEXT_R; + el->inGetValR = IN_GET_MATCH_TEXT_R; + frame->rootScope->insertField( el->name, el ); +} + +void Compiler::addInput( ObjectDef *frame ) +{ + /* Make the type ref. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInput ); + + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltObjectType, typeRef, "input" ); + el->inGetR = IN_LOAD_INPUT_R; + el->inGetWV = IN_LOAD_INPUT_WV; + el->inGetWC = IN_LOAD_INPUT_WC; + el->inGetValR = IN_LOAD_INPUT_R; + el->inGetValWC = IN_LOAD_INPUT_WC; + el->inGetValWV = IN_LOAD_INPUT_WV; + frame->rootScope->insertField( el->name, el ); +} + +void Compiler::addThis( ObjectDef *frame ) +{ + /* Make the type ref. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream ); + + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltObjectType, typeRef, "this" ); + el->inGetR = IN_LOAD_CONTEXT_R; + el->inGetWV = IN_LOAD_CONTEXT_WV; + el->inGetWC = IN_LOAD_CONTEXT_WC; + el->inGetValR = IN_LOAD_CONTEXT_R; + el->inGetValWC = IN_LOAD_CONTEXT_WC; + el->inGetValWV = IN_LOAD_CONTEXT_WV; + frame->rootScope->insertField( el->name, el ); +} + +void Compiler::declareIntFields( ) +{ + intObj = ObjectDef::cons( ObjectDef::BuiltinType, "int", nextObjectId++ ); +// intLangEl->objectDef = intObj; + + initFunction( uniqueTypeStr, intObj, ObjectMethod::Call, "to_string", IN_INT_TO_STR, IN_INT_TO_STR, true ); +} + +void Compiler::declareStrFields( ) +{ + strObj = ObjectDef::cons( ObjectDef::BuiltinType, "str", nextObjectId++ ); + strLangEl->objectDef = strObj; + + initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "atoi", + FN_STR_ATOI, FN_STR_ATOI, true, true ); + + initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "atoo", + FN_STR_ATOO, FN_STR_ATOO, true, true ); + + initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "uord8", + FN_STR_UORD8, FN_STR_UORD8, true, true ); + + initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "sord8", + FN_STR_SORD8, FN_STR_SORD8, true, true ); + + initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "uord16", + FN_STR_UORD16, FN_STR_UORD16, true, true ); + + initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "sord16", + FN_STR_SORD16, FN_STR_SORD16, true, true ); + + initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "uord32", + FN_STR_UORD32, FN_STR_UORD32, true, true ); + + initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "sord32", + FN_STR_SORD32, FN_STR_SORD32, true, true ); + + initFunction( uniqueTypeStr, strObj, ObjectMethod::Call, "prefix", + FN_STR_PREFIX, FN_STR_PREFIX, uniqueTypeInt, true, true ); + + initFunction( uniqueTypeStr, strObj, ObjectMethod::Call, "suffix", + FN_STR_SUFFIX, FN_STR_SUFFIX, uniqueTypeInt, true, true ); + + initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, + ObjectMethod::Call, "sprintf", FN_SPRINTF, FN_SPRINTF, + uniqueTypeStr, uniqueTypeInt, true, true ); + + addLengthField( strObj, IN_STR_LENGTH ); +} + +void Compiler::declareInputField( ObjectDef *objDef, code_t getLength ) +{ + /* Create the "length" field. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "tree" ); + el->isConst = true; + el->inGetR = IN_GET_COLLECT_STRING; + el->inGetValR = IN_GET_COLLECT_STRING; + + objDef->rootScope->insertField( el->name, el ); +} + +void Compiler::declareStreamField( ObjectDef *objDef, code_t getLength ) +{ + /* Create the "length" field. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "tree" ); + el->isConst = true; + el->inGetR = IN_GET_COLLECT_STRING; + el->inGetValR = IN_GET_COLLECT_STRING; + + objDef->rootScope->insertField( el->name, el ); +} + +void Compiler::declareInputFields( ) +{ + inputObj = inputSel->structDef->objectDef; + + initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "pull", + IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false ); + + initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push", + IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false ); + + initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push_ignore", + IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false ); + + initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push_stream", + IN_INPUT_PUSH_STREAM_WV, IN_INPUT_PUSH_STREAM_WV, uniqueTypeStream, false ); + + initFunction( uniqueTypeVoid, inputObj, ObjectMethod::Call, "close", + IN_INPUT_CLOSE_WC, IN_INPUT_CLOSE_WC, false ); + + initFunction( uniqueTypeVoid, inputObj, ObjectMethod::Call, "auto_trim", + IN_IINPUT_AUTO_TRIM_WC, IN_IINPUT_AUTO_TRIM_WC, uniqueTypeBool, false ); + + declareInputField( inputObj, 0 ); +} + +void Compiler::declareStreamFields( ) +{ + streamObj = streamSel->structDef->objectDef; + + initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "pull", + IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false ); + + initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "push", + IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false ); + + initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "push_ignore", + IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false ); + + initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "push_stream", + IN_INPUT_PUSH_STREAM_WV, IN_INPUT_PUSH_STREAM_WV, uniqueTypeStream, false ); + + initFunction( uniqueTypeVoid, streamObj, ObjectMethod::Call, "close", + IN_INPUT_CLOSE_WC, IN_INPUT_CLOSE_WC, false ); + + initFunction( uniqueTypeVoid, streamObj, ObjectMethod::Call, "auto_trim", + IN_INPUT_AUTO_TRIM_WC, IN_INPUT_AUTO_TRIM_WC, uniqueTypeBool, false ); + + declareStreamField( streamObj, 0 ); +} + +ObjectField *Compiler::makeDataEl() +{ + /* Create the "data" field. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "data" ); + + el->inGetR = IN_GET_TOKEN_DATA_R; + el->inSetWC = IN_SET_TOKEN_DATA_WC; + el->inSetWV = IN_SET_TOKEN_DATA_WV; + return el; +} + +ObjectField *Compiler::makeFileEl() +{ + /* Create the "file" field. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "file" ); + + el->isConst = true; + el->inGetR = IN_GET_TOKEN_FILE_R; + el->inGetValR = IN_GET_TOKEN_FILE_R; + return el; +} + +ObjectField *Compiler::makeLineEl() +{ + /* Create the "line" field. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "line" ); + + el->isConst = true; + el->inGetR = IN_GET_TOKEN_LINE_R; + el->inGetValR = IN_GET_TOKEN_LINE_R; + return el; +} + +ObjectField *Compiler::makeColEl() +{ + /* Create the "col" field. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "col" ); + + el->isConst = true; + el->inGetR = IN_GET_TOKEN_COL_R; + el->inGetValR = IN_GET_TOKEN_COL_R; + return el; +} + +ObjectField *Compiler::makePosEl() +{ + /* Create the "data" field. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "pos" ); + + el->isConst = true; + el->inGetR = IN_GET_TOKEN_POS_R; + el->inGetValR = IN_GET_TOKEN_POS_R; + return el; +} + +/* Add a constant length field to the object. + * Opcode supplied by the caller. */ +void Compiler::addLengthField( ObjectDef *objDef, code_t getLength ) +{ + /* Create the "length" field. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "length" ); + el->isConst = true; + el->inGetR = getLength; + el->inGetValR = getLength; + + objDef->rootScope->insertField( el->name, el ); +} + +void Compiler::declareTokenFields( ) +{ + /* Give all user terminals the token object type. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->type == LangEl::Term ) { + if ( lel->objectDef != 0 ) { + /* Create the "data" field. */ + ObjectField *dataEl = makeDataEl(); + lel->objectDef->rootScope->insertField( dataEl->name, dataEl ); + + /* Create the "file" field. */ + ObjectField *fileEl = makeFileEl(); + lel->objectDef->rootScope->insertField( fileEl->name, fileEl ); + + /* Create the "line" field. */ + ObjectField *lineEl = makeLineEl(); + lel->objectDef->rootScope->insertField( lineEl->name, lineEl ); + + /* Create the "col" field. */ + ObjectField *colEl = makeColEl(); + lel->objectDef->rootScope->insertField( colEl->name, colEl ); + + /* Create the "pos" field. */ + ObjectField *posEl = makePosEl(); + lel->objectDef->rootScope->insertField( posEl->name, posEl ); + } + } + } +} + +void Compiler::declareGlobalFields() +{ + ObjectMethod *method; + + method = initFunction( uniqueTypeStream, rootNamespace, globalObjectDef, ObjectMethod::Call, "open", + IN_OPEN_FILE, IN_OPEN_FILE, uniqueTypeStr, uniqueTypeStr, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "tolower", + IN_TO_LOWER, IN_TO_LOWER, uniqueTypeStr, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "toupper", + IN_TO_UPPER, IN_TO_UPPER, uniqueTypeStr, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "atoi", + FN_STR_ATOI, FN_STR_ATOI, uniqueTypeStr, true, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "atoo", + FN_STR_ATOO, FN_STR_ATOO, uniqueTypeStr, true, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "prefix", + FN_PREFIX, FN_PREFIX, uniqueTypeStr, uniqueTypeInt, true, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "suffix", + FN_SUFFIX, FN_SUFFIX, uniqueTypeStr, uniqueTypeInt, true, true ); + method->useCallObj = false; + + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "uord8", + FN_STR_UORD8, FN_STR_UORD8, uniqueTypeStr, true, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "sord8", + FN_STR_SORD8, FN_STR_SORD8, uniqueTypeStr, true, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "uord16", + FN_STR_UORD16, FN_STR_UORD16, uniqueTypeStr, true, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "sord16", + FN_STR_SORD16, FN_STR_SORD16, uniqueTypeStr, true, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "uord32", + FN_STR_UORD32, FN_STR_UORD32, uniqueTypeStr, true, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "sord32", + FN_STR_SORD32, FN_STR_SORD32, uniqueTypeStr, true, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "exit", + FN_EXIT, FN_EXIT, uniqueTypeInt, true, true ); + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "exit_hard", + FN_EXIT_HARD, FN_EXIT_HARD, uniqueTypeInt, true, true ); + + method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "system", + IN_SYSTEM, IN_SYSTEM, uniqueTypeStr, true ); + + method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "xml", + IN_TREE_TO_STR_XML, IN_TREE_TO_STR_XML, uniqueTypeAny, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "xmlac", + IN_TREE_TO_STR_XML_AC, IN_TREE_TO_STR_XML_AC, uniqueTypeAny, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "postfix", + IN_TREE_TO_STR_POSTFIX, IN_TREE_TO_STR_POSTFIX, uniqueTypeAny, true ); + method->useCallObj = false; + + addStdin(); + addStdout(); + addStderr(); + addStds(); + addArgv(); + addError(); + addDefineArgs(); +} + +void Compiler::addStdin() +{ + /* Make the type ref. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream ); + + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "stdin" ); + + el->isConst = true; + + el->inGetR = IN_GET_CONST; + el->inGetWC = IN_GET_CONST; + el->inGetWV = IN_GET_CONST; + el->inGetValR = IN_GET_CONST; + el->inGetValWC = IN_GET_CONST; + el->inGetValWV = IN_GET_CONST; + + el->isConstVal = true; + el->constValId = CONST_STDIN; + + rootNamespace->rootScope->insertField( el->name, el ); +} + +void Compiler::addStdout() +{ + /* Make the type ref. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream ); + + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "stdout" ); + el->isConst = true; + + el->inGetR = IN_GET_CONST; + el->inGetWC = IN_GET_CONST; + el->inGetWV = IN_GET_CONST; + el->inGetValR = IN_GET_CONST; + el->inGetValWC = IN_GET_CONST; + el->inGetValWV = IN_GET_CONST; + + el->isConstVal = true; + el->constValId = CONST_STDOUT; + + rootNamespace->rootScope->insertField( el->name, el ); +} + +void Compiler::addStderr() +{ + /* Make the type ref. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream ); + + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "stderr" ); + el->isConst = true; + + el->inGetR = IN_GET_CONST; + el->inGetWC = IN_GET_CONST; + el->inGetWV = IN_GET_CONST; + el->inGetValR = IN_GET_CONST; + el->inGetValWC = IN_GET_CONST; + el->inGetValWV = IN_GET_CONST; + + el->isConstVal = true; + el->constValId = CONST_STDERR; + + rootNamespace->rootScope->insertField( el->name, el ); +} + +void Compiler::addArgv() +{ + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::StructFieldType, argvTypeRef, "argv" ); + el->isConst = true; + rootNamespace->rootScope->insertField( el->name, el ); + argv = el; + + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); + + el = ObjectField::cons( internal, + ObjectField::StructFieldType, typeRef, "arg0" ); + el->isConst = true; + rootNamespace->rootScope->insertField( el->name, el ); + arg0 = el; +} + +void Compiler::addStds() +{ + ObjectField *el = ObjectField::cons( internal, + ObjectField::StructFieldType, stdsTypeRef, "stds" ); + rootNamespace->rootScope->insertField( el->name, el ); + stds = el; +} + +void Compiler::addError() +{ + /* Make the type ref. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); + + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "error" ); + el->isConst = true; + el->inGetR = IN_GET_ERROR; + el->inGetWC = IN_GET_ERROR; + el->inGetWV = IN_GET_ERROR; + rootNamespace->rootScope->insertField( el->name, el ); +} + +void Compiler::addDefineArgs() +{ + for ( DefineVector::Iter d = defineArgs; d.lte(); d++ ) { + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); + + /* Create the field and insert it into the map. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, d->name ); + + el->isConst = true; + + el->inGetR = IN_GET_CONST; + el->inGetWC = IN_GET_CONST; + el->inGetWV = IN_GET_CONST; + el->inGetValR = IN_GET_CONST; + el->inGetValWC = IN_GET_CONST; + el->inGetValWV = IN_GET_CONST; + + el->isConstVal = true; + el->constValId = CONST_ARG; + el->constValArg = d->value; + + rootNamespace->rootScope->insertField( el->name, el ); + } +} + +void Compiler::initMapFunctions( GenericType *gen ) +{ + /* Value functions. */ + initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "find", + FN_VMAP_FIND, FN_VMAP_FIND, gen->keyUt, true, true, gen ); + + initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "insert", + FN_VMAP_INSERT_WV, FN_VMAP_INSERT_WC, gen->keyUt, gen->valueUt, + false, true, gen ); + + initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "remove", + FN_VMAP_REMOVE_WV, FN_VMAP_REMOVE_WC, gen->keyUt, false, true, gen ); + + /* + * Element Functions + */ + initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "find_el", + FN_MAP_FIND, FN_MAP_FIND, gen->keyUt, true, true, gen ); + + initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "insert_el", + FN_MAP_INSERT_WV, FN_MAP_INSERT_WC, gen->elUt, false, true, gen ); + + initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "detach_el", + FN_MAP_DETACH_WV, FN_MAP_DETACH_WC, gen->elUt, false, true, gen ); +} + +void Compiler::initMapField( GenericType *gen, const char *name, int offset ) +{ + /* Make the type ref and create the field. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltOffType, gen->elTr, name ); + + el->inGetR = IN_GET_MAP_MEM_R; + el->inGetWC = IN_GET_MAP_MEM_WC; + el->inGetWV = IN_GET_MAP_MEM_WV; +// el->inSetWC = IN_SET_MAP_MEM_WC; +// el->inSetWV = IN_SET_MAP_MEM_WV; + + el->inGetValR = IN_GET_MAP_MEM_R; + el->inGetValWC = IN_GET_MAP_MEM_WC; + el->inGetValWV = IN_GET_MAP_MEM_WV; + + gen->objDef->rootScope->insertField( el->name, el ); + + el->useGenericId = true; + el->generic = gen; + + /* Zero for head, One for tail. */ + el->offset = offset; +} + +void Compiler::initMapFields( GenericType *gen ) +{ + addLengthField( gen->objDef, IN_MAP_LENGTH ); + + initMapField( gen, "head_el", 0 ); + initMapField( gen, "tail_el", 1 ); + + initMapElKey( gen, "key", 0 ); + + initMapElField( gen, "prev", 0 ); + initMapElField( gen, "next", 1 ); +} + +void Compiler::initMapElKey( GenericType *gen, const char *name, int offset ) +{ + /* Make the type ref and create the field. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::GenericDependentType, gen->keyTr, name ); + + gen->el->mapKeyField = el; + + /* Offset will be computed when the offset of the owning map element field + * is computed. */ + + gen->elUt->structEl->structDef->objectDef->rootScope->insertField( el->name, el ); +} + +void Compiler::initMapElField( GenericType *gen, const char *name, int offset ) +{ + /* Make the type ref and create the field. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltOffType, gen->elTr, name ); + + el->inGetR = IN_GET_MAP_EL_MEM_R; + el->inGetValR = IN_GET_MAP_EL_MEM_R; +// el->inGetWC = IN_GET_LIST2EL_MEM_WC; +// el->inGetWV = IN_GET_LIST2EL_MEM_WV; +// el->inSetWC = IN_SET_LIST2EL_MEM_WC; +// el->inSetWV = IN_SET_LIST2EL_MEM_WV; + + el->useGenericId = true; + el->generic = gen; + + /* Zero for head, One for tail. */ + el->offset = offset; + + gen->elUt->structEl->structDef->objectDef->rootScope->insertField( el->name, el ); +} + +void Compiler::initListFunctions( GenericType *gen ) +{ + initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_head", + FN_VLIST_PUSH_HEAD_WV, FN_VLIST_PUSH_HEAD_WC, gen->valueUt, false, true, gen ); + + initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_tail", + FN_VLIST_PUSH_TAIL_WV, FN_VLIST_PUSH_TAIL_WC, gen->valueUt, false, true, gen ); + + initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push", + FN_VLIST_PUSH_HEAD_WV, FN_VLIST_PUSH_HEAD_WC, gen->valueUt, false, true, gen ); + + initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "pop_head", + FN_VLIST_POP_HEAD_WV, FN_VLIST_POP_HEAD_WC, false, true, gen ); + + initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "pop_tail", + FN_VLIST_POP_TAIL_WV, FN_VLIST_POP_TAIL_WC, false, true, gen ); + + initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "pop", + FN_VLIST_POP_HEAD_WV, FN_VLIST_POP_HEAD_WC, false, true, gen ); + + initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_head_el", + FN_LIST_PUSH_HEAD_WV, FN_LIST_PUSH_HEAD_WC, gen->elUt, false, true, gen ); + + initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_tail_el", + FN_LIST_PUSH_TAIL_WV, FN_LIST_PUSH_TAIL_WC, gen->elUt, false, true, gen ); + + initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_el", + FN_LIST_PUSH_HEAD_WV, FN_LIST_PUSH_HEAD_WC, gen->elUt, false, true, gen ); + + initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "pop_head_el", + FN_LIST_POP_HEAD_WV, FN_LIST_POP_HEAD_WC, false, true, gen ); + + initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "pop_tail_el", + FN_LIST_POP_TAIL_WV, FN_LIST_POP_TAIL_WC, false, true, gen ); + + initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "pop_el", + FN_LIST_POP_HEAD_WV, FN_LIST_POP_HEAD_WC, false, true, gen ); +} + +void Compiler::initListElField( GenericType *gen, const char *name, int offset ) +{ + /* Make the type ref and create the field. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltOffType, gen->elTr, name ); + + el->inGetR = IN_GET_LIST_EL_MEM_R; + el->inGetValR = IN_GET_LIST_EL_MEM_R; +// el->inGetWC = IN_GET_LIST2EL_MEM_WC; +// el->inGetWV = IN_GET_LIST2EL_MEM_WV; +// el->inSetWC = IN_SET_LIST2EL_MEM_WC; +// el->inSetWV = IN_SET_LIST2EL_MEM_WV; + + el->useGenericId = true; + el->generic = gen; + + /* Zero for head, One for tail. */ + el->offset = offset; + + gen->elUt->structEl->structDef->objectDef->rootScope->insertField( el->name, el ); +} + +void Compiler::initListFieldEl( GenericType *gen, const char *name, int offset ) +{ + /* Make the type ref and create the field. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltOffType, gen->elTr, name ); + + el->inGetR = IN_GET_LIST_MEM_R; + el->inGetWC = IN_GET_LIST_MEM_WC; + el->inGetWV = IN_GET_LIST_MEM_WV; +// el->inSetWC = IN_SET_LIST_MEM_WC; +// el->inSetWV = IN_SET_LIST_MEM_WV; + + el->inGetValR = IN_GET_LIST_MEM_R; + el->inGetValWC = IN_GET_LIST_MEM_WC; + el->inGetValWV = IN_GET_LIST_MEM_WV; + + gen->objDef->rootScope->insertField( el->name, el ); + + el->useGenericId = true; + el->generic = gen; + + /* Zero for head, One for tail. */ + el->offset = offset; +} + +void Compiler::initListFieldVal( GenericType *gen, const char *name, int offset ) +{ + /* Make the type ref and create the field. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltOffType, gen->valueTr, name ); + + el->inGetR = IN_GET_VLIST_MEM_R; + el->inGetWC = IN_GET_VLIST_MEM_WC; + el->inGetWV = IN_GET_VLIST_MEM_WV; +// el->inSetWC = IN_SET_VLIST_MEM_WC; +// el->inSetWV = IN_SET_VLIST_MEM_WV; + + el->inGetValR = IN_GET_VLIST_MEM_R; + el->inGetValWC = IN_GET_VLIST_MEM_WC; + el->inGetValWV = IN_GET_VLIST_MEM_WV; + + gen->objDef->rootScope->insertField( el->name, el ); + + el->useGenericId = true; + el->generic = gen; + + /* Zero for head, One for tail. */ + el->offset = offset; +} + +void Compiler::initListFields( GenericType *gen ) +{ + /* The value fields. */ + initListFieldVal( gen, "head", 0 ); + initListFieldVal( gen, "tail", 1 ); + initListFieldVal( gen, "top", 0 ); + + /* The element fields. */ + initListFieldEl( gen, "head_el", 0 ); + initListFieldEl( gen, "tail_el", 1 ); + initListFieldEl( gen, "top_el", 0 ); + + addLengthField( gen->objDef, IN_LIST_LENGTH ); + + /* The fields of the list element. */ + initListElField( gen, "prev", 0 ); + initListElField( gen, "next", 1 ); +} + +void Compiler::initParserFunctions( GenericType *gen ) +{ + initFunction( gen->elUt, gen->objDef, ObjectMethod::ParseFinish, "finish", + IN_PARSE_FRAG_W, IN_PARSE_FRAG_W, true ); + + initFunction( gen->elUt, gen->objDef, ObjectMethod::ParseFinish, "eof", + IN_PARSE_FRAG_W, IN_PARSE_FRAG_W, true ); + + initFunction( uniqueTypeInput, gen->objDef, ObjectMethod::Call, "gets", + IN_GET_PARSER_STREAM, IN_GET_PARSER_STREAM, true ); +} + +void Compiler::initParserField( GenericType *gen, const char *name, + int offset, TypeRef *typeRef ) +{ + /* Make the type ref and create the field. */ + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltOffType, typeRef, name ); + + el->inGetR = IN_GET_PARSER_MEM_R; + // el->inGetWC = IN_GET_PARSER_MEM_WC; + // el->inGetWV = IN_GET_PARSER_MEM_WV; + // el->inSetWC = IN_SET_PARSER_MEM_WC; + // el->inSetWV = IN_SET_PARSER_MEM_WV; + + gen->objDef->rootScope->insertField( el->name, el ); + + /* Zero for head, One for tail. */ + el->offset = offset; +} + +void Compiler::initParserFields( GenericType *gen ) +{ + TypeRef *typeRef; + + typeRef = TypeRef::cons( internal, gen->elUt ); + initParserField( gen, "tree", 0, typeRef ); + + typeRef = TypeRef::cons( internal, uniqueTypeStr ); + initParserField( gen, "error", 1, typeRef ); +} + +void Compiler::makeFuncVisible( Function *func, bool isUserIter ) +{ + func->localFrame = func->codeBlock->localFrame; + + /* Set up the parameters. */ + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { + if ( func->localFrame->rootScope->findField( param->name ) != 0 ) + error(param->loc) << "parameter " << param->name << " redeclared" << endp; + + func->localFrame->rootScope->insertField( param->name, param ); + } + + /* Insert the function into the global function map. */ + ObjectMethod *objMethod = new ObjectMethod( func->typeRef, func->name, + IN_CALL_WV, IN_CALL_WC, + func->paramList->length(), 0, func->paramList, false ); + objMethod->funcId = func->funcId; + objMethod->useFuncId = true; + objMethod->useCallObj = false; + objMethod->func = func; + + if ( isUserIter ) { + IterDef *uiter = findIterDef( IterDef::User, func ); + objMethod->iterDef = uiter; + } + + NameScope *scope = func->nspace->rootScope; // : globalObjectDef->rootScope; + + if ( !scope->methodMap.insert( func->name, objMethod ) ) + error(func->typeRef->loc) << "function " << func->name << " redeclared" << endp; + + func->objMethod = objMethod; +} + +void Compiler::makeInHostVisible( Function *func ) +{ + /* Set up the parameters. */ + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { + if ( func->localFrame->rootScope->findField( param->name ) != 0 ) + error(param->loc) << "parameter " << param->name << " redeclared" << endp; + + func->localFrame->rootScope->insertField( param->name, param ); + } + + /* Insert the function into the global function map. */ + ObjectMethod *objMethod = new ObjectMethod( func->typeRef, func->name, + IN_HOST, IN_HOST, + func->paramList->length(), 0, func->paramList, false ); + objMethod->funcId = func->funcId; + objMethod->useFuncId = true; + objMethod->useCallObj = false; + objMethod->func = func; + + NameScope *scope = func->nspace->rootScope; + + if ( !scope->methodMap.insert( func->name, objMethod ) ) { + error(func->typeRef->loc) << "in-host function " << func->name << + " redeclared" << endp; + } + + func->objMethod = objMethod; +} + +/* + * Type Declaration Root. + */ +void Compiler::declarePass() +{ + declareReVars(); + + makeDefaultIterators(); + + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) + makeFuncVisible( f, f->isUserIter ); + + for ( FunctionList::Iter f = inHostList; f.lte(); f++ ) + makeInHostVisible( f ); + + rootNamespace->declare( this ); + + /* Will fill in zero lels that were not declared. */ + makeIgnoreCollectors(); + + declareByteCode(); + + declareIntFields(); + declareStrFields(); + declareInputFields(); + declareStreamFields(); + declareTokenFields(); + declareGlobalFields(); + + /* Fill any empty scanners with a default token. */ + initEmptyScanners(); +} diff --git a/src/defs.h.cmake.in b/src/defs.h.cmake.in new file mode 100644 index 00000000..c4cf8844 --- /dev/null +++ b/src/defs.h.cmake.in @@ -0,0 +1,11 @@ +/* defs.h Generated from defs.h.cmake.in by cmake */ + +#ifndef _COLM_DEFS_H +#define _COLM_DEFS_H + +#cmakedefine SIZEOF_LONG @SIZEOF_LONG@ +#cmakedefine SIZEOF_UNSIGNED_LONG @SIZEOF_UNSIGNED_LONG@ +#cmakedefine SIZEOF_UNSIGNED_LONG_LONG @SIZEOF_UNSIGNED_LONG_LONG@ +#cmakedefine SIZEOF_VOID_P @SIZEOF_VOID_P@ + +#endif /* _COLM_DEFS_H */ diff --git a/src/defs.h.in b/src/defs.h.in new file mode 100644 index 00000000..a91e2ff2 --- /dev/null +++ b/src/defs.h.in @@ -0,0 +1,40 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_DEFS_H +#define _COLM_DEFS_H + +/* Configuration */ + +/* The size of `long', as computed by sizeof. */ +#undef SIZEOF_LONG + +/* The size of `unsigned long', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_LONG + +/* The size of `unsigned long long', as computed by sizeof. */ +#undef SIZEOF_UNSIGNED_LONG_LONG + +/* The size of `void *', as computed by sizeof. */ +#undef SIZEOF_VOID_P + +#endif /* _COLM_DEFS_H */ diff --git a/src/dotgen.cc b/src/dotgen.cc new file mode 100644 index 00000000..42f54159 --- /dev/null +++ b/src/dotgen.cc @@ -0,0 +1,117 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdbool.h> + +#include <iostream> + +#include "compiler.h" + +using namespace std; + +void Compiler::writeTransList( PdaState *state ) +{ + ostream &out = *outStream; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + /* Write out the from and to states. */ + out << "\t" << state->stateNum << " -> " << trans->value->toState->stateNum; + + /* Begin the label. */ + out << " [ label = \""; + long key = trans->key; + LangEl *lel = langElIndex[key]; + if ( lel != 0 ) + out << lel->name; + else + out << (char)key; + + if ( trans->value->actions.length() > 0 ) { + out << " / "; + for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) { + switch ( *act & 0x3 ) { + case 1: + out << "S(" << trans->value->actOrds[act.pos()] << ")"; + break; + case 2: { + out << "R(" << prodIdIndex[(*act >> 2)]->data << + ", " << trans->value->actOrds[act.pos()] << ")"; + break; + } + case 3: { + out << "SR(" << prodIdIndex[(*act >> 2)]->data << + ", " << trans->value->actOrds[act.pos()] << ")"; + break; + }} + if ( ! act.last() ) + out << ", "; + } + } + + out << "\" ];\n"; + } +} + +void Compiler::writeDotFile( PdaGraph *graph ) +{ + ostream &out = *outStream; + out << + "digraph " << parserName << " {\n" + " rankdir=LR;\n" + " ranksep=\"0\"\n" + " nodesep=\"0.25\"\n" + "\n"; + + /* Define the psuedo states. Transitions will be done after the states + * have been defined as either final or not final. */ + out << + " node [ shape = point ];\n"; + + for ( int i = 0; i < graph->entryStateSet.length(); i++ ) + out << "\tENTRY" << i << " [ label = \"\" ];\n"; + + out << + "\n" + " node [ shape = circle, fixedsize = true, height = 0.6 ];\n"; + + /* Walk the states. */ + for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) + out << " " << st->stateNum << " [ label = \"" << st->stateNum << "\" ];\n"; + + out << "\n"; + + /* Walk the states. */ + for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) + writeTransList( st ); + + /* Start state and other entry points. */ + for ( PdaStateSet::Iter st = graph->entryStateSet; st.lte(); st++ ) + out << "\tENTRY" << st.pos() << " -> " << (*st)->stateNum << " [ label = \"\" ];\n"; + + out << + "}\n"; +} + +void Compiler::writeDotFile() +{ + writeDotFile( pdaGraph ); +} + diff --git a/src/dotgen.h b/src/dotgen.h new file mode 100644 index 00000000..8e8f694b --- /dev/null +++ b/src/dotgen.h @@ -0,0 +1,52 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_GVDOTGEN_H +#define _COLM_GVDOTGEN_H + +#include <iostream> + +#if 0 + +class GraphvizDotGen : public CodeGenData +{ +public: + GraphvizDotGen( ostream &out ) : CodeGenData(out) { } + + /* Print an fsm to out stream. */ + void writeTransList( RedState *state ); + void writeDotFile( ); + + virtual void finishRagelDef(); + +private: + /* Writing labels and actions. */ + std::ostream &ONCHAR( Key lowKey, Key highKey ); + std::ostream &TRANS_ACTION( RedState *fromState, RedTrans *trans ); + std::ostream &ACTION( RedAction *action ); + std::ostream &KEY( Key key ); +}; + +#endif + +#endif /* _COLM_GVDOTGEN_H */ + diff --git a/src/exports.cc b/src/exports.cc new file mode 100644 index 00000000..988499db --- /dev/null +++ b/src/exports.cc @@ -0,0 +1,260 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdbool.h> + +#include <iostream> + +#include "fsmcodegen.h" + +using std::ostream; +using std::ostringstream; +using std::string; +using std::cerr; +using std::endl; + +void Compiler::openNameSpace( ostream &out, Namespace *nspace ) +{ + if ( nspace == rootNamespace ) + return; + + openNameSpace( out, nspace->parentNamespace ); + out << "namespace " << nspace->name << " { "; +} + +void Compiler::closeNameSpace( ostream &out, Namespace *nspace ) +{ + if ( nspace == rootNamespace ) + return; + + openNameSpace( out, nspace->parentNamespace ); + out << " }"; +} + +void Compiler::generateExports() +{ + ostream &out = *outStream; + + out << + "#ifndef _EXPORTS_H\n" + "#define _EXPORTS_H\n" + "\n" + "#include <colm/colm.h>\n" + "#include <colm/tree.h>\n" + "#include <colm/colmex.h>\n" + "#include <string>\n" + "\n"; + + /* Declare. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->isEOF ) + continue; + + openNameSpace( out, lel->nspace ); + out << "struct " << lel->fullName << ";"; + closeNameSpace( out, lel->nspace ); + out << "\n"; + } + + /* Class definitions. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->isEOF ) + continue; + + openNameSpace( out, lel->nspace ); + out << "struct " << lel->fullName << "\n"; + out << " : public ExportTree\n"; + out << "{\n"; + out << " static const int ID = " << lel->id << ";\n"; + + if ( mainReturnUT != 0 && mainReturnUT->langEl == lel ) { + out << " " << lel->fullName << + "( colm_program *prg ) : ExportTree( prg, returnVal(prg) ) {\n"; + out << " }\n"; + } + + out << " " << lel->fullName << + "( colm_program *prg, colm_tree *tree ) : ExportTree( prg, tree ) {\n"; + + out << "}\n"; + + if ( lel->objectDef != 0 ) { + FieldList &fieldList = lel->objectDef->fieldList; + for ( FieldList::Iter ofi = fieldList; ofi.lte(); ofi++ ) { + ObjectField *field = ofi->value; + if ( ( field->useOffset() && field->typeRef != 0 ) || field->isRhsGet() ) { + UniqueType *ut = field->typeRef->resolveType( this ); + + if ( ut != 0 && ut->typeId == TYPE_TREE ) + out << " " << ut->langEl->refName << " " << field->name << "();\n"; + } + } + } + + bool prodNames = false; + for ( LelDefList::Iter prod = lel->defList; prod.lte(); prod++ ) { + if ( prod->_name.length() > 0 ) + prodNames = true; + } + + if ( prodNames ) { + out << " enum prod_name {\n"; + for ( LelDefList::Iter prod = lel->defList; prod.lte(); prod++ ) { + if ( prod->_name.length() > 0 ) + out << "\t\t" << prod->_name << " = " << prod->prodNum << ",\n"; + } + out << " };\n"; + out << " enum prod_name prodName() " << + "{ return (enum prod_name)__tree->prod_num; }\n"; + } + + out << "};"; + closeNameSpace( out, lel->nspace ); + out << "\n"; + } + + for ( FieldList::Iter of = globalObjectDef->fieldList; of.lte(); of++ ) { + ObjectField *field = of->value; + if ( field->isExport ) { + UniqueType *ut = field->typeRef->resolveType(this); + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << ut->langEl->refName << " " << field->name << "( colm_program *prg );\n"; + } + } + } + + out << "\n"; + + for ( FunctionList::Iter func = functionList; func.lte(); func++ ) { + if ( func->exprt ) { + char *refName = func->typeRef->uniqueType->langEl->refName; + int paramCount = func->paramList->length(); + out << + refName << " " << func->name << "( colm_program *prg"; + + for ( int p = 0; p < paramCount; p++ ) + out << ", const char *p" << p; + + out << " );\n"; + } + } + + out << "#endif\n"; +} + +void Compiler::generateExportsImpl() +{ + ostream &out = *outStream; + + char *headerFn = strdup( exportHeaderFn ); + char *suffix = strstr( headerFn, ".pack" ); + if ( suffix != 0 && strcmp( suffix, ".pack" ) == 0 ) + *suffix = 0; + + if ( exportHeaderFn != 0 ) { + out << "#include \"" << headerFn << "\"\n"; + } + + out << "#include <colm/tree.h>\n"; + out << "#include <string.h>\n"; + + /* Function implementations. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->objectDef != 0 ) { + FieldList &fieldList = lel->objectDef->fieldList; + for ( FieldList::Iter ofi = fieldList; ofi.lte(); ofi++ ) { + ObjectField *field = ofi->value; + if ( field->useOffset() && field->typeRef != 0 ) { + UniqueType *ut = field->typeRef->resolveType( this ); + + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << ut->langEl->refName << " " << lel->declName << + "::" << field->name << "() { return " << + ut->langEl->refName << "( __prg, colm_get_attr( __tree, " << + field->offset << ") ); }\n"; + } + } + + if ( field->isRhsGet() ) { + UniqueType *ut = field->typeRef->resolveType( this ); + + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << ut->langEl->refName << " " << lel->declName << + "::" << field->name << "() { static int a[] = {"; + + /* Need to place the array computing the val. */ + out << field->rhsVal.length(); + for ( Vector<RhsVal>::Iter rg = field->rhsVal; rg.lte(); rg++ ) { + out << ", " << rg->prodEl->production->prodNum; + out << ", " << rg->prodEl->pos; + } + + out << "}; return " << ut->langEl->refName << + "( __prg, colm_get_rhs_val( __prg, __tree, a ) ); }\n"; + } + } + } + } + } + + out << "\n"; + + for ( FieldList::Iter of = globalObjectDef->fieldList; of.lte(); of++ ) { + ObjectField *field = of->value; + if ( field->isExport ) { + UniqueType *ut = field->typeRef->resolveType(this); + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << + ut->langEl->refName << " " << field->name << "( colm_program *prg )\n" + "{ return " << ut->langEl->refName << "( prg, colm_get_global( prg, " << + field->offset << ") ); }\n"; + } + } + } + + out << "\n"; + + for ( FunctionList::Iter func = functionList; func.lte(); func++ ) { + if ( func->exprt ) { + char *refName = func->typeRef->uniqueType->langEl->refName; + int paramCount = func->paramList->length(); + out << + refName << " " << func->name << "( colm_program *prg"; + + for ( int p = 0; p < paramCount; p++ ) + out << ", const char *p" << p; + + out << " )\n" + "{\n" + " int funcId = " << func->funcId << ";\n" + " const char *params[" << paramCount << "];\n"; + + for ( int p = 0; p < paramCount; p++ ) + out << " params[" << p << "] = p" << p << ";\n"; + + out << + " return " << refName << + "( prg, colm_run_func( prg, funcId, params, " << paramCount << " ));\n" + "}\n"; + } + } +} diff --git a/src/fsmap.cc b/src/fsmap.cc new file mode 100644 index 00000000..3e1ae913 --- /dev/null +++ b/src/fsmap.cc @@ -0,0 +1,806 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> + +#include <assert.h> + +#include "fsmgraph.h" + +using std::cerr; +using std::endl; + +KeyOps *keyOps = 0; + +/* Insert an action into an action table. */ +void ActionTable::setAction( int ordering, Action *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void ActionTable::setActions( const ActionTable &other ) +{ + for ( ActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ActionTable::setActions( int *orderings, Action **actions, int nActs ) +{ + for ( int a = 0; a < nActs; a++ ) + insertMulti( orderings[a], actions[a] ); +} + +bool ActionTable::hasAction( Action *action ) +{ + for ( int a = 0; a < length(); a++ ) { + if ( data[a].value == action ) + return true; + } + return false; +} + +/* Insert an action into an action table. */ +void LmActionTable::setAction( int ordering, TokenInstance *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void LmActionTable::setActions( const LmActionTable &other ) +{ + for ( LmActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ErrActionTable::setAction( int ordering, Action *action, int transferPoint ) +{ + insertMulti( ErrActionTableEl( action, ordering, transferPoint ) ); +} + +void ErrActionTable::setActions( const ErrActionTable &other ) +{ + for ( ErrActionTable::Iter act = other; act.lte(); act++ ) + insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) ); +} + +/* Insert a priority into this priority table. Looks out for priorities on + * duplicate keys. */ +void PriorTable::setPrior( int ordering, PriorDesc *desc ) +{ + PriorEl *lastHit = 0; + PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit ); + if ( insed == 0 ) { + /* This already has a priority on the same key as desc. Overwrite the + * priority if the ordering is larger (later in time). */ + if ( ordering >= lastHit->ordering ) + *lastHit = PriorEl( ordering, desc ); + } +} + +/* Set all the priorities from a priorTable in this table. */ +void PriorTable::setPriors( const PriorTable &other ) +{ + /* Loop src priorities once to overwrite duplicates. */ + PriorTable::Iter priorIt = other; + for ( ; priorIt.lte(); priorIt++ ) + setPrior( priorIt->ordering, priorIt->desc ); +} + +/* Set the priority of starting transitions. Isolates the start state so it has + * no other entry points, then sets the priorities of all the transitions out + * of the start state. If the start state is final, then the outPrior of the + * start state is also set. The idea is that a machine that accepts the null + * string can still specify the starting trans prior for when it accepts the + * null word. */ +void FsmGraph::startFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk all transitions out of the start state. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of all transitions in a graph. Walks all transition lists + * and all def transitions. */ +void FsmGraph::allTransPrior( int ordering, PriorDesc *prior ) +{ + /* Walk the list of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } + } +} + +/* Set the priority of all transitions that go into a final state. Note that if + * any entry states are final, we will not be setting the priority of any + * transitions that may go into those states in the future. The graph does not + * support pending in transitions in the same way pending out transitions are + * supported. */ +void FsmGraph::finishFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk all in transitions of the final state. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of any future out transitions that may be made going out of + * this state machine. */ +void FsmGraph::leaveFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Set priority in all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outPriorTable.setPrior( ordering, prior ); +} + + +/* Set actions to execute on starting transitions. Isolates the start state + * so it has no other entry points, then adds to the transition functions + * of all the transitions out of the start state. If the start state is final, + * then the func is also added to the start state's out func list. The idea is + * that a machine that accepts the null string can execute a start func when it + * matches the null word, which can only be done when leaving the start/final + * state. */ +void FsmGraph::startFsmAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk the start state's transitions, setting functions. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Set functions to execute on all transitions. Walks the out lists of all + * states. */ +void FsmGraph::allTransAction( int ordering, Action *action ) +{ + /* Walk all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } + } +} + +/* Specify functions to execute upon entering final states. If the start state + * is final we can't really specify a function to execute upon entering that + * final state the first time. So function really means whenever entering a + * final state from within the same fsm. */ +void FsmGraph::finishFsmAction( int ordering, Action *action ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Add functions to any future out transitions that may be made going out of + * this state machine. */ +void FsmGraph::leaveFsmAction( int ordering, Action *action ) +{ + /* Insert the action in the outActionTable of all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outActionTable.setAction( ordering, action ); +} + +/* Add functions to the longest match action table for constructing scanners. */ +void FsmGraph::longMatchAction( int ordering, TokenInstance *lmPart ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->lmActionTable.setAction( ordering, lmPart ); + } +} + +void FsmGraph::fillGaps( FsmState *state ) +{ + if ( state->outList.length() == 0 ) { + /* Add the range on the lower and upper bound. */ + attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey ); + } + else { + TransList srcList; + srcList.transfer( state->outList ); + + /* Check for a gap at the beginning. */ + TransList::Iter trans = srcList, next; + if ( keyOps->minKey < trans->lowKey ) { + /* Make the high key and append. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, keyOps->minKey, highKey ); + } + + /* Write the transition. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + Key lastHigh = trans->highKey; + + /* Loop each source range. */ + for ( trans = next; trans.lte(); trans = next ) { + /* Make the next key following the last range. */ + Key nextKey = lastHigh; + nextKey.increment(); + + /* Check for a gap from last up to here. */ + if ( nextKey < trans->lowKey ) { + /* Make the high end of the range that fills the gap. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, nextKey, highKey ); + } + + /* Reduce the transition. If it reduced to anything then add it. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + lastHigh = trans->highKey; + } + + /* Now check for a gap on the end to fill. */ + if ( lastHigh < keyOps->maxKey ) { + /* Get a copy of the default. */ + lastHigh.increment(); + + attachNewTrans( state, 0, lastHigh, keyOps->maxKey ); + } + } +} + +void FsmGraph::setErrorAction( FsmState *state, int ordering, Action *action ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + +void FsmGraph::setErrorActions( FsmState *state, const ActionTable &other ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) + trans->actionTable.setActions( other ); + } +} + + +/* Give a target state for error transitions. */ +void FsmGraph::setErrorTarget( FsmState *state, FsmState *target, int *orderings, + Action **actions, int nActs ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error target in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) { + /* The trans goes to error, redirect it. */ + redirectErrorTrans( trans->fromState, target, trans ); + trans->actionTable.setActions( orderings, actions, nActs ); + } + } +} + +void FsmGraph::transferErrorActions( FsmState *state, int transferPoint ) +{ + for ( int i = 0; i < state->errActionTable.length(); ) { + ErrActionTableEl *act = state->errActionTable.data + i; + if ( act->transferPoint == transferPoint ) { + /* Transfer the error action and remove it. */ + setErrorAction( state, act->ordering, act->action ); + state->errActionTable.vremove( i ); + } + else { + /* Not transfering and deleting, skip over the item. */ + i += 1; + } + } +} + +/* Set error actions in the start state. */ +void FsmGraph::startErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in all states where there is a transition out. */ +void FsmGraph::allErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Insert actions in the error action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in final states. */ +void FsmGraph::finalErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->errActionTable.setAction( ordering, action, transferPoint ); +} + +void FsmGraph::notStartErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +void FsmGraph::notFinalErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set error actions in the states that have transitions into a final state. */ +void FsmGraph::middleErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Isolate the start state in case it is reachable from in inside the + * machine, in which case we don't want it set. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set EOF actions in the start state. */ +void FsmGraph::startEOFAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in all states where there is a transition out. */ +void FsmGraph::allEOFAction( int ordering, Action *action ) +{ + /* Insert actions in the EOF action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in final states. */ +void FsmGraph::finalEOFAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->eofActionTable.setAction( ordering, action ); +} + +void FsmGraph::notStartEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->eofActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::notFinalEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* Set EOF actions in the states that have transitions into a final state. */ +void FsmGraph::middleEOFAction( int ordering, Action *action ) +{ + /* Set the actions in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* + * Set To State Actions. + */ + +/* Set to state actions in the start state. */ +void FsmGraph::startToStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in all states. */ +void FsmGraph::allToStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in final states. */ +void FsmGraph::finalToStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->toStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::notStartToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::notFinalToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* Set to state actions in states that are not final and not the start state. */ +void FsmGraph::middleToStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* + * Set From State Actions. + */ + +void FsmGraph::startFromStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->fromStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::allFromStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->fromStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::finalFromStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->fromStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::notStartFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::notFinalFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::middleFromStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +/* Shift the function ordering of the start transitions to start + * at fromOrder and increase in units of 1. Useful before staring. + * Returns the maximum number of order numbers used. */ +int FsmGraph::shiftStartActionOrder( int fromOrder ) +{ + int maxUsed = 0; + + /* Walk the start state's transitions, shifting function ordering. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + /* Walk the function data for the transition and set the keys to + * increasing values starting at fromOrder. */ + int curFromOrder = fromOrder; + ActionTable::Iter action = trans->actionTable; + for ( ; action.lte(); action++ ) + action->key = curFromOrder++; + + /* Keep track of the max number of orders used. */ + if ( curFromOrder - fromOrder > maxUsed ) + maxUsed = curFromOrder - fromOrder; + } + + return maxUsed; +} + +/* Remove all priorities. */ +void FsmGraph::clearAllPriorities() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Clear out priority data. */ + state->outPriorTable.empty(); + + /* Clear transition data from the out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + trans->priorTable.empty(); + } +} + +/* Zeros out the function ordering keys. This may be called before minimization + * when it is known that no more fsm operations are going to be done. This + * will achieve greater reduction as states will not be separated on the basis + * of function ordering. */ +void FsmGraph::nullActionKeys( ) +{ + /* For each state... */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the transitions for the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Walk the action table for the transition. */ + for ( ActionTable::Iter action = trans->actionTable; + action.lte(); action++ ) + action->key = 0; + + /* Walk the action table for the transition. */ + for ( LmActionTable::Iter action = trans->lmActionTable; + action.lte(); action++ ) + action->key = 0; + } + + /* Null the action keys of the to state action table. */ + for ( ActionTable::Iter action = state->toStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the from state action table. */ + for ( ActionTable::Iter action = state->fromStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the out transtions. */ + for ( ActionTable::Iter action = state->outActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the error action table. */ + for ( ErrActionTable::Iter action = state->errActionTable; + action.lte(); action++ ) + action->ordering = 0; + + /* Null the action keys eof action table. */ + for ( ActionTable::Iter action = state->eofActionTable; + action.lte(); action++ ) + action->key = 0; + } +} + +/* Walk the list of states and verify that non final states do not have out + * data, that all stateBits are cleared, and that there are no states with + * zero foreign in transitions. */ +void FsmGraph::verifyStates() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Non final states should not have leaving data. */ + if ( ! (state->stateBits & SB_ISFINAL) ) { + assert( state->outActionTable.length() == 0 ); + assert( state->outCondSet.length() == 0 ); + assert( state->outPriorTable.length() == 0 ); + } + + /* Data used in algorithms should be cleared. */ + assert( (state->stateBits & SB_BOTH) == 0 ); + assert( state->foreignInTrans > 0 ); + } +} + +/* Compare two transitions according to their relative priority. Since the + * base transition has no priority associated with it, the default is to + * return equal. */ +int FsmGraph::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ) +{ + /* Looking for differing priorities on same keys. Need to concurrently + * scan the priority lists. */ + PriorTable::Iter pd1 = priorTable1; + PriorTable::Iter pd2 = priorTable2; + while ( pd1.lte() && pd2.lte() ) { + /* Check keys. */ + if ( pd1->desc->key < pd2->desc->key ) + pd1.increment(); + else if ( pd1->desc->key > pd2->desc->key ) + pd2.increment(); + /* Keys are the same, check priorities. */ + else if ( pd1->desc->priority < pd2->desc->priority ) + return -1; + else if ( pd1->desc->priority > pd2->desc->priority ) + return 1; + else { + /* Keys and priorities are equal, advance both. */ + pd1.increment(); + pd2.increment(); + } + } + + /* No differing priorities on the same key. */ + return 0; +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +int FsmGraph::compareTransData( FsmTrans *trans1, FsmTrans *trans2 ) +{ + /* Compare the prior table. */ + int cmpRes = CmpPriorTable::compare( trans1->priorTable, + trans2->priorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare longest match action tables. */ + cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, + trans2->lmActionTable); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare action tables. */ + return CmpActionTable::compare(trans1->actionTable, + trans2->actionTable); +} + +/* Callback invoked when another trans (or possibly this) is added into this + * transition during the merging process. Draw in any properties of srcTrans + * into this transition. AddInTrans is called when a new transitions is made + * that will be a duplicate of another transition or a combination of several + * other transitions. AddInTrans will be called for each transition that the + * new transition is to represent. */ +void FsmGraph::addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + /* Protect against adding in from ourselves. */ + if ( srcTrans == destTrans ) { + /* Adding in ourselves, need to make a copy of the source transitions. + * The priorities are not copied in as that would have no effect. */ + destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); + destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); + } + else { + /* Not a copy of ourself, get the functions and priorities. */ + destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); + destTrans->actionTable.setActions( srcTrans->actionTable ); + destTrans->priorTable.setPriors( srcTrans->priorTable ); + } +} + +/* Compare the properties of states that are embedded by users. Compares out + * priorities, out transitions, to, from, out, error and eof action tables. */ +int FsmGraph::compareStateData( const FsmState *state1, const FsmState *state2 ) +{ + /* Compare the out priority table. */ + int cmpRes = CmpPriorTable:: + compare( state1->outPriorTable, state2->outPriorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test to state action tables. */ + cmpRes = CmpActionTable::compare( state1->toStateActionTable, + state2->toStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test from state action tables. */ + cmpRes = CmpActionTable::compare( state1->fromStateActionTable, + state2->fromStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out action tables. */ + cmpRes = CmpActionTable::compare( state1->outActionTable, + state2->outActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out condition sets. */ + cmpRes = CmpActionSet::compare( state1->outCondSet, + state2->outCondSet ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out error action tables. */ + cmpRes = CmpErrActionTable::compare( state1->errActionTable, + state2->errActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test eof action tables. */ + return CmpActionTable::compare( state1->eofActionTable, + state2->eofActionTable ); +} + +/* Invoked when a state looses its final state status and the leaving + * transition embedding data should be deleted. */ +void FsmGraph::clearOutData( FsmState *state ) +{ + /* Kill the out actions and priorities. */ + state->outActionTable.empty(); + state->outCondSet.empty(); + state->outPriorTable.empty(); +} + +bool FsmGraph::hasOutData( FsmState *state ) +{ + return ( state->outActionTable.length() > 0 || + state->outCondSet.length() > 0 || + state->outPriorTable.length() > 0 ); +} diff --git a/src/fsmattach.cc b/src/fsmattach.cc new file mode 100644 index 00000000..bc8571b2 --- /dev/null +++ b/src/fsmattach.cc @@ -0,0 +1,427 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> + +#include <iostream> + +#include "fsmgraph.h" + +using namespace std; + +/* Insert a transition into an inlist. The head must be supplied. */ +void FsmGraph::attachToInList( FsmState *from, FsmState *to, + FsmTrans *&head, FsmTrans *trans ) +{ + trans->ilnext = head; + trans->ilprev = 0; + + /* If in trans list is not empty, set the head->prev to trans. */ + if ( head != 0 ) + head->ilprev = trans; + + /* Now insert ourselves at the front of the list. */ + head = trans; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * move it from the misfit list to the main list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + to->foreignInTrans += 1; + } +}; + +/* Detach a transition from an inlist. The head of the inlist must be supplied. */ +void FsmGraph::detachFromInList( FsmState *from, FsmState *to, + FsmTrans *&head, FsmTrans *trans ) +{ + /* Detach in the inTransList. */ + if ( trans->ilprev == 0 ) + head = trans->ilnext; + else + trans->ilprev->ilnext = trans->ilnext; + + if ( trans->ilnext != 0 ) + trans->ilnext->ilprev = trans->ilprev; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + to->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions goes down to 0 then move it + * from the main list to the misfit list. */ + if ( to->foreignInTrans == 0 ) + misfitList.append( stateList.detach( to ) ); + } + } +} + +/* Attach states on the default transition, range list or on out/in list key. + * First makes a new transition. If there is already a transition out from + * fromState on the default, then will assertion fail. */ +FsmTrans *FsmGraph::attachNewTrans( FsmState *from, FsmState *to, Key lowKey, Key highKey ) +{ + /* Make the new transition. */ + FsmTrans *retVal = new FsmTrans(); + + /* The transition is now attached. Remember the parties involved. */ + retVal->fromState = from; + retVal->toState = to; + + /* Make the entry in the out list for the transitions. */ + from->outList.append( retVal ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + retVal->highKey = highKey; + + /* Attach using inList as the head pointer. */ + if ( to != 0 ) + attachToInList( from, to, to->inList.head, retVal ); + + return retVal; +} + +/* Attach for range lists or for the default transition. This attach should + * be used when a transition already is allocated and must be attached to a + * target state. Does not handle adding the transition into the out list. */ +void FsmGraph::attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + trans->fromState = from; + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Redirect a transition away from error and towards some state. This is just + * like attachTrans except it requires fromState to be set and does not touch + * it. */ +void FsmGraph::redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ) +{ + assert( trans->fromState != 0 && trans->toState == 0 ); + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Detach for out/in lists or for default transition. */ +void FsmGraph::detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + trans->fromState = 0; + trans->toState = 0; + + if ( to != 0 ) { + /* Detach using to's inList pointer as the head. */ + detachFromInList( from, to, to->inList.head, trans ); + } +} + + +/* Detach a state from the graph. Detaches and deletes transitions in and out + * of the state. Empties inList and outList. Removes the state from the final + * state set. A detached state becomes useless and should be deleted. */ +void FsmGraph::detachState( FsmState *state ) +{ + /* Detach the in transitions from the inList list of transitions. */ + while ( state->inList.head != 0 ) { + /* Get pointers to the trans and the state. */ + FsmTrans *trans = state->inList.head; + FsmState *fromState = trans->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, trans ); + + /* Ok to delete the transition. */ + fromState->outList.detach( trans ); + delete trans; + } + + /* Remove the entry points in on the machine. */ + while ( state->entryIds.length() > 0 ) + unsetEntry( state->entryIds[0], state ); + + /* Detach out range transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); ) { + TransList::Iter next = trans.next(); + detachTrans( state, trans->toState, trans ); + delete trans; + trans = next; + } + + /* Delete all of the out range pointers. */ + state->outList.abandon(); + + /* Unset final stateness before detaching from graph. */ + if ( state->stateBits & SB_ISFINAL ) + finStateSet.remove( state ); +} + + +/* Duplicate a transition. Makes a new transition that is attached to the same + * dest as srcTrans. The new transition has functions and priority taken from + * srcTrans. Used for merging a transition in to a free spot. The trans can + * just be dropped in. It does not conflict with an existing trans and need + * not be crossed. Returns the new transition. */ +FsmTrans *FsmGraph::dupTrans( FsmState *from, FsmTrans *srcTrans ) +{ + /* Make a new transition. */ + FsmTrans *newTrans = new FsmTrans(); + + /* We can attach the transition, one does not exist. */ + attachTrans( from, srcTrans->toState, newTrans ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newTrans, srcTrans ); + + return newTrans; +} + +/* In crossing, src trans and dest trans both go to existing states. Make one + * state from the sets of states that src and dest trans go to. */ +FsmTrans *FsmGraph::fsmAttachStates( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + /* The priorities are equal. We must merge the transitions. Does the + * existing trans go to the state we are to attach to? ie, are we to + * simply double up the transition? */ + FsmState *toState = srcTrans->toState; + FsmState *existingState = destTrans->toState; + + if ( existingState == toState ) { + /* The transition is a double up to the same state. Copy the src + * trans into itself. We don't need to merge in the from out trans + * data, that was done already. */ + addInTrans( destTrans, srcTrans ); + } + else { + /* The trans is not a double up. Dest trans cannot be the same as src + * trans. Set up the state set. */ + StateSet stateSet; + + /* We go to all the states the existing trans goes to, plus... */ + if ( existingState->stateDictEl == 0 ) + stateSet.insert( existingState ); + else + stateSet.insert( existingState->stateDictEl->stateSet ); + + /* ... all the states that we have been told to go to. */ + if ( toState->stateDictEl == 0 ) + stateSet.insert( toState ); + else + stateSet.insert( toState->stateDictEl->stateSet ); + + /* Look for the state. If it is not there already, make it. */ + StateDictEl *lastFound; + if ( md.stateDict.insert( stateSet, &lastFound ) ) { + /* Make a new state representing the combination of states in + * stateSet. It gets added to the fill list. This means that we + * need to fill in it's transitions sometime in the future. We + * don't do that now (ie, do not recurse). */ + FsmState *combinState = addState(); + + /* Link up the dict element and the state. */ + lastFound->targState = combinState; + combinState->stateDictEl = lastFound; + + /* Add to the fill list. */ + md.fillListAppend( combinState ); + } + + /* Get the state insertted/deleted. */ + FsmState *targ = lastFound->targState; + + /* Detach the state from existing state. */ + detachTrans( from, existingState, destTrans ); + + /* Re-attach to the new target. */ + attachTrans( from, targ, destTrans ); + + /* Add in src trans to the existing transition that we redirected to + * the new state. We don't need to merge in the from out trans data, + * that was done already. */ + addInTrans( destTrans, srcTrans ); + } + + return destTrans; +} + +/* Two transitions are to be crossed, handle the possibility of either going + * to the error state. */ +FsmTrans *FsmGraph::mergeTrans( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + FsmTrans *retTrans = 0; + if ( destTrans->toState == 0 && srcTrans->toState == 0 ) { + /* Error added into error. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) { + /* Non error added into error we need to detach and reattach, */ + detachTrans( from, destTrans->toState, destTrans ); + attachTrans( from, srcTrans->toState, destTrans ); + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( srcTrans->toState == 0 ) { + /* Dest goes somewhere but src doesn't, just add it it in. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else { + /* Both go somewhere, run the actual cross. */ + retTrans = fsmAttachStates( md, from, destTrans, srcTrans ); + } + + return retTrans; +} + +/* Find the trans with the higher priority. If src is lower priority then dest then + * src is ignored. If src is higher priority than dest, then src overwrites dest. If + * the priorities are equal, then they are merged. */ +FsmTrans *FsmGraph::crossTransitions( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + FsmTrans *retTrans; + + /* Compare the priority of the dest and src transitions. */ + int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); + if ( compareRes < 0 ) { + /* Src trans has a higher priority than dest, src overwrites dest. + * Detach dest and return a copy of src. */ + detachTrans( from, destTrans->toState, destTrans ); + retTrans = dupTrans( from, srcTrans ); + } + else if ( compareRes > 0 ) { + /* The dest trans has a higher priority, use dest. */ + retTrans = destTrans; + } + else { + /* Src trans and dest trans have the same priority, they must be merged. */ + retTrans = mergeTrans( md, from, destTrans, srcTrans ); + } + + /* Return the transition that resulted from the cross. */ + return retTrans; +} + +/* Copy the transitions in srcList to the outlist of dest. The srcList should + * not be the outList of dest, otherwise you would be copying the contents of + * srcList into itself as it's iterated: bad news. */ +void FsmGraph::outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ) +{ + /* The destination list. */ + TransList destList; + + /* Set up an iterator to stop at breaks. */ + PairIter<FsmTrans> outPair( dest->outList.head, srcList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case RangeInS1: { + /* The pair iter is the authority on the keys. It may have needed + * to break the dest range. */ + FsmTrans *destTrans = outPair.s1Tel.trans; + destTrans->lowKey = outPair.s1Tel.lowKey; + destTrans->highKey = outPair.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangeInS2: { + /* Src range may get crossed with dest's default transition. */ + FsmTrans *newTrans = dupTrans( dest, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s2Tel.lowKey; + newTrans->highKey = outPair.s2Tel.highKey; + destList.append( newTrans ); + break; + } + case RangeOverlap: { + /* Exact overlap, cross them. */ + FsmTrans *newTrans = crossTransitions( md, dest, + outPair.s1Tel.trans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s1Tel.lowKey; + newTrans->highKey = outPair.s1Tel.highKey; + destList.append( newTrans ); + break; + } + case BreakS1: { + /* Since we are always writing to the dest trans, the dest needs + * to be copied when it is broken. The copy goes into the first + * half of the break to "break it off". */ + outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans ); + break; + } + case BreakS2: + break; + } + } + + /* Abandon the old outList and transfer destList into it. */ + dest->outList.transfer( destList ); +} + + +/* Move all the transitions that go into src so that they go into dest. */ +void FsmGraph::inTransMove( FsmState *dest, FsmState *src ) +{ + /* Do not try to move in trans to and from the same state. */ + assert( dest != src ); + + /* If src is the start state, dest becomes the start state. */ + if ( src == startState ) { + unsetStartState(); + setStartState( dest ); + } + + /* For each entry point into, create an entry point into dest, when the + * state is detached, the entry points to src will be removed. */ + for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ ) + changeEntry( *enId, dest, src ); + + /* Move the transitions in inList. */ + while ( src->inList.head != 0 ) { + /* Get trans and from state. */ + FsmTrans *trans = src->inList.head; + FsmState *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } +} diff --git a/src/fsmbase.cc b/src/fsmbase.cc new file mode 100644 index 00000000..52698a1a --- /dev/null +++ b/src/fsmbase.cc @@ -0,0 +1,603 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> + +#include "fsmgraph.h" + +/* Simple singly linked list append routine for the fill list. The new state + * goes to the end of the list. */ +void MergeData::fillListAppend( FsmState *state ) +{ + state->alg.next = 0; + + if ( stfillHead == 0 ) { + /* List is empty, state becomes head and tail. */ + stfillHead = state; + stfillTail = state; + } + else { + /* List is not empty, state goes after last element. */ + stfillTail->alg.next = state; + stfillTail = state; + } +} + +/* Graph constructor. */ +FsmGraph::FsmGraph() +: + /* No start state. */ + startState(0), + errState(0), + + /* Misfit accounting is a switch, turned on only at specific times. It + * controls what happens when states have no way in from the outside + * world.. */ + misfitAccounting(false), + + lmRequiresErrorState(false) +{ +} + +/* Copy all graph data including transitions. */ +FsmGraph::FsmGraph( const FsmGraph &graph ) +: + /* Lists start empty. Will be filled by copy. */ + stateList(), + misfitList(), + + /* Copy in the entry points, + * pointers will be resolved later. */ + entryPoints(graph.entryPoints), + startState(graph.startState), + errState(0), + + /* Will be filled by copy. */ + finStateSet(), + + /* Misfit accounting is only on during merging. */ + misfitAccounting(false), + + lmRequiresErrorState(graph.lmRequiresErrorState) +{ + /* Create the states and record their map in the original state. */ + StateList::Iter origState = graph.stateList; + for ( ; origState.lte(); origState++ ) { + /* Make the new state. */ + FsmState *newState = new FsmState( *origState ); + + /* Add the state to the list. */ + stateList.append( newState ); + + /* Set the mapsTo item of the old state. */ + origState->alg.stateMap = newState; + } + + /* Derefernce all the state maps. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + FsmState *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0; + + /* Attach The transition to the duplicate. */ + trans->toState = 0; + attachTrans( state, toState, trans ); + } + } + + /* Fix the state pointers in the entry points array. */ + EntryMapEl *eel = entryPoints.data; + for ( int e = 0; e < entryPoints.length(); e++, eel++ ) { + /* Get the duplicate of the state. */ + eel->value = eel->value->alg.stateMap; + + /* Foreign in transitions must be built up when duping machines so + * increment it here. */ + eel->value->foreignInTrans += 1; + } + + /* Fix the start state pointer and the new start state's count of in + * transiions. */ + startState = startState->alg.stateMap; + startState->foreignInTrans += 1; + + /* Build the final state set. */ + StateSet::Iter st = graph.finStateSet; + for ( ; st.lte(); st++ ) + finStateSet.insert((*st)->alg.stateMap); +} + +/* Deletes all transition data then deletes each state. */ +FsmGraph::~FsmGraph() +{ + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Iterate the out transitions, deleting them. */ + state->outList.empty(); + } + + /* Delete all the states. */ + stateList.empty(); +} + +/* Set a state final. The state has its isFinState set to true and the state + * is added to the finStateSet. */ +void FsmGraph::setFinState( FsmState *state ) +{ + /* Is it already a fin state. */ + if ( state->stateBits & SB_ISFINAL ) + return; + + state->stateBits |= SB_ISFINAL; + finStateSet.insert( state ); +} + +/* Set a state non-final. The has its isFinState flag set false and the state + * is removed from the final state set. */ +void FsmGraph::unsetFinState( FsmState *state ) +{ + /* Is it already a non-final state? */ + if ( ! (state->stateBits & SB_ISFINAL) ) + return; + + /* When a state looses its final state status it must relinquish all the + * properties that are allowed only for final states. */ + clearOutData( state ); + + state->stateBits &= ~ SB_ISFINAL; + finStateSet.remove( state ); +} + +/* Set and unset a state as the start state. */ +void FsmGraph::setStartState( FsmState *state ) +{ + /* Sould change from unset to set. */ + assert( startState == 0 ); + startState = state; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; +} + +void FsmGraph::unsetStartState() +{ + /* Should change from set to unset. */ + assert( startState != 0 ); + + /* Decrement the entry's count of foreign entries. */ + startState->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( startState->foreignInTrans == 0 ) + misfitList.append( stateList.detach( startState ) ); + } + + startState = 0; +} + +/* Associate an id with a state. Makes the state a named entry point. Has no + * effect if the entry point is already mapped to the state. */ +void FsmGraph::setEntry( int id, FsmState *state ) +{ + /* Insert the id into the state. If the state is already labelled with id, + * nothing to do. */ + if ( state->entryIds.insert( id ) ) { + /* Insert the entry and assert that it succeeds. */ + entryPoints.insertMulti( id, state ); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; + } +} + +/* Remove the association of an id with a state. The state looses it's entry + * point status. Assumes that the id is indeed mapped to state. */ +void FsmGraph::unsetEntry( int id, FsmState *state ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != state ) + enLow += 1; + + /* Remove the record from the map. */ + entryPoints.remove( enLow ); + + /* Remove the state's sense of the link. */ + state->entryIds.remove( id ); + state->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( state->foreignInTrans == 0 ) + misfitList.append( stateList.detach( state ) ); + } +} + +/* Remove all association of an id with states. Assumes that the id is indeed + * mapped to a state. */ +void FsmGraph::unsetEntry( int id ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) { + /* Remove the state's sense of the link. */ + mel->value->entryIds.remove( id ); + mel->value->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit list. */ + if ( mel->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( mel->value ) ); + } + } + + /* Remove the records from the entry points map. */ + entryPoints.removeMulti( enLow, enHigh ); +} + + +void FsmGraph::changeEntry( int id, FsmState *to, FsmState *from ) +{ + /* Find the entry in the entry map. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != from ) + enLow += 1; + + /* Change it to the new target. */ + enLow->value = to; + + /* Remove from's sense of the link. */ + from->entryIds.remove( id ); + from->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( from->foreignInTrans == 0 ) + misfitList.append( stateList.detach( from ) ); + } + + /* Add to's sense of the link. */ + if ( to->entryIds.insert( id ) != 0 ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + /* Up the foreign in transitions to the state. */ + to->foreignInTrans += 1; + } +} + + +/* Clear all entry points from a machine. */ +void FsmGraph::unsetAllEntryPoints() +{ + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) { + /* Kill all the state's entry points at once. */ + if ( en->value->entryIds.length() > 0 ) { + en->value->foreignInTrans -= en->value->entryIds.length(); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit + * list. */ + if ( en->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( en->value ) ); + } + + /* Clear the set of ids out all at once. */ + en->value->entryIds.empty(); + } + } + + /* Now clear out the entry map all at once. */ + entryPoints.empty(); +} + +/* Assigning an epsilon transition into final states. */ +void FsmGraph::epsilonTrans( int id ) +{ + for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ ) + (*fs)->epsilonTrans.append( id ); +} + +/* Mark all states reachable from state. Traverses transitions forward. Used + * for removing states that have no path into them. */ +void FsmGraph::markReachableFromHere( FsmState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + markReachableFromHere( trans->toState ); + } +} + +void FsmGraph::markReachableFromHereStopFinal( FsmState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + FsmState *toState = trans->toState; + if ( toState != 0 && !toState->isFinState() ) + markReachableFromHereStopFinal( toState ); + } +} + +/* Mark all states reachable from state. Traverse transitions backwards. Used + * for removing dead end paths in graphs. */ +void FsmGraph::markReachableFromHereReverse( FsmState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states with + * transitions into this state. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all items in transitions. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + markReachableFromHereReverse( trans->fromState ); +} + +/* Determine if there are any entry points into a start state other than the + * start state. Setting starting transitions requires that the start state be + * isolated. In most cases a start state will already be isolated. */ +bool FsmGraph::isStartStateIsolated() +{ + /* If there are any in transitions then the state is not isolated. */ + if ( startState->inList.head != 0 ) + return false; + + /* If there are any entry points then isolated. */ + if ( startState->entryIds.length() > 0 ) + return false; + + return true; +} + +/* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ +void FsmGraph::copyInEntryPoints( FsmGraph *other ) +{ + /* Use insert multi because names are not unique. */ + for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ ) + entryPoints.insertMulti( en->key, en->value ); +} + + +void FsmGraph::unsetAllFinStates() +{ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) + (*st)->stateBits &= ~ SB_ISFINAL; + finStateSet.empty(); +} + +void FsmGraph::setFinBits( int finStateBits ) +{ + for ( int s = 0; s < finStateSet.length(); s++ ) + finStateSet.data[s]->stateBits |= finStateBits; +} + + +/* Tests the integrity of the transition lists and the fromStates. */ +void FsmGraph::verifyIntegrity() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out transitions and assert fromState is correct. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + assert( trans->fromState == state ); + + /* Walk the inlist and assert toState is correct. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + assert( trans->toState == state ); + } +} + +void FsmGraph::verifyReachability() +{ + /* Mark all the states that can be reached + * through the set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Check that everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert it got marked and then clear the mark. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} + +void FsmGraph::verifyNoDeadEndStates() +{ + /* Mark all states that have paths to the final states. */ + for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ ) + markReachableFromHereReverse( *pst ); + + /* Start state gets honorary marking. Must be done AFTER recursive call. */ + startState->stateBits |= SB_ISMARKED; + + /* Make sure everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert the state got marked and unmark it. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} + +void FsmGraph::depthFirstOrdering( FsmState *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->stateBits & SB_ONLIST ) + return; + + /* Doing depth first, put state on the list. */ + state->stateBits |= SB_ONLIST; + stateList.append( state ); + + /* Recurse on everything ranges. */ + for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) { + if ( tel->toState != 0 ) + depthFirstOrdering( tel->toState ); + } +} + +/* Ordering states by transition connections. */ +void FsmGraph::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->stateBits &= ~SB_ONLIST; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + if ( errState != 0 ) + depthFirstOrdering( errState ); + depthFirstOrdering( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( en->value ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Stable sort the states by final state status. */ +void FsmGraph::sortStatesByFinal() +{ + /* Move forward through the list and throw final states onto the end. */ + FsmState *state = 0; + FsmState *next = stateList.head; + FsmState *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinState() ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +void FsmGraph::setStateNumbers( int base ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->alg.stateNum = base++; +} + + +bool FsmGraph::checkErrTrans( FsmState *state, FsmTrans *trans ) +{ + /* Might go directly to error state. */ + if ( trans->toState == 0 ) + return true; + + if ( trans->prev == 0 ) { + /* If this is the first transition. */ + if ( keyOps->minKey < trans->lowKey ) + return true; + } + else { + /* Not the first transition. Compare against the prev. */ + FsmTrans *prev = trans->prev; + Key nextKey = prev->highKey; + nextKey.increment(); + if ( nextKey < trans->lowKey ) + return true; + } + return false; +} + +bool FsmGraph::checkErrTransFinish( FsmState *state ) +{ + /* Check if there are any ranges already. */ + if ( state->outList.length() == 0 ) + return true; + else { + /* Get the last and check for a gap on the end. */ + FsmTrans *last = state->outList.tail; + if ( last->highKey < keyOps->maxKey ) + return true; + } + return 0; +} + +bool FsmGraph::hasErrorTrans() +{ + bool result; + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) { + result = checkErrTrans( st, tr ); + if ( result ) + return true; + } + result = checkErrTransFinish( st ); + if ( result ) + return true; + } + return false; +} diff --git a/src/fsmcodegen.cc b/src/fsmcodegen.cc new file mode 100644 index 00000000..5d63c079 --- /dev/null +++ b/src/fsmcodegen.cc @@ -0,0 +1,918 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <string.h> +#include <stdbool.h> + +#include <sstream> +#include <iostream> + +#include "fsmcodegen.h" + +using std::ostream; +using std::ostringstream; +using std::string; +using std::cerr; +using std::endl; + +/* Init code gen with in parameters. */ +FsmCodeGen::FsmCodeGen( ostream &out, + RedFsm *redFsm, fsm_tables *fsmTables ) +: + out(out), + redFsm(redFsm), + fsmTables(fsmTables), + codeGenErrCount(0), + dataPrefix(true), + writeFirstFinal(true), + writeErr(true), + skipTokprefLabelNeeded(false) +{ +} + +/* Write out the fsm name. */ +string FsmCodeGen::FSM_NAME() +{ + return "parser"; +} + +/* Emit the offset of the start state as a decimal integer. */ +string FsmCodeGen::START_STATE_ID() +{ + ostringstream ret; + ret << redFsm->startState->id; + return ret.str(); +}; + +/* Write out the array of actions. */ +std::ostream &FsmCodeGen::ACTIONS_ARRAY() +{ + out << "\t0, "; + int totalActions = 1; + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + out << act->key.length() << ", "; + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { + out << item->value->actionId; + if ( ! (act.last() && item.last()) ) + out << ", "; + + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + + +string FsmCodeGen::CS() +{ + ostringstream ret; + /* Expression for retrieving the key, use simple dereference. */ + ret << ACCESS() << "fsm_cs"; + return ret.str(); +} + +string FsmCodeGen::GET_WIDE_KEY() +{ + return GET_KEY(); +} + +string FsmCodeGen::GET_WIDE_KEY( RedState *state ) +{ + return GET_KEY(); +} + +string FsmCodeGen::GET_KEY() +{ + ostringstream ret; + /* Expression for retrieving the key, use simple dereference. */ + ret << "(*" << P() << ")"; + return ret.str(); +} + +/* Write out level number of tabs. Makes the nested binary search nice + * looking. */ +string FsmCodeGen::TABS( int level ) +{ + string result; + while ( level-- > 0 ) + result += "\t"; + return result; +} + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string FsmCodeGen::KEY( Key key ) +{ + ostringstream ret; + ret << key.getVal(); + return ret.str(); +} + +void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = " << item->longestMatchPart->longestMatchId << ";"; +} + +void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item ) +{ + /* The tokend action sets tokend. */ + ret << "{ " << TOKEND() << " = " << TOKPREF() << " + ( " << P() << " - " << BLOCK_START() << " ) + 1; }"; +} + +void FsmCodeGen::SET_TOKEND_0( ostream &ret, InlineItem *item ) +{ + /* The tokend action sets tokend. */ + ret << "{ " << TOKEND() << " = " << TOKPREF() << " + ( " << P() << " - " << BLOCK_START() << " ); }"; +} + +void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = 0;"; +} + +void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = 0;"; +} + +void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = " << P() << ";"; +} + +void FsmCodeGen::EMIT_TOKEN( ostream &ret, LangEl *token ) +{ + ret << " " << MATCHED_TOKEN() << " = " << token->id << ";\n"; +} + +void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, + int targState, int inFinish ) +{ + ret << + " switch( " << ACT() << " ) {\n"; + + /* If the switch handles error then we also forced the error state. It + * will exist. */ + if ( item->tokenRegion->lmSwitchHandlesError ) { + ret << " case 0: " //<< P() << " = " << TOKSTART() << ";" << + "goto st" << redFsm->errState->id << ";\n"; + } + + for ( TokenInstanceListReg::Iter lmi = item->tokenRegion->tokenInstanceList; lmi.lte(); lmi++ ) { + if ( lmi->inLmSelect ) { + assert( lmi->tokenDef->tdLangEl != 0 ); + ret << " case " << lmi->longestMatchId << ":\n"; + EMIT_TOKEN( ret, lmi->tokenDef->tdLangEl ); + ret << " break;\n"; + } + } + + ret << + " }\n" + "\t" + " goto skip_tokpref;\n"; + + skipTokprefLabelNeeded = true; +} + +void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) +{ + assert( item->longestMatchPart->tokenDef->tdLangEl != 0 ); + + ret << " " << P() << " += 1;\n"; + SET_TOKEND_0( ret, 0 ); + EMIT_TOKEN( ret, item->longestMatchPart->tokenDef->tdLangEl ); + ret << " goto out;\n"; +} + +void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item ) +{ + assert( item->longestMatchPart->tokenDef->tdLangEl != 0 ); + + SET_TOKEND_0( ret, 0 ); + EMIT_TOKEN( ret, item->longestMatchPart->tokenDef->tdLangEl ); + ret << " goto out;\n"; +} + +void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) +{ + assert( item->longestMatchPart->tokenDef->tdLangEl != 0 ); + + EMIT_TOKEN( ret, item->longestMatchPart->tokenDef->tdLangEl ); + ret << " goto skip_tokpref;\n"; + + skipTokprefLabelNeeded = true; +} + + +/* Write out an inline tree structure. Walks the list and possibly calls out + * to virtual functions than handle language specific items in the tree. */ +void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList, + int targState, bool inFinish ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + assert( false ); + break; + case InlineItem::LmSetActId: + SET_ACT( ret, item ); + break; + case InlineItem::LmSetTokEnd: + SET_TOKEND( ret, item ); + break; + case InlineItem::LmInitTokStart: + assert( false ); + break; + case InlineItem::LmInitAct: + INIT_ACT( ret, item ); + break; + case InlineItem::LmSetTokStart: + SET_TOKSTART( ret, item ); + break; + case InlineItem::LmSwitch: + LM_SWITCH( ret, item, targState, inFinish ); + break; + case InlineItem::LmOnLast: + LM_ON_LAST( ret, item ); + break; + case InlineItem::LmOnNext: + LM_ON_NEXT( ret, item ); + break; + case InlineItem::LmOnLagBehind: + LM_ON_LAG_BEHIND( ret, item ); + break; + } + } +} + +/* Write out paths in line directives. Escapes any special characters. */ +string FsmCodeGen::LDIR_PATH( char *path ) +{ + ostringstream ret; + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + ret << "\\\\"; + else + ret << *pc; + } + return ret.str(); +} + +void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ) +{ + /* Write the block and close it off. */ + ret << "\t{"; + INLINE_LIST( ret, action->inlineList, targState, inFinish ); + + if ( action->markId > 0 ) + ret << "mark[" << action->markId-1 << "] = " << P() << ";\n"; + + ret << "}\n"; + +} + +void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition ) +{ + ret << "\n"; + INLINE_LIST( ret, condition->inlineList, 0, false ); +} + +string FsmCodeGen::ERROR_STATE() +{ + ostringstream ret; + if ( redFsm->errState != 0 ) + ret << redFsm->errState->id; + else + ret << "-1"; + return ret.str(); +} + +string FsmCodeGen::FIRST_FINAL_STATE() +{ + ostringstream ret; + if ( redFsm->firstFinState != 0 ) + ret << redFsm->firstFinState->id; + else + ret << redFsm->nextStateId; + return ret.str(); +} + +string FsmCodeGen::DATA_PREFIX() +{ + if ( dataPrefix ) + return FSM_NAME() + "_"; + return ""; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::ALPH_TYPE() +{ + string ret = keyOps->alphType->data1; + if ( keyOps->alphType->data2 != 0 ) { + ret += " "; + ret += + keyOps->alphType->data2; + } + return ret; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::WIDE_ALPH_TYPE() +{ + string ret; + ret = ALPH_TYPE(); + return ret; +} + + +string FsmCodeGen::PTR_CONST() +{ + return "const "; +} + +std::ostream &FsmCodeGen::OPEN_ARRAY( string type, string name ) +{ + out << "static const " << type << " " << name << "[] = {\n"; + return out; +} + +std::ostream &FsmCodeGen::CLOSE_ARRAY() +{ + return out << "};\n"; +} + +std::ostream &FsmCodeGen::STATIC_VAR( string type, string name ) +{ + out << "static const " << type << " " << name; + return out; +} + +string FsmCodeGen::UINT( ) +{ + return "unsigned int"; +} + +string FsmCodeGen::ARR_OFF( string ptr, string offset ) +{ + return ptr + " + " + offset; +} + +string FsmCodeGen::CAST( string type ) +{ + return "(" + type + ")"; +} + +std::ostream &FsmCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + return out; +} + +std::ostream &FsmCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + return out; +} + +std::ostream &FsmCodeGen::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + return out; +} + +void FsmCodeGen::emitSingleSwitch( RedState *state ) +{ + /* Load up the singles. */ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + if ( numSingles == 1 ) { + /* If there is a single single key then write it out as an if. */ + out << "\tif ( " << GET_WIDE_KEY(state) << " == " << + KEY(data[0].lowKey) << " )\n\t\t"; + + /* Virtual function for writing the target of the transition. */ + TRANS_GOTO(data[0].value, 0) << "\n"; + } + else if ( numSingles > 1 ) { + /* Write out single keys in a switch if there is more than one. */ + out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n"; + + /* Write out the single indices. */ + for ( int j = 0; j < numSingles; j++ ) { + out << "\t\tcase " << KEY(data[j].lowKey) << ": "; + TRANS_GOTO(data[j].value, 0) << "\n"; + } + + /* Close off the transition switch. */ + out << "\t}\n"; + } +} + +void FsmCodeGen::emitRangeBSearch( RedState *state, int level, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = data[mid].lowKey == keyOps->minKey; + bool limitHigh = data[mid].highKey == keyOps->maxKey; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " << + KEY(data[mid].lowKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( !limitLow && limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } +} + +std::ostream &FsmCodeGen::STATE_GOTOS() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st == redFsm->errState ) + STATE_GOTO_ERROR(); + else { + /* Writing code above state gotos. */ + GOTO_HEADER( st ); + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + emitSingleSwitch( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) + emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 ); + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans, 1 ) << "\n"; + } + } + return out; +} + +unsigned int FsmCodeGen::TO_STATE_ACTION( RedState *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +unsigned int FsmCodeGen::FROM_STATE_ACTION( RedState *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +std::ostream &FsmCodeGen::TO_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = TO_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +std::ostream &FsmCodeGen::FROM_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = FROM_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +bool FsmCodeGen::IN_TRANS_ACTIONS( RedState *state ) +{ + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInTrans; it++ ) { + RedTrans *trans = state->inTrans[it]; + if ( trans->action != 0 && trans->labelNeeded ) { + /* Write the label for the transition so it can be jumped to. */ + out << "tr" << trans->id << ":\n"; + + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( trans->action->anyNextStmt() ) + out << " " << CS() << " = " << trans->targ->id << ";\n"; + + /* Write each action in the list. */ + for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ ) + ACTION( out, item->value, trans->targ->id, false ); + + out << "\tgoto st" << trans->targ->id << ";\n"; + } + } + + return 0; +} + +/* Called from FsmCodeGen::STATE_GOTOS just before writing the gotos for each + * state. */ +void FsmCodeGen::GOTO_HEADER( RedState *state ) +{ + IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + if ( state->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = state->toStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Give the state a switch case. */ + out << "case " << state->id << ":\n"; + + /* Advance and test buffer pos. */ + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto out" << state->id << ";\n"; + + if ( state->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Record the prev state if necessary. */ + if ( state->anyRegCurStateRef() ) + out << " _ps = " << state->id << ";\n"; +} + +void FsmCodeGen::STATE_GOTO_ERROR() +{ + /* In the error state we need to emit some stuff that usually goes into + * the header. */ + RedState *state = redFsm->errState; + IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + /* We do not need a case label here because the the error state is checked + * at the head of the loop. */ + + /* Break out here. */ + out << " goto out" << state->id << ";\n"; +} + + +/* Emit the goto to take for a given transition. */ +std::ostream &FsmCodeGen::TRANS_GOTO( RedTrans *trans, int level ) +{ + if ( trans->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << TABS(level) << "goto tr" << trans->id << ";"; + } + else { + /* Go directly to the target state. */ + out << TABS(level) << "goto st" << trans->targ->id << ";"; + } + return out; +} + +std::ostream &FsmCodeGen::EXIT_STATES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << " case " << st->id << ": out" << st->id << ": "; + if ( st->eofTrans != 0 ) { + out << "if ( " << DATA_EOF() << " ) {"; + TRANS_GOTO( st->eofTrans, 0 ); + out << "\n"; + out << "}"; + } + + /* Exit. */ + out << CS() << " = " << st->id << "; goto out; \n"; + } + return out; +} + +/* Set up labelNeeded flag for each state. */ +void FsmCodeGen::setLabelsNeeded() +{ + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + + if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() ) + redFsm->errState->labelNeeded = true; + + /* Walk all transitions and set only those that have targs. */ + for ( RedTransSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* If there is no action with a next statement, then the label will be + * needed. */ + if ( trans->action == 0 || !trans->action->anyNextStmt() ) + trans->targ->labelNeeded = true; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->outNeeded = st->labelNeeded; +} + +void FsmCodeGen::writeData() +{ + out << "#define " << START() << " " << START_STATE_ID() << "\n"; + out << "#define " << FIRST_FINAL() << " " << FIRST_FINAL_STATE() << "\n"; + out << "#define " << ERROR() << " " << ERROR_STATE() << "\n"; + out << "#define false 0\n"; + out << "#define true 1\n"; + out << "\n"; + + out << "static long " << ENTRY_BY_REGION() << "[] = {\n\t"; + for ( int i = 0; i < fsmTables->num_regions; i++ ) { + out << fsmTables->entry_by_region[i]; + + if ( i < fsmTables->num_regions-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << + "static struct fsm_tables fsmTables_start =\n" + "{\n" + " 0, " /* actions */ + " 0, " /* keyOffsets */ + " 0, " /* transKeys */ + " 0, " /* singleLengths */ + " 0, " /* rangeLengths */ + " 0, " /* indexOffsets */ + " 0, " /* transTargsWI */ + " 0, " /* transActionsWI */ + " 0, " /* toStateActions */ + " 0, " /* fromStateActions */ + " 0, " /* eofActions */ + " 0,\n" /* eofTargs */ + " " << ENTRY_BY_REGION() << ",\n" + + "\n" + " 0, " /* numStates */ + " 0, " /* numActions */ + " 0, " /* numTransKeys */ + " 0, " /* numSingleLengths */ + " 0, " /* numRangeLengths */ + " 0, " /* numIndexOffsets */ + " 0, " /* numTransTargsWI */ + " 0,\n" /* numTransActionsWI */ + " " << redFsm->regionToEntry.length() << ",\n" + "\n" + " " << START() << ",\n" + " " << FIRST_FINAL() << ",\n" + " " << ERROR() << ",\n" + "\n" + " 0,\n" /* actionSwitch */ + " 0\n" /* numActionSwitch */ + "};\n" + "\n"; +} + +void FsmCodeGen::writeInit() +{ + out << + " " << CS() << " = " << START() << ";\n"; + + /* If there are any calls, then the stack top needs initialization. */ + if ( redFsm->anyActionCalls() || redFsm->anyActionRets() ) + out << "\t" << TOP() << " = 0;\n"; + + out << + " " << TOKSTART() << " = 0;\n" + " " << TOKEND() << " = 0;\n" + " " << ACT() << " = 0;\n"; + + out << "\n"; +} + +void FsmCodeGen::writeExec() +{ + setLabelsNeeded(); + + out << + "static void fsm_execute( struct pda_run *pdaRun, struct input_impl *inputStream )\n" + "{\n" + " " << BLOCK_START() << " = pdaRun->p;\n" + "/*_resume:*/\n"; + + if ( redFsm->errState != 0 ) { + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto out;\n"; + } + + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto out_switch;\n" + " --" << P() << ";\n" + "\n" + " switch ( " << CS() << " )\n {\n"; + STATE_GOTOS() << + " }\n"; + + out << + "out_switch:\n" + " switch ( " << CS() << " )\n {\n"; + EXIT_STATES() << + " }\n"; + + out << + "out:\n" + " if ( " << P() << " != 0 )\n" + " " << TOKPREF() << " += " << P() << " - " << BLOCK_START() << ";\n"; + + if ( skipTokprefLabelNeeded ) { + out << + "skip_tokpref:\n" + " {}\n"; + } + + out << + "}\n" + "\n"; +} + +void FsmCodeGen::writeCode() +{ + redFsm->depthFirstOrdering(); + + writeData(); + writeExec(); + + /* Referenced in the runtime lib, but used only in the compiler. Probably + * should use the preprocessor to make these go away. */ + out << + "static void sendNamedLangEl( struct colm_program *prg, tree_t **tree,\n" + " struct pda_run *pda_run, struct input_impl *input ) { }\n" + "static void initBindings( struct pda_run *pdaRun ) {}\n" + "static void popBinding( struct pda_run *pdaRun, parse_tree_t *tree ) {}\n" + "\n" + "\n"; +} + + diff --git a/src/fsmcodegen.h b/src/fsmcodegen.h new file mode 100644 index 00000000..c8f66c9c --- /dev/null +++ b/src/fsmcodegen.h @@ -0,0 +1,211 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_FSMCODEGEN_H +#define _COLM_FSMCODEGEN_H + +#include <stdio.h> + +#include <string> +#include <iostream> + +#include "keyops.h" +#include "compiler.h" +#include "redfsm.h" + +using std::string; +using std::ostream; + +/* Integer array line length. */ +#define IALL 8 + +/* Forwards. */ +struct RedFsm; +struct RedState; +struct GenAction; +struct NameInst; +struct RedAction; +struct LongestMatch; +struct TokenInstance; +struct InlineList; +struct InlineItem; +struct NameInst; +struct FsmCodeGen; + +typedef unsigned long ulong; +typedef unsigned char uchar; + + +/* + * The interface to the parser + */ + +std::ostream *openOutput( char *inputFile ); + +inline string itoa( int i ) +{ + char buf[16]; + sprintf( buf, "%i", i ); + return buf; +} + +/* + * class FsmCodeGen + */ +struct FsmCodeGen +{ +public: + FsmCodeGen( ostream &out, RedFsm *redFsm, fsm_tables *fsmTables ); + +protected: + + string FSM_NAME(); + string START_STATE_ID(); + ostream &ACTIONS_ARRAY(); + string GET_WIDE_KEY(); + string GET_WIDE_KEY( RedState *state ); + string TABS( int level ); + string KEY( Key key ); + string LDIR_PATH( char *path ); + void ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ); + void CONDITION( ostream &ret, GenAction *condition ); + string ALPH_TYPE(); + string WIDE_ALPH_TYPE(); + string ARRAY_TYPE( unsigned long maxVal ); + + string ARR_OFF( string ptr, string offset ); + string CAST( string type ); + string UINT(); + string GET_KEY(); + + string ACCESS() { return "pdaRun->"; } + + string P() { return ACCESS() + "p"; } + string PE() { return ACCESS() + "pe"; } + string DATA_EOF() { return ACCESS() + "scan_eof"; } + + string CS(); + string TOP() { return ACCESS() + "top"; } + string TOKSTART() { return ACCESS() + "tokstart"; } + string TOKEND() { return ACCESS() + "tokend"; } + string BLOCK_START() { return ACCESS() + "start"; } + string TOKPREF() { return ACCESS() + "tokpref"; } + string ACT() { return ACCESS() + "act"; } + string MATCHED_TOKEN() { return ACCESS() + "matched_token"; } + + string DATA_PREFIX(); + + string START() { return DATA_PREFIX() + "start"; } + string ERROR() { return DATA_PREFIX() + "error"; } + string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } + + string ENTRY_BY_REGION() { return DATA_PREFIX() + "entry_by_region"; } + + + void INLINE_LIST( ostream &ret, InlineList *inlineList, + int targState, bool inFinish ); + void EXEC_TOKEND( ostream &ret, InlineItem *item, int targState, int inFinish ); + void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish ); + void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ); + void SET_ACT( ostream &ret, InlineItem *item ); + void INIT_TOKSTART( ostream &ret, InlineItem *item ); + void INIT_ACT( ostream &ret, InlineItem *item ); + void SET_TOKSTART( ostream &ret, InlineItem *item ); + void SET_TOKEND( ostream &ret, InlineItem *item ); + void SET_TOKEND_0( ostream &ret, InlineItem *item ); + void GET_TOKEND( ostream &ret, InlineItem *item ); + void SUB_ACTION( ostream &ret, InlineItem *item, int targState, bool inFinish ); + void LM_ON_LAST( ostream &ret, InlineItem *item ); + void LM_ON_NEXT( ostream &ret, InlineItem *item ); + void LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ); + void EXEC_TOKEND( ostream &ret ); + void EMIT_TOKEN( ostream &ret, LangEl *token ); + + string ERROR_STATE(); + string FIRST_FINAL_STATE(); + + string PTR_CONST(); + ostream &OPEN_ARRAY( string type, string name ); + ostream &CLOSE_ARRAY(); + ostream &STATIC_VAR( string type, string name ); + + string CTRL_FLOW(); + + unsigned int arrayTypeSize( unsigned long maxVal ); + +public: + ostream &out; + RedFsm *redFsm; + fsm_tables *fsmTables; + int codeGenErrCount; + + /* Write options. */ + bool dataPrefix; + bool writeFirstFinal; + bool writeErr; + bool skipTokprefLabelNeeded; + + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + std::ostream &STATE_GOTOS(); + std::ostream &TRANSITIONS(); + std::ostream &EXEC_FUNCS(); + + unsigned int TO_STATE_ACTION( RedState *state ); + unsigned int FROM_STATE_ACTION( RedState *state ); + + std::ostream &TO_STATE_ACTIONS(); + std::ostream &FROM_STATE_ACTIONS(); + + void emitCondBSearch( RedState *state, int level, int low, int high ); + void STATE_CONDS( RedState *state, bool genDefault ); + + void emitSingleSwitch( RedState *state ); + void emitRangeBSearch( RedState *state, int level, int low, int high ); + + std::ostream &EXIT_STATES(); + std::ostream &TRANS_GOTO( RedTrans *trans, int level ); + std::ostream &FINISH_CASES(); + + void writeIncludes(); + void writeData(); + void writeInit(); + void writeExec(); + void writeCode(); + void writeMain( long activeRealm ); + +protected: + bool useAgainLabel(); + + /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for + * each state. */ + bool IN_TRANS_ACTIONS( RedState *state ); + void GOTO_HEADER( RedState *state ); + void STATE_GOTO_ERROR(); + + /* Set up labelNeeded flag for each state. */ + void setLabelsNeeded(); +}; + +#endif /* _COLM_FSMCODEGEN_H */ + diff --git a/src/fsmexec.cc b/src/fsmexec.cc new file mode 100644 index 00000000..8aa4a072 --- /dev/null +++ b/src/fsmexec.cc @@ -0,0 +1,220 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdbool.h> + +#include <assert.h> + +#include "redfsm.h" +#include "compiler.h" + +void execAction( struct pda_run *pdaRun, GenAction *genAction ) +{ + for ( InlineList::Iter item = *genAction->inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + assert(false); + break; + case InlineItem::LmSetActId: + pdaRun->act = item->longestMatchPart->longestMatchId; + break; + case InlineItem::LmSetTokEnd: + pdaRun->tokend = pdaRun->tokpref + ( pdaRun->p - pdaRun->start ) + 1; + break; + case InlineItem::LmInitTokStart: + assert(false); + break; + case InlineItem::LmInitAct: + pdaRun->act = 0; + break; + case InlineItem::LmSetTokStart: + pdaRun->tokstart = pdaRun->p; + break; + case InlineItem::LmSwitch: + /* If the switch handles error then we also forced the error state. It + * will exist. */ + if ( item->tokenRegion->lmSwitchHandlesError && pdaRun->act == 0 ) { + pdaRun->fsm_cs = pdaRun->fsm_tables->error_state; + } + else { + for ( TokenInstanceListReg::Iter lmi = item->tokenRegion->tokenInstanceList; + lmi.lte(); lmi++ ) + { + if ( lmi->inLmSelect && pdaRun->act == lmi->longestMatchId ) + pdaRun->matched_token = lmi->tokenDef->tdLangEl->id; + } + } + pdaRun->return_result = true; + pdaRun->skip_tokpref = true; + break; + case InlineItem::LmOnLast: + pdaRun->p += 1; + pdaRun->tokend = pdaRun->tokpref + ( pdaRun->p - pdaRun->start ); + pdaRun->matched_token = item->longestMatchPart->tokenDef->tdLangEl->id; + pdaRun->return_result = true; + break; + case InlineItem::LmOnNext: + pdaRun->tokend = pdaRun->tokpref + ( pdaRun->p - pdaRun->start ); + pdaRun->matched_token = item->longestMatchPart->tokenDef->tdLangEl->id; + pdaRun->return_result = true; + break; + case InlineItem::LmOnLagBehind: + pdaRun->matched_token = item->longestMatchPart->tokenDef->tdLangEl->id; + pdaRun->return_result = true; + pdaRun->skip_tokpref = true; + break; + } + } + + if ( genAction->markType == MarkMark ) + pdaRun->mark[genAction->markId-1] = pdaRun->p; +} + +extern "C" void internalFsmExecute( struct pda_run *pdaRun, struct input_impl *inputStream ) +{ + int _klen; + unsigned int _trans; + const long *_acts; + unsigned int _nacts; + const char *_keys; + + pdaRun->start = pdaRun->p; + + /* Init the token match to nothing (the sentinal). */ + pdaRun->matched_token = 0; + +/*_resume:*/ + if ( pdaRun->fsm_cs == pdaRun->fsm_tables->error_state ) + goto out; + + if ( pdaRun->p == pdaRun->pe ) + goto out; + +_loop_head: + _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->from_state_actions[pdaRun->fsm_cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] ); + + _keys = pdaRun->fsm_tables->trans_keys + pdaRun->fsm_tables->key_offsets[pdaRun->fsm_cs]; + _trans = pdaRun->fsm_tables->index_offsets[pdaRun->fsm_cs]; + + _klen = pdaRun->fsm_tables->single_lengths[pdaRun->fsm_cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*pdaRun->p) < *_mid ) + _upper = _mid - 1; + else if ( (*pdaRun->p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (_mid - _keys); + goto _match; + } + } + _keys += _klen; + _trans += _klen; + } + + _klen = pdaRun->fsm_tables->range_lengths[pdaRun->fsm_cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*pdaRun->p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*pdaRun->p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += ((_mid - _keys)>>1); + goto _match; + } + } + _trans += _klen; + } + +_match: + pdaRun->fsm_cs = pdaRun->fsm_tables->transTargsWI[_trans]; + + if ( pdaRun->fsm_tables->transActionsWI[_trans] == 0 ) + goto _again; + + pdaRun->return_result = false; + pdaRun->skip_tokpref = false; + _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->transActionsWI[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] ); + if ( pdaRun->return_result ) { + if ( pdaRun->skip_tokpref ) + goto skip_tokpref; + goto final; + } + +_again: + _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->to_state_actions[pdaRun->fsm_cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] ); + + if ( pdaRun->fsm_cs == pdaRun->fsm_tables->error_state ) + goto out; + + if ( ++pdaRun->p != pdaRun->pe ) + goto _loop_head; +out: + if ( pdaRun->scan_eof ) { + pdaRun->return_result = false; + pdaRun->skip_tokpref = false; + _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->eof_actions[pdaRun->fsm_cs]; + _nacts = (unsigned int) *_acts++; + + if ( pdaRun->fsm_tables->eof_targs[pdaRun->fsm_cs] >= 0 ) + pdaRun->fsm_cs = pdaRun->fsm_tables->eof_targs[pdaRun->fsm_cs]; + + while ( _nacts-- > 0 ) + execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] ); + if ( pdaRun->return_result ) { + if ( pdaRun->skip_tokpref ) + goto skip_tokpref; + goto final; + } + } + +final: + + if ( pdaRun->p != 0 ) + pdaRun->tokpref += pdaRun->p - pdaRun->start; +skip_tokpref: + {} +} diff --git a/src/fsmgraph.cc b/src/fsmgraph.cc new file mode 100644 index 00000000..8cbfe29c --- /dev/null +++ b/src/fsmgraph.cc @@ -0,0 +1,981 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "fsmgraph.h" + +#include <assert.h> + +#include <iostream> + +using std::cerr; +using std::endl; + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +FsmState *FsmGraph::addState() +{ + /* Make the new state to return. */ + FsmState *state = new FsmState(); + + if ( misfitAccounting ) { + /* Create the new state on the misfit list. All states are created + * with no foreign in transitions. */ + misfitList.append( state ); + } + else { + /* Create the new state. */ + stateList.append( state ); + } + + return state; +} + +/* Construct an FSM that is the concatenation of an array of characters. A new + * machine will be made that has len+1 states with one transition between each + * state for each integer in str. IsSigned determines if the integers are to + * be considered as signed or unsigned ints. */ +void FsmGraph::concatFsm( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + FsmState *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + FsmState *newState = addState(); + attachNewTrans( last, newState, str[i], str[i] ); + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Case insensitive version of concatFsm. */ +void FsmGraph::concatFsmCI( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + FsmState *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + FsmState *newState = addState(); + + KeySet keySet; + if ( str[i].isLower() ) + keySet.insert( str[i].toUpper() ); + if ( str[i].isUpper() ) + keySet.insert( str[i].toLower() ); + keySet.insert( str[i] ); + + for ( int i = 0; i < keySet.length(); i++ ) + attachNewTrans( last, newState, keySet[i], keySet[i] ); + + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Construct a machine that matches one character. A new machine will be made + * that has two states with a single transition between the states. IsSigned + * determines if the integers are to be considered as signed or unsigned ints. */ +void FsmGraph::concatFsm( Key chr ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + FsmState *end = addState(); + setFinState( end ); + + /* Attach on the character. */ + attachNewTrans( startState, end, chr, chr ); +} + +/* Construct a machine that matches any character in set. A new machine will + * be made that has two states and len transitions between the them. The set + * should be ordered correctly accroding to KeyOps and should not contain + * any duplicates. */ +void FsmGraph::orFsm( Key *set, int len ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + FsmState *end = addState(); + setFinState( end ); + + for ( int i = 1; i < len; i++ ) + assert( set[i-1] < set[i] ); + + /* Attach on all the integers in the given string of ints. */ + for ( int i = 0; i < len; i++ ) + attachNewTrans( startState, end, set[i], set[i] ); +} + +/* Construct a machine that matches a range of characters. A new machine will + * be made with two states and a range transition between them. The range will + * match any characters from low to high inclusive. Low should be less than or + * equal to high otherwise undefined behaviour results. IsSigned determines + * if the integers are to be considered as signed or unsigned ints. */ +void FsmGraph::rangeFsm( Key low, Key high ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + FsmState *end = addState(); + setFinState( end ); + + /* Attach using the range of characters. */ + attachNewTrans( startState, end, low, high ); +} + +/* Construct a machine that a repeated range of characters. */ +void FsmGraph::rangeStarFsm( Key low, Key high) +{ + /* One state which is final and is the start state. */ + setStartState( addState() ); + setFinState( startState ); + + /* Attach start to start using range of characters. */ + attachNewTrans( startState, startState, low, high ); +} + +/* Construct a machine that matches the empty string. A new machine will be + * made with only one state. The new state will be both a start and final + * state. IsSigned determines if the machine has a signed or unsigned + * alphabet. Fsm operations must be done on machines with the same alphabet + * signedness. */ +void FsmGraph::lambdaFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); + setFinState( startState ); +} + +/* Construct a machine that matches nothing at all. A new machine will be + * made with only one state. It will not be final. */ +void FsmGraph::emptyFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); +} + +void FsmGraph::transferOutData( FsmState *destState, FsmState *srcState ) +{ + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + trans->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + trans->priorTable.setPriors( srcState->outPriorTable ); + } + } +} + +/* Kleene star operator. Makes this machine the kleene star of itself. Any + * transitions made going out of the machine and back into itself will be + * notified that they are leaving transitions by having the leavingFromState + * callback invoked. */ +void FsmGraph::starOp( ) +{ + /* For the merging process. */ + MergeData md; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* Create the new new start state. It will be set final after the merging + * of the final states with the start state is complete. */ + FsmState *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Merge the start state into all final states. Except the start state on + * the first pass. If the start state is set final we will be doubling up + * its transitions, which will get transfered to any final states that + * follow it in the final state set. This will be determined by the order + * of items in the final state set. To prevent this we just merge with the + * start on a second pass. */ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) { + if ( *st != startState ) + mergeStatesLeaving( md, *st, startState ); + } + + /* Now it is safe to merge the start state with itself (provided it + * is set final). */ + if ( startState->isFinState() ) + mergeStatesLeaving( md, startState, startState ); + + /* Now ensure the new start state is a final state. */ + setFinState( startState ); + + /* Fill in any states that were newed up as combinations of others. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +void FsmGraph::repeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one does absolutely nothing. */ + if ( times == 1 ) + return; + + /* Make a machine to make copies from. */ + FsmGraph *copyFrom = new FsmGraph( *this ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + FsmGraph *dup = new FsmGraph( *copyFrom ); + doConcat( dup, 0, false ); + } + + /* Now use the copyFrom on the end. */ + doConcat( copyFrom, 0, false ); +} + +void FsmGraph::optionalRepeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one optional merely allows zero string. */ + if ( times == 1 ) { + setFinState( startState ); + return; + } + + /* Make a machine to make copies from. */ + FsmGraph *copyFrom = new FsmGraph( *this ); + + /* The state set used in the from end of the concatentation. Starts with + * the initial final state set, then after each concatenation, gets set to + * the the final states that come from the the duplicate. */ + StateSet lastFinSet( finStateSet ); + + /* Set the initial state to zero to allow zero copies. */ + setFinState( startState ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + /* Make a duplicate for concating and set the fin bits to graph 2 so we + * can pick out it's final states after the optional style concat. */ + FsmGraph *dup = new FsmGraph( *copyFrom ); + dup->setFinBits( SB_GRAPH2 ); + doConcat( dup, &lastFinSet, true ); + + /* Clear the last final state set and make the new one by taking only + * the final states that come from graph 2.*/ + lastFinSet.empty(); + for ( int i = 0; i < finStateSet.length(); i++ ) { + /* If the state came from graph 2, add it to the last set and clear + * the bits. */ + FsmState *fs = finStateSet[i]; + if ( fs->stateBits & SB_GRAPH2 ) { + lastFinSet.insert( fs ); + fs->stateBits &= ~SB_GRAPH2; + } + } + } + + /* Now use the copyFrom on the end, no bits set, no bits to clear. */ + doConcat( copyFrom, &lastFinSet, true ); +} + + +/* Fsm concatentation worker. Supports treating the concatentation as optional, + * which essentially leaves the final states of machine one as final. */ +void FsmGraph::doConcat( FsmGraph *other, StateSet *fromStates, bool optional ) +{ + /* For the merging process. */ + StateSet finStateSetCopy, startStateSet; + MergeData md; + + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Get the other's start state. */ + FsmState *otherStartState = other->startState; + + /* Unset other's start state before bringing in the entry points. */ + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Bring in other's states into our state lists. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* If from states is not set, then get a copy of our final state set before + * we clobber it and use it instead. */ + if ( fromStates == 0 ) { + finStateSetCopy = finStateSet; + fromStates = &finStateSetCopy; + } + + /* Unset all of our final states and get the final states from other. */ + if ( !optional ) + unsetAllFinStates(); + finStateSet.insert( other->finStateSet ); + + /* Since other's lists are empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Merge our former final states with the start state of other. */ + for ( int i = 0; i < fromStates->length(); i++ ) { + FsmState *state = fromStates->data[i]; + + /* Merge the former final state with other's start state. */ + mergeStatesLeaving( md, state, otherStartState ); + + /* If the former final state was not reset final then we must clear + * the state's out trans data. If it got reset final then it gets to + * keep its out trans data. This must be done before fillInStates gets + * called to prevent the data from being sourced. */ + if ( ! state->isFinState() ) + clearOutData( state ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Concatenates other to the end of this machine. Other is deleted. Any + * transitions made leaving this machine and entering into other are notified + * that they are leaving transitions by having the leavingFromState callback + * invoked. */ +void FsmGraph::concatOp( FsmGraph *other ) +{ + /* Assert same signedness and return graph concatenation op. */ + doConcat( other, 0, false ); +} + + +void FsmGraph::doOr( FsmGraph *other ) +{ + /* For the merging process. */ + MergeData md; + + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( startState ); + startStateSet.insert( other->startState ); + + /* Both of the original start states loose their start state status. */ + unsetStartState(); + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* Move the final set data from other into this. */ + finStateSet.insert(other->finStateSet); + other->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Create a new start state. */ + setStartState( addState() ); + + /* Merge the start states. */ + mergeStates( md, startState, startStateSet.data, startStateSet.length() ); + + /* Fill in any new states made from merging. */ + fillInStates( md ); +} + +/* Unions other with this machine. Other is deleted. */ +void FsmGraph::unionOp( FsmGraph *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Call Worker routine. */ + doOr( other ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Intersects other with this machine. Other is deleted. */ +void FsmGraph::intersectOp( FsmGraph *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits on this and other to want each other. */ + setFinBits( SB_GRAPH1 ); + other->setFinBits( SB_GRAPH2 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetIncompleteFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +/* Set subtracts other machine from this machine. Other is deleted. */ +void FsmGraph::subtractOp( FsmGraph *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits of other to be killers. */ + other->setFinBits( SB_GRAPH1 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetKilledFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +bool FsmGraph::inEptVect( EptVect *eptVect, FsmState *state ) +{ + if ( eptVect != 0 ) { + /* Vect is there, walk it looking for state. */ + for ( int i = 0; i < eptVect->length(); i++ ) { + if ( eptVect->data[i].targ == state ) + return true; + } + } + return false; +} + +/* Fill epsilon vectors in a root state from a given starting point. Epmploys + * a depth first search through the graph of epsilon transitions. */ +void FsmGraph::epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ) +{ + /* Walk the epsilon transitions out of the state. */ + for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { + /* Find the entry point, if the it does not resove, ignore it. */ + EntryMapEl *enLow, *enHigh; + if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { + /* Loop the targets. */ + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { + /* Do not add the root or states already in eptVect. */ + FsmState *targ = en->value; + if ( targ != from && !inEptVect(root->eptVect, targ) ) { + /* Maybe need to create the eptVect. */ + if ( root->eptVect == 0 ) + root->eptVect = new EptVect(); + + /* If moving to a different graph or if any parent is + * leaving then we are leaving. */ + bool leaving = parentLeaving || + root->owningGraph != targ->owningGraph; + + /* All ok, add the target epsilon and recurse. */ + root->eptVect->append( EptVectEl(targ, leaving) ); + epsilonFillEptVectFrom( root, targ, leaving ); + } + } + } + } +} + +void FsmGraph::shadowReadWriteStates( MergeData &md ) +{ + /* Init isolatedShadow algorithm data. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->isolatedShadow = 0; + + /* Any states that may be both read from and written to must + * be shadowed. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Find such states by looping through stateVect lists, which give us + * the states that will be read from. May cause us to visit the states + * that we are interested in more than once. */ + if ( st->eptVect != 0 ) { + /* For all states that will be read from. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + /* Check for read and write to the same state. */ + FsmState *targ = ept->targ; + if ( targ->eptVect != 0 ) { + /* State is to be written to, if the shadow is not already + * there, create it. */ + if ( targ->isolatedShadow == 0 ) { + FsmState *shadow = addState(); + mergeStates( md, shadow, targ ); + targ->isolatedShadow = shadow; + } + + /* Write shadow into the state vector so that it is the + * state that the epsilon transition will read from. */ + ept->targ = targ->isolatedShadow; + } + } + } + } +} + +void FsmGraph::resolveEpsilonTrans( MergeData &md ) +{ + /* Walk the state list and invoke recursive worker on each state. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + epsilonFillEptVectFrom( st, st, false ); + + /* Prevent reading from and writing to of the same state. */ + shadowReadWriteStates( md ); + + /* For all states that have epsilon transitions out, draw the transitions, + * clear the epsilon transitions. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* If there is a state vector, then create the pre-merge state. */ + if ( st->eptVect != 0 ) { + /* Merge all the epsilon targets into the state. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + if ( ept->leaving ) + mergeStatesLeaving( md, st, ept->targ ); + else + mergeStates( md, st, ept->targ ); + } + + /* Clean up the target list. */ + delete st->eptVect; + st->eptVect = 0; + } + + /* Clear the epsilon transitions vector. */ + st->epsilonTrans.empty(); + } +} + +void FsmGraph::epsilonOp() +{ + /* For merging process. */ + MergeData md; + + setMisfitAccounting( true ); + + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 0; + + /* Perform merges. */ + resolveEpsilonTrans( md ); + + /* Epsilons can caused merges which leave behind unreachable states. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Make a new maching by joining together a bunch of machines without making + * any transitions between them. A negative finalId results in there being no + * final id. */ +void FsmGraph::joinOp( int startId, int finalId, FsmGraph **others, int numOthers ) +{ + /* For the merging process. */ + MergeData md; + + /* Set the owning machines. Start at one. Zero is reserved for the start + * and final states. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 1; + for ( int m = 0; m < numOthers; m++ ) { + for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) + st->owningGraph = 2+m; + } + + /* All machines loose start state status. */ + unsetStartState(); + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } + + /* Look up the start entry point. */ + EntryMapEl *enLow = 0, *enHigh = 0; + bool findRes = entryPoints.findMulti( startId, enLow, enHigh ); + if ( ! findRes ) { + /* No start state. Set a default one and proceed with the join. Note + * that the result of the join will be a very uninteresting machine. */ + setStartState( addState() ); + } + else { + /* There is at least one start state, create a state that will become + * the new start state. */ + FsmState *newStart = addState(); + setStartState( newStart ); + + /* The start state is in an owning machine class all it's own. */ + newStart->owningGraph = 0; + + /* Create the set of states to merge from. */ + StateSet stateSet; + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) + stateSet.insert( en->value ); + + /* Merge in the set of start states into the new start state. */ + mergeStates( md, newStart, stateSet.data, stateSet.length() ); + } + + /* Take a copy of the final state set, before unsetting them all. This + * will allow us to call clearOutData on the states that don't get + * final state status back back. */ + StateSet finStateSetCopy = finStateSet; + + /* Now all final states are unset. */ + unsetAllFinStates(); + + if ( finalId >= 0 ) { + /* Create the implicit final state. */ + FsmState *finState = addState(); + setFinState( finState ); + + /* Assign an entry into the final state on the final state entry id. Note + * that there may already be an entry on this id. That's ok. Also set the + * final state owning machine id. It's in a class all it's own. */ + setEntry( finalId, finState ); + finState->owningGraph = 0; + } + + /* Hand over to workers for resolving epsilon trans. This will merge states + * with the targets of their epsilon transitions. */ + resolveEpsilonTrans( md ); + + /* Invoke the relinquish final callback on any states that did not get + * final state status back. */ + for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { + if ( !((*st)->stateBits & SB_ISFINAL) ) + clearOutData( *st ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Joining can be messy. Instead of having misfit accounting on (which is + * tricky here) do a full cleaning. */ + removeUnreachableStates(); +} + +void FsmGraph::globOp( FsmGraph **others, int numOthers ) +{ + /* All other machines loose start states status. */ + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } +} + +void FsmGraph::deterministicEntry() +{ + /* For the merging process. */ + MergeData md; + + /* States may loose their entry points, turn on misfit accounting. */ + setMisfitAccounting( true ); + + /* Get a copy of the entry map then clear all the entry points. As we + * iterate the old entry map finding duplicates we will add the entry + * points for the new states that we create. */ + EntryMap prevEntry = entryPoints; + unsetAllEntryPoints(); + + for ( int enId = 0; enId < prevEntry.length(); ) { + /* Count the number of states on this entry key. */ + int highId = enId; + while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) + highId += 1; + + int numIds = highId - enId; + if ( numIds == 1 ) { + /* Only a single entry point, just set the entry. */ + setEntry( prevEntry[enId].key, prevEntry[enId].value ); + } + else { + /* Multiple entry points, need to create a new state and merge in + * all the targets of entry points. */ + FsmState *newEntry = addState(); + for ( int en = enId; en < highId; en++ ) + mergeStates( md, newEntry, prevEntry[en].value ); + + /* Add the new state as the single entry point. */ + setEntry( prevEntry[enId].key, newEntry ); + } + + enId += numIds; + } + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmGraph::unsetKilledFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for killing bit. */ + FsmState *state = fin.data[s]; + if ( state->stateBits & SB_GRAPH1 ) { + /* One final state is a killer, set to non-final. */ + unsetFinState( state ); + } + + /* Clear all killing bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_GRAPH1; + } +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmGraph::unsetIncompleteFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for one set but not the other. */ + FsmState *state = fin.data[s]; + if ( state->stateBits & SB_BOTH && + (state->stateBits & SB_BOTH) != SB_BOTH ) + { + /* One state wants the other but it is not there. */ + unsetFinState( state ); + } + + /* Clear wanting bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_BOTH; + } +} + +/* Ensure that the start state is free of entry points (aside from the fact + * that it is the start state). If the start state has entry points then Make a + * new start state by merging with the old one. Useful before modifying start + * transitions. If the existing start state has any entry points other than the + * start state entry then modifying its transitions changes more than the start + * transitions. So isolate the start state by separating it out such that it + * only has start stateness as it's entry point. */ +void FsmGraph::isolateStartState( ) +{ + /* For the merging process. */ + MergeData md; + + /* Bail out if the start state is already isolated. */ + if ( isStartStateIsolated() ) + return; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* This will be the new start state. The existing start + * state is merged with it. */ + FsmState *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Stfil and stateDict will be empty because the merging of the old start + * state into the new one will not have any conflicting transitions. */ + assert( md.stateDict.treeSize == 0 ); + assert( md.stfillHead == 0 ); + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* A state merge which represents the drawing in of leaving transitions. If + * there is any out data then we duplicate the souce state, transfer the out + * data, then merge in the state. The new state will be reaped because it will + * not be given any in transitions. */ +void FsmGraph::mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ) +{ + if ( !hasOutData( destState ) ) + mergeStates( md, destState, srcState ); + else { + FsmState *ssMutable = addState(); + mergeStates( md, ssMutable, srcState ); + transferOutData( ssMutable, destState ); + mergeStates( md, destState, ssMutable ); + } +} + +void FsmGraph::mergeStates( MergeData &md, FsmState *destState, + FsmState **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) + mergeStates( md, destState, srcStates[s] ); +} + +void FsmGraph::mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ) +{ + outTransCopy( md, destState, srcState->outList.head ); + + /* Get its bits and final state status. */ + destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL ); + if ( srcState->isFinState() ) + setFinState( destState ); + + /* Draw in any properties of srcState into destState. */ + if ( srcState == destState ) { + /* Duplicate the list to protect against write to source. The + * priorities sets are not copied in because that would have no + * effect. */ + destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); + + /* Get all actions, duplicating to protect against write to source. */ + destState->toStateActionTable.setActions( + ActionTable( srcState->toStateActionTable ) ); + destState->fromStateActionTable.setActions( + ActionTable( srcState->fromStateActionTable ) ); + destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); + destState->outCondSet.insert( ActionSet( srcState->outCondSet ) ); + destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); + destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); + } + else { + /* Get the epsilons, out priorities. */ + destState->epsilonTrans.append( srcState->epsilonTrans ); + destState->outPriorTable.setPriors( srcState->outPriorTable ); + + /* Get all actions. */ + destState->toStateActionTable.setActions( srcState->toStateActionTable ); + destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); + destState->outActionTable.setActions( srcState->outActionTable ); + destState->outCondSet.insert( srcState->outCondSet ); + destState->errActionTable.setActions( srcState->errActionTable ); + destState->eofActionTable.setActions( srcState->eofActionTable ); + } +} + +void FsmGraph::fillInStates( MergeData &md ) +{ + /* Merge any states that are awaiting merging. This will likey cause + * other states to be added to the stfil list. */ + FsmState *state = md.stfillHead; + while ( state != 0 ) { + StateSet *stateSet = &state->stateDictEl->stateSet; + mergeStates( md, state, stateSet->data, stateSet->length() ); + state = state->alg.next; + } + + /* Delete the state sets of all states that are on the fill list. */ + state = md.stfillHead; + while ( state != 0 ) { + /* Delete and reset the state set. */ + delete state->stateDictEl; + state->stateDictEl = 0; + + /* Next state in the stfill list. */ + state = state->alg.next; + } + + /* StateDict will still have its ptrs/size set but all of it's element + * will be deleted so we don't need to clean it up. */ +} diff --git a/src/fsmgraph.h b/src/fsmgraph.h new file mode 100644 index 00000000..5b357499 --- /dev/null +++ b/src/fsmgraph.h @@ -0,0 +1,1321 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_FSMGRAPH_H +#define _COLM_FSMGRAPH_H + +#include <assert.h> + +#include <avltree.h> +#include <avlmap.h> +#include <avlset.h> +#include <bstmap.h> +#include <vector.h> +#include <sbstmap.h> +#include <sbstset.h> +#include <sbsttable.h> +#include <bstset.h> +#include <compare.h> +#include <dlist.h> + +#include "keyops.h" + +/* Flags that control merging. */ +#define SB_GRAPH1 0x01 +#define SB_GRAPH2 0x02 +#define SB_BOTH 0x03 +#define SB_ISFINAL 0x04 +#define SB_ISMARKED 0x08 +#define SB_ONLIST 0x10 + +struct FsmTrans; +struct FsmState; +struct FsmGraph; +struct Action; +struct TokenInstance; +struct NameInst; + +/* State list element for unambiguous access to list element. */ +struct FsmListEl +{ + FsmState *prev, *next; +}; + +/* This is the marked index for a state pair. Used in minimization. It keeps + * track of whether or not the state pair is marked. */ +struct MarkIndex +{ + MarkIndex(int states); + ~MarkIndex(); + + void markPair(int state1, int state2); + bool isPairMarked(int state1, int state2); + +private: + int numStates; + bool *array; +}; + +extern KeyOps *keyOps; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, Action* > ActionTableEl; + +/* Transition Action Table. */ +struct ActionTable + : public SBstMap< int, Action*, CmpOrd<int> > +{ + void setAction( int ordering, Action *action ); + void setActions( int *orderings, Action **actions, int nActs ); + void setActions( const ActionTable &other ); + + bool hasAction( Action *action ); +}; + +typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet; +typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, TokenInstance* > LmActionTableEl; + +/* Transition Action Table. */ +struct LmActionTable + : public SBstMap< int, TokenInstance*, CmpOrd<int> > +{ + void setAction( int ordering, TokenInstance *action ); + void setActions( const LmActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpActionTableEl +{ + static int compare( const ActionTableEl &action1, + const ActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; + +/* Compare of a whole lm action table element (key & value). */ +struct CmpLmActionTableEl +{ + static int compare( const LmActionTableEl &lmAction1, + const LmActionTableEl &lmAction2 ) + { + if ( lmAction1.key < lmAction2.key ) + return -1; + else if ( lmAction1.key > lmAction2.key ) + return 1; + else if ( lmAction1.value < lmAction2.value ) + return -1; + else if ( lmAction1.value > lmAction2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; + +/* Action table element for error action tables. Adds the encoding of transfer + * point. */ +struct ErrActionTableEl +{ + ErrActionTableEl( Action *action, int ordering, int transferPoint ) + : ordering(ordering), action(action), transferPoint(transferPoint) { } + + /* Ordering and id of the action embedding. */ + int ordering; + Action *action; + + /* Id of point of transfere from Error action table to transtions and + * eofActionTable. */ + int transferPoint; + + int getKey() const { return ordering; } +}; + +struct ErrActionTable + : public SBstTable< ErrActionTableEl, int, CmpOrd<int> > +{ + void setAction( int ordering, Action *action, int transferPoint ); + void setActions( const ErrActionTable &other ); +}; + +/* Compare of an error action table element (key & value). */ +struct CmpErrActionTableEl +{ + static int compare( const ErrActionTableEl &action1, + const ErrActionTableEl &action2 ) + { + if ( action1.ordering < action2.ordering ) + return -1; + else if ( action1.ordering > action2.ordering ) + return 1; + else if ( action1.action < action2.action ) + return -1; + else if ( action1.action > action2.action ) + return 1; + else if ( action1.transferPoint < action2.transferPoint ) + return -1; + else if ( action1.transferPoint > action2.transferPoint ) + return 1; + return 0; + } +}; + +/* Compare for ErrActionTable. */ +typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; + + +/* Descibe a priority, shared among PriorEls. + * Has key and whether or not used. */ +struct PriorDesc +{ + int key; + int priority; +}; + +/* Element in the arrays of priorities for transitions and arrays. Ordering is + * unique among instantiations of machines, desc is shared. */ +struct PriorEl +{ + PriorEl( int ordering, PriorDesc *desc ) + : ordering(ordering), desc(desc) { } + + int ordering; + PriorDesc *desc; +}; + +/* Compare priority elements, which are ordered by the priority descriptor + * key. */ +struct PriorElCmp +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc->key < pel2.desc->key ) + return -1; + else if ( pel1.desc->key > pel2.desc->key ) + return 1; + else + return 0; + } +}; + + +/* Priority Table. */ +struct PriorTable + : public SBstSet< PriorEl, PriorElCmp > +{ + void setPrior( int ordering, PriorDesc *desc ); + void setPriors( const PriorTable &other ); +}; + +/* Compare of prior table elements for distinguising state data. */ +struct CmpPriorEl +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc < pel2.desc ) + return -1; + else if ( pel1.desc > pel2.desc ) + return 1; + else if ( pel1.ordering < pel2.ordering ) + return -1; + else if ( pel1.ordering > pel2.ordering ) + return 1; + return 0; + } +}; + +/* Compare of PriorTable distinguising state data. Using a compare of the + * pointers is a little more strict than it needs be. It requires that + * prioritiy tables have the exact same set of priority assignment operators + * (from the input lang) to be considered equal. + * + * Really only key-value pairs need be tested and ordering be merged. However + * this would require that in the fuseing of states, priority descriptors be + * chosen for the new fused state based on priority. Since the out transition + * lists and ranges aren't necessarily going to line up, this is more work for + * little gain. Final compression resets all priorities first, so this would + * only be useful for compression at every operator, which is only an + * undocumented test feature. + */ +typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable; + +/* Plain action list that imposes no ordering. */ +typedef Vector<int> TransFuncList; + +/* Comparison for TransFuncList. */ +typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare; + +/* Transition class that implements actions and priorities. */ +struct FsmTrans +{ + FsmTrans() : fromState(0), toState(0) {} + FsmTrans( const FsmTrans &other ) : + lowKey(other.lowKey), + highKey(other.highKey), + fromState(0), toState(0), + actionTable(other.actionTable), + priorTable(other.priorTable) + { + assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 ); + } + + Key lowKey, highKey; + FsmState *fromState; + FsmState *toState; + + /* Pointers for outlist. */ + FsmTrans *prev, *next; + + /* Pointers for in-list. */ + FsmTrans *ilprev, *ilnext; + + /* The function table and priority for the transition. */ + ActionTable actionTable; + PriorTable priorTable; + + LmActionTable lmActionTable; +}; + +/* In transition list. Like DList except only has head pointers, which is all + * that is required. Insertion and deletion is handled by the graph. This + * class provides the iterator of a single list. */ +struct TransInList +{ + TransInList() : head(0) { } + + FsmTrans *head; + + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct, assign from a list. */ + Iter( const TransInList &il ) : ptr(il.head) { } + Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; } + + /* At the end */ + bool lte() const { return ptr != 0; } + bool end() const { return ptr == 0; } + + /* At the first, last element. */ + bool first() const { return ptr && ptr->ilprev == 0; } + bool last() const { return ptr && ptr->ilnext == 0; } + + /* Cast, dereference, arrow ops. */ + operator FsmTrans*() const { return ptr; } + FsmTrans &operator *() const { return *ptr; } + FsmTrans *operator->() const { return ptr; } + + /* Increment, decrement. */ + inline void operator++(int) { ptr = ptr->ilnext; } + inline void operator--(int) { ptr = ptr->ilprev; } + + /* The iterator is simply a pointer. */ + FsmTrans *ptr; + }; +}; + +typedef DList<FsmTrans> TransList; + +/* Set of states, list of states. */ +typedef BstSet<FsmState*> StateSet; +typedef DList<FsmState> StateList; + +/* A element in a state dict. */ +struct StateDictEl +: + public AvlTreeEl<StateDictEl> +{ + StateDictEl(const StateSet &stateSet) + : stateSet(stateSet) { } + + const StateSet &getKey() { return stateSet; } + StateSet stateSet; + FsmState *targState; +}; + +/* Dictionary mapping a set of states to a target state. */ +typedef AvlTree< StateDictEl, StateSet, CmpTable<FsmState*> > StateDict; + +/* Data needed for a merge operation. */ +struct MergeData +{ + MergeData() + : stfillHead(0), stfillTail(0) { } + + StateDict stateDict; + + FsmState *stfillHead; + FsmState *stfillTail; + + void fillListAppend( FsmState *state ); +}; + +struct TransEl +{ + /* Constructors. */ + TransEl() { } + TransEl( Key lowKey, Key highKey ) + : lowKey(lowKey), highKey(highKey) { } + TransEl( Key lowKey, Key highKey, FsmTrans *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + FsmTrans *value; +}; + +struct CmpKey +{ + static int compare( const Key key1, const Key key2 ) + { + if ( key1 < key2 ) + return -1; + else if ( key1 > key2 ) + return 1; + else + return 0; + } +}; + +/* Vector based set of key items. */ +typedef BstSet<Key, CmpKey> KeySet; + +struct MinPartition +{ + MinPartition() : active(false) { } + + StateList list; + bool active; + + MinPartition *prev, *next; +}; + +/* Epsilon transition stored in a state. Specifies the target */ +typedef Vector<int> EpsilonTrans; + +/* List of states that are to be drawn into this. */ +struct EptVectEl +{ + EptVectEl( FsmState *targ, bool leaving ) + : targ(targ), leaving(leaving) { } + + FsmState *targ; + bool leaving; +}; +typedef Vector<EptVectEl> EptVect; + +/* Set of entry ids that go into this state. */ +typedef BstSet<int> EntryIdSet; + +/* Set of longest match items that may be active in a given state. */ +typedef BstSet<TokenInstance*> LmItemSet; + +/* Conditions. */ +typedef BstSet< Action*, CmpOrd<Action*> > CondSet; +typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet; + +struct CondSpace + : public AvlTreeEl<CondSpace> +{ + CondSpace( const CondSet &condSet ) + : condSet(condSet) {} + + const CondSet &getKey() { return condSet; } + + CondSet condSet; + Key baseKey; + long condSpaceId; +}; + +typedef Vector<CondSpace*> CondSpaceVect; + +typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap; + +struct StateCond +{ + StateCond( Key lowKey, Key highKey ) : + lowKey(lowKey), highKey(highKey) {} + + Key lowKey; + Key highKey; + CondSpace *condSpace; + + StateCond *prev, *next; +}; + +typedef DList<StateCond> StateCondList; +typedef Vector<long> LongVect; + +/* State class that implements actions and priorities. */ +struct FsmState +{ + FsmState(); + FsmState(const FsmState &other); + ~FsmState(); + + /* Is the state final? */ + bool isFinState() { return stateBits & SB_ISFINAL; } + + /* Out transition list and the pointer for the default out trans. */ + TransList outList; + + /* In transition Lists. */ + TransInList inList; + + /* Entry points into the state. */ + EntryIdSet entryIds; + + /* Epsilon transitions. */ + EpsilonTrans epsilonTrans; + + /* Condition info. */ + StateCondList stateCondList; + + /* Number of in transitions from states other than ourselves. */ + int foreignInTrans; + + /* Temporary data for various algorithms. */ + union { + /* When duplicating the fsm we need to map each + * state to the new state representing it. */ + FsmState *stateMap; + + /* When minimizing machines by partitioning, this maps to the group + * the state is in. */ + MinPartition *partition; + + /* When merging states (state machine operations) this next pointer is + * used for the list of states that need to be filled in. */ + FsmState *next; + + /* Identification for printing and stable minimization. */ + int stateNum; + + } alg; + + /* Data used in epsilon operation, maybe fit into alg? */ + FsmState *isolatedShadow; + int owningGraph; + + /* A pointer to a dict element that contains the set of states this state + * represents. This cannot go into alg, because alg.next is used during + * the merging process. */ + StateDictEl *stateDictEl; + + /* When drawing epsilon transitions, holds the list of states to merge + * with. */ + EptVect *eptVect; + + /* Bits controlling the behaviour of the state during collapsing to dfa. */ + int stateBits; + + /* State list elements. */ + FsmState *next, *prev; + + /* + * Priority and Action data. + */ + + /* Out priorities transfered to out transitions. */ + PriorTable outPriorTable; + + /* The following two action tables are distinguished by the fact that when + * toState actions are executed immediatly after transition actions of + * incoming transitions and the current character will be the same as the + * one available then. The fromState actions are executed immediately + * before the transition actions of outgoing transitions and the current + * character is same as the one available then. */ + + /* Actions to execute upon entering into a state. */ + ActionTable toStateActionTable; + + /* Actions to execute when going from the state to the transition. */ + ActionTable fromStateActionTable; + + /* Actions to add to any future transitions that leave via this state. */ + ActionTable outActionTable; + + /* Conditions to add to any future transiions that leave via this sttate. */ + ActionSet outCondSet; + + /* Error action tables. */ + ErrActionTable errActionTable; + + /* Actions to execute on eof. */ + ActionTable eofActionTable; + + /* Set of longest match items that may be active in this state. */ + LmItemSet lmItemSet; + + FsmState *eofTarget; +}; + +template <class ListItem> struct NextTrans +{ + Key lowKey, highKey; + ListItem *trans; + ListItem *next; + + void load() { + if ( trans == 0 ) + next = 0; + else { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + void set( ListItem *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + + +/* Encodes the different states that are meaningful to the of the iterator. */ +enum PairIterUserState +{ + RangeInS1, RangeInS2, + RangeOverlap, + BreakS1, BreakS2 +}; + +template <class ListItem1, class ListItem2 = ListItem1> struct PairIter +{ + /* Encodes the different states that an fsm iterator can be in. */ + enum IterState { + Begin, + ConsumeS1Range, ConsumeS2Range, + OnlyInS1Range, OnlyInS2Range, + S1SticksOut, S1SticksOutBreak, + S2SticksOut, S2SticksOutBreak, + S1DragsBehind, S1DragsBehindBreak, + S2DragsBehind, S2DragsBehindBreak, + ExactOverlap, End + }; + + PairIter( ListItem1 *list1, ListItem2 *list2 ); + + /* Query iterator. */ + bool lte() { return itState != End; } + bool end() { return itState == End; } + void operator++(int) { findNext(); } + void operator++() { findNext(); } + + /* Iterator state. */ + ListItem1 *list1; + ListItem2 *list2; + IterState itState; + PairIterUserState userState; + + NextTrans<ListItem1> s1Tel; + NextTrans<ListItem2> s2Tel; + Key bottomLow, bottomHigh; + ListItem1 *bottomTrans1; + ListItem2 *bottomTrans2; + +private: + void findNext(); +}; + +/* Init the iterator by advancing to the first item. */ +template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter( + ListItem1 *list1, ListItem2 *list2 ) +: + list1(list1), + list2(list2), + itState(Begin) +{ + findNext(); +} + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN(label) \ + itState = label; \ + return; \ + entry##label: {} + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN2(label, uState) \ + itState = label; \ + userState = uState; \ + return; \ + entry##label: {} + +/* Advance to the next transition. When returns, trans points to the next + * transition, unless there are no more, in which case end() returns true. */ +template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext() +{ + /* Jump into the iterator routine base on the iterator state. */ + switch ( itState ) { + case Begin: goto entryBegin; + case ConsumeS1Range: goto entryConsumeS1Range; + case ConsumeS2Range: goto entryConsumeS2Range; + case OnlyInS1Range: goto entryOnlyInS1Range; + case OnlyInS2Range: goto entryOnlyInS2Range; + case S1SticksOut: goto entryS1SticksOut; + case S1SticksOutBreak: goto entryS1SticksOutBreak; + case S2SticksOut: goto entryS2SticksOut; + case S2SticksOutBreak: goto entryS2SticksOutBreak; + case S1DragsBehind: goto entryS1DragsBehind; + case S1DragsBehindBreak: goto entryS1DragsBehindBreak; + case S2DragsBehind: goto entryS2DragsBehind; + case S2DragsBehindBreak: goto entryS2DragsBehindBreak; + case ExactOverlap: goto entryExactOverlap; + case End: goto entryEnd; + } + +entryBegin: + /* Set up the next structs at the head of the transition lists. */ + s1Tel.set( list1 ); + s2Tel.set( list2 ); + + /* Concurrently scan both out ranges. */ + while ( true ) { + if ( s1Tel.trans == 0 ) { + /* We are at the end of state1's ranges. Process the rest of + * state2's ranges. */ + while ( s2Tel.trans != 0 ) { + /* Range is only in s2. */ + CO_RETURN2( ConsumeS2Range, RangeInS2 ); + s2Tel.increment(); + } + break; + } + else if ( s2Tel.trans == 0 ) { + /* We are at the end of state2's ranges. Process the rest of + * state1's ranges. */ + while ( s1Tel.trans != 0 ) { + /* Range is only in s1. */ + CO_RETURN2( ConsumeS1Range, RangeInS1 ); + s1Tel.increment(); + } + break; + } + /* Both state1's and state2's transition elements are good. + * The signiture of no overlap is a back key being in front of a + * front key. */ + else if ( s1Tel.highKey < s2Tel.lowKey ) { + /* A range exists in state1 that does not overlap with state2. */ + CO_RETURN2( OnlyInS1Range, RangeInS1 ); + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.lowKey ) { + /* A range exists in state2 that does not overlap with state1. */ + CO_RETURN2( OnlyInS2Range, RangeInS2 ); + s2Tel.increment(); + } + /* There is overlap, must mix the ranges in some way. */ + else if ( s1Tel.lowKey < s2Tel.lowKey ) { + /* Range from state1 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s2Tel.lowKey; + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.lowKey; + s1Tel.highKey.decrement(); + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s1Tel[0,1].value. */ + CO_RETURN2( S1SticksOutBreak, BreakS1 ); + + /* Broken off range is only in s1. */ + CO_RETURN2( S1SticksOut, RangeInS1 ); + + /* Advance over the part sticking out front. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + } + else if ( s2Tel.lowKey < s1Tel.lowKey ) { + /* Range from state2 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s1Tel.lowKey; + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.lowKey; + s2Tel.highKey.decrement(); + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2SticksOutBreak, BreakS2 ); + + /* Broken off range is only in s2. */ + CO_RETURN2( S2SticksOut, RangeInS2 ); + + /* Advance over the part sticking out front. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + } + /* Low ends are even. Are the high ends even? */ + else if ( s1Tel.highKey < s2Tel.highKey ) { + /* Range from state2 goes longer than the range from state1. We + * must break the range from state2 into an evenly overlaping + * segment. */ + bottomLow = s1Tel.highKey; + bottomLow.increment(); + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.highKey; + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2DragsBehindBreak, BreakS2 ); + + /* Breaking s2 produces exact overlap. */ + CO_RETURN2( S2DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 2. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + + /* Advance over the entire s1Tel. We have consumed it. */ + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.highKey ) { + /* Range from state1 goes longer than the range from state2. We + * must break the range from state1 into an evenly overlaping + * segment. */ + bottomLow = s2Tel.highKey; + bottomLow.increment(); + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.highKey; + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S1DragsBehindBreak, BreakS1 ); + + /* Breaking s1 produces exact overlap. */ + CO_RETURN2( S1DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 1. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + + /* Advance over the entire s2Tel. We have consumed it. */ + s2Tel.increment(); + } + else { + /* There is an exact overlap. */ + CO_RETURN2( ExactOverlap, RangeOverlap ); + + s1Tel.increment(); + s2Tel.increment(); + } + } + + /* Done, go into end state. */ + CO_RETURN( End ); +} + + +/* Compare lists of epsilon transitions. Entries are name ids of targets. */ +typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; + +/* Compare class for the Approximate minimization. */ +class ApproxCompare +{ +public: + ApproxCompare() { } + int compare( const FsmState *pState1, const FsmState *pState2 ); +}; + +/* Compare class for the initial partitioning of a partition minimization. */ +class InitPartitionCompare +{ +public: + InitPartitionCompare() { } + int compare( const FsmState *pState1, const FsmState *pState2 ); +}; + +/* Compare class for the regular partitioning of a partition minimization. */ +class PartitionCompare +{ +public: + PartitionCompare() { } + int compare( const FsmState *pState1, const FsmState *pState2 ); +}; + +/* Compare class for a minimization that marks pairs. Provides the shouldMark + * routine. */ +class MarkCompare +{ +public: + MarkCompare() { } + bool shouldMark( MarkIndex &markIndex, const FsmState *pState1, + const FsmState *pState2 ); +}; + +/* List of partitions. */ +typedef DList< MinPartition > PartitionList; + +/* List of transtions out of a state. */ +typedef Vector<TransEl> TransListVect; + +/* Entry point map used for keeping track of entry points in a machine. */ +typedef BstSet< int > EntryIdSet; +typedef BstMapEl< int, FsmState* > EntryMapEl; +typedef BstMap< int, FsmState* > EntryMap; +typedef Vector<EntryMapEl> EntryMapBase; + +/* Graph class that implements actions and priorities. */ +struct FsmGraph +{ + /* Constructors/Destructors. */ + FsmGraph( ); + FsmGraph( const FsmGraph &graph ); + ~FsmGraph(); + + /* The list of states. */ + StateList stateList; + StateList misfitList; + + /* The map of entry points. */ + EntryMap entryPoints; + + /* The start state. */ + FsmState *startState; + + /* Error state, possibly created only when the final machine has been + * created and the XML machine is about to be written. No transitions + * point to this state. */ + FsmState *errState; + + /* The set of final states. */ + StateSet finStateSet; + + /* Misfit Accounting. Are misfits put on a separate list. */ + bool misfitAccounting; + + bool lmRequiresErrorState; + NameInst **nameIndex; + + /* + * Transition actions and priorities. + */ + + /* Set priorities on transtions. */ + void startFsmPrior( int ordering, PriorDesc *prior ); + void allTransPrior( int ordering, PriorDesc *prior ); + void finishFsmPrior( int ordering, PriorDesc *prior ); + void leaveFsmPrior( int ordering, PriorDesc *prior ); + + /* Action setting support. */ + void transferErrorActions( FsmState *state, int transferPoint ); + void setErrorAction( FsmState *state, int ordering, Action *action ); + void setErrorActions( FsmState *state, const ActionTable &other ); + + /* Fill all spaces in a transition list with an error transition. */ + void fillGaps( FsmState *state ); + + /* Similar to setErrorAction, instead gives a state to go to on error. */ + void setErrorTarget( FsmState *state, FsmState *target, int *orderings, + Action **actions, int nActs ); + + /* Set actions to execute. */ + void startFsmAction( int ordering, Action *action ); + void allTransAction( int ordering, Action *action ); + void finishFsmAction( int ordering, Action *action ); + void leaveFsmAction( int ordering, Action *action ); + void longMatchAction( int ordering, TokenInstance *lmPart ); + + /* Set error actions to execute. */ + void startErrorAction( int ordering, Action *action, int transferPoint ); + void allErrorAction( int ordering, Action *action, int transferPoint ); + void finalErrorAction( int ordering, Action *action, int transferPoint ); + void notStartErrorAction( int ordering, Action *action, int transferPoint ); + void notFinalErrorAction( int ordering, Action *action, int transferPoint ); + void middleErrorAction( int ordering, Action *action, int transferPoint ); + + /* Set EOF actions. */ + void startEOFAction( int ordering, Action *action ); + void allEOFAction( int ordering, Action *action ); + void finalEOFAction( int ordering, Action *action ); + void notStartEOFAction( int ordering, Action *action ); + void notFinalEOFAction( int ordering, Action *action ); + void middleEOFAction( int ordering, Action *action ); + + /* Set To State actions. */ + void startToStateAction( int ordering, Action *action ); + void allToStateAction( int ordering, Action *action ); + void finalToStateAction( int ordering, Action *action ); + void notStartToStateAction( int ordering, Action *action ); + void notFinalToStateAction( int ordering, Action *action ); + void middleToStateAction( int ordering, Action *action ); + + /* Set From State actions. */ + void startFromStateAction( int ordering, Action *action ); + void allFromStateAction( int ordering, Action *action ); + void finalFromStateAction( int ordering, Action *action ); + void notStartFromStateAction( int ordering, Action *action ); + void notFinalFromStateAction( int ordering, Action *action ); + void middleFromStateAction( int ordering, Action *action ); + + /* Shift the action ordering of the start transitions to start at + * fromOrder and increase in units of 1. Useful before kleene star + * operation. */ + int shiftStartActionOrder( int fromOrder ); + + /* Clear all priorities from the fsm to so they won't affcet minimization + * of the final fsm. */ + void clearAllPriorities(); + + /* Zero out all the function keys. */ + void nullActionKeys(); + + /* Walk the list of states and verify state properties. */ + void verifyStates(); + + /* Misfit Accounting. Are misfits put on a separate list. */ + void setMisfitAccounting( bool val ) + { misfitAccounting = val; } + + /* Set and Unset a state as final. */ + void setFinState( FsmState *state ); + void unsetFinState( FsmState *state ); + + void setStartState( FsmState *state ); + void unsetStartState( ); + + /* Set and unset a state as an entry point. */ + void setEntry( int id, FsmState *state ); + void changeEntry( int id, FsmState *to, FsmState *from ); + void unsetEntry( int id, FsmState *state ); + void unsetEntry( int id ); + void unsetAllEntryPoints(); + + /* Epsilon transitions. */ + void epsilonTrans( int id ); + void shadowReadWriteStates( MergeData &md ); + + /* + * Basic attaching and detaching. + */ + + /* Common to attaching/detaching list and default. */ + void attachToInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); + void detachFromInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); + + /* Attach with a new transition. */ + FsmTrans *attachNewTrans( FsmState *from, FsmState *to, + Key onChar1, Key onChar2 ); + + /* Attach with an existing transition that already in an out list. */ + void attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); + + /* Redirect a transition away from error and towards some state. */ + void redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ); + + /* Detach a transition from a target state. */ + void detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); + + /* Detach a state from the graph. */ + void detachState( FsmState *state ); + + /* + * NFA to DFA conversion routines. + */ + + /* Duplicate a transition that will dropin to a free spot. */ + FsmTrans *dupTrans( FsmState *from, FsmTrans *srcTrans ); + + /* In crossing, two transitions both go to real states. */ + FsmTrans *fsmAttachStates( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ); + + /* Two transitions are to be crossed, handle the possibility of either + * going to the error state. */ + FsmTrans *mergeTrans( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ); + + /* Compare deterimne relative priorities of two transition tables. */ + int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); + + /* Cross a src transition with one that is already occupying a spot. */ + FsmTrans *crossTransitions( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ); + + void outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ); + void mergeStateConds( FsmState *destState, FsmState *srcState ); + + /* Merge a set of states into newState. */ + void mergeStates( MergeData &md, FsmState *destState, + FsmState **srcStates, int numSrc ); + void mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ); + void mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ); + + /* Make all states that are combinations of other states and that + * have not yet had their out transitions filled in. This will + * empty out stateDict and stFil. */ + void fillInStates( MergeData &md ); + + /* + * Transition Comparison. + */ + + /* Compare transition data. Either of the pointers may be null. */ + static inline int compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Compare target state and transition data. Either pointer may be null. */ + static inline int compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Compare target partitions. Either pointer may be null. */ + static inline int comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Check marked status of target states. Either pointer may be null. */ + static inline bool shouldMarkPtr( MarkIndex &markIndex, + FsmTrans *trans1, FsmTrans *trans2 ); + + /* + * Callbacks. + */ + + /* Compare priority and function table of transitions. */ + static int compareTransData( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Add in the properties of srcTrans into this. */ + void addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ); + + /* Compare states on data stored in the states. */ + static int compareStateData( const FsmState *state1, const FsmState *state2 ); + + /* Out transition data. */ + void clearOutData( FsmState *state ); + bool hasOutData( FsmState *state ); + void transferOutData( FsmState *destState, FsmState *srcState ); + + /* + * Allocation. + */ + + /* New up a state and add it to the graph. */ + FsmState *addState(); + + /* + * Building basic machines + */ + + void concatFsm( Key c ); + void concatFsm( Key *str, int len ); + void concatFsmCI( Key *str, int len ); + void orFsm( Key *set, int len ); + void rangeFsm( Key low, Key high ); + void rangeStarFsm( Key low, Key high ); + void emptyFsm( ); + void lambdaFsm( ); + + /* + * Fsm operators. + */ + + void starOp( ); + void repeatOp( int times ); + void optionalRepeatOp( int times ); + void concatOp( FsmGraph *other ); + void unionOp( FsmGraph *other ); + void intersectOp( FsmGraph *other ); + void subtractOp( FsmGraph *other ); + void epsilonOp(); + void joinOp( int startId, int finalId, FsmGraph **others, int numOthers ); + void globOp( FsmGraph **others, int numOthers ); + void deterministicEntry(); + + /* + * Operator workers + */ + + /* Determine if there are any entry points into a start state other than + * the start state. */ + bool isStartStateIsolated(); + + /* Make a new start state that has no entry points. Will not change the + * identity of the fsm. */ + void isolateStartState(); + + /* Workers for resolving epsilon transitions. */ + bool inEptVect( EptVect *eptVect, FsmState *targ ); + void epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ); + void resolveEpsilonTrans( MergeData &md ); + + /* Workers for concatenation and union. */ + void doConcat( FsmGraph *other, StateSet *fromStates, bool optional ); + void doOr( FsmGraph *other ); + + /* + * Final states + */ + + /* Unset any final states that are no longer to be final + * due to final bits. */ + void unsetIncompleteFinals(); + void unsetKilledFinals(); + + /* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ + void copyInEntryPoints( FsmGraph *other ); + + /* Ordering states. */ + void depthFirstOrdering( FsmState *state ); + void depthFirstOrdering(); + void sortStatesByFinal(); + + /* Set sqequential state numbers starting at 0. */ + void setStateNumbers( int base ); + + /* Unset all final states. */ + void unsetAllFinStates(); + + /* Set the bits of final states and clear the bits of non final states. */ + void setFinBits( int finStateBits ); + + /* + * Self-consistency checks. + */ + + /* Run a sanity check on the machine. */ + void verifyIntegrity(); + + /* Verify that there are no unreachable states, or dead end states. */ + void verifyReachability(); + void verifyNoDeadEndStates(); + + /* + * Path pruning + */ + + /* Mark all states reachable from state. */ + void markReachableFromHereReverse( FsmState *state ); + + /* Mark all states reachable from state. */ + void markReachableFromHere( FsmState *state ); + void markReachableFromHereStopFinal( FsmState *state ); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeDeadEndStates(); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeUnreachableStates(); + + /* Remove error actions from states on which the error transition will + * never be taken. */ + bool outListCovers( FsmState *state ); + bool anyErrorRange( FsmState *state ); + + /* Remove states that are on the misfit list. */ + void removeMisfits(); + + /* + * FSM Minimization + */ + + /* Minimization by partitioning. */ + void minimizePartition1(); + void minimizePartition2(); + + /* Minimize the final state Machine. The result is the minimal fsm. Slow + * but stable, correct minimization. Uses n^2 space (lookout) and average + * n^2 time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeStable(); + + /* Minimize the final state machine. Does not find the minimal fsm, but a + * pretty good approximation. Does not use any extra space. Average n^2 + * time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeApproximate(); + + /* This is the worker for the minimize approximate solution. It merges + * states that have identical out transitions. */ + bool minimizeRound( ); + + /* Given an intial partioning of states, split partitions that have out trans + * to differing partitions. */ + int partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ); + + /* Split partitions that have a transition to a previously split partition, until + * there are no more partitions to split. */ + int splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ); + + /* Fuse together states in the same partition. */ + void fusePartitions( MinPartition *parts, int numParts ); + + /* Mark pairs where out final stateness differs, out trans data differs, + * trans pairs go to a marked pair or trans data differs. Should get + * alot of pairs. */ + void initialMarkRound( MarkIndex &markIndex ); + + /* One marking round on all state pairs. Considers if trans pairs go + * to a marked state only. Returns whether or not a pair was marked. */ + bool markRound( MarkIndex &markIndex ); + + /* Move the in trans into src into dest. */ + void inTransMove(FsmState *dest, FsmState *src); + + /* Make state src and dest the same state. */ + void fuseEquivStates(FsmState *dest, FsmState *src); + + /* Find any states that didn't get marked by the marking algorithm and + * merge them into the primary states of their equivalence class. */ + void fuseUnmarkedPairs( MarkIndex &markIndex ); + + /* Merge neighboring transitions go to the same state and have the same + * transitions data. */ + void compressTransitions(); + + /* Returns true if there is a transtion (either explicit or by a gap) to + * the error state. */ + bool checkErrTrans( FsmState *state, FsmTrans *trans ); + bool checkErrTransFinish( FsmState *state ); + bool hasErrorTrans(); +}; + + +#endif /* _COLM_FSMGRAPH_H */ + diff --git a/src/fsmmin.cc b/src/fsmmin.cc new file mode 100644 index 00000000..f47500bd --- /dev/null +++ b/src/fsmmin.cc @@ -0,0 +1,737 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdbool.h> +#include <assert.h> + +#include <mergesort.h> + +#include "fsmgraph.h" + +int FsmGraph::partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort object and a single partition compare. */ + MergeSort<FsmState*, PartitionCompare> mergeSort; + PartitionCompare partCompare; + + /* For each partition. */ + for ( int p = 0; p < numParts; p++ ) { + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = parts[p].list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = parts[p].list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + int destPart = p, firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = numParts; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != p ) { + FsmState *state = parts[p].list.detach( statePtrs[s] ); + parts[destPart].list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + } + + return numParts; +} + +/** + * \brief Minimize by partitioning version 1. + * + * Repeatedly tries to split partitions until all partitions are unsplittable. + * Produces the most minimal FSM possible. + */ +void FsmGraph::minimizePartition1() +{ + /* Need one mergesort object and partition compares. */ + MergeSort<FsmState*, InitPartitionCompare> mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + FsmState** statePtrs = new FsmState*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = destPart + 1; + while ( true ) { + /* Test all partitions for splitting. */ + int newNum = partitionRound( statePtrs, parts, numParts ); + + /* When no partitions can be split, stop. */ + if ( newNum == numParts ) + break; + + numParts = newNum; + } + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +/* Split partitions that need splittting, decide which partitions might need + * to be split as a result, continue until there are no more that might need + * to be split. */ +int FsmGraph::splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort and a partition compare. */ + MergeSort<FsmState*, PartitionCompare> mergeSort; + PartitionCompare partCompare; + + /* The lists of unsplitable (partList) and splitable partitions. + * Only partitions in the splitable list are check for needing splitting. */ + PartitionList partList, splittable; + + /* Initially, all partitions are born from a split (the initial + * partitioning) and can cause other partitions to be split. So any + * partition with a state with a transition out to another partition is a + * candidate for splitting. This will make every partition except possibly + * partitions of final states split candidates. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume not active. */ + parts[p].active = false; + + /* Look for a trans out of any state in the partition. */ + for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) { + /* If there is at least one transition out to another state then + * the partition becomes splittable. */ + if ( state->outList.length() > 0 ) { + parts[p].active = true; + break; + } + } + + /* If it was found active then it goes on the splittable list. */ + if ( parts[p].active ) + splittable.append( &parts[p] ); + else + partList.append( &parts[p] ); + } + + /* While there are partitions that are splittable, pull one off and try + * to split it. If it splits, determine which partitions may now be split + * as a result of the newly split partition. */ + while ( splittable.length() > 0 ) { + MinPartition *partition = splittable.detachFirst(); + + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = partition->list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = partition->list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + MinPartition *destPart = partition; + int firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = &parts[numParts]; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != partition ) { + FsmState *state = partition->list.detach( statePtrs[s] ); + destPart->list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + int newPart; + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + + /* Put the partition we just split and any new partitions that came out + * of the split onto the inactive list. */ + partition->active = false; + partList.append( partition ); + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + parts[newPart].active = false; + partList.append( &parts[newPart] ); + } + + if ( destPart == partition ) + continue; + + /* Now determine which partitions are splittable as a result of + * splitting partition by walking the in lists of the states in + * partitions that got split. Partition is the faked first item in the + * loop. */ + MinPartition *causalPart = partition; + newPart = firstNewPart - 1; + while ( newPart < numParts ) { + /* Loop all states in the causal partition. */ + StateList::Iter state = causalPart->list; + for ( ; state.lte(); state++ ) { + /* Walk all transition into the state and put the partition + * that the from state is in onto the splittable list. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) { + MinPartition *fromPart = trans->fromState->alg.partition; + if ( ! fromPart->active ) { + fromPart->active = true; + partList.detach( fromPart ); + splittable.append( fromPart ); + } + } + } + + newPart += 1; + causalPart = &parts[newPart]; + } + } + return numParts; +} + + +/** + * \brief Minimize by partitioning version 2 (best alg). + * + * Repeatedly tries to split partitions that may splittable until there are no + * more partitions that might possibly need splitting. Runs faster than + * version 1. Produces the most minimal fsm possible. + */ +void FsmGraph::minimizePartition2() +{ + /* Need a mergesort and an initial partition compare. */ + MergeSort<FsmState*, InitPartitionCompare> mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + FsmState** statePtrs = new FsmState*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = splitCandidates( statePtrs, parts, destPart+1 ); + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +void FsmGraph::initialMarkRound( MarkIndex &markIndex ) +{ + /* P and q for walking pairs. */ + FsmState *p = stateList.head, *q; + + /* Need an initial partition compare. */ + InitPartitionCompare initPartCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* If the states differ on final state status, out transitions or + * any transition data then they should be separated on the initial + * round. */ + if ( initPartCompare.compare( p, q ) != 0 ) + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + + q = q->next; + } + p = p->next; + } +} + +bool FsmGraph::markRound( MarkIndex &markIndex ) +{ + /* P an q for walking pairs. Take note if any pair gets marked. */ + FsmState *p = stateList.head, *q; + bool pairWasMarked = false; + + /* Need a mark comparison. */ + MarkCompare markCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* Should we mark the pair? */ + if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + if ( markCompare.shouldMark( markIndex, p, q ) ) { + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + pairWasMarked = true; + } + } + q = q->next; + } + p = p->next; + } + + return pairWasMarked; +} + + +/** + * \brief Minimize by pair marking. + * + * Decides if each pair of states is distinct or not. Uses O(n^2) memory and + * should only be used on small graphs. Produces the most minmimal FSM + * possible. + */ +void FsmGraph::minimizeStable() +{ + /* Set the state numbers. */ + setStateNumbers( 0 ); + + /* This keeps track of which pairs have been marked. */ + MarkIndex markIndex( stateList.length() ); + + /* Mark pairs where final stateness, out trans, or trans data differ. */ + initialMarkRound( markIndex ); + + /* While the last round of marking succeeded in marking a state + * continue to do another round. */ + int modified = markRound( markIndex ); + while (modified) + modified = markRound( markIndex ); + + /* Merge pairs that are unmarked. */ + fuseUnmarkedPairs( markIndex ); +} + +bool FsmGraph::minimizeRound() +{ + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return false; + + /* Need a mergesort on approx compare and an approx compare. */ + MergeSort<FsmState*, ApproxCompare> mergeSort; + ApproxCompare approxCompare; + + /* Fill up an array of pointers to the states. */ + FsmState **statePtrs = new FsmState*[stateList.length()]; + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + bool modified = false; + + /* Sort The list. */ + mergeSort.sort( statePtrs, stateList.length() ); + + /* Walk the list looking for duplicates next to each other, + * merge in any duplicates. */ + FsmState **pLast = statePtrs; + FsmState **pState = statePtrs + 1; + for ( int i = 1; i < stateList.length(); i++, pState++ ) { + if ( approxCompare.compare( *pLast, *pState ) == 0 ) { + /* Last and pState are the same, so fuse together. Move forward + * with pState but not with pLast. If any more are identical, we + * must */ + fuseEquivStates( *pLast, *pState ); + modified = true; + } + else { + /* Last and this are different, do not set to merge them. Move + * pLast to the current (it may be way behind from merging many + * states) and pState forward one to consider the next pair. */ + pLast = pState; + } + } + delete[] statePtrs; + return modified; +} + +/** + * \brief Minmimize by an approximation. + * + * Repeatedly tries to find states with transitions out to the same set of + * states on the same set of keys until no more identical states can be found. + * Does not produce the most minimial FSM possible. + */ +void FsmGraph::minimizeApproximate() +{ + /* While the last minimization round succeeded in compacting states, + * continue to try to compact states. */ + while ( true ) { + bool modified = minimizeRound(); + if ( ! modified ) + break; + } +} + + +/* Remove states that have no path to them from the start state. Recursively + * traverses the graph marking states that have paths into them. Then removes + * all states that did not get marked. */ +void FsmGraph::removeUnreachableStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all the states that can be reached + * through the existing set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + FsmState *state = stateList.head; + while ( state ) { + FsmState *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +bool FsmGraph::outListCovers( FsmState *state ) +{ + /* Must be at least one range to cover. */ + if ( state->outList.length() == 0 ) + return false; + + /* The first must start at the lower bound. */ + TransList::Iter trans = state->outList.first(); + if ( keyOps->minKey < trans->lowKey ) + return false; + + /* Loop starts at second el. */ + trans.increment(); + + /* Loop checks lower against prev upper. */ + for ( ; trans.lte(); trans++ ) { + /* Lower end of the trans must be one greater than the + * previous' high end. */ + Key lowKey = trans->lowKey; + lowKey.decrement(); + if ( trans->prev->highKey < lowKey ) + return false; + } + + /* Require that the last range extends to the upper bound. */ + trans = state->outList.last(); + if ( trans->highKey < keyOps->maxKey ) + return false; + + return true; +} + +/* Remove states that that do not lead to a final states. Works recursivly traversing + * the graph in reverse (starting from all final states) and marking seen states. Then + * removes states that did not get marked. */ +void FsmGraph::removeDeadEndStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all states that have paths to the final states. */ + FsmState **st = finStateSet.data; + int nst = finStateSet.length(); + for ( int i = 0; i < nst; i++, st++ ) + markReachableFromHereReverse( *st ); + + /* Start state gets honorary marking. If the machine accepts nothing we + * still want the start state to hang around. This must be done after the + * recursive call on all the final states so that it does not cause the + * start state in transitions to be skipped when the start state is + * visited by the traversal. */ + startState->stateBits |= SB_ISMARKED; + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + FsmState *state = stateList.head; + while ( state != 0 ) { + FsmState *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +/* Remove states on the misfit list. To work properly misfit accounting should + * be on when this is called. The detaching of a state will likely cause + * another misfit to be collected and it can then be removed. */ +void FsmGraph::removeMisfits() +{ + while ( misfitList.length() > 0 ) { + /* Get the first state. */ + FsmState *state = misfitList.head; + + /* Detach and delete. */ + detachState( state ); + + /* The state was previously on the misfit list and detaching can only + * remove in transitions so the state must still be on the misfit + * list. */ + misfitList.detach( state ); + delete state; + } +} + +/* Fuse src into dest because they have been deemed equivalent states. + * Involves moving transitions into src to go into dest and invoking + * callbacks. Src is deleted detached from the graph and deleted. */ +void FsmGraph::fuseEquivStates( FsmState *dest, FsmState *src ) +{ + /* This would get ugly. */ + assert( dest != src ); + + /* Cur is a duplicate. We can merge it with trail. */ + inTransMove( dest, src ); + + detachState( src ); + stateList.detach( src ); + delete src; +} + +void FsmGraph::fuseUnmarkedPairs( MarkIndex &markIndex ) +{ + FsmState *p = stateList.head, *nextP, *q; + + /* Definition: The primary state of an equivalence class is the first state + * encounterd that belongs to the equivalence class. All equivalence + * classes have primary state including equivalence classes with one state + * in it. */ + + /* For each unmarked pair merge p into q and delete p. q is always the + * primary state of it's equivalence class. We wouldn't have landed on it + * here if it were not, because it would have been deleted. + * + * Proof that q is the primaray state of it's equivalence class: Assume q + * is not the primary state of it's equivalence class, then it would be + * merged into some state that came before it and thus p would be + * equivalent to that state. But q is the first state that p is equivalent + * to so we have a contradiction. */ + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + nextP = p->next; + + q = stateList.head; + while ( q != p ) { + /* If one of p or q is a final state then mark. */ + if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + fuseEquivStates( q, p ); + break; + } + q = q->next; + } + p = nextP; + } +} + +void FsmGraph::fusePartitions( MinPartition *parts, int numParts ) +{ + /* For each partition, fuse state 2, 3, ... into state 1. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume that there will always be at least one state. */ + FsmState *first = parts[p].list.head, *toFuse = first->next; + + /* Put the first state back onto the main state list. Don't bother + * removing it from the partition list first. */ + stateList.append( first ); + + /* Fuse the rest of the state into the first. */ + while ( toFuse != 0 ) { + /* Save the next. We will trash it before it is needed. */ + FsmState *next = toFuse->next; + + /* Put the state to be fused in to the first back onto the main + * list before it is fuse. the graph. The state needs to be on + * the main list for the detach from the graph to work. Don't + * bother removing the state from the partition list first. We + * need not maintain it. */ + stateList.append( toFuse ); + + /* Now fuse to the first. */ + fuseEquivStates( first, toFuse ); + + /* Go to the next that we saved before trashing the next pointer. */ + toFuse = next; + } + + /* We transfered the states from the partition list into the main list without + * removing the states from the partition list first. Clean it up. */ + parts[p].list.abandon(); + } +} + + +/* Merge neighboring transitions go to the same state and have the same + * transitions data. */ +void FsmGraph::compressTransitions() +{ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outList.length() > 1 ) { + for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) { + Key nextLow = next->lowKey; + nextLow.decrement(); + if ( trans->highKey == nextLow && trans->toState == next->toState && + CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 ) + { + trans->highKey = next->highKey; + st->outList.detach( next ); + detachTrans( next->fromState, next->toState, next ); + delete next; + next = trans.next(); + } + else { + trans.increment(); + next.increment(); + } + } + } + } +} diff --git a/src/fsmstate.cc b/src/fsmstate.cc new file mode 100644 index 00000000..b3d1c313 --- /dev/null +++ b/src/fsmstate.cc @@ -0,0 +1,441 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> +#include <assert.h> +#include <stdbool.h> + +#include <iostream> + +#include "fsmgraph.h" + +using namespace std; + +/* Construct a mark index for a specified number of states. Must new up + * an array that is states^2 in size. */ +MarkIndex::MarkIndex( int states ) : numStates(states) +{ + /* Total pairs is states^2. Actually only use half of these, but we allocate + * them all to make indexing into the array easier. */ + int total = states * states; + + /* New up chars so that individual DListEl constructors are + * not called. Zero out the mem manually. */ + array = new bool[total]; + memset( array, 0, sizeof(bool) * total ); +} + +/* Free the array used to store state pairs. */ +MarkIndex::~MarkIndex() +{ + delete[] array; +} + +/* Mark a pair of states. States are specified by their number. The + * marked states are moved from the unmarked list to the marked list. */ +void MarkIndex::markPair(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + array[pos] = true; +} + +/* Returns true if the pair of states are marked. Returns false otherwise. + * Ordering of states given does not matter. */ +bool MarkIndex::isPairMarked(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + return array[pos]; +} + +/* Create a new fsm state. State has not out transitions or in transitions, not + * out out transition data and not number. */ +FsmState::FsmState() +: + /* No out or in transitions. */ + outList(), + inList(), + + /* No entry points, or epsilon trans. */ + entryIds(), + epsilonTrans(), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* Only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* No state identification bits. */ + stateBits(0), + + /* No Priority data. */ + outPriorTable(), + + /* No Action data. */ + toStateActionTable(), + fromStateActionTable(), + outActionTable(), + outCondSet(), + errActionTable(), + eofActionTable(), + + eofTarget(0) +{ +} + +/* Copy everything except actual the transitions. That is left up to the + * FsmGraph copy constructor. */ +FsmState::FsmState(const FsmState &other) +: + /* All lists are cleared. They will be filled in when the + * individual transitions are duplicated and attached. */ + outList(), + inList(), + + /* Duplicate the entry id set and epsilon transitions. These + * are sets of integers and as such need no fixing. */ + entryIds(other.entryIds), + epsilonTrans(other.epsilonTrans), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* This is only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* Fsm state data. */ + stateBits(other.stateBits), + + /* Copy in priority data. */ + outPriorTable(other.outPriorTable), + + /* Copy in action data. */ + toStateActionTable(other.toStateActionTable), + fromStateActionTable(other.fromStateActionTable), + outActionTable(other.outActionTable), + outCondSet(other.outCondSet), + errActionTable(other.errActionTable), + eofActionTable(other.eofActionTable), + + eofTarget(0) +{ + /* Duplicate all the transitions. */ + for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) { + /* Dupicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + FsmTrans *newTrans = new FsmTrans(*trans); + newTrans->toState = trans->toState; + outList.append( newTrans ); + } +} + +/* If there is a state dict element, then delete it. Everything else is left + * up to the FsmGraph destructor. */ +FsmState::~FsmState() +{ + if ( stateDictEl != 0 ) + delete stateDictEl; +} + +/* Compare two states using pointers to the states. With the approximate + * compare the idea is that if the compare finds them the same, they can + * immediately be merged. */ +int ApproxCompare::compare( const FsmState *state1 , const FsmState *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmGraph::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmGraph::compareFullPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmGraph::compareFullPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmGraph::compareFullPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + /* Got through the entire state comparison, deem them equal. */ + return 0; +} + +/* Compare class for the sort that does the intial partition of compaction. */ +int InitPartitionCompare::compare( const FsmState *state1 , const FsmState *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmGraph::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to test the transition pairs. */ + PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmGraph::compareDataPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmGraph::compareDataPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmGraph::compareDataPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +int PartitionCompare::compare( const FsmState *state1, const FsmState *state2 ) +{ + int compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmGraph::comparePartPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmGraph::comparePartPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmGraph::comparePartPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +bool MarkCompare::shouldMark( MarkIndex &markIndex, const FsmState *state1, + const FsmState *state2 ) +{ + /* Use a pair iterator to get the transition pairs. */ + PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + if ( FsmGraph::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) ) + return true; + break; + + case RangeInS2: + if ( FsmGraph::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) ) + return true; + break; + + case RangeOverlap: + if ( FsmGraph::shouldMarkPtr( markIndex, + outPair.s1Tel.trans, outPair.s2Tel.trans ) ) + return true; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return false; +} + +/* + * Transition Comparison. + */ + +/* Compare target partitions. Either pointer may be null. */ +int FsmGraph::comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ) +{ + if ( trans1 != 0 ) { + /* If trans1 is set then so should trans2. The initial partitioning + * guarantees this for us. */ + if ( trans1->toState == 0 && trans2->toState != 0 ) + return -1; + else if ( trans1->toState != 0 && trans2->toState == 0 ) + return 1; + else if ( trans1->toState != 0 ) { + /* Both of targets are set. */ + return CmpOrd< MinPartition* >::compare( + trans1->toState->alg.partition, trans2->toState->alg.partition ); + } + } + return 0; +} + + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +int FsmGraph::compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +/* Compares two transitions according to target state, priority and functions. + * Does not consider from state. Either of the pointers may be null. */ +int FsmGraph::compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. */ + if ( trans1 != 0 ) + return -1; + else + return 1; + } + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. Test target state, + * priority and funcs. */ + if ( trans1->toState < trans2->toState ) + return -1; + else if ( trans1->toState > trans2->toState ) + return 1; + else if ( trans1->toState != 0 ) { + /* Test transition data. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + } + return 0; +} + + +bool FsmGraph::shouldMarkPtr( MarkIndex &markIndex, FsmTrans *trans1, + FsmTrans *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. The initial mark round + * should rule out this case. */ + assert( false ); + } + else if ( trans1 != 0 ) { + /* Both of the transitions are set. If the target pair is marked, then + * the pair we are considering gets marked. */ + return markIndex.isPairMarked( trans1->toState->alg.stateNum, + trans2->toState->alg.stateNum ); + } + + /* Neither of the transitiosn are set. */ + return false; +} + + diff --git a/src/global.h b/src/global.h new file mode 100644 index 00000000..58b98077 --- /dev/null +++ b/src/global.h @@ -0,0 +1,110 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_GLOBAL_H +#define _COLM_GLOBAL_H + +#include <stdio.h> + +#include <iostream> +#include <fstream> +#include <fstream> +#include <string> + +#include <avltree.h> + +#include "defs.h" +#include "keyops.h" + +#define PROGNAME "colm" + +/* IO filenames and stream. */ +extern bool genGraphviz; +extern int gblErrorCount; + +std::ostream &error(); + +/* IO filenames and stream. */ +extern std::ostream *outStream; +extern bool generateGraphviz; +extern bool branchPointInfo; +extern bool verbose, logging; +extern bool addUniqueEmptyProductions; + +extern int gblErrorCount; +extern char startDefName[]; + +/* Error reporting. */ +std::ostream &error(); +std::ostream &error( int first_line, int first_column ); +std::ostream &warning( ); +std::ostream &warning( int first_line, int first_column ); + +extern std::ostream *outStream; +extern bool printStatistics; + +extern int gblErrorCount; +extern bool gblLibrary; +extern long gblActiveRealm; +extern char machineMain[]; +extern const char *exportHeaderFn; +extern bool rangeCrossesZero; + +struct colm_location; + +/* Location in an input file. */ +struct InputLoc +{ + InputLoc( colm_location *pcloc ); + + InputLoc() : fileName(0), line(-1), col(-1) {} + + InputLoc( const InputLoc &loc ) + { + fileName = loc.fileName; + line = loc.line; + col = loc.col; + } + + const char *fileName; + int line; + int col; +}; + +extern InputLoc internal; + +/* Error reporting. */ +std::ostream &error(); +std::ostream &error( const InputLoc &loc ); +std::ostream &warning( const InputLoc &loc ); + +void scan( char *fileName, std::istream &input, std::ostream &output ); +void terminateAllParsers( ); +void checkMachines( ); + +void xmlEscapeHost( std::ostream &out, char *data, int len ); +void openOutput(); +void escapeLiteralString( std::ostream &out, const char *data ); +bool readCheck( const char *fn ); + +#endif /* _COLM_GLOBAL_H */ + diff --git a/src/input.c b/src/input.c new file mode 100644 index 00000000..043791f2 --- /dev/null +++ b/src/input.c @@ -0,0 +1,759 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <colm/input.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> +#include <stdbool.h> + +#include <colm/pdarun.h> +#include <colm/debug.h> +#include <colm/program.h> +#include <colm/tree.h> +#include <colm/bytecode.h> +#include <colm/pool.h> +#include <colm/struct.h> + +DEF_INPUT_FUNCS( input_funcs_seq, input_impl_seq ); +extern struct input_funcs_seq input_funcs; + +static bool is_tree( struct seq_buf *b ) +{ + return b->type == SB_TOKEN || b->type == SB_IGNORE; +} + +static bool is_stream( struct seq_buf *b ) +{ + return b->type == SB_SOURCE || b->type == SB_ACCUM; +} + +char *colm_filename_add( program_t *prg, const char *fn ) +{ + /* Search for it. */ + const char **ptr = prg->stream_fns; + while ( *ptr != 0 ) { + if ( strcmp( *ptr, fn ) == 0 ) + return (char*)*ptr; + ptr += 1; + } + + /* Not present, find. */ + int items = ptr - prg->stream_fns; + + prg->stream_fns = realloc( prg->stream_fns, sizeof(char*) * ( items + 2 ) ); + prg->stream_fns[items] = strdup( fn ); + prg->stream_fns[items+1] = 0; + + return (char*)prg->stream_fns[items]; +} + +static struct seq_buf *new_seq_buf() +{ + struct seq_buf *rb = (struct seq_buf*) malloc( sizeof(struct seq_buf) ); + memset( rb, 0, sizeof(struct seq_buf) ); + return rb; +} + +static void input_transfer_loc( struct colm_program *prg, location_t *loc, + struct input_impl_seq *ss ) +{ +} + +static bool call_destructor( struct seq_buf *buf ) +{ + return is_stream( buf ) && buf->own_si; +} + +static void colm_input_destroy( program_t *prg, tree_t **sp, struct_t *s ) +{ + input_t *input = (input_t*) s; + struct input_impl *si = input->impl; + si->funcs->destructor( prg, sp, si ); +} + +static void input_stream_stash_head( struct colm_program *prg, + struct input_impl_seq *si, struct seq_buf *seq_buf ) +{ + debug( prg, REALM_INPUT, "stash_head: stream %p buf %p\n", si, seq_buf ); + seq_buf->next = si->stash; + si->stash = seq_buf; +} + +static struct seq_buf *input_stream_pop_stash( struct colm_program *prg, struct input_impl_seq *si ) +{ + struct seq_buf *seq_buf = si->stash; + si->stash = si->stash->next; + + debug( prg, REALM_INPUT, "pop_stash: stream %p buf %p\n", si, seq_buf ); + + return seq_buf; +} + +static void maybe_split( struct colm_program *prg, struct input_impl_seq *iis ) +{ + struct seq_buf *head = iis->queue.head; + if ( head != 0 && is_stream( head ) ) { + /* Maybe the stream will split itself off. */ + struct stream_impl *split_off = head->si->funcs->split_consumed( prg, head->si ); + + if ( split_off != 0 ) { + debug( prg, REALM_INPUT, "maybe split: consumed is > 0, splitting\n" ); + + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = SB_ACCUM; + new_buf->si = split_off; + new_buf->own_si = 1; + + input_stream_stash_head( prg, iis, new_buf ); + } + } +} + + +/* + * StreamImpl struct, this wraps the list of input streams. + */ + +void init_input_impl_seq( struct input_impl_seq *is, char *name ) +{ + memset( is, 0, sizeof(struct input_impl_seq) ); + + is->type = 'S'; + //is->name = name; + //is->line = 1; + //is->column = 1; + //is->byte = 0; +} + +static struct seq_buf *input_stream_seq_pop_head( struct input_impl_seq *is ) +{ + struct seq_buf *ret = is->queue.head; + is->queue.head = is->queue.head->next; + if ( is->queue.head == 0 ) + is->queue.tail = 0; + else + is->queue.head->prev = 0; + return ret; +} + +static void input_stream_seq_append( struct input_impl_seq *is, struct seq_buf *seq_buf ) +{ + if ( is->queue.head == 0 ) { + seq_buf->prev = seq_buf->next = 0; + is->queue.head = is->queue.tail = seq_buf; + } + else { + is->queue.tail->next = seq_buf; + seq_buf->prev = is->queue.tail; + seq_buf->next = 0; + is->queue.tail = seq_buf; + } +} + +static struct seq_buf *input_stream_seq_pop_tail( struct input_impl_seq *is ) +{ + struct seq_buf *ret = is->queue.tail; + is->queue.tail = is->queue.tail->prev; + if ( is->queue.tail == 0 ) + is->queue.head = 0; + else + is->queue.tail->next = 0; + return ret; +} + +static void input_stream_seq_prepend( struct input_impl_seq *is, struct seq_buf *seq_buf ) +{ + if ( is->queue.head == 0 ) { + seq_buf->prev = seq_buf->next = 0; + is->queue.head = is->queue.tail = seq_buf; + } + else { + is->queue.head->prev = seq_buf; + seq_buf->prev = 0; + seq_buf->next = is->queue.head; + is->queue.head = seq_buf; + } +} + +void input_set_eof_mark( struct colm_program *prg, struct input_impl_seq *si, char eof_mark ) +{ + si->eof_mark = eof_mark; +} + +static void input_destructor( program_t *prg, tree_t **sp, struct input_impl_seq *si ) +{ + struct seq_buf *buf = si->queue.head; + while ( buf != 0 ) { + if ( is_tree( buf ) ) + colm_tree_downref( prg, sp, buf->tree ); + + if ( call_destructor( buf ) ) + buf->si->funcs->destructor( prg, sp, buf->si ); + + struct seq_buf *next = buf->next; + free( buf ); + buf = next; + } + + buf = si->stash; + while ( buf != 0 ) { + struct seq_buf *next = buf->next; + if ( call_destructor( buf ) ) + buf->si->funcs->destructor( prg, sp, buf->si ); + + free( buf ); + buf = next; + } + + si->queue.head = 0; + + /* FIXME: Need to leak this for now. Until we can return strings to a + * program loader and free them at a later date (after the colm program is + * deleted). */ + // if ( stream->impl->name != 0 ) + // free( stream->impl->name ); + + free( si ); +} + +static int input_get_option( struct colm_program *prg, struct input_impl_seq *ii, + int option ) +{ + return ii->auto_trim; +} + +static void input_set_option( struct colm_program *prg, struct input_impl_seq *ii, + int option, int value ) +{ + ii->auto_trim = value ? 1 : 0; +} + + +static int input_get_parse_block( struct colm_program *prg, struct input_impl_seq *is, + int *pskip, alph_t **pdp, int *copied ) +{ + int ret = 0; + *copied = 0; + + /* Move over skip bytes. */ + struct seq_buf *buf = is->queue.head; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + ret = is->eof_mark ? INPUT_EOF : INPUT_EOD; + break; + } + + if ( is_stream( buf ) ) { + struct stream_impl *si = buf->si; + int type = si->funcs->get_parse_block( prg, si, pskip, pdp, copied ); + + if ( type == INPUT_EOD || type == INPUT_EOF ) { + buf = buf->next; + continue; + } + + ret = type; + break; + } + + if ( buf->type == SB_TOKEN ) { + ret = INPUT_TREE; + break; + } + + if ( buf->type == SB_IGNORE ) { + ret = INPUT_IGNORE; + break; + } + + buf = buf->next; + } + +#if DEBUG + switch ( ret ) { + case INPUT_DATA: + if ( *pdp != 0 ) { + debug( prg, REALM_INPUT, "get parse block: DATA: %d %.*s\n", + *copied, (int)(*copied), *pdp ); + } + else { + debug( prg, REALM_INPUT, "get parse block: DATA: %d\n", *copied ); + } + break; + case INPUT_EOD: + debug( prg, REALM_INPUT, "get parse block: EOD\n" ); + break; + case INPUT_EOF: + debug( prg, REALM_INPUT, "get parse block: EOF\n" ); + break; + case INPUT_TREE: + debug( prg, REALM_INPUT, "get parse block: TREE\n" ); + break; + case INPUT_IGNORE: + debug( prg, REALM_INPUT, "get parse block: IGNORE\n" ); + break; + case INPUT_LANG_EL: + debug( prg, REALM_INPUT, "get parse block: LANG_EL\n" ); + break; + } +#endif + + return ret; +} + +static int input_get_data( struct colm_program *prg, struct input_impl_seq *is, + alph_t *dest, int length ) +{ + int copied = 0; + + /* Move over skip bytes. */ + struct seq_buf *buf = is->queue.head; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + break; + } + + if ( is_stream( buf ) ) { + struct stream_impl *si = buf->si; + int glen = si->funcs->get_data( prg, si, dest+copied, length ); + + if ( glen == 0 ) { + //debug( REALM_INPUT, "skipping over input\n" ); + buf = buf->next; + continue; + } + + copied += glen; + length -= glen; + } + else if ( buf->type == SB_TOKEN ) + break; + else if ( buf->type == SB_IGNORE ) + break; + + if ( length == 0 ) { + //debug( REALM_INPUT, "exiting get data\n", length ); + break; + } + + buf = buf->next; + } + + return copied; +} + +/* + * Consume + */ + +static int input_consume_data( struct colm_program *prg, struct input_impl_seq *si, + int length, location_t *loc ) +{ + debug( prg, REALM_INPUT, "input_consume_data: stream %p consuming %d bytes\n", si, length ); + + int consumed = 0; + + /* Move over skip bytes. */ + while ( true ) { + struct seq_buf *buf = si->queue.head; + + if ( buf == 0 ) + break; + + if ( is_stream( buf ) ) { + struct stream_impl *sub = buf->si; + int slen = sub->funcs->consume_data( prg, sub, length, loc ); + //debug( REALM_INPUT, " got %d bytes from source\n", slen ); + + consumed += slen; + length -= slen; + } + else if ( buf->type == SB_TOKEN ) + break; + else if ( buf->type == SB_IGNORE ) + break; + else { + assert(false); + } + + if ( length == 0 ) { + //debug( REALM_INPUT, "exiting consume\n", length ); + break; + } + + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + input_stream_stash_head( prg, si, seq_buf ); + } + + return consumed; +} + +static int input_undo_consume_data( struct colm_program *prg, struct input_impl_seq *si, + const alph_t *data, int length ) +{ + /* When we push back data we need to move backwards through the block of + * text. The source stream type will */ + debug( prg, REALM_INPUT, "input_undo_consume_data: stream %p undoing consume of %d bytes\n", si, length ); + + assert( length > 0 ); + long tot = length; + int offset = 0; + int remaining = length; + + while ( true ) { + if ( is_stream( si->queue.head ) ) { + struct stream_impl *sub = si->queue.head->si; + int pushed_back = sub->funcs->undo_consume_data( prg, sub, data, remaining ); + remaining -= pushed_back; + offset += pushed_back; + + if ( remaining == 0 ) + break; + } + + struct seq_buf *b = input_stream_pop_stash( prg, si ); + input_stream_seq_prepend( si, b ); + } + + return tot; +} + +static tree_t *input_consume_tree( struct colm_program *prg, struct input_impl_seq *si ) +{ + debug( prg, REALM_INPUT, "input_consume_tree: stream %p\n", si ); + + while ( si->queue.head != 0 && is_stream( si->queue.head ) ) + { + debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si ); + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + input_stream_stash_head( prg, si, seq_buf ); + } + + assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN || + si->queue.head->type == SB_IGNORE ) ); + + { + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + input_stream_stash_head( prg, si, seq_buf ); + tree_t *tree = seq_buf->tree; + debug( prg, REALM_INPUT, " stream %p consume: tree: %p\n", si, tree ); + return tree; + } + + return 0; +} + + +static void input_undo_consume_tree( struct colm_program *prg, struct input_impl_seq *si, + tree_t *tree, int ignore ) +{ + debug( prg, REALM_INPUT, "input_undo_consume_tree: stream %p undo " + "consume tree %p\n", si, tree ); + + while ( true ) { + debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si ); + + struct seq_buf *b = input_stream_pop_stash( prg, si ); + input_stream_seq_prepend( si, b ); + + if ( is_tree( b ) ) { + assert( b->tree->id == tree->id ); + break; + } + } +} + +/* + * Prepend + */ +static void input_prepend_data( struct colm_program *prg, struct input_impl_seq *si, + struct colm_location *loc, const alph_t *data, long length ) +{ + debug( prg, REALM_INPUT, "input_prepend_data: stream %p prepend data length %d\n", si, length ); + + maybe_split( prg, si ); + + char *name = loc != 0 ? (char*)loc->name : "<text1>"; + struct stream_impl *sub_si = colm_impl_new_text( name, loc, data, length ); + + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = SB_ACCUM; + new_buf->si = sub_si; + new_buf->own_si = 1; + + input_stream_seq_prepend( si, new_buf ); +} + +static int input_undo_prepend_data( struct colm_program *prg, struct input_impl_seq *si, int length ) +{ + debug( prg, REALM_INPUT, "input_undo_prepend_data: stream %p undo " + "append data length %d\n", si, length ); + + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + free( seq_buf ); + + return 0; +} + +static void input_prepend_tree( struct colm_program *prg, struct input_impl_seq *si, + tree_t *tree, int ignore ) +{ + debug( prg, REALM_INPUT, "input_prepend_tree: stream %p prepend tree %p\n", si, tree ); + + maybe_split( prg, si ); + + /* Create a new buffer for the data. This is the easy implementation. + * Something better is needed here. It puts a max on the amount of + * data that can be pushed back to the inputStream. */ + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = ignore ? SB_IGNORE : SB_TOKEN; + new_buf->tree = tree; + input_stream_seq_prepend( si, new_buf ); +} + +static tree_t *input_undo_prepend_tree( struct colm_program *prg, struct input_impl_seq *si ) +{ + debug( prg, REALM_INPUT, "input_undo_prepend_tree: stream %p undo prepend tree\n", si ); + + assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN || + si->queue.head->type == SB_IGNORE ) ); + + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + + tree_t *tree = seq_buf->tree; + free(seq_buf); + + debug( prg, REALM_INPUT, " stream %p tree %p\n", si, tree ); + + return tree; +} + + +static void input_prepend_stream( struct colm_program *prg, struct input_impl_seq *si, + struct colm_stream *stream ) +{ + maybe_split( prg, si ); + + /* Create a new buffer for the data. This is the easy implementation. + * Something better is needed here. It puts a max on the amount of + * data that can be pushed back to the inputStream. */ + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = SB_SOURCE; + new_buf->si = stream_to_impl( stream ); + input_stream_seq_prepend( si, new_buf ); + + assert( ((struct stream_impl_data*)new_buf->si)->type == 'D' ); +} + +static tree_t *input_undo_prepend_stream( struct colm_program *prg, struct input_impl_seq *is ) +{ + struct seq_buf *seq_buf = input_stream_seq_pop_head( is ); + free( seq_buf ); + return 0; +} + +static void input_append_data( struct colm_program *prg, struct input_impl_seq *si, + const alph_t *data, long length ) +{ + debug( prg, REALM_INPUT, "input_append_data: stream %p append data length %d\n", si, length ); + + if ( si->queue.tail == 0 || si->queue.tail->type != SB_ACCUM ) { + debug( prg, REALM_INPUT, "input_append_data: creating accum\n" ); + + struct stream_impl *sub_si = colm_impl_new_accum( "<text2>" ); + + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = SB_ACCUM; + new_buf->si = sub_si; + new_buf->own_si = 1; + + input_stream_seq_append( si, new_buf ); + } + + si->queue.tail->si->funcs->append_data( prg, si->queue.tail->si, data, length ); +} + +static tree_t *input_undo_append_data( struct colm_program *prg, struct input_impl_seq *si, int length ) +{ + debug( prg, REALM_INPUT, "input_undo_append_data: stream %p undo append data length %d\n", si, length ); + + while ( true ) { + struct seq_buf *buf = si->queue.tail; + + if ( buf == 0 ) + break; + + if ( is_stream( buf ) ) { + struct stream_impl *sub = buf->si; + int slen = sub->funcs->undo_append_data( prg, sub, length ); + //debug( REALM_INPUT, " got %d bytes from source\n", slen ); + //consumed += slen; + length -= slen; + } + else if ( buf->type == SB_TOKEN ) + break; + else if ( buf->type == SB_IGNORE ) + break; + else { + assert(false); + } + + if ( length == 0 ) { + //debug( REALM_INPUT, "exiting consume\n", length ); + break; + } + + struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); + free( seq_buf ); + } + return 0; +} + +static void input_append_tree( struct colm_program *prg, struct input_impl_seq *si, tree_t *tree ) +{ + debug( prg, REALM_INPUT, "input_append_tree: stream %p append tree %p\n", si, tree ); + + struct seq_buf *ad = new_seq_buf(); + + input_stream_seq_append( si, ad ); + + ad->type = SB_TOKEN; + ad->tree = tree; +} + +static tree_t *input_undo_append_tree( struct colm_program *prg, struct input_impl_seq *si ) +{ + debug( prg, REALM_INPUT, "input_undo_append_tree: stream %p undo append tree\n", si ); + + struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); + tree_t *tree = seq_buf->tree; + free( seq_buf ); + return tree; +} + +static void input_append_stream( struct colm_program *prg, struct input_impl_seq *si, + struct colm_stream *stream ) +{ + debug( prg, REALM_INPUT, "input_append_stream: stream %p append stream %p\n", si, stream ); + + struct seq_buf *ad = new_seq_buf(); + + input_stream_seq_append( si, ad ); + + ad->type = SB_SOURCE; + ad->si = stream_to_impl( stream ); + + assert( ((struct stream_impl_data*)ad->si)->type == 'D' ); +} + +static tree_t *input_undo_append_stream( struct colm_program *prg, struct input_impl_seq *si ) +{ + debug( prg, REALM_INPUT, "input_undo_append_stream: stream %p undo append stream\n", si ); + + struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); + free( seq_buf ); + return 0; +} + +struct input_funcs_seq input_funcs = +{ + &input_get_parse_block, + &input_get_data, + + /* Consume. */ + &input_consume_data, + &input_undo_consume_data, + + &input_consume_tree, + &input_undo_consume_tree, + + 0, /* consume_lang_el */ + 0, /* undo_consume_lang_el */ + + /* Prepend */ + &input_prepend_data, + &input_undo_prepend_data, + + &input_prepend_tree, + &input_undo_prepend_tree, + + &input_prepend_stream, + &input_undo_prepend_stream, + + /* Append */ + &input_append_data, + &input_undo_append_data, + + &input_append_tree, + &input_undo_append_tree, + + &input_append_stream, + &input_undo_append_stream, + + /* EOF */ + &input_set_eof_mark, + + &input_transfer_loc, + &input_destructor, + + /* Trimming */ + &input_get_option, + &input_set_option, +}; + +struct input_impl *colm_impl_new_generic( char *name ) +{ + struct input_impl_seq *ss = (struct input_impl_seq*)malloc(sizeof(struct input_impl_seq)); + init_input_impl_seq( ss, name ); + ss->funcs = (struct input_funcs*)&input_funcs; + return (struct input_impl*)ss; +} + +input_t *colm_input_new_struct( program_t *prg ) +{ + size_t memsize = sizeof(struct colm_input); + struct colm_input *input = (struct colm_input*) malloc( memsize ); + memset( input, 0, memsize ); + colm_struct_add( prg, (struct colm_struct *)input ); + input->id = prg->rtd->struct_input_id; + input->destructor = &colm_input_destroy; + return input; +} + +input_t *colm_input_new( program_t *prg ) +{ + struct input_impl *impl = colm_impl_new_generic( colm_filename_add( prg, "<internal>" ) ); + struct colm_input *input = colm_input_new_struct( prg ); + input->impl = impl; + return input; +} + +struct input_impl *input_to_impl( input_t *ptr ) +{ + return ptr->impl; +} diff --git a/src/input.h b/src/input.h new file mode 100644 index 00000000..8cb20088 --- /dev/null +++ b/src/input.h @@ -0,0 +1,232 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_INPUT_H +#define _COLM_INPUT_H + +#include <stdio.h> +#include "colm.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FSM_BUFSIZE 8192 +//#define FSM_BUFSIZE 8 + +#define INPUT_DATA 1 +/* This is for data sources to return, not for the wrapper. */ +#define INPUT_EOD 2 +#define INPUT_EOF 3 +#define INPUT_EOS 4 +#define INPUT_LANG_EL 5 +#define INPUT_TREE 6 +#define INPUT_IGNORE 7 + +struct LangEl; +struct colm_tree; +struct colm_stream; +struct colm_location; +struct colm_program; +struct colm_struct; +struct colm_str; +struct colm_stream; + +struct input_impl; +struct stream_impl; + +typedef colm_alph_t alph_t; + +#define DEF_INPUT_FUNCS( input_funcs, _input_impl ) \ +struct input_funcs \ +{ \ + int (*get_parse_block)( struct colm_program *prg, struct _input_impl *si, int *pskip, alph_t **pdp, int *copied ); \ + int (*get_data)( struct colm_program *prg, struct _input_impl *si, alph_t *dest, int length ); \ + int (*consume_data)( struct colm_program *prg, struct _input_impl *si, int length, struct colm_location *loc ); \ + int (*undo_consume_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, int length ); \ + struct colm_tree *(*consume_tree)( struct colm_program *prg, struct _input_impl *si ); \ + void (*undo_consume_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \ + struct LangEl *(*consume_lang_el)( struct colm_program *prg, struct _input_impl *si, long *bind_id, alph_t **data, long *length ); \ + void (*undo_consume_lang_el)( struct colm_program *prg, struct _input_impl *si ); \ + void (*prepend_data)( struct colm_program *prg, struct _input_impl *si, struct colm_location *loc, const alph_t *data, long len ); \ + int (*undo_prepend_data)( struct colm_program *prg, struct _input_impl *si, int length ); \ + void (*prepend_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \ + struct colm_tree *(*undo_prepend_tree)( struct colm_program *prg, struct _input_impl *si ); \ + void (*prepend_stream)( struct colm_program *prg, struct _input_impl *si, struct colm_stream *stream ); \ + struct colm_tree *(*undo_prepend_stream)( struct colm_program *prg, struct _input_impl *si ); \ + void (*append_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, long length ); \ + struct colm_tree *(*undo_append_data)( struct colm_program *prg, struct _input_impl *si, int length ); \ + void (*append_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree ); \ + struct colm_tree *(*undo_append_tree)( struct colm_program *prg, struct _input_impl *si ); \ + void (*append_stream)( struct colm_program *prg, struct _input_impl *si, struct colm_stream *stream ); \ + struct colm_tree *(*undo_append_stream)( struct colm_program *prg, struct _input_impl *si ); \ + void (*set_eof_mark)( struct colm_program *prg, struct _input_impl *si, char eof_mark ); \ + void (*transfer_loc)( struct colm_program *prg, struct colm_location *loc, struct _input_impl *si ); \ + void (*destructor)( struct colm_program *prg, struct colm_tree **sp, struct _input_impl *si ); \ + int (*get_option)( struct colm_program *prg, struct _input_impl *si, int option ); \ + void (*set_option)( struct colm_program *prg, struct _input_impl *si, int option, int value ); \ +} + +#define DEF_STREAM_FUNCS( stream_funcs, _stream_impl ) \ +struct stream_funcs \ +{ \ + int (*get_parse_block)( struct colm_program *prg, struct _stream_impl *si, int *pskip, alph_t **pdp, int *copied ); \ + int (*get_data)( struct colm_program *prg, struct _stream_impl *si, alph_t *dest, int length ); \ + int (*get_data_source)( struct colm_program *prg, struct _stream_impl *si, alph_t *dest, int length ); \ + int (*consume_data)( struct colm_program *prg, struct _stream_impl *si, int length, struct colm_location *loc ); \ + int (*undo_consume_data)( struct colm_program *prg, struct _stream_impl *si, const alph_t *data, int length ); \ + void (*transfer_loc)( struct colm_program *prg, struct colm_location *loc, struct _stream_impl *si ); \ + struct colm_str_collect *(*get_collect)( struct colm_program *prg, struct _stream_impl *si ); \ + void (*flush_stream)( struct colm_program *prg, struct _stream_impl *si ); \ + void (*close_stream)( struct colm_program *prg, struct _stream_impl *si ); \ + void (*print_tree)( struct colm_program *prg, struct colm_tree **sp, \ + struct _stream_impl *impl, struct colm_tree *tree, int trim ); \ + struct stream_impl *(*split_consumed)( struct colm_program *prg, struct _stream_impl *si ); \ + int (*append_data)( struct colm_program *prg, struct _stream_impl *si, const alph_t *data, int len ); \ + int (*undo_append_data)( struct colm_program *prg, struct _stream_impl *si, int length ); \ + void (*destructor)( struct colm_program *prg, struct colm_tree **sp, struct _stream_impl *si ); \ + int (*get_option)( struct colm_program *prg, struct _stream_impl *si, int option ); \ + void (*set_option)( struct colm_program *prg, struct _stream_impl *si, int option, int value ); \ +} + +DEF_INPUT_FUNCS( input_funcs, input_impl ); +DEF_STREAM_FUNCS( stream_funcs, stream_impl ); + +/* List of source streams. Enables streams to be pushed/popped. */ +struct input_impl +{ + struct input_funcs *funcs; +}; + +/* List of source streams. Enables streams to be pushed/popped. */ +struct stream_impl +{ + struct stream_funcs *funcs; +}; + +enum seq_buf_type { + SB_TOKEN = 1, + SB_IGNORE, + SB_SOURCE, + SB_ACCUM +}; + +struct seq_buf +{ + enum seq_buf_type type; + char own_si; + struct colm_tree *tree; + struct stream_impl *si; + struct seq_buf *next, *prev; +}; + +/* List of source streams. Enables streams to be pushed/popped. */ +struct input_impl_seq +{ + struct input_funcs *funcs; + char type; + + char eof_mark; + char eof_sent; + + struct { + struct seq_buf *head; + struct seq_buf *tail; + } queue; + + struct seq_buf *stash; + + int consumed; + int auto_trim; +}; + +struct run_buf +{ + long length; + long offset; + struct run_buf *next, *prev; + + /* Must be at the end. We will grow this struct to add data if the input + * demands it. */ + alph_t data[FSM_BUFSIZE]; +}; + +struct run_buf *new_run_buf( int sz ); + +struct stream_impl_data +{ + struct stream_funcs *funcs; + char type; + + struct { + struct run_buf *head; + struct run_buf *tail; + } queue; + + const alph_t *data; + long dlen; + int offset; + + long line; + long column; + long byte; + + char *name; + FILE *file; + + struct colm_str_collect *collect; + + int consumed; + + struct indent_impl indent; + + int *line_len; + int lines_alloc; + int lines_cur; + + int auto_trim; +}; + +void stream_impl_push_line( struct stream_impl_data *ss, int ll ); +int stream_impl_pop_line( struct stream_impl_data *ss ); + +struct input_impl *colm_impl_new_generic( char *name ); + +void update_position( struct stream_impl *input_stream, const char *data, long length ); +void undo_position( struct stream_impl *input_stream, const char *data, long length ); + +struct stream_impl *colm_stream_impl( struct colm_struct *s ); + +struct colm_str *collect_string( struct colm_program *prg, struct colm_stream *s ); +struct colm_stream *colm_stream_open_collect( struct colm_program *prg ); + +char *colm_filename_add( struct colm_program *prg, const char *fn ); +struct stream_impl *colm_impl_new_accum( char *name ); +struct stream_impl *colm_impl_consumed( char *name, int len ); +struct stream_impl *colm_impl_new_text( char *name, struct colm_location *loc, const alph_t *data, int len ); + +#ifdef __cplusplus +} +#endif + +#endif /* _COLM_INPUT_H */ + diff --git a/src/internal.h b/src/internal.h new file mode 100644 index 00000000..e6e1fa7e --- /dev/null +++ b/src/internal.h @@ -0,0 +1,33 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_INTERNAL_H +#define _COLM_INTERNAL_H + +#include "colm.h" + +typedef struct colm_struct struct_t; +typedef struct colm_program program_t; +typedef unsigned long value_t; + +#endif /* _COLM_INTERNAL_H */ + diff --git a/src/iter.c b/src/iter.c new file mode 100644 index 00000000..66974f4a --- /dev/null +++ b/src/iter.c @@ -0,0 +1,648 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> + +#include <colm/tree.h> +#include <colm/bytecode.h> +#include <colm/program.h> + +#include "internal.h" + +void colm_init_list_iter( generic_iter_t *list_iter, tree_t **stack_root, + long arg_size, long root_size, const ref_t *root_ref, int generic_id ) +{ + list_iter->type = IT_Tree; + list_iter->root_ref = *root_ref; + list_iter->stack_root = stack_root; + list_iter->yield_size = 0; + list_iter->root_size = root_size; + list_iter->ref.kid = 0; + list_iter->ref.next = 0; + list_iter->arg_size = arg_size; + list_iter->generic_id = generic_id; +} + +void colm_list_iter_destroy( program_t *prg, tree_t ***psp, generic_iter_t *iter ) +{ + if ( (int)iter->type != 0 ) { + int i; + tree_t **sp = *psp; + long cur_stack_size = vm_ssize() - iter->root_size; + assert( iter->yield_size == cur_stack_size ); + vm_popn( iter->yield_size ); + for ( i = 0; i < iter->arg_size; i++ ) { + //colm_tree_downref( prg, sp, vm_pop_tree() ); + vm_pop_value(); + } + iter->type = 0; + *psp = sp; + } +} + +tree_t *colm_list_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter ) +{ + tree_t **sp = *psp; + assert( iter->yield_size == (vm_ssize() - iter->root_size) ); + + if ( iter->ref.kid == 0 ) { + /* kid_t is zero, start from the root. */ + list_t *list = *((list_t**)iter->root_ref.kid); + iter->ref.kid = (kid_t*)list->head; + iter->ref.next = 0; + + //= iter->rootRef; + //iter + //iterFind( prg, psp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + //iterFind( prg, psp, iter, false ); + + list_el_t *list_el = (list_el_t*)iter->ref.kid; + list_el = list_el->list_next; + iter->ref.kid = (kid_t*)list_el; + iter->ref.next = 0; + } + + sp = *psp; + iter->yield_size = vm_ssize() - iter->root_size; + + return (iter->ref.kid ? prg->true_val : prg->false_val ); +} + +tree_t *colm_rev_list_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter ) +{ + tree_t **sp = *psp; + assert( iter->yield_size == (vm_ssize() - iter->root_size) ); + + if ( iter->ref.kid == 0 ) { + /* kid_t is zero, start from the root. */ + list_t *list = *((list_t**)iter->root_ref.kid); + iter->ref.kid = (kid_t*)list->tail; + iter->ref.next = 0; + + //= iter->rootRef; + //iter + //iterFind( prg, psp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + //iterFind( prg, psp, iter, false ); + + list_el_t *list_el = (list_el_t*)iter->ref.kid; + list_el = list_el->list_prev; + iter->ref.kid = (kid_t*)list_el; + iter->ref.next = 0; + } + + sp = *psp; + iter->yield_size = vm_ssize() - iter->root_size; + + return (iter->ref.kid ? prg->true_val : prg->false_val ); +} + +tree_t *colm_map_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter ) +{ + tree_t **sp = *psp; + assert( iter->yield_size == (vm_ssize() - iter->root_size) ); + + if ( iter->ref.kid == 0 ) { + /* kid_t is zero, start from the root. */ + map_t *map = *((map_t**)iter->root_ref.kid); + iter->ref.kid = (kid_t*)map->head; + iter->ref.next = 0; + + //= iter->rootRef; + //iter + //iterFind( prg, psp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + //iterFind( prg, psp, iter, false ); + + map_el_t *map_el = (map_el_t*)iter->ref.kid; + map_el = map_el->next; + iter->ref.kid = (kid_t*)map_el; + iter->ref.next = 0; + } + + sp = *psp; + iter->yield_size = vm_ssize() - iter->root_size; + + return (iter->ref.kid ? prg->true_val : prg->false_val ); +} + +tree_t *colm_list_iter_deref_cur( program_t *prg, generic_iter_t *iter ) +{ + struct generic_info *gi = &prg->rtd->generic_info[iter->generic_id]; + list_el_t *el = (list_el_t*)iter->ref.kid; + struct colm_struct *s = el != 0 ? + colm_struct_container( el, gi->el_offset ) : 0; + return (tree_t*)s; +} + +value_t colm_viter_deref_cur( program_t *prg, generic_iter_t *iter ) +{ + struct generic_info *gi = &prg->rtd->generic_info[iter->generic_id]; + list_el_t *el = (list_el_t*)iter->ref.kid; + struct colm_struct *s = el != 0 ? + colm_struct_container( el, gi->el_offset ) : 0; + + value_t value = colm_struct_get_field( s, value_t, 0 ); + if ( gi->value_type == TYPE_TREE ) + colm_tree_upref( prg, (tree_t*)value ); + + return value; +} + +void colm_init_tree_iter( tree_iter_t *tree_iter, tree_t **stack_root, + long arg_size, long root_size, + const ref_t *root_ref, int search_id ) +{ + tree_iter->type = IT_Tree; + tree_iter->root_ref = *root_ref; + tree_iter->search_id = search_id; + tree_iter->stack_root = stack_root; + tree_iter->yield_size = 0; + tree_iter->root_size = root_size; + tree_iter->ref.kid = 0; + tree_iter->ref.next = 0; + tree_iter->arg_size = arg_size; +} + +void colm_init_rev_tree_iter( rev_tree_iter_t *rev_triter, tree_t **stack_root, + long arg_size, long root_size, + const ref_t *root_ref, int search_id, int children ) +{ + rev_triter->type = IT_RevTree; + rev_triter->root_ref = *root_ref; + rev_triter->search_id = search_id; + rev_triter->stack_root = stack_root; + rev_triter->yield_size = children; + rev_triter->root_size = root_size; + rev_triter->kid_at_yield = 0; + rev_triter->children = children; + rev_triter->ref.kid = 0; + rev_triter->ref.next = 0; + rev_triter->arg_size = arg_size; +} + +void init_user_iter( user_iter_t *user_iter, tree_t **stack_root, long root_size, + long arg_size, long search_id ) +{ + user_iter->type = IT_User; + user_iter->stack_root = stack_root; + user_iter->arg_size = arg_size; + user_iter->yield_size = 0; + user_iter->root_size = root_size; + user_iter->resume = 0; + user_iter->frame = 0; + user_iter->search_id = search_id; + + user_iter->ref.kid = 0; + user_iter->ref.next = 0; +} + + +user_iter_t *colm_uiter_create( program_t *prg, tree_t ***psp, struct function_info *fi, long search_id ) +{ + tree_t **sp = *psp; + + vm_pushn( sizeof(user_iter_t) / sizeof(word_t) ); + void *mem = vm_ptop(); + user_iter_t *uiter = mem; + + tree_t **stack_root = vm_ptop(); + long root_size = vm_ssize(); + + init_user_iter( uiter, stack_root, root_size, fi->arg_size, search_id ); + + *psp = sp; + return uiter; +} + +void uiter_init( program_t *prg, tree_t **sp, user_iter_t *uiter, + struct function_info *fi, int revert_on ) +{ + /* Set up the first yeild so when we resume it starts at the beginning. */ + uiter->ref.kid = 0; + uiter->yield_size = vm_ssize() - uiter->root_size; + // uiter->frame = &uiter->stackRoot[-IFR_AA]; + + if ( revert_on ) + uiter->resume = prg->rtd->frame_info[fi->frame_id].codeWV; + else + uiter->resume = prg->rtd->frame_info[fi->frame_id].codeWC; +} + + +void colm_tree_iter_destroy( program_t *prg, tree_t ***psp, tree_iter_t *iter ) +{ + if ( (int)iter->type != 0 ) { + int i; + tree_t **sp = *psp; + long cur_stack_size = vm_ssize() - iter->root_size; + assert( iter->yield_size == cur_stack_size ); + vm_popn( iter->yield_size ); + for ( i = 0; i < iter->arg_size; i++ ) + colm_tree_downref( prg, sp, vm_pop_tree() ); + iter->type = 0; + *psp = sp; + } +} + +void colm_rev_tree_iter_destroy( struct colm_program *prg, tree_t ***psp, rev_tree_iter_t *riter ) +{ + if ( (int)riter->type != 0 ) { + int i; + tree_t **sp = *psp; + long cur_stack_size = vm_ssize() - riter->root_size; + assert( riter->yield_size == cur_stack_size ); + vm_popn( riter->yield_size ); + for ( i = 0; i < riter->arg_size; i++ ) + colm_tree_downref( prg, sp, vm_pop_tree() ); + riter->type = 0; + *psp = sp; + } +} + +void colm_uiter_destroy( program_t *prg, tree_t ***psp, user_iter_t *uiter ) +{ + if ( uiter != 0 && (int)uiter->type != 0 ) { + tree_t **sp = *psp; + + /* We should always be coming from a yield. The current stack size will be + * nonzero and the stack size in the iterator will be correct. */ + long cur_stack_size = vm_ssize() - uiter->root_size; + assert( uiter->yield_size == cur_stack_size ); + + vm_popn( uiter->yield_size ); + vm_popn( sizeof(user_iter_t) / sizeof(word_t) ); + + uiter->type = 0; + + *psp = sp; + } +} + +void colm_uiter_unwind( program_t *prg, tree_t ***psp, user_iter_t *uiter ) +{ + if ( uiter != 0 && (int)uiter->type != 0 ) { + tree_t **sp = *psp; + + /* We should always be coming from a yield. The current stack size will be + * nonzero and the stack size in the iterator will be correct. */ + long cur_stack_size = vm_ssize() - uiter->root_size; + assert( uiter->yield_size == cur_stack_size ); + + long arg_size = uiter->arg_size; + + vm_popn( uiter->yield_size ); + vm_popn( sizeof(user_iter_t) / sizeof(word_t) ); + + /* The IN_PREP_ARGS stack data. */ + vm_popn( arg_size ); + vm_pop_value(); + + uiter->type = 0; + + *psp = sp; + } +} + +tree_t *tree_iter_deref_cur( tree_iter_t *iter ) +{ + return iter->ref.kid == 0 ? 0 : iter->ref.kid->tree; +} + +void set_triter_cur( program_t *prg, tree_iter_t *iter, tree_t *tree ) +{ + iter->ref.kid->tree = tree; +} + +void set_uiter_cur( program_t *prg, user_iter_t *uiter, tree_t *tree ) +{ + uiter->ref.kid->tree = tree; +} + +void split_iter_cur( program_t *prg, tree_t ***psp, tree_iter_t *iter ) +{ + if ( iter->ref.kid == 0 ) + return; + + split_ref( prg, psp, &iter->ref ); +} + +void iter_find( program_t *prg, tree_t ***psp, tree_iter_t *iter, int try_first ) +{ + int any_tree = iter->search_id == prg->rtd->any_id; + tree_t **top = iter->stack_root; + kid_t *child; + tree_t **sp = *psp; + +rec_call: + if ( try_first && ( iter->ref.kid->tree->id == iter->search_id || any_tree ) ) { + *psp = sp; + return; + } + else { + child = tree_child( prg, iter->ref.kid->tree ); + if ( child != 0 ) { + vm_contiguous( 2 ); + vm_push_ref( iter->ref.next ); + vm_push_kid( iter->ref.kid ); + iter->ref.kid = child; + iter->ref.next = (ref_t*)vm_ptop(); + while ( iter->ref.kid != 0 ) { + try_first = true; + goto rec_call; + rec_return: + iter->ref.kid = iter->ref.kid->next; + } + iter->ref.kid = vm_pop_kid(); + iter->ref.next = vm_pop_ref(); + } + } + + if ( top != vm_ptop() ) + goto rec_return; + + iter->ref.kid = 0; + *psp = sp; +} + +tree_t *tree_iter_advance( program_t *prg, tree_t ***psp, tree_iter_t *iter ) +{ + tree_t **sp = *psp; + assert( iter->yield_size == (vm_ssize() - iter->root_size) ); + + if ( iter->ref.kid == 0 ) { + /* kid_t is zero, start from the root. */ + iter->ref = iter->root_ref; + iter_find( prg, psp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + iter_find( prg, psp, iter, false ); + } + + sp = *psp; + iter->yield_size = vm_ssize() - iter->root_size; + + return (iter->ref.kid ? prg->true_val : prg->false_val ); +} + +tree_t *tree_iter_next_child( program_t *prg, tree_t ***psp, tree_iter_t *iter ) +{ + tree_t **sp = *psp; + assert( iter->yield_size == (vm_ssize() - iter->root_size) ); + kid_t *kid = 0; + + if ( iter->ref.kid == 0 ) { + /* kid_t is zero, start from the first child. */ + kid_t *child = tree_child( prg, iter->root_ref.kid->tree ); + + if ( child == 0 ) + iter->ref.next = 0; + else { + /* Make a reference to the root. */ + vm_contiguous( 2 ); + vm_push_ref( iter->root_ref.next ); + vm_push_kid( iter->root_ref.kid ); + iter->ref.next = (ref_t*)vm_ptop(); + + kid = child; + } + } + else { + /* Start at next. */ + kid = iter->ref.kid->next; + } + + if ( iter->search_id != prg->rtd->any_id ) { + /* Have a previous item, go to the next sibling. */ + while ( kid != 0 && kid->tree->id != iter->search_id ) + kid = kid->next; + } + + iter->ref.kid = kid; + iter->yield_size = vm_ssize() - iter->root_size; + *psp = sp; + return ( iter->ref.kid ? prg->true_val : prg->false_val ); +} + +tree_t *tree_rev_iter_prev_child( program_t *prg, tree_t ***psp, rev_tree_iter_t *iter ) +{ + tree_t **sp = *psp; + assert( iter->yield_size == ( vm_ssize() - iter->root_size ) ); + + if ( iter->kid_at_yield != iter->ref.kid ) { + /* Need to reload the kids. */ + vm_popn( iter->children ); + + int c; + kid_t *kid = tree_child( prg, iter->root_ref.kid->tree ); + for ( c = 0; c < iter->children; c++ ) { + vm_push_kid( kid ); + kid = kid->next; + } + } + + if ( iter->ref.kid != 0 ) { + vm_pop_ignore(); + iter->children -= 1; + } + + if ( iter->search_id != prg->rtd->any_id ) { + /* Have a previous item, go to the next sibling. */ + while ( iter->children > 0 && ((kid_t*)(vm_top()))->tree->id != iter->search_id ) { + iter->children -= 1; + vm_pop_ignore(); + } + } + + if ( iter->children == 0 ) { + iter->ref.next = 0; + iter->ref.kid = 0; + } + else { + iter->ref.next = &iter->root_ref; + iter->ref.kid = (kid_t*)vm_top(); + } + + /* We will use this to detect a split above the iterated tree. */ + iter->kid_at_yield = iter->ref.kid; + + iter->yield_size = vm_ssize() - iter->root_size; + + *psp = sp; + + return (iter->ref.kid ? prg->true_val : prg->false_val ); +} + +void iter_find_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter, int try_first ) +{ + tree_t **sp = *psp; + int any_tree = iter->search_id == prg->rtd->any_id; + tree_t **top = iter->stack_root; + kid_t *child; + +rec_call: + if ( try_first && ( iter->ref.kid->tree->id == iter->search_id || any_tree ) ) { + *psp = sp; + return; + } + else { + /* The repeat iterator is just like the normal top-down-left-right, + * execept it only goes into the children of a node if the node is the + * root of the iteration, or if does not have any neighbours to the + * right. */ + if ( top == vm_ptop() || iter->ref.kid->next == 0 ) { + child = tree_child( prg, iter->ref.kid->tree ); + if ( child != 0 ) { + vm_contiguous( 2 ); + vm_push_ref( iter->ref.next ); + vm_push_kid( iter->ref.kid ); + iter->ref.kid = child; + iter->ref.next = (ref_t*)vm_ptop(); + while ( iter->ref.kid != 0 ) { + try_first = true; + goto rec_call; + rec_return: + iter->ref.kid = iter->ref.kid->next; + } + iter->ref.kid = vm_pop_kid(); + iter->ref.next = vm_pop_ref(); + } + } + } + + if ( top != vm_ptop() ) + goto rec_return; + + iter->ref.kid = 0; + *psp = sp; +} + +tree_t *tree_iter_next_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter ) +{ + tree_t **sp = *psp; + assert( iter->yield_size == ( vm_ssize() - iter->root_size ) ); + + if ( iter->ref.kid == 0 ) { + /* kid_t is zero, start from the root. */ + iter->ref = iter->root_ref; + iter_find_repeat( prg, psp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + iter_find_repeat( prg, psp, iter, false ); + } + + sp = *psp; + iter->yield_size = vm_ssize() - iter->root_size; + + return (iter->ref.kid ? prg->true_val : prg->false_val ); +} + +void iter_find_rev_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter, int try_first ) +{ + tree_t **sp = *psp; + int any_tree = iter->search_id == prg->rtd->any_id; + tree_t **top = iter->stack_root; + kid_t *child; + + if ( try_first ) { + while ( true ) { + if ( top == vm_ptop() || iter->ref.kid->next == 0 ) { + child = tree_child( prg, iter->ref.kid->tree ); + + if ( child == 0 ) + break; + vm_contiguous( 2 ); + vm_push_ref( iter->ref.next ); + vm_push_kid( iter->ref.kid ); + iter->ref.kid = child; + iter->ref.next = (ref_t*)vm_ptop(); + } + else { + /* Not the top and not there is a next, go over to it. */ + iter->ref.kid = iter->ref.kid->next; + } + } + + goto first; + } + + while ( true ) { + if ( top == vm_ptop() ) { + iter->ref.kid = 0; + return; + } + + if ( iter->ref.kid->next == 0 ) { + /* Go up one and then down. Remember we can't use iter->ref.next + * because the chain may have been split, setting it null (to + * prevent repeated walks up). */ + ref_t *ref = (ref_t*)vm_ptop(); + iter->ref.kid = tree_child( prg, ref->kid->tree ); + } + else { + iter->ref.kid = vm_pop_kid(); + iter->ref.next = vm_pop_ref(); + } +first: + if ( iter->ref.kid->tree->id == iter->search_id || any_tree ) { + *psp = sp; + return; + } + } + *psp = sp; + return; +} + + +tree_t *tree_iter_prev_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter ) +{ + tree_t **sp = *psp; + assert( iter->yield_size == (vm_ssize() - iter->root_size) ); + + if ( iter->ref.kid == 0 ) { + /* kid_t is zero, start from the root. */ + iter->ref = iter->root_ref; + iter_find_rev_repeat( prg, psp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + iter_find_rev_repeat( prg, psp, iter, false ); + } + + sp = *psp; + iter->yield_size = vm_ssize() - iter->root_size; + + return (iter->ref.kid ? prg->true_val : prg->false_val ); +} + + + diff --git a/src/keyops.h b/src/keyops.h new file mode 100644 index 00000000..ed58db8d --- /dev/null +++ b/src/keyops.h @@ -0,0 +1,196 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#ifndef _COLM_KEYOPS_H +#define _COLM_KEYOPS_H + +#include <fstream> +#include <climits> + +enum MarkType +{ + MarkNone = 0, + MarkMark +}; + +typedef unsigned long long Size; + +/* This key struct does not implement unsigned. */ +struct Key +{ +private: + long key; + +public: + friend inline Key operator+(const Key key1, const Key key2); + friend inline Key operator-(const Key key1, const Key key2); + + friend inline bool operator<( const Key key1, const Key key2 ); + friend inline bool operator<=( const Key key1, const Key key2 ); + friend inline bool operator>( const Key key1, const Key key2 ); + friend inline bool operator>=( const Key key1, const Key key2 ); + friend inline bool operator==( const Key key1, const Key key2 ); + friend inline bool operator!=( const Key key1, const Key key2 ); + + friend struct KeyOps; + + Key( ) {} + Key( const Key &key ) : key(key.key) {} + Key( long key ) : key(key) {} + + long getVal() const { return key; }; + + long long getLongLong() const; + + bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } + bool isLower() const { return ( 'a' <= key && key <= 'z' ); } + bool isPrintable() const { return ( 32 <= key && key < 127 ); } + + Key toUpper() const + { return Key( 'A' + ( key - 'a' ) ); } + Key toLower() const + { return Key( 'a' + ( key - 'A' ) ); } + + void operator+=( const Key other ) + { key += other.key; } + + void operator-=( const Key other ) + { key -= other.key; } + + void operator|=( const Key other ) + { key |= other.key; } + + /* Decrement. Needed only for ranges. */ + inline void decrement(); + inline void increment(); +}; + +struct HostType +{ + const char *data1; + const char *data2; + bool isSigned; + long long minVal; + long long maxVal; + unsigned int size; +}; + +struct HostLang +{ + HostType *hostTypes; + int numHostTypes; + int defaultHostType; +}; + +extern HostLang *hostLang; +extern HostLang hostLangC; + +/* An abstraction of the key operators that manages key operations such as + * comparison and increment according the signedness of the key. */ +struct KeyOps +{ + /* Default to signed alphabet. */ + KeyOps() : alphType(0) {} + + Key minKey, maxKey; + const HostType *alphType; + + void setAlphType( const HostType *alphType ) + { + this->alphType = alphType; + minKey = (long) alphType->minVal; + maxKey = (long) alphType->maxVal; + } + + /* Compute the distance between two keys. */ + Size span( Key key1, Key key2 ) + { + return (unsigned long long)( (long long)key2.key - (long long)key1.key + 1) ; + } + + Size alphSize() + { return span( minKey, maxKey ); } +}; + +inline bool operator<( const Key key1, const Key key2 ) +{ + return key1.key < key2.key; +} + +inline bool operator<=( const Key key1, const Key key2 ) +{ + return key1.key <= key2.key; +} + +inline bool operator>( const Key key1, const Key key2 ) +{ + return key1.key > key2.key; +} + +inline bool operator>=( const Key key1, const Key key2 ) +{ + return key1.key >= key2.key; +} + +inline bool operator==( const Key key1, const Key key2 ) +{ + return key1.key == key2.key; +} + +inline bool operator!=( const Key key1, const Key key2 ) +{ + return key1.key != key2.key; +} + +/* Decrement. Needed only for ranges. */ +inline void Key::decrement() +{ + key = key - 1; +} + +/* Increment. Needed only for ranges. */ +inline void Key::increment() +{ + key = key + 1; +} + +inline long long Key::getLongLong() const +{ + return (long long) key; +} + +inline Key operator+(const Key key1, const Key key2) +{ + return Key( key1.key + key2.key ); +} + +inline Key operator-(const Key key1, const Key key2) +{ + return Key( key1.key - key2.key ); +} + +const char *findFileExtension( const char *stemFile ); +char *fileNameFromStem( const char *stemFile, const char *suffix ); + +#endif /* _COLM_KEYOPS_H */ + diff --git a/src/list.c b/src/list.c new file mode 100644 index 00000000..2003674a --- /dev/null +++ b/src/list.c @@ -0,0 +1,255 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#include <colm/pdarun.h> +#include <colm/program.h> +#include <colm/struct.h> +#include <colm/bytecode.h> + +static void colm_list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el ); +static void colm_list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el); +list_el_t *colm_list_detach( list_t *list, list_el_t *el ); + +void colm_list_prepend( list_t *list, list_el_t *new_el ) +{ + colm_list_add_before( list, list->head, new_el ); +} + +void colm_list_append( list_t *list, list_el_t *new_el ) +{ + colm_list_add_after( list, list->tail, new_el ); +} + +list_el_t *colm_list_detach_head( list_t *list ) +{ + return colm_list_detach( list, list->head ); +} + +list_el_t *colm_list_detach_tail( list_t *list ) +{ + return colm_list_detach( list, list->tail ); +} + +long colm_list_length( list_t *list ) +{ + return list->list_len; +} + +void colm_vlist_append( struct colm_program *prg, list_t *list, value_t value ) +{ + struct colm_struct *s = colm_struct_new( prg, list->generic_info->el_struct_id ); + + colm_struct_set_field( s, value_t, 0, value ); + + list_el_t *list_el = colm_struct_get_addr( s, list_el_t*, list->generic_info->el_offset ); + + colm_list_append( list, list_el ); +} + +void colm_vlist_prepend( struct colm_program *prg, list_t *list, value_t value ) +{ + struct colm_struct *s = colm_struct_new( prg, list->generic_info->el_struct_id ); + + colm_struct_set_field( s, value_t, 0, value ); + + list_el_t *list_el = colm_struct_get_addr( s, list_el_t*, list->generic_info->el_offset ); + + colm_list_prepend( list, list_el ); +} + +value_t colm_vlist_detach_tail( struct colm_program *prg, list_t *list ) +{ + list_el_t *list_el = list->tail; + colm_list_detach( list, list_el ); + + struct colm_struct *s = colm_generic_el_container( prg, list_el, + (list->generic_info - prg->rtd->generic_info) ); + + value_t val = colm_struct_get_field( s, value_t, 0 ); + + if ( list->generic_info->value_type == TYPE_TREE ) + colm_tree_upref( prg, (tree_t*)val ); + + return val; +} + +value_t colm_vlist_detach_head( struct colm_program *prg, list_t *list ) +{ + list_el_t *list_el = list->head; + colm_list_detach( list, list_el ); + + struct colm_struct *s = colm_generic_el_container( prg, list_el, + (list->generic_info - prg->rtd->generic_info) ); + + value_t val = colm_struct_get_field( s, value_t, 0 ); + + if ( list->generic_info->value_type == TYPE_TREE ) + colm_tree_upref( prg, (tree_t*) val ); + + return val; +} + + +static void colm_list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el ) +{ + /* Set the previous pointer of new_el to prev_el. We do + * this regardless of the state of the list. */ + new_el->list_prev = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + new_el->list_next = list->head; + list->head = new_el; + } + else { + /* There was a prev_el, we can access previous next. */ + new_el->list_next = prev_el->list_next; + prev_el->list_next = new_el; + } + + /* Set reverse pointers. */ + if (new_el->list_next == 0) { + /* There is no next element. Set the tail pointer. */ + list->tail = new_el; + } + else { + /* There is a next element. Set it's prev pointer. */ + new_el->list_next->list_prev = new_el; + } + + /* Update list length. */ + list->list_len++; +} + +static void colm_list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el) +{ + /* Set the next pointer of the new element to next_el. We do + * this regardless of the state of the list. */ + new_el->list_next = next_el; + + /* Set reverse pointers. */ + if (next_el == 0) { + /* There is no next elememnt. We are inserting at the tail. */ + new_el->list_prev = list->tail; + list->tail = new_el; + } + else { + /* There is a next element and we can access next's previous. */ + new_el->list_prev = next_el->list_prev; + next_el->list_prev = new_el; + } + + /* Set forward pointers. */ + if (new_el->list_prev == 0) { + /* There is no previous element. Set the head pointer.*/ + list->head = new_el; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + new_el->list_prev->list_next = new_el; + } + + list->list_len++; +} + +list_el_t *colm_list_detach( list_t *list, list_el_t *el ) +{ + /* Set forward pointers to skip over el. */ + if (el->list_prev == 0) + list->head = el->list_next; + else + el->list_prev->list_next = el->list_next; + + /* Set reverse pointers to skip over el. */ + if (el->list_next == 0) + list->tail = el->list_prev; + else + el->list_next->list_prev = el->list_prev; + + /* Update List length and return element we detached. */ + list->list_len--; + return el; +} + +void colm_list_destroy( struct colm_program *prg, tree_t **sp, struct colm_struct *s ) +{ +} + +list_t *colm_list_new( struct colm_program *prg ) +{ + size_t memsize = sizeof(struct colm_list); + struct colm_list *list = (struct colm_list*) malloc( memsize ); + memset( list, 0, memsize ); + colm_struct_add( prg, (struct colm_struct *)list ); + list->id = prg->rtd->struct_inbuilt_id; + list->destructor = &colm_list_destroy; + return list; +} + +struct colm_struct *colm_list_get( struct colm_program *prg, + list_t *list, word_t gen_id, word_t field ) +{ + struct generic_info *gi = &prg->rtd->generic_info[gen_id]; + list_el_t *result = 0; + switch ( field ) { + case 0: + result = list->head; + break; + case 1: + result = list->tail; + break; + default: + assert( 0 ); + break; + } + + struct colm_struct *s = result != 0 ? + colm_struct_container( result, gi->el_offset ) : 0; + return s; +} + +struct colm_struct *colm_list_el_get( struct colm_program *prg, + list_el_t *list_el, word_t gen_id, word_t field ) +{ + struct generic_info *gi = &prg->rtd->generic_info[gen_id]; + list_el_t *result = 0; + switch ( field ) { + case 0: + result = list_el->list_prev; + break; + case 1: + result = list_el->list_next; + break; + default: + assert( 0 ); + break; + } + + struct colm_struct *s = result != 0 ? + colm_struct_container( result, gi->el_offset ) : 0; + return s; +} diff --git a/src/lmparse.kh b/src/lmparse.kh new file mode 100644 index 00000000..13977a9e --- /dev/null +++ b/src/lmparse.kh @@ -0,0 +1,86 @@ +/* + * Copyright 2001-2007, 2013 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef LMPARSE_H +#define LMPARSE_H + +#include <iostream> +#include "avltree.h" +#include "parsedata.h" +#include "parser.h" + +struct ColmParser +: + public BaseParser +{ + ColmParser( Compiler *pd ) + : BaseParser( pd ) + {} + + %%{ + parser ColmParser; + + # Use a class for tokens. + token uses class Token; + + # Atoms. + token TK_Word, TK_Literal, TK_SingleLit, TK_DoubleLit, TK_Number, TK_UInt, + TK_Hex, KW_Nil, KW_True, KW_False; + + # General tokens. + token TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon, + TK_DoubleArrow, TK_StarStar, TK_NameSep, TK_DashDash, TK_DoubleEql, + TK_NotEql, TK_DoubleColon, TK_LessEql, TK_GrtrEql, TK_RightArrow, + TK_LitPat, TK_AmpAmp, TK_BarBar, TK_SqOpen, TK_SqOpenNeg, TK_SqClose, + TK_Dash, TK_ReChar, TK_LtLt; + + # Defining things. + token KW_Rl, KW_Def, KW_Lex, KW_Context, KW_Ignore, KW_Token, KW_Commit, KW_Namespace, KW_End, + KW_Literal, KW_ReduceFirst, KW_Map, KW_List, KW_Vector, KW_Parser, KW_Global, KW_Export, + KW_Iter, KW_Reject, KW_Ptr, KW_Ref, KW_Deref; + + # Language. + token KW_If, KW_While, KW_Else, KW_Elsif, KW_For, KW_Return, KW_Yield, KW_In, + KW_Break, KW_PrintXMLAC, KW_PrintXML, KW_Print, KW_PrintStream, KW_Require; + + # Patterns. + token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken, + KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni; + + token KW_Include, KW_Preeof; + + token KW_Left, KW_Right, KW_Nonassoc, KW_Prec; + + }%% + + %% write instance_data; + + /* Report an error encountered by the parser. */ + ostream &parse_error( int tokId, Token &token ); + void init(); + int parseLangEl( int type, const Token *token ); + int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); +}; + +%% write token_defs; + +#endif diff --git a/src/lmparse.kl b/src/lmparse.kl new file mode 100644 index 00000000..b64bd344 --- /dev/null +++ b/src/lmparse.kl @@ -0,0 +1,2139 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include <errno.h> + +#include "config.h" +#include "lmparse.h" +#include "global.h" +#include "input.h" + +using std::cout; +using std::cerr; +using std::endl; + +%%{ + +parser ColmParser; + +include "lmparse.kh"; + +start: root_item_list + final { + pd->rootCodeBlock = CodeBlock::cons( $1->stmtList, 0 ); + }; + +nonterm root_item_list uses lang_stmt_list; + +root_item_list: root_item_list root_item + final { + $$->stmtList = appendStatement( $1->stmtList, $2->stmt ); + }; + +root_item_list: + final { + $$->stmtList = new StmtList; + }; + +nonterm root_item uses statement; + +root_item: literal_def commit final { $$->stmt = 0; }; +root_item: rl_def commit final { $$->stmt = 0; }; +root_item: token_def commit final { $$->stmt = 0; }; +root_item: cfl_def commit final { $$->stmt = 0; }; +root_item: region_def commit final { $$->stmt = 0; }; +root_item: context_def commit final { $$->stmt = 0; }; +root_item: namespace_def commit final { $$->stmt = 0; }; +root_item: function_def commit final { $$->stmt = 0; }; +root_item: iter_def commit final { $$->stmt = 0; }; +root_item: global_def commit final { $$->stmt = $1->stmt; }; +root_item: export_def commit final { $$->stmt = 0; }; +root_item: statement commit final { $$->stmt = $1->stmt; }; +root_item: pre_eof commit final { $$->stmt = 0; }; +root_item: precedence commit final { $$->stmt = 0; }; +root_item: typedef commit final { $$->stmt = 0; }; + +nonterm block_open +{ + ObjectDef *localFrame; +}; + +block_open: '{' + final { + $$->localFrame = blockOpen(); + }; + +block_close: '}' + final { + blockClose(); + }; + + +iter_def: + KW_Iter TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close + final { + iterDef( $7->stmtList, $6->localFrame, $4->paramList, $2->data ); + }; + +function_def: + type_ref TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close + final { + functionDef( $7->stmtList, $6->localFrame, $4->paramList, $1->typeRef, $2->data ); + }; + +nonterm opt_param_list uses param_list; + +opt_param_list: param_list + final { + $$->paramList = $1->paramList; + }; + +opt_param_list: + final { + $$->paramList = new ParameterList; + }; + +nonterm param_list +{ + ParameterList *paramList; +}; + +param_list: param_list param_var_def + final { + $$->paramList = appendParam( $1->paramList, $2->objField ); + }; + +param_list: param_var_def + final { + $$->paramList = appendParam( new ParameterList, $1->objField ); + }; + +nonterm param_var_def uses var_def; + +param_var_def: TK_Word ':' type_ref + final { + $$->objField = addParam( $1->loc, $3->typeRef, $1->data ); + }; +param_var_def: TK_Word ':' reference_type_ref + final { + $$->objField = addParam( $1->loc, $3->typeRef, $1->data ); + }; + +nonterm reference_type_ref uses type_ref; + +reference_type_ref: KW_Ref type_ref + final { + $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Ref, $2->typeRef ); + }; + +nonterm global_def uses statement; + +global_def: KW_Global var_def opt_def_init + final { + $$->stmt = globalDef( $2->objField, $3->expr, $3->assignType ); + }; + +nonterm export_def uses statement; + +export_def: KW_Export var_def opt_def_init + final { + $$->stmt = exportStmt( $2->objField, $3->assignType, $3->expr ); + }; + +precedence: + pred_type pred_token_list + final { + precedenceStmt( $1->predType, $2->predDeclList ); + }; + +nonterm pred_type +{ + PredType predType; +}; + +pred_type: KW_Left final { $$->predType = PredLeft; }; +pred_type: KW_Right final { $$->predType = PredRight; }; +pred_type: KW_Nonassoc final { $$->predType = PredNonassoc; }; + +nonterm pred_token_list +{ + PredDeclList *predDeclList; +}; + +pred_token_list: + pred_token_list ',' pred_token + final { + $$->predDeclList = $1->predDeclList; + $$->predDeclList->append( $3->predDecl ); + }; + +pred_token_list: + pred_token + final { + $$->predDeclList = new PredDeclList; + $$->predDeclList->append( $1->predDecl ); + }; + +nonterm pred_token +{ + PredDecl *predDecl; +}; + +pred_token: + region_qual TK_Word + final { + $$->predDecl = predTokenName( $2->loc, $1->nspaceQual, $2->data ); + }; + +pred_token: + region_qual TK_Literal + final { + $$->predDecl = predTokenLit( $2->loc, $2->data, $1->nspaceQual ); + }; + +typedef: + KW_Alias TK_Word type_ref + final { + alias( $1->loc, $2->data, $3->typeRef ); + }; + +cfl_def: + cfl_def_head obj_var_list opt_reduce_first cfl_prod_list + final { + $2->objectDef->name = $1->name; + NtDef *ntDef = NtDef::cons( $1->name, namespaceStack.top(), + contextStack.top(), $3->reduceFirst ); + + cflDef( ntDef, $2->objectDef, $4->defList ); + }; + +nonterm class cfl_def_head +{ + String name; +}; + +cfl_def_head: KW_Def TK_Word + final { + $$->name = $2->data; + }; + +nonterm cfl_prod_list +{ + LelDefList *defList; +}; + +cfl_prod_list: cfl_prod_list '|' define_prod + final { + $$->defList = prodAppend( $1->defList, $3->definition ); + }; +cfl_prod_list: define_prod + final { + $$->defList = prodAppend( new LelDefList, $1->definition ); + }; + +nonterm opt_reduce_first +{ + bool reduceFirst; +}; + +opt_reduce_first: + KW_ReduceFirst + final { + $$->reduceFirst = true; + }; +opt_reduce_first: + final { + $$->reduceFirst = false; + }; + +nonterm opt_prec +{ + LangEl *predOf; +}; + +opt_prec: + final { + $$->predOf = 0; + }; + +opt_prec: + KW_Prec pred_token + final { + //$$->predOf = $2->factor->langEl; + assert(false); + }; + +nonterm define_prod +{ + Production *definition; +}; + +define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec + final { + $$->definition = production( $1->loc, $2->list, $4->commit, + $5->codeBlock, $6->predOf ); + }; + +nonterm obj_var_list +{ + ObjectDef *objectDef; +}; + +obj_var_list: obj_var_list var_def + final { + objVarDef( $1->objectDef, $2->objField ); + $$->objectDef = $1->objectDef; + }; + +obj_var_list: + final { + $$->objectDef = ObjectDef::cons( ObjectDef::UserType, + String(), pd->nextObjectId++ ); + }; + + +nonterm type_ref +{ + TypeRef *typeRef; +}; + +type_ref: basic_type_ref + final { + $$->typeRef = $1->typeRef; + }; + +type_ref: KW_Map '<' type_ref type_ref '>' + final { + $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Map, + 0, $3->typeRef, $4->typeRef ); + }; + +type_ref: KW_List '<' type_ref '>' + final { + $$->typeRef = TypeRef::cons( $1->loc, TypeRef::List, + 0, $3->typeRef, 0 ); + }; +type_ref: KW_Vector '<' type_ref '>' + final { + $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Vector, + 0, $3->typeRef, 0 ); + }; +type_ref: KW_Parser '<' type_ref '>' + final { + $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Parser, + 0, $3->typeRef, 0 ); + }; + +nonterm basic_type_ref uses type_ref; + +basic_type_ref: region_qual TK_Word opt_repeat + final { + $$->typeRef = TypeRef::cons( $2->loc, $1->nspaceQual, $2->data, $3->repeatType ); + }; + +basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat + final { + TypeRef *inner = TypeRef::cons( $1->loc, $2->nspaceQual, $3->data, $4->repeatType ); + $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Ptr, inner ); + }; + + +nonterm var_def +{ + InputLoc loc; + ObjectField *objField; +}; + +var_def: TK_Word ':' type_ref + final { + /* Return an object field object. The user of this nonterminal must + * load it into the approrpriate map and do error checking. */ + $$->objField = ObjectField::cons( $1->loc, $3->typeRef, $1->data ); + }; + +region_def: + region_head root_item_list KW_End + final { + popRegionSet(); + }; + +region_head: + KW_Lex + final { + pushRegionSet( $1->loc ); + }; + +namespace_def: + namespace_head root_item_list KW_End + final { + namespaceStack.pop(); + }; + + +namespace_head: + KW_Namespace TK_Word + final { + /* Make the new namespace. */ + createNamespace( $1->loc, $2->data ); + }; + +context_var_def: + var_def + final { + contextVarDef( $1->loc, $1->objField ); + }; + + +context_item: context_var_def commit; +context_item: literal_def commit; +context_item: rl_def commit; +context_item: token_def commit; +context_item: cfl_def commit; +context_item: region_def commit; +context_item: context_def commit; +context_item: function_def commit; +context_item: iter_def commit; +context_item: export_def commit; +context_item: pre_eof commit; +context_item: precedence commit; + +context_item_list: + context_item_list context_item; +context_item_list: + ; + +context_def: + context_head context_item_list KW_End + final { + contextStack.pop(); + namespaceStack.pop(); + }; + +context_head: + KW_Context TK_Word + final { + contextHead( $1->loc, $2->data ); + }; + +# +# Pattern +# + +nonterm pattern +{ + PatternItemList *list; + InputLoc loc; +}; + +pattern: + pattern_list + final { + $$->list = $1->list; + }; + +nonterm pattern_list uses pattern; + +pattern_list: pattern_list pattern_top_el + final { + $$->list = patListConcat( $1->list, $2->list ); + }; +pattern_list: pattern_top_el + final { + $$->list = $1->list; + }; + +nonterm pattern_top_el uses pattern; + +pattern_top_el: '"' litpat_el_list '"' + final { + $$->list = $2->list; + }; +pattern_top_el: '[' pattern_el_list ']' + final { + $$->list = $2->list; + }; + +nonterm litpat_el_list uses pattern; + +litpat_el_list: litpat_el_list litpat_el + final { + $$->list = patListConcat( $1->list, $2->list ); + }; +litpat_el_list: + final { + $$->list = new PatternItemList; + }; + +nonterm litpat_el uses pattern; + +litpat_el: TK_LitPat + final { + PatternItem *patternItem = PatternItem::cons( $1->loc, $1->data, + PatternItem::InputText ); + $$->list = PatternItemList::cons( patternItem ); + }; + +litpat_el: '[' pattern_el_list ']' + final { + $$->list = $2->list; + }; + +nonterm pattern_el_list uses pattern; + +pattern_el_list: + pattern_el_list pattern_el + final { + $$->list = patListConcat( $1->list, $2->list ); + }; +pattern_el_list: + final { + $$->list = new PatternItemList; + }; + +nonterm pattern_el uses pattern; + +pattern_el: + opt_label pattern_el_type_or_lit + final { + $$->list = patternEl( $1->varRef, $2->list ); + }; + +nonterm pattern_el uses pattern; + +pattern_el: '"' litpat_el_list '"' + final { + $$->list = $2->list; + }; +pattern_el: '?' TK_Word + final { + /* FIXME: Implement */ + assert(false); + }; + +nonterm pattern_el_type_or_lit uses pattern; + +pattern_el_type_or_lit: + region_qual TK_Word opt_repeat + final { + $$->list = patternElNamed( $2->loc, $1->nspaceQual, $2->data, $3->repeatType ); + }; + +pattern_el_type_or_lit: + region_qual TK_Literal opt_repeat + final { + $$->list = patternElType( $2->loc, $1->nspaceQual, $2->data, $3->repeatType ); + }; + +nonterm opt_label +{ + /* Variable reference. */ + LangVarRef *varRef; +}; + +opt_label: TK_Word ':' + final { + $$->varRef = LangVarRef::cons( $1->loc, $1->data ); + }; +opt_label: + final { + $$->varRef = 0; + }; + +# +# Constructor List (constructor) +# + +nonterm constructor +{ + ConsItemList *list; +}; + +constructor: cons_list + final { + $$->list = $1->list; + }; + +nonterm cons_list uses constructor; + +cons_list: cons_top_el cons_list + final { + $$->list = consListConcat( $1->list, $2->list ); + }; +cons_list: cons_top_el + final { + $$->list = $1->list; + }; + +nonterm cons_top_el uses constructor; + +cons_top_el: '"' lit_cons_el_list '"' + final { + $$->list = $2->list; + }; +cons_top_el: '[' cons_el_list ']' + final { + $$->list = $2->list; + }; + +nonterm lit_cons_el_list uses constructor; + +lit_cons_el_list: lit_cons_el_list lit_cons_el + final { + $$->list = consListConcat( $1->list, $2->list ); + }; +lit_cons_el_list: + final { + $$->list = new ConsItemList; + }; + +nonterm lit_cons_el uses constructor; + +lit_cons_el: TK_LitPat + final { + ConsItem *consItem = ConsItem::cons( $1->loc, ConsItem::InputText, $1->data ); + $$->list = ConsItemList::cons( consItem ); + }; + +lit_cons_el: '[' cons_el_list ']' + final { + $$->list = $2->list; + }; + +nonterm cons_el_list uses constructor; + +cons_el_list: cons_el_list cons_el + final { + $$->list = consListConcat( $1->list, $2->list ); + }; +cons_el_list: + final { + $$->list = new ConsItemList; + }; + +nonterm cons_el uses constructor; + +cons_el: region_qual TK_Literal + final { + $$->list = consElLiteral( $2->loc, $2->data, $1->nspaceQual ); + }; + +cons_el: '"' lit_cons_el_list '"' + final { + $$->list = $2->list; + }; + +cons_el: code_expr + final { + ConsItem *consItem = ConsItem::cons( $1->expr->loc, ConsItem::ExprType, $1->expr ); + $$->list = ConsItemList::cons( consItem ); + }; + +# +# Accumulate List +# + +nonterm accumulate +{ + ConsItemList *list; +}; + +accumulate: + accum_list + final { + $$->list = $1->list; + }; + +nonterm accum_list uses accumulate; + +accum_list: accum_top_el accum_list + final { + $$->list = consListConcat( $1->list, $2->list ); + }; + +accum_list: accum_top_el + final { + $$->list = $1->list; + }; + +nonterm accum_top_el uses accumulate; + +accum_top_el: '"' lit_accum_el_list '"' + final { + $$->list = $2->list; + }; + +accum_top_el: '[' accum_el_list ']' + final { + $$->list = $2->list; + }; + +nonterm lit_accum_el_list uses accumulate; + +lit_accum_el_list: + lit_accum_el_list lit_accum_el + final { + $$->list = consListConcat( $1->list, $2->list ); + }; + +lit_accum_el_list: + final { + $$->list = new ConsItemList; + }; + +nonterm lit_accum_el uses accumulate; + +lit_accum_el: TK_LitPat + final { + ConsItem *consItem = ConsItem::cons( $1->loc, ConsItem::InputText, $1->data ); + $$->list = ConsItemList::cons( consItem ); + }; + +lit_accum_el: '[' accum_el_list ']' + final { + $$->list = $2->list; + }; + +nonterm accum_el_list uses accumulate; + +accum_el_list: accum_el_list accum_el + final { + $$->list = consListConcat( $1->list, $2->list ); + }; + +accum_el_list: + final { + $$->list = new ConsItemList; + }; + +nonterm accum_el uses accumulate; + +accum_el: code_expr + final { + ConsItem *consItem = ConsItem::cons( $1->expr->loc, ConsItem::ExprType, $1->expr ); + $$->list = ConsItemList::cons( consItem ); + }; + +accum_el: '"' lit_accum_el_list '"' + final { + $$->list = $2->list; + }; + + +# +# String List +# + +nonterm string +{ + ConsItemList *list; +}; + +string: string_list + final { + $$->list = $1->list; + }; + +nonterm string_list uses string; + +string_list: string_top_el string_list + final { + $$->list = consListConcat( $1->list, $2->list ); + }; +string_list: string_top_el + final { + $$->list = $1->list; + }; + +nonterm string_top_el uses string; + +string_top_el: '"' lit_string_el_list '"' + final { + $$->list = $2->list; + }; +string_top_el: '[' string_el_list ']' + final { + $$->list = $2->list; + }; + +nonterm lit_string_el_list uses string; + +lit_string_el_list: lit_string_el_list lit_string_el + final { + $$->list = consListConcat( $1->list, $2->list ); + }; +lit_string_el_list: + final { + $$->list = new ConsItemList; + }; + +nonterm lit_string_el uses string; + +lit_string_el: TK_LitPat + final { + ConsItem *consItem = ConsItem::cons( $1->loc, ConsItem::InputText, $1->data ); + $$->list = ConsItemList::cons( consItem ); + }; + +lit_string_el: '[' string_el_list ']' + final { + $$->list = $2->list; + }; + +nonterm string_el_list uses string; + +string_el_list: string_el_list string_el + final { + $$->list = consListConcat( $1->list, $2->list ); + }; +string_el_list: + final { + $$->list = new ConsItemList; + }; + +nonterm string_el uses string; + +string_el: code_expr + final { + ConsItem *consItem = ConsItem::cons( $1->expr->loc, ConsItem::ExprType, $1->expr ); + $$->list = ConsItemList::cons( consItem ); + }; + +string_el: '"' lit_string_el_list '"' + final { + $$->list = $2->list; + }; + +# +# Production Lists. +# + +nonterm prod_el_list +{ + ProdElList *list; +}; + +prod_el_list: + prod_el_list prod_el + final { + $$->list = appendProdEl( $1->list, $2->prodEl ); + }; + +prod_el_list: + final { + $$->list = new ProdElList; + }; + +nonterm opt_no_ignore { bool noIgnore; }; + +opt_no_ignore: KW_Ni final { $$->noIgnore = true; }; +opt_no_ignore: final { $$->noIgnore = false; }; + +nonterm prod_el +{ + ProdEl *prodEl; +}; + +prod_el: + opt_capture opt_commit region_qual TK_Word opt_repeat + final { + $$->prodEl = prodElName( $4->loc, $4->data, $3->nspaceQual, + $1->objField, $5->repeatType, $2->commit ); + }; + +prod_el: + opt_capture opt_commit region_qual TK_Literal opt_repeat + final { + $$->prodEl = prodElLiteral( $4->loc, $4->data, $3->nspaceQual, + $1->objField, $5->repeatType, $2->commit ); + }; + +nonterm opt_repeat +{ + bool opt; + bool repeat; + RepeatType repeatType; +}; + +opt_repeat: '*' final { $$->opt = false; $$->repeat = true; $$->repeatType = RepeatRepeat; }; +opt_repeat: '+' final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatList; }; +opt_repeat: '?' final { $$->opt = true; $$->repeat = false; $$->repeatType = RepeatOpt; }; +opt_repeat: final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatNone; }; + +nonterm region_qual +{ + NamespaceQual *nspaceQual; +}; + +region_qual: region_qual TK_Word TK_DoubleColon + final { + $$->nspaceQual = $1->nspaceQual; + $$->nspaceQual->qualNames.append( $2->data ); + }; + +region_qual: + final { + $$->nspaceQual = NamespaceQual::cons( namespaceStack.top() ); + }; + +literal_def: KW_Literal literal_list; + +literal_list: literal_list ',' literal_item; +literal_list: literal_item; + +literal_item: opt_no_ignore TK_Literal opt_no_ignore + final { + if ( strcmp( $2->data, "''" ) == 0 ) + zeroDef( $2->loc, $2->data, $1->noIgnore, $3->noIgnore ); + else + literalDef( $2->loc, $2->data, $1->noIgnore, $3->noIgnore ); + }; + + +# These two productions are responsible for setting and unsetting the Regular +# language scanning context. +enter_rl: + try { + enterRl = true; + } + undo { + enterRl = false; + }; +leave_rl: + try { + enterRl = false; + } + undo { + enterRl = true; + }; + +token_def: + token_or_ignore token_def_name obj_var_list + enter_rl opt_no_ignore '/' opt_lex_join leave_rl '/' opt_no_ignore + opt_translate + final { + $3->objectDef->name = $2->name; + defineToken( $1->loc, $2->name, $7->join, $3->objectDef, + $11->transBlock, $1->ignore, $5->noIgnore, $10->noIgnore ); + }; + +nonterm token_or_ignore +{ + InputLoc loc; + bool ignore; +}; + +token_or_ignore: KW_Token + final { $$->loc = $1->loc; $$->ignore = false; }; + +token_or_ignore: KW_Ignore + final { $$->loc = $1->loc; $$->ignore = true; }; + +nonterm class token_def_name +{ + String name; +}; + +token_def_name: + opt_name + final { + $$->name = $1->name; + }; + +nonterm class opt_name +{ + String name; +}; + +opt_name: TK_Word final { $$->name = $1->data; }; +opt_name: ; + +nonterm opt_translate +{ + CodeBlock *transBlock; +}; + +opt_translate: + block_open lang_stmt_list block_close + final { + $$->transBlock = CodeBlock::cons( $2->stmtList, $1->localFrame ); + $$->transBlock->context = contextStack.top(); + }; + +opt_translate: + final { + $$->transBlock = 0; + }; + +pre_eof: + KW_Preeof block_open lang_stmt_list block_close + final { + preEof( $1->loc, $3->stmtList, $2->localFrame ); + }; + +rl_def: + KW_Rl machine_name enter_rl '/' lex_join leave_rl '/' + final { + /* Generic creation of machine for instantiation and assignment. */ + addRegularDef( $2->loc, namespaceStack.top(), $2->data, $5->join ); + }; + +type class token_data +{ + InputLoc loc; + String data; +}; + +nonterm machine_name uses token_data; + +machine_name: + TK_Word + final { + $$->loc = $1->loc; + $$->data = $1->data; + }; + +# +# Reduce statements +# + +nonterm opt_reduce_code +{ + CodeBlock *codeBlock; +}; + +opt_reduce_code: + final { $$->codeBlock = 0; }; + +opt_reduce_code: + start_reduce lang_stmt_list block_close + final { + $$->codeBlock = CodeBlock::cons( $2->stmtList, $1->localFrame ); + $$->codeBlock->context = contextStack.top(); + }; + +nonterm start_reduce uses block_open; + +start_reduce: + block_open + final { + $$->localFrame = $1->localFrame; + }; + +nonterm lang_stmt_list +{ + StmtList *stmtList; +}; + +lang_stmt_list: rec_stmt_list opt_require_stmt + final { + $$->stmtList = $1->stmtList; + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +nonterm rec_stmt_list uses lang_stmt_list; + +rec_stmt_list: rec_stmt_list statement + final { + $$->stmtList = $1->stmtList; + + /* Maybe a statement was generated. */ + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +rec_stmt_list: + final { + $$->stmtList = new StmtList; + }; + +nonterm opt_def_init +{ + LangExpr *expr; + LangStmt::Type assignType; +}; + +opt_def_init: '=' code_expr + final { + $$->expr = $2->expr; + $$->assignType = LangStmt::AssignType; + }; +opt_def_init: + final { + $$->expr = 0; + }; + +scope_push: + final { + pd->curLocalFrame->pushScope(); + }; + +scope_pop: + final { + pd->curLocalFrame->popScope(); + }; + +nonterm statement +{ + LangStmt *stmt; +}; +nonterm for_scope uses statement; + +statement: var_def opt_def_init + final { + $$->stmt = varDef( $1->objField, $2->expr, $2->assignType ); + }; +statement: var_ref '=' code_expr + final { + $$->stmt = LangStmt::cons( $2->loc, LangStmt::AssignType, $1->varRef, $3->expr ); + }; +statement: KW_Print '(' code_expr_list ')' + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintType, $3->exprVect ); + }; +statement: KW_PrintXMLAC '(' code_expr_list ')' + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintXMLACType, $3->exprVect ); + }; +statement: KW_PrintXML '(' code_expr_list ')' + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintXMLType, $3->exprVect ); + }; +statement: KW_PrintStream '(' code_expr_list ')' + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintStreamType, $3->exprVect ); + }; +statement: code_expr + final { + $$->stmt = LangStmt::cons( InputLoc(), LangStmt::ExprType, $1->expr ); + }; +statement: if_stmt + final { + $$->stmt = $1->stmt; + }; +statement: KW_Reject + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::RejectType ); + }; +statement: KW_While scope_push code_expr block_or_single scope_pop + final { + $$->stmt = LangStmt::cons( LangStmt::WhileType, $3->expr, $4->stmtList ); + }; + +for_scope: TK_Word ':' type_ref KW_In iter_call block_or_single + final { + $$->stmt = forScope( $1->loc, $1->data, $3->typeRef, $5->langTerm, $6->stmtList ); + }; + +statement: KW_For scope_push for_scope scope_pop + final { + $$->stmt = $3->stmt; + }; + +statement: KW_Return code_expr + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::ReturnType, $2->expr ); + }; +statement: KW_Break + final { + $$->stmt = LangStmt::cons( LangStmt::BreakType ); + }; +statement: KW_Yield var_ref + final { + $$->stmt = LangStmt::cons( LangStmt::YieldType, $2->varRef ); + }; + +nonterm opt_require_stmt uses statement; + +opt_require_stmt: + scope_push require_pattern lang_stmt_list scope_pop + final { + $$->stmt = LangStmt::cons( LangStmt::IfType, $2->expr, $3->stmtList, 0 ); + }; +opt_require_stmt: + final { + $$->stmt = 0; + }; + +nonterm require_pattern uses code_expr; + +require_pattern: + KW_Require var_ref pattern + final { + $$->expr = require( $1->loc, $2->varRef, $3->list ); + }; + +nonterm block_or_single uses lang_stmt_list; + +block_or_single: '{' lang_stmt_list '}' + final { + $$->stmtList = $2->stmtList; + }; +block_or_single: statement + final { + $$->stmtList = new StmtList; + $$->stmtList->append( $1->stmt ); + }; + +nonterm iter_call +{ + LangTerm *langTerm; +}; + +iter_call: var_ref '(' opt_code_expr_list ')' + final { + $$->langTerm = LangTerm::cons( InputLoc(), $1->varRef, $3->exprVect ); + }; +iter_call: TK_Word + final { + $$->langTerm = LangTerm::cons( InputLoc(), LangTerm::VarRefType, + LangVarRef::cons( $1->loc, $1->data ) ); + }; + +# +# If Statements +# + +nonterm if_stmt uses statement; + +if_stmt: KW_If scope_push code_expr block_or_single scope_pop elsif_list + final { + $$->stmt = LangStmt::cons( LangStmt::IfType, $3->expr, $4->stmtList, $6->stmt ); + }; + +nonterm elsif_list +{ + LangStmt *stmt; +}; + +elsif_list: + elsif_clause elsif_list + final { + /* Put any of the followng elseif part, an else, or null into the elsePart. */ + $$->stmt = $1->stmt; + $$->stmt->elsePart = $2->stmt; + }; +elsif_list: + optional_else + final { + $$->stmt = $1->stmt; + }; + +nonterm elsif_clause +{ + LangStmt *stmt; +}; + +elsif_clause: + KW_Elsif scope_push code_expr block_or_single scope_pop + final { + $$->stmt = LangStmt::cons( LangStmt::IfType, $3->expr, $4->stmtList, 0 ); + }; + +nonterm optional_else +{ + LangStmt *stmt; +}; + +optional_else: + KW_Else scope_push block_or_single scope_pop + final { + $$->stmt = LangStmt::cons( LangStmt::ElseType, $3->stmtList ); + }; + +optional_else: + final { + $$->stmt = 0; + }; + +# +# Code LexExpression Lists. +# +nonterm code_expr_list +{ + ExprVect *exprVect; +}; + +code_expr_list: + code_expr_list code_expr + final { + $$->exprVect = $1->exprVect; + $$->exprVect->append( $2->expr ); + }; +code_expr_list: + code_expr + final { + $$->exprVect = new ExprVect; + $$->exprVect->append( $1->expr ); + }; + +nonterm opt_code_expr_list uses code_expr_list; + +opt_code_expr_list: + code_expr_list + final { + $$->exprVect = $1->exprVect; + }; + +opt_code_expr_list: + final { + $$->exprVect = 0; + }; + +# +# Type list +# + +nonterm type_list +{ + TypeRefVect *typeRefVect; +}; + +type_list: type_list ',' type_ref + final { + $$->typeRefVect = $1->typeRefVect; + $$->typeRefVect->append( $3->typeRef ); + }; +type_list: type_ref + final { + $$->typeRefVect = new TypeRefVect; + $$->typeRefVect->append( $1->typeRef ); + }; + +nonterm opt_type_list uses type_list; + +opt_type_list: type_list + final { + $$->typeRefVect = $1->typeRefVect; + }; + +opt_type_list: + final { + $$->typeRefVect = 0; + }; + + +# +# Variable reference +# + +nonterm var_ref +{ + LangVarRef *varRef; +}; + +var_ref: qual TK_Word + final { + $$->varRef = LangVarRef::cons( $2->loc, $1->qual, $2->data ); + }; + +nonterm qual +{ + QualItemVect *qual; +}; + +qual: qual TK_Word '.' + final { + $$->qual = $1->qual; + $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Dot ) ); + }; +qual: qual TK_Word TK_RightArrow + final { + $$->qual = $1->qual; + $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Arrow ) ); + }; +qual: + final { + $$->qual = new QualItemVect; + }; + +# +# Code expression +# + +nonterm code_expr +{ + LangExpr *expr; +}; + +code_expr: code_expr TK_AmpAmp code_relational + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LogicalAnd, $3->expr ); + }; + +code_expr: code_expr TK_BarBar code_relational + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LogicalOr, $3->expr ); + }; + +code_expr: code_relational + final { + $$->expr = $1->expr; + }; + +nonterm code_relational uses code_expr; + +code_relational: code_relational TK_DoubleEql code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_DoubleEql, $3->expr ); + }; + +code_relational: code_relational TK_NotEql code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_NotEql, $3->expr ); + }; + +code_relational: code_relational '<' code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '<', $3->expr ); + }; + +code_relational: code_relational '>' code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '>', $3->expr ); + }; + +code_relational: code_relational TK_LessEql code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LessEql, $3->expr ); + }; + +code_relational: code_relational TK_GrtrEql code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_GrtrEql, $3->expr ); + }; + + +code_relational: code_additive + final { + $$->expr = $1->expr; + }; + +nonterm code_additive uses code_expr; + +code_additive: code_additive '+' code_multiplicitive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '+', $3->expr ); + }; + +code_additive: code_additive '-' code_multiplicitive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '-', $3->expr ); + }; + +code_additive: code_multiplicitive + final { + $$->expr = $1->expr; + }; + +nonterm code_multiplicitive uses code_expr; + +code_multiplicitive: code_multiplicitive '*' code_unary + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '*', $3->expr ); + }; + +code_multiplicitive: code_multiplicitive '/' code_unary + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '/', $3->expr ); + }; + +code_multiplicitive: code_unary + final { + $$->expr = $1->expr; + }; + +nonterm code_unary uses code_expr; +code_unary: '!' code_factor + final { + $$->expr = LangExpr::cons( $1->loc, '!', $2->expr ); + }; +code_unary: '$' code_factor + final { + $$->expr = LangExpr::cons( $1->loc, '$', $2->expr ); + }; +code_unary: '^' code_factor + final { + $$->expr = LangExpr::cons( $1->loc, '^', $2->expr ); + }; +code_unary: '%' code_factor + final { + $$->expr = LangExpr::cons( $1->loc, '%', $2->expr ); + }; +code_unary: code_factor + final { + $$->expr = $1->expr; + }; + +nonterm opt_capture uses var_def; + +opt_capture: TK_Word ':' + final { + $$->objField = ObjectField::cons( $1->loc, 0, $1->data ); + }; +opt_capture: + final { + $$->objField = 0; + }; + +nonterm parse_cmd +{ + bool stop; + InputLoc loc; +}; + +parse_cmd: + KW_Parse + final { + $$->stop = false; + $$->loc = $1->loc; + }; + +parse_cmd: + KW_ParseStop + final { + $$->stop = true; + $$->loc = $1->loc; + }; + +nonterm code_factor uses code_expr; + +code_factor: TK_Number + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::NumberType, $1->data ) ); + }; +code_factor: TK_Literal + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::StringType, $1->data ) ); + }; +code_factor: var_ref '(' opt_code_expr_list ')' + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), $1->varRef, $3->exprVect ) ); + }; +code_factor: var_ref + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::VarRefType, $1->varRef ) ); + }; +code_factor: KW_Match var_ref pattern + final { + $$->expr = match( $1->loc, $2->varRef, $3->list ); + }; +code_factor: KW_New code_factor + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::NewType, $2->expr ) ); + }; +code_factor: + KW_Construct opt_capture type_ref opt_field_init constructor + final { + $$->expr = construct( $1->loc, $2->objField, $5->list, + $3->typeRef, $4->fieldInitVect ); + }; + +code_factor: + parse_cmd opt_capture type_ref opt_field_init accumulate + final { + $$->expr = parseCmd( $1->loc, $1->stop, $2->objField, + $3->typeRef, $4->fieldInitVect, $5->list ); + }; +code_factor: + var_ref TK_LtLt accumulate + final { + $$->expr = send( $2->loc, $1->varRef, $3->list ); + }; +code_factor: + KW_Send var_ref accumulate + final { + $$->expr = send( $1->loc, $2->varRef, $3->list ); + }; +code_factor: KW_TypeId '<' type_ref '>' + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::TypeIdType, $3->typeRef ) ); + }; +code_factor: type_ref KW_In var_ref + final { + $$->expr = LangExpr::cons( LangTerm::cons( $2->loc, + LangTerm::SearchType, $1->typeRef, $3->varRef ) ); + }; +code_factor: KW_Nil + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::NilType ) ); + }; +code_factor: KW_True + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::TrueType ) ); + }; +code_factor: KW_False + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::FalseType ) ); + }; +code_factor: '(' code_expr ')' + final { + $$->expr = $2->expr; + }; +code_factor: KW_MakeTree '(' opt_code_expr_list ')' + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::MakeTreeType, $3->exprVect ) ); + }; +code_factor: KW_MakeToken '(' opt_code_expr_list ')' + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::MakeTokenType, $3->exprVect ) ); + }; +code_factor: KW_Deref code_expr + final { + $$->expr = LangExpr::cons( $1->loc, OP_Deref, $2->expr ); + }; +code_factor: string + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), $1->list ) ); + }; + +nonterm opt_field_init uses field_init_list; + +opt_field_init: '(' opt_field_init_list ')' + final { + $$->fieldInitVect = $2->fieldInitVect; + }; +opt_field_init: + final { + $$->fieldInitVect = 0; + }; + +nonterm opt_field_init_list uses field_init_list; + +opt_field_init_list: field_init_list + final { + $$->fieldInitVect = $1->fieldInitVect; + }; +opt_field_init_list: + final { + $$->fieldInitVect = 0; + }; + +nonterm field_init_list +{ + FieldInitVect *fieldInitVect; +}; + +field_init_list: field_init_list field_init + final { + $$->fieldInitVect = $1->fieldInitVect; + $$->fieldInitVect->append( $2->fieldInit ); + }; +field_init_list: field_init + final { + $$->fieldInitVect = new FieldInitVect; + $$->fieldInitVect->append( $1->fieldInit ); + }; + +nonterm field_init +{ + FieldInit *fieldInit; +}; + +field_init: code_expr + final { + $$->fieldInit = FieldInit::cons( InputLoc(), "_name", $1->expr ); + }; + +# +# Regular Expressions +# + +nonterm opt_lex_join +{ + LexJoin *join; +}; + +opt_lex_join: + lex_join opt_context + final { + $$->join = lexOptJoin( $1->join, $2->context ); + }; + +opt_lex_join: + final { + $$->join = 0; + }; + +nonterm lex_join +{ + LexJoin *join; +}; + +lex_join: + lex_expr + final { + $$->join = LexJoin::cons( $1->expression ); + }; + +nonterm opt_context +{ + LexJoin *context; +}; + +opt_context: + '@' lex_join + final + { + $$->context = $2->join; + }; + +opt_context: + final { + $$->context = 0; + }; + +nonterm lex_expr +{ + LexExpression *expression; +}; + +lex_expr: + lex_expr '|' lex_term_short + final { + $$->expression = LexExpression::cons( $1->expression, + $3->term, LexExpression::OrType ); + }; +lex_expr: + lex_expr '&' lex_term_short + final { + $$->expression = LexExpression::cons( $1->expression, + $3->term, LexExpression::IntersectType ); + }; +# This priority specification overrides the innermost parsing strategy which +# results ordered choice interpretation of the grammar. +lex_expr: + lex_expr '-' lex_term_short + final { + $$->expression = LexExpression::cons( $1->expression, + $3->term, LexExpression::SubtractType ); + }; +lex_expr: + lex_expr TK_DashDash lex_term_short + final { + $$->expression = LexExpression::cons( $1->expression, + $3->term, LexExpression::StrongSubtractType ); + }; +lex_expr: + lex_term_short + final { + $$->expression = LexExpression::cons( $1->term ); + }; + +nonterm lex_term_short +{ + LexTerm *term; +}; + +shortest lex_term_short; + +lex_term_short: lex_term + final { $$->term = $1->term; }; + +nonterm lex_term +{ + LexTerm *term; +}; + +lex_term: + lex_term lex_factor_label + final { + $$->term = LexTerm::cons( $1->term, $2->factorAug ); + }; +lex_term: + lex_term '.' lex_factor_label + final { + $$->term = LexTerm::cons( $1->term, $3->factorAug ); + }; +lex_term: + lex_term TK_ColonGt lex_factor_label + final { + $$->term = LexTerm::cons( $1->term, $3->factorAug, LexTerm::RightStartType ); + }; +lex_term: + lex_term TK_ColonGtGt lex_factor_label + final { + $$->term = LexTerm::cons( $1->term, $3->factorAug, LexTerm::RightFinishType ); + }; +lex_term: + lex_term TK_LtColon lex_factor_label + final { + $$->term = LexTerm::cons( $1->term, + $3->factorAug, LexTerm::LeftType ); + }; +lex_term: + lex_factor_label + final { + $$->term = LexTerm::cons( $1->factorAug ); + }; + +nonterm lex_factor_label +{ + LexFactorAug *factorAug; +}; + +lex_factor_label: + factor_ep + final { + $$->factorAug = $1->factorAug; + }; + +lex_factor_label: + TK_Word ':' lex_factor_label + final { + $$->factorAug = lexFactorLabel( $1->loc, $1->data, $3->factorAug ); + }; + +nonterm factor_ep +{ + LexFactorAug *factorAug; +}; + +factor_ep: + factor_aug + final { + $$->factorAug = $1->factorAug; + }; + +nonterm factor_aug +{ + LexFactorAug *factorAug; +}; + +factor_aug: + lex_factor_rep + final { + $$->factorAug = LexFactorAug::cons( $1->factorRep ); + }; + + +# The fourth level of precedence. These are the trailing unary operators that +# allow for repetition. + +nonterm lex_factor_rep +{ + LexFactorRep *factorRep; +}; + +lex_factor_rep: + lex_factor_rep '*' + final { + $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, + 0, 0, LexFactorRep::StarType ); + }; +lex_factor_rep: + lex_factor_rep TK_StarStar + final { + $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, + 0, 0, LexFactorRep::StarStarType ); + }; +lex_factor_rep: + lex_factor_rep '?' + final { + $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, + 0, 0, LexFactorRep::OptionalType ); + }; +lex_factor_rep: + lex_factor_rep '+' + final { + $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, + 0, 0, LexFactorRep::PlusType ); + }; +lex_factor_rep: + lex_factor_rep '{' lex_factor_rep_num '}' + final { + $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, + $3->rep, 0, LexFactorRep::ExactType ); + }; +lex_factor_rep: + lex_factor_rep '{' ',' lex_factor_rep_num '}' + final { + $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, + 0, $4->rep, LexFactorRep::MaxType ); + }; +lex_factor_rep: + lex_factor_rep '{' lex_factor_rep_num ',' '}' + final { + $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, + $3->rep, 0, LexFactorRep::MinType ); + }; +lex_factor_rep: + lex_factor_rep '{' lex_factor_rep_num ',' lex_factor_rep_num '}' + final { + $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, + $3->rep, $5->rep, LexFactorRep::RangeType ); + }; +lex_factor_rep: + lex_factor_neg + final { + $$->factorRep = LexFactorRep::cons( + $1->factorNeg->loc, $1->factorNeg ); + }; + +nonterm lex_factor_rep_num +{ + int rep; +}; + +lex_factor_rep_num: + TK_UInt + final { + $$->rep = lexFactorRepNum( $1->loc, $1->data ); + }; + + +# +# The fifth level up in precedence. Negation. +# + +nonterm lex_factor_neg +{ + LexFactorNeg *factorNeg; +}; + +lex_factor_neg: + '!' lex_factor_neg + final { + $$->factorNeg = LexFactorNeg::cons( $1->loc, + $2->factorNeg, LexFactorNeg::NegateType ); + }; +lex_factor_neg: + '^' lex_factor_neg + final { + $$->factorNeg = LexFactorNeg::cons( $1->loc, + $2->factorNeg, LexFactorNeg::CharNegateType ); + }; +lex_factor_neg: + lex_rl_factor + final { + $$->factorNeg = LexFactorNeg::cons( $1->factor->loc, $1->factor ); + }; + +nonterm lex_rl_factor +{ + LexFactor *factor; +}; + +lex_rl_factor: + TK_Literal + final { + /* Create a new factor node going to a concat literal. */ + $$->factor = LexFactor::cons( Literal::cons( $1->loc, + $1->data, Literal::LitString ) ); + }; +lex_rl_factor: + lex_alphabet_num + final { + /* Create a new factor node going to a literal number. */ + $$->factor = LexFactor::cons( Literal::cons( $1->loc, + $1->data, Literal::Number ) ); + }; +lex_rl_factor: + TK_Word + final { + $$->factor = lexRlFactorName( $1->data, $1->loc ); + }; +lex_rl_factor: + TK_SqOpen lex_regular_expr_or_data TK_SqClose + final { + /* Create a new factor node going to an OR expression. */ + $$->factor = LexFactor::cons( ReItem::cons( $1->loc, $2->reOrBlock, ReItem::OrBlock ) ); + }; +lex_rl_factor: + TK_SqOpenNeg lex_regular_expr_or_data TK_SqClose + final { + /* Create a new factor node going to a negated OR expression. */ + $$->factor = LexFactor::cons( ReItem::cons( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) ); + }; +lex_rl_factor: + lex_range_lit TK_DotDot lex_range_lit + final { + /* Create a new factor node going to a range. */ + $$->factor = LexFactor::cons( Range::cons( $1->literal, $3->literal ) ); + }; +lex_rl_factor: + '(' lex_join ')' + final { + /* Create a new factor going to a parenthesized join. */ + $$->factor = LexFactor::cons( $2->join ); + }; + +nonterm lex_range_lit +{ + Literal *literal; +}; + +# Literals which can be the end points of ranges. +lex_range_lit: + TK_Literal + final { + /* Range literas must have only one char. We restrict this in the parse tree. */ + $$->literal = Literal::cons( $1->loc, $1->data, Literal::LitString ); + }; +lex_range_lit: + lex_alphabet_num + final { + /* Create a new literal number. */ + $$->literal = Literal::cons( $1->loc, $1->data, Literal::Number ); + }; + +nonterm lex_alphabet_num uses token_data; + +# Any form of a number that can be used as a basic machine. */ +lex_alphabet_num: + TK_UInt + final { + $$->loc = $1->loc; + $$->data = $1->data; + }; +lex_alphabet_num: + '-' TK_UInt + final { + $$->loc = $1->loc; + $$->data = '+'; + $$->data += $2->data; + }; +lex_alphabet_num: + TK_Hex + final { + $$->loc = $1->loc; + $$->data = $1->data; + }; + +# +# Regular Expressions. +# + + +# The data inside of a [] expression in a regular expression. Accepts any +# number of characters or ranges. */ +nonterm lex_regular_expr_or_data +{ + ReOrBlock *reOrBlock; +}; + +lex_regular_expr_or_data: + lex_regular_expr_or_data lex_regular_expr_or_char + final { + $$->reOrBlock = lexRegularExprData( $1->reOrBlock, $2->reOrItem ); + }; +lex_regular_expr_or_data: + final { + $$->reOrBlock = ReOrBlock::cons(); + }; + +# A single character inside of an or expression. Can either be a character or a +# set of characters. +nonterm lex_regular_expr_or_char +{ + ReOrItem *reOrItem; +}; + +lex_regular_expr_or_char: + TK_ReChar + final { + $$->reOrItem = ReOrItem::cons( $1->loc, $1->data ); + }; + +lex_regular_expr_or_char: + TK_ReChar TK_Dash TK_ReChar + final { + $$->reOrItem = ReOrItem::cons( $2->loc, $1->data[0], $3->data[0] ); + }; + +nonterm opt_commit +{ + bool commit; +}; + +opt_commit: + final { + $$->commit = false; + }; + +opt_commit: + KW_Commit + final { + $$->commit = true; + }; + +# +# Grammar Finished +# + + write types; + write data; +}%% + +void ColmParser::init() +{ + BaseParser::init(); + %% write init; +} + +int ColmParser::parseLangEl( int type, const Token *token ) +{ + %% write exec; + return errCount == 0 ? 0 : -1; +} + +int ColmParser::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) +{ + Token token; + + if ( toklen > 0 ) + token.data.setAs( tokstart, toklen ); + + token.loc = loc; + int res = parseLangEl( tokId, &token ); + if ( res < 0 ) { + parse_error(tokId, token) << "parse error" << endl; + exit(1); + } + return res; +} + +ostream &ColmParser::parse_error( int tokId, Token &token ) +{ + /* Maintain the error count. */ + gblErrorCount += 1; + + cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": "; + cerr << "at token "; + if ( tokId < 128 ) + cerr << "\"" << ColmParser_lelNames[tokId] << "\""; + else + cerr << ColmParser_lelNames[tokId]; + if ( token.data != 0 ) + cerr << " with data \"" << token.data << "\""; + cerr << ": "; + + return cerr; +} + diff --git a/src/lmscan.h b/src/lmscan.h new file mode 100644 index 00000000..ff3de0ad --- /dev/null +++ b/src/lmscan.h @@ -0,0 +1,104 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _RLSCAN_H +#define _RLSCAN_H + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "global.h" +#include "lmparse.h" +#include "compiler.h" +#include "avltree.h" +#include "vector.h" +#include "buffer.h" + +using std::ifstream; +using std::istream; +using std::ostream; +using std::cout; +using std::cerr; +using std::endl; + +extern char *Parser_lelNames[]; + + +struct ColmScanner +{ + ColmScanner( const char *fileName, istream &input, + ColmParser *parser, int includeDepth ) + : + fileName(fileName), input(input), + includeDepth(includeDepth), + line(1), column(1), lastnl(0), + parser(parser), + parserExistsError(false), + whitespaceOn(true) + { + } + + ifstream *tryOpenInclude( char **pathChecks, long &found ); + char **makeIncludePathChecks( const char *thisFileName, const char *fileName ); + bool recursiveInclude( const char *inclFileName ); + + void sectionParseInit(); + void token( int type, char *start, char *end ); + void token( int type, char c ); + void token( int type ); + void updateCol(); + void endSection(); + void scan(); + void eof(); + ostream &scan_error(); + + const char *fileName; + istream &input; + int includeDepth; + + int cs; + int line; + char *word, *lit; + int word_len, lit_len; + InputLoc sectionLoc; + char *ts, *te; + int column; + char *lastnl; + + /* Set by machine statements, these persist from section to section + * allowing for unnamed sections. */ + ColmParser *parser; + IncludeStack includeStack; + + /* This is set if ragel has already emitted an error stating that + * no section name has been seen and thus no parser exists. */ + bool parserExistsError; + + /* This is for inline code. By default it is on. It goes off for + * statements and values in inline blocks which are parsed. */ + bool whitespaceOn; + + Buffer litBuf; +}; + +#endif /* _RLSCAN_H */ diff --git a/src/lmscan.rl b/src/lmscan.rl new file mode 100644 index 00000000..231e2689 --- /dev/null +++ b/src/lmscan.rl @@ -0,0 +1,637 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "global.h" +#include "lmscan.h" +#include "lmparse.h" +#include "parsedata.h" +#include "avltree.h" +#include "vector.h" + +//#define PRINT_TOKENS + +using std::ifstream; +using std::istream; +using std::ostream; +using std::cout; +using std::cerr; +using std::endl; + +%%{ + machine section_parse; + alphtype int; + write data; +}%% + +void ColmScanner::sectionParseInit() +{ + %% write init; +} + +ostream &ColmScanner::scan_error() +{ + /* Maintain the error count. */ + gblErrorCount += 1; + cerr << fileName << ":" << line << ":" << column << ": "; + return cerr; +} + +bool ColmScanner::recursiveInclude( const char *inclFileName ) +{ + for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) { + if ( strcmp( si->fileName, inclFileName ) == 0 ) + return true; + } + return false; +} + +void ColmScanner::updateCol() +{ + char *from = lastnl; + if ( from == 0 ) + from = ts; + //cerr << "adding " << te - from << " to column" << endl; + column += te - from; + lastnl = 0; +} + +void ColmScanner::token( int type, char c ) +{ + token( type, &c, &c + 1 ); +} + +void ColmScanner::token( int type ) +{ + token( type, 0, 0 ); +} + +bool isAbsolutePath( const char *path ) +{ + return path[0] == '/'; +} + +ifstream *ColmScanner::tryOpenInclude( char **pathChecks, long &found ) +{ + char **check = pathChecks; + ifstream *inFile = new ifstream; + + while ( *check != 0 ) { + inFile->open( *check ); + if ( inFile->is_open() ) { + found = check - pathChecks; + return inFile; + } + check += 1; + } + + found = -1; + delete inFile; + return 0; +} + +char **ColmScanner::makeIncludePathChecks( const char *thisFileName, const char *fileName ) +{ + char **checks = 0; + long nextCheck = 0; + char *data = strdup(fileName); + long length = strlen(fileName); + + /* Absolute path? */ + if ( isAbsolutePath( data ) ) { + checks = new char*[2]; + checks[nextCheck++] = data; + } + else { + /* Search from the the location of the current file. */ + checks = new char *[2 + includePaths.length()]; + const char *lastSlash = strrchr( thisFileName, '/' ); + if ( lastSlash == 0 ) + checks[nextCheck++] = data; + else { + long givenPathLen = (lastSlash - thisFileName) + 1; + long checklen = givenPathLen + length; + char *check = new char[checklen+1]; + memcpy( check, thisFileName, givenPathLen ); + memcpy( check+givenPathLen, data, length ); + check[checklen] = 0; + checks[nextCheck++] = check; + } + + /* Search from the include paths given on the command line. */ + for ( ArgsVector::Iter incp = includePaths; incp.lte(); incp++ ) { + long pathLen = strlen( *incp ); + long checkLen = pathLen + 1 + length; + char *check = new char[checkLen+1]; + memcpy( check, *incp, pathLen ); + check[pathLen] = '/'; + memcpy( check+pathLen+1, data, length ); + check[checkLen] = 0; + checks[nextCheck++] = check; + } + } + + checks[nextCheck] = 0; + return checks; +} + + +%%{ + machine section_parse; + import "lmparse.h"; + + action clear_words { word = lit = 0; word_len = lit_len = 0; } + action store_lit { lit = tokdata; lit_len = toklen; } + + action mach_err { scan_error() << "bad machine statement" << endl; } + action incl_err { scan_error() << "bad include statement" << endl; } + action write_err { scan_error() << "bad write statement" << endl; } + + action handle_include + { + String src( lit, lit_len ); + String fileName; + bool unused; + + /* Need a location. */ + InputLoc here; + here.fileName = fileName; + here.line = line; + here.col = column; + + prepareLitString( fileName, unused, src, here ); + char **checks = makeIncludePathChecks( this->fileName, fileName ); + + /* Open the input file for reading. */ + long found = 0; + ifstream *inFile = tryOpenInclude( checks, found ); + if ( inFile == 0 ) { + scan_error() << "include: could not open " << + fileName << " for reading" << endl; + } + else { + /* Only proceed with the include if it was found. */ + if ( recursiveInclude( checks[found] ) ) + scan_error() << "include: this is a recursive include operation" << endl; + + /* Check for a recursive include structure. Add the current file/section + * name then check if what we are including is already in the stack. */ + includeStack.append( IncludeStackItem( checks[found] ) ); + + ColmScanner *scanner = new ColmScanner( fileName, *inFile, parser, includeDepth+1 ); + scanner->scan(); + delete inFile; + + /* Remove the last element (len-1) */ + includeStack.remove( -1 ); + + delete scanner; + } + } + + include_target = + TK_Literal >clear_words @store_lit; + + include_stmt = + ( KW_Include include_target ) @handle_include + <>err incl_err <>eof incl_err; + + action handle_token + { +// cout << Parser_lelNames[type] << " "; +// if ( start != 0 ) { +// cout.write( start, end-start ); +// } +// cout << endl; + + InputLoc loc; + + #ifdef PRINT_TOKENS + cerr << "scanner:" << line << ":" << column << + ": sending token to the parser " << Parser_lelNames[*p]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif + + loc.fileName = fileName; + loc.line = line; + loc.col = column; + + if ( tokdata != 0 && tokdata[toklen-1] == '\n' ) + loc.line -= 1; + + parser->token( loc, type, tokdata, toklen ); + } + + # Catch everything else. + everything_else = ^( KW_Include ) @handle_token; + + main := ( + include_stmt | + everything_else + )*; +}%% + +void ColmScanner::token( int type, char *start, char *end ) +{ + char *tokdata = 0; + int toklen = 0; + int *p = &type; + int *pe = &type + 1; + int *eof = 0; + + if ( start != 0 ) { + toklen = end-start; + tokdata = new char[toklen+1]; + memcpy( tokdata, start, toklen ); + tokdata[toklen] = 0; + } + + %%{ + machine section_parse; + write exec; + }%% + + updateCol(); +} + +void ColmScanner::endSection( ) +{ + /* Execute the eof actions for the section parser. */ + /* Probably use: token( -1 ); */ +} + +%%{ + machine lmscan; + + # This is sent by the driver code. + EOF = 0; + + action inc_nl { + lastnl = p; + column = 0; + line++; + } + NL = '\n' @inc_nl; + + # Identifiers, numbers, commetns, and other common things. + ident = ( alpha | '_' ) ( alpha |digit |'_' )*; + number = digit+; + hex_number = '0x' [0-9a-fA-F]+; + + # These literal forms are common to C-like host code and ragel. + s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; + d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; + + whitespace = [ \t] | NL; + pound_comment = '#' [^\n]* NL; + + or_literal := |* + # Escape sequences in OR expressions. + '\\0' => { token( TK_ReChar, '\0' ); }; + '\\a' => { token( TK_ReChar, '\a' ); }; + '\\b' => { token( TK_ReChar, '\b' ); }; + '\\t' => { token( TK_ReChar, '\t' ); }; + '\\n' => { token( TK_ReChar, '\n' ); }; + '\\v' => { token( TK_ReChar, '\v' ); }; + '\\f' => { token( TK_ReChar, '\f' ); }; + '\\r' => { token( TK_ReChar, '\r' ); }; + '\\\n' => { updateCol(); }; + '\\' any => { token( TK_ReChar, ts+1, te ); }; + + # Range dash in an OR expression. + '-' => { token( TK_Dash, 0, 0 ); }; + + # Terminate an OR expression. + ']' => { token( TK_SqClose ); fret; }; + + EOF => { + scan_error() << "unterminated OR literal" << endl; + }; + + # Characters in an OR expression. + [^\]] => { token( TK_ReChar, ts, te ); }; + + *|; + + regular_type := |* + # Identifiers. + ident => { token( TK_Word, ts, te ); } ; + + # Numbers + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; + + # Literals, with optionals. + ( s_literal | d_literal ) [i]? + => { token( TK_Literal, ts, te ); }; + + '[' => { token( TK_SqOpen ); fcall or_literal; }; + '[^' => { token( TK_SqOpenNeg ); fcall or_literal; }; + + '/' => { token( '/'); fret; }; + + # Ignore. + pound_comment => { updateCol(); }; + + '..' => { token( TK_DotDot ); }; + '**' => { token( TK_StarStar ); }; + '--' => { token( TK_DashDash ); }; + + ':>' => { token( TK_ColonGt ); }; + ':>>' => { token( TK_ColonGtGt ); }; + '<:' => { token( TK_LtColon ); }; + + # Whitespace other than newline. + [ \t\r]+ => { updateCol(); }; + + # If we are in a single line machine then newline may end the spec. + NL => { updateCol(); }; + + # Consume eof. + EOF; + + any => { token( *ts ); } ; + *|; + + literal_pattern := |* + '\\' '0' { litBuf.append( '\0' ); }; + '\\' 'a' { litBuf.append( '\a' ); }; + '\\' 'b' { litBuf.append( '\b' ); }; + '\\' 't' { litBuf.append( '\t' ); }; + '\\' 'n' { litBuf.append( '\n' ); }; + '\\' 'v' { litBuf.append( '\v' ); }; + '\\' 'f' { litBuf.append( '\f' ); }; + '\\' 'r' { litBuf.append( '\r' ); }; + + '\\' any { + litBuf.append( ts[1] ); + }; + '"' => { + if ( litBuf.length > 0 ) { + token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); + litBuf.clear(); + } + token( '"' ); + fret; + }; + NL => { + litBuf.append( '\n' ); + token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); + litBuf.clear(); + token( '"' ); + fret; + }; + '[' => { + if ( litBuf.length > 0 ) { + token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); + litBuf.clear(); + } + token( '[' ); + fcall main; + }; + any => { + litBuf.append( *ts ); + }; + *|; + + # Parser definitions. + main := |* + 'lex' => { token( KW_Lex ); }; + 'commit' => { token( KW_Commit ); }; + 'token' => { token( KW_Token ); }; + 'literal' => { token( KW_Literal ); }; + 'rl' => { token( KW_Rl ); }; + 'def' => { token( KW_Def ); }; + 'ignore' => { token( KW_Ignore ); }; + 'construct' => { token( KW_Construct ); }; + 'cons' => { token( KW_Construct ); }; + 'new' => { token( KW_New ); }; + 'if' => { token( KW_If ); }; + 'reject' => { token( KW_Reject ); }; + 'while' => { token( KW_While ); }; + 'else' => { token( KW_Else ); }; + 'elsif' => { token( KW_Elsif ); }; + 'match' => { token( KW_Match ); }; + 'for' => { token( KW_For ); }; + 'iter' => { token( KW_Iter ); }; + 'prints' => { token( KW_PrintStream ); }; + 'print' => { token( KW_Print ); }; + 'print_xml_ac' => { token( KW_PrintXMLAC ); }; + 'print_xml' => { token( KW_PrintXML ); }; + 'namespace' => { token( KW_Namespace ); }; + 'lex' => { token( KW_Lex ); }; + 'end' => { token( KW_End ); }; + 'map' => { token( KW_Map ); }; + 'list' => { token( KW_List ); }; + 'vector' => { token( KW_Vector ); }; + 'accum' => { token( KW_Parser ); }; + 'parser' => { token( KW_Parser ); }; + 'return' => { token( KW_Return ); }; + 'break' => { token( KW_Break ); }; + 'yield' => { token( KW_Yield ); }; + 'typeid' => { token( KW_TypeId ); }; + 'make_token' => { token( KW_MakeToken ); }; + 'make_tree' => { token( KW_MakeTree ); }; + 'reducefirst' => { token( KW_ReduceFirst ); }; + 'for' => { token( KW_For ); }; + 'in' => { token( KW_In ); }; + 'nil' => { token( KW_Nil ); }; + 'true' => { token( KW_True ); }; + 'false' => { token( KW_False ); }; + 'parse' => { token( KW_Parse ); }; + 'parse_stop' => { token( KW_ParseStop ); }; + 'global' => { token( KW_Global ); }; + 'export' => { token( KW_Export ); }; + 'ptr' => { token( KW_Ptr ); }; + 'ref' => { token( KW_Ref ); }; + 'deref' => { token( KW_Deref ); }; + 'require' => { token( KW_Require ); }; + 'preeof' => { token( KW_Preeof ); }; + 'left' => { token( KW_Left ); }; + 'right' => { token( KW_Right ); }; + 'nonassoc' => { token( KW_Nonassoc ); }; + 'prec' => { token( KW_Prec ); }; + 'include' => { token( KW_Include ); }; + 'context' => { token( KW_Context ); }; + 'alias' => { token( KW_Alias ); }; + 'send' => { token( KW_Send ); }; + 'ni' => { token( KW_Ni ); }; + + # Identifiers. + ident => { token( TK_Word, ts, te ); } ; + + number => { token( TK_Number, ts, te ); }; + + '/' => { + token( '/' ); + if ( parser->enterRl ) + fcall regular_type; + }; + + "~" [^\n]* NL => { + token( '"' ); + token( TK_LitPat, ts+1, te ); + token( '"' ); + }; + + "'" ([^'\\\n] | '\\' (any | NL))* ( "'" | NL ) => { + token( TK_Literal, ts, te ); + }; + + '"' => { + token( '"' ); + litBuf.clear(); + fcall literal_pattern; + }; + '[' => { + token( '[' ); + fcall main; + }; + + ']' => { + token( ']' ); + if ( top > 0 ) + fret; + }; + + # Ignore. + pound_comment => { updateCol(); }; + + '=>' => { token( TK_DoubleArrow ); }; + '==' => { token( TK_DoubleEql ); }; + '!=' => { token( TK_NotEql ); }; + '::' => { token( TK_DoubleColon ); }; + '<=' => { token( TK_LessEql ); }; + '>=' => { token( TK_GrtrEql ); }; + '->' => { token( TK_RightArrow ); }; + '&&' => { token( TK_AmpAmp ); }; + '||' => { token( TK_BarBar ); }; + '<<' => { token( TK_LtLt ); }; + + ( '+' | '-' | '*' | '/' | '(' | ')' | '@' | '$' | '^' ) => { token( *ts ); }; + + + # Whitespace other than newline. + [ \t\r]+ => { updateCol(); }; + NL => { updateCol(); }; + + # Consume eof. + EOF; + + any => { token( *ts ); } ; + *|; +}%% + +%% write data; + +void ColmScanner::scan() +{ + int bufsize = 8; + char *buf = new char[bufsize]; + const char last_char = 0; + int cs, act, have = 0; + int top, stack[32]; + bool execute = true; + + sectionParseInit(); + %% write init; + + while ( execute ) { + char *p = buf + have; + int space = bufsize - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. Grow it. */ + bufsize = bufsize * 2; + char *newbuf = new char[bufsize]; + + /* Recompute p and space. */ + p = newbuf + have; + space = bufsize - have; + + /* Patch up pointers possibly in use. */ + if ( ts != 0 ) + ts = newbuf + ( ts - buf ); + te = newbuf + ( te - buf ); + + /* Copy the new buffer in. */ + memcpy( newbuf, buf, have ); + delete[] buf; + buf = newbuf; + } + + input.read( p, space ); + int len = input.gcount(); + + /* If we see eof then append the EOF char. */ + if ( len == 0 ) { + p[0] = last_char, len = 1; + execute = false; + } + + char *pe = p + len; + char *eof = 0; + %% write exec; + + /* Check if we failed. */ + if ( cs == lmscan_error ) { + /* Machine failed before finding a token. I'm not yet sure if this + * is reachable. */ + scan_error() << "colm scanner error (metalanguage)" << endl; + exit(1); + } + + /* Decide if we need to preserve anything. */ + char *preserve = ts; + + /* Now set up the prefix. */ + if ( preserve == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - preserve; + memmove( buf, preserve, have ); + unsigned int shiftback = preserve - buf; + if ( ts != 0 ) + ts -= shiftback; + te -= shiftback; + + preserve = buf; + } + } + delete[] buf; +} + +void ColmScanner::eof() +{ + InputLoc loc; + loc.fileName = "<EOF>"; + loc.line = line; + loc.col = 1; + parser->token( loc, ColmParser_tk_eof, 0, 0 ); +} diff --git a/src/loadboot2.cc b/src/loadboot2.cc new file mode 100644 index 00000000..b2553739 --- /dev/null +++ b/src/loadboot2.cc @@ -0,0 +1,3 @@ +#include "gen/if2.h" +#include "loadfinal.cc" + diff --git a/src/loadcolm.cc b/src/loadcolm.cc new file mode 100644 index 00000000..f16779be --- /dev/null +++ b/src/loadcolm.cc @@ -0,0 +1,2 @@ +#include "gen/if3.h" +#include "loadfinal.cc" diff --git a/src/loadfinal.cc b/src/loadfinal.cc new file mode 100644 index 00000000..0fddd360 --- /dev/null +++ b/src/loadfinal.cc @@ -0,0 +1,2978 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* This file is not compiled directly, but rather included in sources. It is + * used for both bootstrap2 and colm, but these programs each use separate + * gen/if[23].h headers, so this file is included by distinct source files that + * each bring in the appropriate dependency, then include this file. */ + +#include <stdbool.h> +#include <string.h> +#include <iostream> + +#include "loadfinal.h" + +extern colm_sections colm_object; + +InputLoc::InputLoc( colm_location *pcloc ) +{ + if ( pcloc != 0 ) { + fileName = pcloc->name; + line = pcloc->line; + col = pcloc->column; + } + else { + fileName = 0; + line = -1; + col = -1; + } +} + +String unescape( const String &s ) +{ + String out( String::Fresh(), s.length() ); + char *d = out.data; + + for ( int i = 0; i < s.length(); ) { + if ( s[i] == '\\' ) { + switch ( s[i+1] ) { + case '0': *d++ = '\0'; break; + case 'a': *d++ = '\a'; break; + case 'b': *d++ = '\b'; break; + case 't': *d++ = '\t'; break; + case 'n': *d++ = '\n'; break; + case 'v': *d++ = '\v'; break; + case 'f': *d++ = '\f'; break; + case 'r': *d++ = '\r'; break; + default: *d++ = s[i+1]; break; + } + i += 2; + } + else { + *d++ = s[i]; + i += 1; + } + } + out.chop( d - out.data ); + return out; +} + +struct LoadColm +: + public BaseParser +{ + LoadColm( Compiler *pd, const char *inputFileName ) + : + BaseParser( pd ), + inputFileName( inputFileName ) + {} + + const char *inputFileName; + + struct Alignment + { + Alignment() + : + firstLine(0), + lastLine(0), + firstColumn(0) + {} + + int firstLine; + int lastLine; + int firstColumn; + + void check( const char *type, colm_location *loc ) + { + if ( firstLine == 0 ) { + firstLine = lastLine = loc->line; + firstColumn = loc->column; + } + else { + /* Checking if we are outdented. Indents and are ok. So is + * outdenting back to the first. */ + if ( loc->column < firstColumn ) { + warning( loc ) << type << " literal outdented beyond first at " << + firstLine << ":" << firstColumn << + ", possible unintended concatenation" << std::endl; + } + + lastLine = loc->line; + } + } + }; + + + + Literal *walkLexRangeLit( lex_range_lit lexRangeLit ) + { + Literal *literal = 0; + switch ( lexRangeLit.prodName() ) { + case lex_range_lit::Lit: { + String lit = lexRangeLit.lex_lit().data(); + literal = Literal::cons( lexRangeLit.lex_lit().loc(), lit, Literal::LitString ); + break; + } + case lex_range_lit::Number: { + String num = lexRangeLit.lex_num().text().c_str(); + literal = Literal::cons( lexRangeLit.lex_num().loc(), num, Literal::Number ); + break; + }} + return literal; + } + + LexFactor *walkLexFactor( lex_factor lexFactor ) + { + LexFactor *factor = 0; + switch ( lexFactor.prodName() ) { + case lex_factor::Literal: { + String litString = lexFactor.lex_lit().data(); + Literal *literal = Literal::cons( lexFactor.lex_lit().loc(), + litString, Literal::LitString ); + factor = LexFactor::cons( literal ); + break; + } + case lex_factor::Id: { + String id = lexFactor.lex_id().data(); + factor = lexRlFactorName( id, lexFactor.lex_id().loc() ); + break; + } + case lex_factor::Range: { + Literal *low = walkLexRangeLit( lexFactor.Low() ); + Literal *high = walkLexRangeLit( lexFactor.High() ); + + Range *range = Range::cons( low, high ); + factor = LexFactor::cons( range ); + break; + } + case lex_factor::PosOrBlock: { + ReOrBlock *block = walkRegOrData( lexFactor.reg_or_data() ); + factor = LexFactor::cons( ReItem::cons( block, ReItem::OrBlock ) ); + break; + } + case lex_factor::NegOrBlock: { + ReOrBlock *block = walkRegOrData( lexFactor.reg_or_data() ); + factor = LexFactor::cons( ReItem::cons( block, ReItem::NegOrBlock ) ); + break; + } + case lex_factor::Number: { + String number = lexFactor.lex_uint().text().c_str(); + factor = LexFactor::cons( Literal::cons( lexFactor.lex_uint().loc(), + number, Literal::Number ) ); + break; + } + case lex_factor::Hex: { + String number = lexFactor.lex_hex().text().c_str(); + factor = LexFactor::cons( Literal::cons( lexFactor.lex_hex().loc(), + number, Literal::Number ) ); + break; + } + case lex_factor::Paren: { + lex_expr LexExpr = lexFactor.lex_expr(); + LexExpression *expr = walkLexExpr( LexExpr ); + LexJoin *join = LexJoin::cons( expr ); + factor = LexFactor::cons( join ); + break; + }} + return factor; + } + + LexFactorAug *walkLexFactorAug( lex_factor_rep LexFactorRepTree ) + { + LexFactorRep *factorRep = walkLexFactorRep( LexFactorRepTree ); + return LexFactorAug::cons( factorRep ); + } + + LangExpr *walkCodeExpr( code_expr codeExpr, bool used = true ) + { + LangExpr *expr = 0; + + switch ( codeExpr.prodName() ) { + case code_expr::AmpAmp: { + LangExpr *relational = walkCodeRelational( codeExpr.code_relational() ); + LangExpr *left = walkCodeExpr( codeExpr._code_expr() ); + + InputLoc loc = codeExpr.AMPAMP().loc(); + expr = LangExpr::cons( loc, left, OP_LogicalAnd, relational ); + break; + } + case code_expr::BarBar: { + LangExpr *relational = walkCodeRelational( codeExpr.code_relational() ); + LangExpr *left = walkCodeExpr( codeExpr._code_expr() ); + + InputLoc loc = codeExpr.BARBAR().loc(); + expr = LangExpr::cons( loc, left, OP_LogicalOr, relational ); + break; + } + case code_expr::Base: { + LangExpr *relational = walkCodeRelational( codeExpr.code_relational(), used ); + expr = relational; + break; + }} + return expr; + } + + LangStmt *walkStatement( statement Statement ) + { + LangStmt *stmt = 0; + switch ( Statement.prodName() ) { + case statement::Print: { + print_stmt printStmt = Statement.print_stmt(); + stmt = walkPrintStmt( printStmt ); + break; + } + case statement::VarDef: { + ObjectField *objField = walkVarDef( Statement.var_def(), + ObjectField::UserLocalType ); + LangExpr *expr = walkOptDefInit( Statement.opt_def_init() ); + stmt = varDef( objField, expr, LangStmt::AssignType ); + break; + } + case statement::For: { + pushScope(); + + String forDecl = Statement.id().text().c_str(); + TypeRef *typeRef = walkTypeRef( Statement.type_ref() ); + StmtList *stmtList = walkBlockOrSingle( Statement.block_or_single() ); + + IterCall *iterCall = walkIterCall( Statement.iter_call() ); + + stmt = forScope( Statement.id().loc(), forDecl, + curScope(), typeRef, iterCall, stmtList ); + + popScope(); + break; + } + case statement::If: { + pushScope(); + + LangExpr *expr = walkCodeExpr( Statement.code_expr() ); + StmtList *stmtList = walkBlockOrSingle( Statement.block_or_single() ); + + popScope(); + + LangStmt *elsifList = walkElsifList( Statement.elsif_list() ); + stmt = LangStmt::cons( LangStmt::IfType, expr, stmtList, elsifList ); + break; + } + case statement::SwitchUnder: + case statement::SwitchBlock: { + pushScope(); + stmt = walkCaseClauseList( Statement.case_clause_list(), Statement.var_ref() ); + popScope(); + break; + } + case statement::While: { + pushScope(); + LangExpr *expr = walkCodeExpr( Statement.code_expr() ); + StmtList *stmtList = walkBlockOrSingle( Statement.block_or_single() ); + stmt = LangStmt::cons( LangStmt::WhileType, expr, stmtList ); + popScope(); + break; + } + case statement::LhsVarRef: { + LangVarRef *varRef = walkVarRef( Statement.var_ref() ); + LangExpr *expr = walkCodeExpr( Statement.code_expr() ); + stmt = LangStmt::cons( varRef->loc, LangStmt::AssignType, varRef, expr ); + break; + } + case statement::Yield: { + LangVarRef *varRef = walkVarRef( Statement.var_ref() ); + stmt = LangStmt::cons( LangStmt::YieldType, varRef ); + break; + } + case statement::Return: { + LangExpr *expr = walkCodeExpr( Statement.code_expr() ); + stmt = LangStmt::cons( Statement.loc(), LangStmt::ReturnType, expr ); + break; + } + case statement::Break: { + stmt = LangStmt::cons( LangStmt::BreakType ); + break; + } + case statement::Reject: { + stmt = LangStmt::cons( Statement.REJECT().loc(), LangStmt::RejectType ); + break; + } + case statement::Call: { + LangVarRef *langVarRef = walkVarRef( Statement.var_ref() ); + CallArgVect *exprVect = walkCallArgList( Statement.call_arg_list() ); + LangTerm *term = LangTerm::cons( langVarRef->loc, langVarRef, exprVect ); + LangExpr *expr = LangExpr::cons( term ); + stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr ); + break; + } + case statement::StmtOrFactor: { + LangExpr *expr = walkStmtOrFactor( Statement.stmt_or_factor() ); + stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr ); + break; + } + case statement::BareSend: { + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + QualItemVect *qualItemVect = new QualItemVect; + + LangVarRef *varRef = LangVarRef::cons( InputLoc(), + curNspace(), curStruct(), curScope(), nspaceQual, + qualItemVect, String("_") ); + + ConsItemList *list = walkAccumulate( Statement.accumulate() ); + bool eof = walkOptEos( Statement.opt_eos() ); + LangExpr *expr = send( InputLoc(), varRef, list, eof ); + stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr ); + break; + } + } + return stmt; + } + + StmtList *walkLangStmtList( lang_stmt_list langStmtList ) + { + StmtList *retList = new StmtList; + + /* Walk the list of statements. */ + RepeatIter<statement> ri( langStmtList.StmtList() ); + + while ( !ri.end() ) { + statement Statement = ri.value(); + LangStmt *stmt = walkStatement( Statement ); + if ( stmt != 0 ) + retList->append( stmt ); + ri.next(); + } + + require_pattern require = langStmtList.opt_require_stmt().require_pattern(); + if ( require != 0 ) { + pushScope(); + + LangVarRef *varRef = walkVarRef( require.var_ref() ); + PatternItemList *list = walkPattern( require.pattern(), varRef ); + LangExpr *expr = match( require.REQUIRE().loc(), varRef, list ); + + StmtList *reqList = walkLangStmtList( langStmtList.opt_require_stmt().lang_stmt_list() ); + + LangStmt *stmt = LangStmt::cons( LangStmt::IfType, expr, reqList, 0 ); + + popScope(); + + retList->append( stmt ); + } + + return retList; + } + + void walkTokenDef( token_def TokenDef ) + { + String name = TokenDef.id().data(); + + bool niLeft = walkNoIgnoreLeft( TokenDef.no_ignore_left() ); + bool niRight = walkNoIgnoreRight( TokenDef.no_ignore_right() ); + + ObjectDef *objectDef = walkVarDefList( TokenDef.VarDefList() ); + objectDef->name = name; + + LexJoin *join = 0; + if ( TokenDef.opt_lex_expr().lex_expr() != 0 ) { + LexExpression *expr = walkLexExpr( TokenDef.opt_lex_expr().lex_expr() ); + join = LexJoin::cons( expr ); + } + + CodeBlock *translate = walkOptTranslate( TokenDef.opt_translate() ); + + defineToken( TokenDef.id().loc(), name, join, objectDef, + translate, false, niLeft, niRight ); + } + + void walkIgnoreCollector( ic_def IgnoreCollector ) + { + String id = IgnoreCollector.id().data(); + zeroDef( IgnoreCollector.id().loc(), id ); + } + + String walkOptId( opt_id optId ) + { + String name; + if ( optId.prodName() == opt_id::Id ) + name = optId.id().data(); + return name; + } + + ObjectDef *walkVarDefList( _lrepeat_var_def varDefList ) + { + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, + String(), pd->nextObjectId++ ); + + RepeatIter<var_def> varDefIter( varDefList ); + + while ( !varDefIter.end() ) { + ObjectField *varDef = walkVarDef( varDefIter.value(), + ObjectField::UserFieldType ); + objVarDef( objectDef, varDef ); + varDefIter.next(); + } + + return objectDef; + } + + void walkPreEof( pre_eof_def PreEofDef ) + { + ObjectDef *localFrame = blockOpen(); + StmtList *stmtList = walkLangStmtList( PreEofDef.lang_stmt_list() ); + preEof( PreEofDef.PREEOF().loc(), stmtList, localFrame ); + blockClose(); + } + + void walkIgnoreDef( ignore_def IgnoreDef ) + { + String name = walkOptId( IgnoreDef.opt_id() ); + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, + name, pd->nextObjectId++ ); + + LexJoin *join = 0; + if ( IgnoreDef.opt_lex_expr().lex_expr() != 0 ) { + LexExpression *expr = walkLexExpr( IgnoreDef.opt_lex_expr().lex_expr() ); + join = LexJoin::cons( expr ); + } + + defineToken( IgnoreDef.IGNORE().loc(), name, join, objectDef, + 0, true, false, false ); + } + + LangExpr *walkCodeMultiplicitive( code_multiplicitive mult, bool used = true ) + { + LangExpr *expr = 0; + switch ( mult.prodName() ) { + case code_multiplicitive::Star: { + LangExpr *right = walkCodeUnary( mult.code_unary() ); + LangExpr *left = walkCodeMultiplicitive( mult._code_multiplicitive() ); + expr = LangExpr::cons( mult.STAR().loc(), left, '*', right ); + break; + } + case code_multiplicitive::Fslash: { + LangExpr *right = walkCodeUnary( mult.code_unary() ); + LangExpr *left = walkCodeMultiplicitive( mult._code_multiplicitive() ); + expr = LangExpr::cons( mult.FSLASH().loc(), left, '/', right ); + break; + } + case code_multiplicitive::Base: { + LangExpr *right = walkCodeUnary( mult.code_unary(), used ); + expr = right; + break; + }} + return expr; + } + + PatternItemList *walkPatternElTypeOrLit( pattern_el_lel typeOrLit, + LangVarRef *patternVarRef ) + { + NamespaceQual *nspaceQual = walkRegionQual( typeOrLit.region_qual() ); + RepeatType repeatType = walkOptRepeat( typeOrLit.opt_repeat() ); + + PatternItemList *list = 0; + switch ( typeOrLit.prodName() ) { + case pattern_el_lel::Id: { + String id = typeOrLit.id().data(); + list = patternElNamed( typeOrLit.id().loc(), patternVarRef, + nspaceQual, id, repeatType ); + break; + } + case pattern_el_lel::Lit: { + String lit = typeOrLit.backtick_lit().data(); + list = patternElType( typeOrLit.backtick_lit().loc(), patternVarRef, + nspaceQual, lit, repeatType ); + break; + }} + + return list; + } + + LangVarRef *walkOptLabel( opt_label optLabel ) + { + LangVarRef *varRef = 0; + if ( optLabel.prodName() == opt_label::Id ) { + String id = optLabel.id().data(); + varRef = LangVarRef::cons( optLabel.id().loc(), + curNspace(), curStruct(), curScope(), id ); + } + return varRef; + } + + PatternItemList *walkPatternEl( pattern_el patternEl, LangVarRef *patternVarRef ) + { + PatternItemList *list = 0; + switch ( patternEl.prodName() ) { + case pattern_el::Dq: { + list = walkLitpatElList( patternEl.LitpatElList(), + patternEl.dq_lit_term().LIT_DQ_NL(), patternVarRef ); + break; + } + case pattern_el::Sq: { + list = walkPatSqConsDataList( patternEl.SqConsDataList(), + patternEl.sq_lit_term().CONS_SQ_NL() ); + break; + } + case pattern_el::Tilde: { + String patternData = patternEl.opt_tilde_data().text().c_str(); + patternData += '\n'; + PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, + patternEl.opt_tilde_data().loc(), patternData ); + list = PatternItemList::cons( patternItem ); + break; + } + case pattern_el::PatternEl: { + PatternItemList *typeOrLitList = walkPatternElTypeOrLit( + patternEl.pattern_el_lel(), patternVarRef ); + LangVarRef *varRef = walkOptLabel( patternEl.opt_label() ); + list = consPatternEl( varRef, typeOrLitList ); + break; + }} + return list; + } + + PatternItemList *walkLitpatEl( litpat_el litpatEl, LangVarRef *patternVarRef ) + { + PatternItemList *list = 0; + switch ( litpatEl.prodName() ) { + case litpat_el::ConsData: { + String consData = unescape( litpatEl.lit_dq_data().text().c_str() ); + PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, + litpatEl.lit_dq_data().loc(), consData ); + list = PatternItemList::cons( patternItem ); + break; + } + case litpat_el::SubList: { + list = walkPatternElList( litpatEl.PatternElList(), patternVarRef ); + break; + }} + return list; + } + + PatternItemList *walkPatSqConsDataList( _lrepeat_sq_cons_data sqConsDataList, CONS_SQ_NL Nl ) + { + PatternItemList *list = new PatternItemList; + + RepeatIter<sq_cons_data> sqConsDataIter( sqConsDataList ); + + while ( !sqConsDataIter.end() ) { + String consData = unescape( sqConsDataIter.value().text().c_str() ); + PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, + sqConsDataIter.value().loc(), consData ); + PatternItemList *tail = PatternItemList::cons( patternItem ); + list = patListConcat( list, tail ); + + sqConsDataIter.next(); + } + + if ( Nl != 0 ) { + String nl = unescape( Nl.data() ); + PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, + Nl.loc(), nl ); + PatternItemList *tail = PatternItemList::cons( patternItem ); + list = patListConcat( list, tail ); + } + + return list; + } + + ConsItemList *walkConsSqConsDataList( _lrepeat_sq_cons_data sqConsDataList, CONS_SQ_NL Nl ) + { + ConsItemList *list = new ConsItemList; + + RepeatIter<sq_cons_data> sqConsDataIter( sqConsDataList ); + + while ( !sqConsDataIter.end() ) { + String consData = unescape( sqConsDataIter.value().text().c_str() ); + ConsItem *consItem = ConsItem::cons( + sqConsDataIter.value().loc(), ConsItem::InputText, consData ); + ConsItemList *tail = ConsItemList::cons( consItem ); + list = consListConcat( list, tail ); + + sqConsDataIter.next(); + } + + if ( Nl != 0 ) { + String nl = unescape( Nl.data() ); + ConsItem *consItem = ConsItem::cons( + Nl.loc(), ConsItem::InputText, nl ); + ConsItemList *tail = ConsItemList::cons( consItem ); + list = consListConcat( list, tail ); + } + + return list; + } + + PatternItemList *walkLitpatElList( _lrepeat_litpat_el litpatElList, LIT_DQ_NL Nl, + LangVarRef *patternVarRef ) + { + PatternItemList *list = new PatternItemList; + + RepeatIter<litpat_el> litpatElIter( litpatElList ); + + while ( !litpatElIter.end() ) { + PatternItemList *tail = walkLitpatEl( litpatElIter.value(), patternVarRef ); + list = patListConcat( list, tail ); + litpatElIter.next(); + } + + if ( Nl != 0 ) { + String nl = unescape( Nl.data() ); + PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, + Nl.loc(), nl ); + PatternItemList *tail = PatternItemList::cons( patternItem ); + list = patListConcat( list, tail ); + } + + return list; + } + + PatternItemList *walkPatternElList( _lrepeat_pattern_el patternElList, + LangVarRef *patternVarRef ) + { + PatternItemList *list = new PatternItemList; + + RepeatIter<pattern_el> patternElIter( patternElList ); + + while ( !patternElIter.end() ) { + PatternItemList *tail = walkPatternEl( patternElIter.value(), patternVarRef ); + list = patListConcat( list, tail ); + patternElIter.next(); + } + return list; + } + + PatternItemList *walkPattternTopEl( pattern_top_el patternTopEl, + LangVarRef *patternVarRef ) + { + PatternItemList *list = 0; + switch ( patternTopEl.prodName() ) { + case pattern_top_el::Dq: { + list = walkLitpatElList( patternTopEl.LitpatElList(), + patternTopEl.dq_lit_term().LIT_DQ_NL(), patternVarRef ); + break; + } + case pattern_top_el::Sq: { + list = walkPatSqConsDataList( patternTopEl.SqConsDataList(), + patternTopEl.sq_lit_term().CONS_SQ_NL() ); + break; + } + case pattern_top_el::Tilde: { + String patternData = patternTopEl.opt_tilde_data().text().c_str(); + patternData += '\n'; + PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, + patternTopEl.opt_tilde_data().loc(), patternData ); + list = PatternItemList::cons( patternItem ); + break; + }} + return list; + } + + PatternItemList *walkPatternList( pattern_list patternList, LangVarRef *patternVarRef ) + { + Alignment alignment; + PatternItemList *list = new PatternItemList; + RepeatIter<pattern_top_el> patternTopElIter ( patternList ); + while ( !patternTopElIter.end() ) { + pattern_top_el topEl = patternTopElIter.value(); + alignment.check( "pattern", topEl.loc() ); + + PatternItemList *tail = walkPattternTopEl( topEl, patternVarRef ); + list = patListConcat( list, tail ); + patternTopElIter.next(); + } + return list; + } + + PatternItemList *walkPattern( pattern Pattern, LangVarRef *patternVarRef ) + { + if ( Pattern.prodName() == pattern::TopList ) + return walkPatternList( Pattern.pattern_list(), patternVarRef ); + else + return walkPatternElList( Pattern.PatternElList(), patternVarRef ); + } + + LangExpr *walkOptDefInit( opt_def_init optDefInit ) + { + LangExpr *expr = 0; + if ( optDefInit.prodName() == opt_def_init::Init ) + expr = walkCodeExpr( optDefInit.code_expr() ); + return expr; + } + + LangStmt *walkExportDef( export_def exportDef ) + { + ObjectField *objField = walkVarDef( exportDef.var_def(), + ObjectField::StructFieldType ); + LangExpr *expr = walkOptDefInit( exportDef.opt_def_init() ); + + return exportStmt( objField, LangStmt::AssignType, expr ); + } + + LangStmt *walkGlobalDef( global_def GlobalDef ) + { + ObjectField *objField = walkVarDef( GlobalDef.var_def(), + ObjectField::StructFieldType ); + LangExpr *expr = walkOptDefInit( GlobalDef.opt_def_init() ); + + return globalDef( objField, expr, LangStmt::AssignType ); + } + + void walkAliasDef( alias_def aliasDef ) + { + String id = aliasDef.id().data(); + TypeRef *typeRef = walkTypeRef( aliasDef.type_ref() ); + alias( aliasDef.id().loc(), id, typeRef ); + } + + CodeBlock *walkOptTranslate( opt_translate optTranslate ) + { + CodeBlock *block = 0; + if ( optTranslate.prodName() == opt_translate::Translate ) { + ObjectDef *localFrame = blockOpen(); + StmtList *stmtList = walkLangStmtList( optTranslate.lang_stmt_list() ); + block = CodeBlock::cons( stmtList, localFrame ); + block->context = curStruct(); + blockClose(); + } + return block; + } + + PredDecl *walkPredToken( pred_token predToken ) + { + NamespaceQual *nspaceQual = walkRegionQual( predToken.region_qual() ); + PredDecl *predDecl = 0; + switch ( predToken.prodName() ) { + case pred_token::Id: { + String id = predToken.id().data(); + predDecl = predTokenName( predToken.id().loc(), nspaceQual, id ); + break; + } + case pred_token::Lit: { + String lit = predToken.backtick_lit().data(); + predDecl = predTokenLit( predToken.backtick_lit().loc(), lit, nspaceQual ); + break; + }} + return predDecl; + } + + PredDeclList *walkPredTokenList( pred_token_list predTokenList ) + { + PredDeclList *list = 0; + switch ( predTokenList.prodName() ) { + case pred_token_list::List: { + list = walkPredTokenList( predTokenList._pred_token_list() ); + PredDecl *predDecl = walkPredToken( predTokenList.pred_token() ); + list->append( predDecl ); + break; + } + case pred_token_list::Base: { + PredDecl *predDecl = walkPredToken( predTokenList.pred_token() ); + list = new PredDeclList; + list->append( predDecl ); + break; + }} + return list; + } + + PredType walkPredType( pred_type predType ) + { + PredType pt = PredLeft; + switch ( predType.prodName() ) { + case pred_type::Left: + pt = PredLeft; + break; + case pred_type::Right: + pt = PredRight; + break; + case pred_type::NonAssoc: + pt = PredNonassoc; + break; + } + + return pt; + } + + void walkPrecedenceDef( precedence_def precedenceDef ) + { + PredType predType = walkPredType( precedenceDef.pred_type() ); + PredDeclList *predDeclList = walkPredTokenList( + precedenceDef.pred_token_list() ); + precedenceStmt( predType, predDeclList ); + } + + StmtList *walkInclude( _include Include ) + { + String lit = ""; + _lrepeat_sq_cons_data sqConsDataList = Include.SqConsDataList(); + + RepeatIter<sq_cons_data> sqConsDataIter( sqConsDataList ); + + while ( !sqConsDataIter.end() ) { + colm_data *data = sqConsDataIter.value().data(); + lit.append( data->data, data->length ); + sqConsDataIter.next(); + } + + String file = unescape( lit ); + + /* Check if we can open the input file for reading. */ + if ( ! readCheck( file.data ) ) { + + bool found = false; + for ( ArgsVector::Iter av = includePaths; av.lte(); av++ ) { + String path = String( *av ) + "/" + file; + if ( readCheck( path.data ) ) { + found = true; + file = path; + break; + } + } + + if ( !found ) + error() << "could not open " << file.data << " for reading" << endp; + } + + const char *argv[3]; + argv[0] = "load-include"; + argv[1] = file.data; + argv[2] = 0; + + colm_program *program = colm_new_program( &colm_object ); + colm_run_program( program, 2, argv ); + + /* Extract the parse tree. */ + start Start = ColmTree( program ); + str Error = ColmError( program ); + + if ( Start == 0 ) { + gblErrorCount += 1; + InputLoc loc = Error.loc(); + error(loc) << file.data << ": parse error: " << Error.text() << std::endl; + return 0; + } + + StmtList *stmtList = walkRootItemList( Start.RootItemList() ); + pd->streamFileNames.append( colm_extract_fns( program ) ); + colm_delete_program( program ); + return stmtList; + } + + + NamespaceQual *walkRegionQual( region_qual regionQual ) + { + NamespaceQual *qual = 0; + switch ( regionQual.prodName() ) { + case region_qual::Qual: { + qual = walkRegionQual( regionQual._region_qual() ); + qual->qualNames.append( String( regionQual.id().data() ) ); + break; + } + case region_qual::Base: { + qual = NamespaceQual::cons( curNspace() ); + break; + }} + return qual; + } + + RepeatType walkOptRepeat( opt_repeat OptRepeat ) + { + RepeatType repeatType = RepeatNone; + switch ( OptRepeat.prodName() ) { + case opt_repeat::Star: + repeatType = RepeatRepeat; + break; + case opt_repeat::Plus: + repeatType = RepeatList; + break; + case opt_repeat::Question: + repeatType = RepeatOpt; + break; + case opt_repeat::LeftStar: + repeatType = RepeatLeftRepeat; + break; + case opt_repeat::LeftPlus: + repeatType = RepeatLeftList; + break; + } + return repeatType; + } + + TypeRef *walkValueList( type_ref typeRef ) + { + TypeRef *valType = walkTypeRef( typeRef._type_ref() ); + TypeRef *elType = TypeRef::cons( typeRef.loc(), TypeRef::ListEl, valType ); + return TypeRef::cons( typeRef.loc(), TypeRef::List, 0, elType, valType ); + } + + TypeRef *walkListEl( type_ref typeRef ) + { + TypeRef *valType = walkTypeRef( typeRef._type_ref() ); + return TypeRef::cons( typeRef.loc(), TypeRef::ListEl, valType ); + } + + TypeRef *walkValueMap( type_ref typeRef ) + { + TypeRef *keyType = walkTypeRef( typeRef.KeyType() ); + TypeRef *valType = walkTypeRef( typeRef.ValType() ); + TypeRef *elType = TypeRef::cons( typeRef.loc(), + TypeRef::MapEl, 0, keyType, valType ); + + return TypeRef::cons( typeRef.loc(), TypeRef::Map, 0, + keyType, elType, valType ); + } + + TypeRef *walkMapEl( type_ref typeRef ) + { + TypeRef *keyType = walkTypeRef( typeRef.KeyType() ); + TypeRef *valType = walkTypeRef( typeRef.ValType() ); + + return TypeRef::cons( typeRef.loc(), TypeRef::MapEl, 0, keyType, valType ); + } + + TypeRef *walkTypeRef( type_ref typeRef ) + { + TypeRef *tr = 0; + switch ( typeRef.prodName() ) { + case type_ref::Id: { + NamespaceQual *nspaceQual = walkRegionQual( typeRef.region_qual() ); + String id = typeRef.id().data(); + RepeatType repeatType = walkOptRepeat( typeRef.opt_repeat() ); + tr = TypeRef::cons( typeRef.id().loc(), nspaceQual, id, repeatType ); + break; + } + case type_ref::Int: { + tr = TypeRef::cons( internal, pd->uniqueTypeInt ); + break; + } + case type_ref::Bool: { + tr = TypeRef::cons( internal, pd->uniqueTypeBool ); + break; + } + case type_ref::Void: { + tr = TypeRef::cons( internal, pd->uniqueTypeVoid ); + break; + } + case type_ref::Parser: { + TypeRef *type = walkTypeRef( typeRef._type_ref() ); + tr = TypeRef::cons( typeRef.loc(), TypeRef::Parser, 0, type, 0 ); + break; + } + case type_ref::List: { + tr = walkValueList( typeRef ); + break; + } + case type_ref::Map: { + tr = walkValueMap( typeRef ); + break; + } + case type_ref::ListEl: { + tr = walkListEl( typeRef ); + break; + } + case type_ref::MapEl: { + tr = walkMapEl( typeRef ); + break; + }} + return tr; + } + + StmtList *walkBlockOrSingle( block_or_single blockOrSingle ) + { + StmtList *stmtList = 0; + switch ( blockOrSingle.prodName() ) { + case block_or_single::Single: { + stmtList = new StmtList; + LangStmt *stmt = walkStatement( blockOrSingle.statement() ); + stmtList->append( stmt ); + break; + } + case block_or_single::Block: { + stmtList = walkLangStmtList( blockOrSingle.lang_stmt_list() ); + break; + }} + + return stmtList; + } + + void walkProdEl( const String &defName, ProdElList *list, prod_el El ) + { + ObjectField *captureField = 0; + if ( El.opt_prod_el_name().prodName() == opt_prod_el_name::Name ) { + String fieldName = El.opt_prod_el_name().id().data(); + captureField = ObjectField::cons( El.opt_prod_el_name().id().loc(), + ObjectField::RhsNameType, 0, fieldName ); + } + else { + /* default the prod name. */ + if ( El.prodName() == prod_el::Id ) { + String fieldName = El.id().data(); + opt_repeat::prod_name orpn = El.opt_repeat().prodName(); + if ( orpn == opt_repeat::Star ) + fieldName = "_repeat_" + fieldName; + else if ( orpn == opt_repeat::LeftStar ) + fieldName = "_lrepeat_" + fieldName; + else if ( orpn == opt_repeat::Plus ) + fieldName = "_list_" + fieldName; + else if ( orpn == opt_repeat::LeftPlus ) + fieldName = "_llist_" + fieldName; + else if ( orpn == opt_repeat::Question ) + fieldName = "_opt_" + fieldName; + else if ( strcmp( fieldName, defName ) == 0 ) + fieldName = "_" + fieldName; + captureField = ObjectField::cons( El.id().loc(), + ObjectField::RhsNameType, 0, fieldName ); + } + } + + RepeatType repeatType = walkOptRepeat( El.opt_repeat() ); + switch ( El.prodName() ) { + case prod_el::Id: { + NamespaceQual *nspaceQual = walkRegionQual( El.region_qual() ); + + String typeName = El.id().data(); + ProdEl *prodEl = prodElName( El.id().loc(), typeName, + nspaceQual, captureField, repeatType, false ); + appendProdEl( list, prodEl ); + break; + } + case prod_el::Lit: { + NamespaceQual *nspaceQual = walkRegionQual( El.region_qual() ); + + String lit = El.backtick_lit().data(); + ProdEl *prodEl = prodElLiteral( El.backtick_lit().loc(), lit, + nspaceQual, captureField, repeatType, false ); + appendProdEl( list, prodEl ); + break; + } + case prod_el::SubList: { + error( El.POPEN().loc() ) << "production sublist is implemented as a " + "colm transformation, it is not accepted at this stage" << endp; + }} + } + + void walkProdElList( const String &defName, ProdElList *list, prod_el_list ProdElList ) + { + if ( ProdElList.prodName() == prod_el_list::List ) { + prod_el_list RightProdElList = ProdElList._prod_el_list(); + walkProdElList( defName, list, RightProdElList ); + walkProdEl( defName, list, ProdElList.prod_el() ); + } + } + + CodeBlock *walkOptReduce( opt_reduce OptReduce ) + { + CodeBlock *block = 0; + if ( OptReduce.prodName() == opt_reduce::Reduce ) { + ObjectDef *localFrame = blockOpen(); + StmtList *stmtList = walkLangStmtList( OptReduce.lang_stmt_list() ); + + block = CodeBlock::cons( stmtList, localFrame ); + block->context = curStruct(); + + blockClose(); + } + return block; + } + + void walkProdudction( const String &defName, LelDefList *lelDefList, prod Prod ) + { + ProdElList *list = new ProdElList; + + walkProdElList( defName, list, Prod.prod_el_list() ); + + String name; + if ( Prod.opt_prod_name().prodName() == opt_prod_name::Name ) + name = Prod.opt_prod_name().id().data(); + + CodeBlock *codeBlock = walkOptReduce( Prod.opt_reduce() ); + bool commit = Prod.opt_commit().prodName() == opt_commit::Commit; + + Production *prod = BaseParser::production( Prod.SQOPEN().loc(), + list, name, commit, codeBlock, 0 ); + prodAppend( lelDefList, prod ); + } + + void walkProdList( const String &name, LelDefList *lelDefList, prod_list ProdList ) + { + if ( ProdList.prodName() == prod_list::List ) + walkProdList( name, lelDefList, ProdList._prod_list() ); + + walkProdudction( name, lelDefList, ProdList.prod() ); + } + + ReOrItem *walkRegOrChar( reg_or_char regOrChar ) + { + ReOrItem *orItem = 0; + switch ( regOrChar.prodName() ) { + case reg_or_char::Char: { + String c = unescape( regOrChar.RE_CHAR().data() ); + orItem = ReOrItem::cons( regOrChar.RE_CHAR().loc(), c ); + break; + } + case reg_or_char::Range: { + String low = unescape( regOrChar.Low().data() ); + String high = unescape( regOrChar.High().data() ); + orItem = ReOrItem::cons( regOrChar.Low().loc(), low[0], high[0] ); + break; + }} + return orItem; + } + + ReOrBlock *walkRegOrData( reg_or_data regOrData ) + { + ReOrBlock *block = 0; + switch ( regOrData.prodName() ) { + case reg_or_data::Data: { + ReOrBlock *left = walkRegOrData( regOrData._reg_or_data() ); + ReOrItem *right = walkRegOrChar( regOrData.reg_or_char() ); + block = lexRegularExprData( left, right ); + break; + } + case reg_or_data::Base: { + block = ReOrBlock::cons(); + break; + }} + return block; + } + + LexFactorNeg *walkLexFactorNeg( lex_factor_neg lexFactorNeg ) + { + LexFactorNeg *factorNeg = 0; + switch ( lexFactorNeg.prodName() ) { + case lex_factor_neg::Caret: { + LexFactorNeg *recNeg = walkLexFactorNeg( lexFactorNeg._lex_factor_neg() ); + factorNeg = LexFactorNeg::cons( recNeg, LexFactorNeg::CharNegateType ); + break; + } + case lex_factor_neg::Base: { + LexFactor *factor = walkLexFactor( lexFactorNeg.lex_factor() ); + factorNeg = LexFactorNeg::cons( factor ); + break; + }} + return factorNeg; + } + + LexFactorRep *walkLexFactorRep( lex_factor_rep lexFactorRep ) + { + LexFactorRep *factorRep = 0; + LexFactorRep *recRep = 0; + lex_factor_rep::prod_name pn = lexFactorRep.prodName(); + + if ( pn != lex_factor_rep::Base ) + recRep = walkLexFactorRep( lexFactorRep._lex_factor_rep() ); + + switch ( pn ) { + case lex_factor_rep::Star: { + factorRep = LexFactorRep::cons( lexFactorRep.LEX_STAR().loc(), + recRep, 0, 0, LexFactorRep::StarType ); + break; + } + case lex_factor_rep::StarStar: { + factorRep = LexFactorRep::cons( lexFactorRep.LEX_STARSTAR().loc(), + recRep, 0, 0, LexFactorRep::StarStarType ); + break; + } + case lex_factor_rep::Plus: { + factorRep = LexFactorRep::cons( lexFactorRep.LEX_PLUS().loc(), + recRep, 0, 0, LexFactorRep::PlusType ); + break; + } + case lex_factor_rep::Question: { + factorRep = LexFactorRep::cons( lexFactorRep.LEX_QUESTION().loc(), + recRep, 0, 0, LexFactorRep::OptionalType ); + break; + } + case lex_factor_rep::Exact: { + int low = atoi( lexFactorRep.lex_uint().data()->data ); + factorRep = LexFactorRep::cons( lexFactorRep.lex_uint().loc(), + recRep, low, 0, LexFactorRep::ExactType ); + break; + } + case lex_factor_rep::Max: { + int high = atoi( lexFactorRep.lex_uint().data()->data ); + factorRep = LexFactorRep::cons( lexFactorRep.lex_uint().loc(), + recRep, 0, high, LexFactorRep::MaxType ); + break; + } + case lex_factor_rep::Min: { + int low = atoi( lexFactorRep.lex_uint().data()->data ); + factorRep = LexFactorRep::cons( lexFactorRep.lex_uint().loc(), + recRep, low, 0, LexFactorRep::MinType ); + break; + } + case lex_factor_rep::Range: { + int low = atoi( lexFactorRep.Low().data()->data ); + int high = atoi( lexFactorRep.High().data()->data ); + factorRep = LexFactorRep::cons( lexFactorRep.Low().loc(), + recRep, low, high, LexFactorRep::RangeType ); + break; + } + case lex_factor_rep::Base: { + LexFactorNeg *factorNeg = walkLexFactorNeg( lexFactorRep.lex_factor_neg() ); + factorRep = LexFactorRep::cons( factorNeg ); + }} + + return factorRep; + } + + LexTerm *walkLexTerm( lex_term lexTerm ) + { + LexTerm *term = 0; + lex_term::prod_name pn = lexTerm.prodName(); + + LexTerm *leftTerm = 0; + if ( pn != lex_term::Base ) + leftTerm = walkLexTerm( lexTerm._lex_term() ); + + LexFactorAug *factorAug = walkLexFactorAug( lexTerm.lex_factor_rep() ); + + switch ( pn ) { + case lex_term::Dot: + term = LexTerm::cons( leftTerm, factorAug, LexTerm::ConcatType ); + break; + case lex_term::ColonGt: + term = LexTerm::cons( leftTerm, factorAug, LexTerm::RightStartType ); + break; + case lex_term::ColonGtGt: + term = LexTerm::cons( leftTerm, factorAug, LexTerm::RightFinishType ); + break; + case lex_term::LtColon: + term = LexTerm::cons( leftTerm, factorAug, LexTerm::LeftType ); + break; + default: + term = LexTerm::cons( factorAug ); + break; + } + + return term; + } + + LexExpression *walkLexExpr( lex_expr lexExpr ) + { + LexExpression *expr = 0; + lex_expr::prod_name pn = lexExpr.prodName(); + + LexExpression *leftExpr = 0; + if ( pn != lex_expr::Base ) + leftExpr = walkLexExpr( lexExpr._lex_expr() ); + + LexTerm *term = walkLexTerm( lexExpr.lex_term() ); + + switch ( pn ) { + case lex_expr::Bar: + expr = LexExpression::cons( leftExpr, term, LexExpression::OrType ); + break; + case lex_expr::Amp: + expr = LexExpression::cons( leftExpr, term, LexExpression::IntersectType ); + break; + case lex_expr::Dash: + expr = LexExpression::cons( leftExpr, term, LexExpression::SubtractType ); + break; + case lex_expr::DashDash: + expr = LexExpression::cons( leftExpr, term, LexExpression::StrongSubtractType ); + break; + case lex_expr::Base: + expr = LexExpression::cons( term ); + } + return expr; + } + + + void walkRlDef( rl_def rlDef ) + { + String id = rlDef.id().data(); + + lex_expr LexExpr = rlDef.lex_expr(); + LexExpression *expr = walkLexExpr( LexExpr ); + LexJoin *join = LexJoin::cons( expr ); + + addRegularDef( rlDef.id().loc(), curNspace(), id, join ); + } + + void walkLexRegion( region_def regionDef ) + { + pushRegionSet( regionDef.loc() ); + walkRootItemList( regionDef.RootItemList() ); + popRegionSet(); + } + + void walkCflDef( cfl_def cflDef ) + { + String name = cflDef.id().data(); + ObjectDef *objectDef = walkVarDefList( cflDef.VarDefList() ); + objectDef->name = name; + + LelDefList *defList = new LelDefList; + walkProdList( name, defList, cflDef.prod_list() ); + + bool reduceFirst = cflDef.opt_reduce_first().REDUCEFIRST() != 0; + + NtDef *ntDef = NtDef::cons( name, curNspace(), + curStruct(), reduceFirst ); + + BaseParser::cflDef( ntDef, objectDef, defList ); + } + + CallArgVect *walkCallArgSeq( call_arg_seq callArgSeq ) + { + CallArgVect *callArgVect = new CallArgVect; + while ( callArgSeq != 0 ) { + code_expr codeExpr = callArgSeq.code_expr(); + LangExpr *expr = walkCodeExpr( codeExpr ); + callArgVect->append( new CallArg(expr) ); + callArgSeq = callArgSeq._call_arg_seq(); + } + return callArgVect; + } + + CallArgVect *walkCallArgList( call_arg_list callArgList ) + { + CallArgVect *callArgVect = walkCallArgSeq( callArgList.call_arg_seq() ); + return callArgVect; + } + + LangExpr *liftTrim( LangExpr *expr, ConsItem::Trim &trim ) + { + if ( expr->type == LangExpr::UnaryType ) { + if ( expr->op == '^' ) { + trim = ConsItem::TrimYes; + expr = expr->right; + } + else if ( expr->op == '@' ) { + trim = ConsItem::TrimNo; + expr = expr->right; + } + } + return expr; + } + + ConsItemList *walkCallArgSeqAccum( call_arg_seq callArgSeq ) + { + ConsItemList *consItemList = new ConsItemList; + while ( callArgSeq != 0 ) { + code_expr codeExpr = callArgSeq.code_expr(); + +// LangExpr *expr = walkCodeExpr( codeExpr ); +// callArgVect->append( new CallArg(expr) ); + + ConsItem::Trim trim = ConsItem::TrimDefault; + LangExpr *consExpr = walkCodeExpr( codeExpr ); + + ConsItem *consItem = ConsItem::cons( consExpr->loc, + ConsItem::ExprType, consExpr, trim ); + consItemList->append( consItem ); + + callArgSeq = callArgSeq._call_arg_seq(); + } + return consItemList; + } + + ConsItemList *walkCallArgListAccum( call_arg_list callArgList ) + { + return walkCallArgSeqAccum( callArgList.call_arg_seq() ); + } + + LangStmt *walkPrintStmt( print_stmt &printStmt ) + { + LangStmt *stmt = 0; + switch ( printStmt.prodName() ) { + case print_stmt::Accum: { + InputLoc loc = printStmt.PRINT().loc(); + + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + QualItemVect *qualItemVect = new QualItemVect; + LangVarRef *varRef = LangVarRef::cons( loc, curNspace(), curStruct(), + curScope(), nspaceQual, qualItemVect, String("stdout") ); + + ConsItemList *list = walkAccumulate( printStmt.accumulate() ); + + bool eof = false; //walkOptEos( StmtOrFactor.opt_eos() ); + LangExpr *expr = send( loc, varRef, list, eof ); + stmt = LangStmt::cons( loc, LangStmt::ExprType, expr ); + break; + } + case print_stmt::Tree: { + InputLoc loc = printStmt.PRINT().loc(); + + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + QualItemVect *qualItemVect = new QualItemVect; + LangVarRef *varRef = LangVarRef::cons( loc, curNspace(), curStruct(), + curScope(), nspaceQual, qualItemVect, String("stdout") ); + + ConsItemList *list = walkCallArgListAccum( printStmt.call_arg_list() ); + + bool eof = false; //walkOptEos( StmtOrFactor.opt_eos() ); + LangExpr *expr = send( loc, varRef, list, eof ); + stmt = LangStmt::cons( loc, LangStmt::ExprType, expr ); + break; + } + case print_stmt::PrintStream: { + LangVarRef *varRef = walkVarRef( printStmt.var_ref() ); + + ConsItemList *list = walkCallArgListAccum( printStmt.call_arg_list() ); + + InputLoc loc = printStmt.PRINTS().loc(); + + bool eof = false; //walkOptEos( StmtOrFactor.opt_eos() ); + LangExpr *expr = send( loc, varRef, list, eof ); + stmt = LangStmt::cons( loc, LangStmt::ExprType, expr ); + break; + }} + return stmt; + } + + QualItemVect *walkQual( qual &Qual ) + { + QualItemVect *qualItemVect = 0; + qual RecQual = Qual._qual(); + switch ( Qual.prodName() ) { + case qual::Dot: + case qual::Arrow: { + qualItemVect = walkQual( RecQual ); + String id = Qual.id().data(); + QualItem::Form form = Qual.DOT() != 0 ? QualItem::Dot : QualItem::Arrow; + qualItemVect->append( QualItem( form, Qual.id().loc(), id ) ); + break; + } + case qual::Base: { + qualItemVect = new QualItemVect; + break; + }} + return qualItemVect; + } + + LangVarRef *walkVarRef( var_ref varRef ) + { + NamespaceQual *nspaceQual = walkRegionQual( varRef.region_qual() ); + qual Qual = varRef.qual(); + QualItemVect *qualItemVect = walkQual( Qual ); + String id = varRef.id().data(); + LangVarRef *langVarRef = LangVarRef::cons( varRef.id().loc(), + curNspace(), curStruct(), curScope(), nspaceQual, qualItemVect, id ); + return langVarRef; + } + + ObjectField *walkOptCapture( opt_capture optCapture ) + { + ObjectField *objField = 0; + if ( optCapture.prodName() == opt_capture::Id ) { + String id = optCapture.id().data(); + objField = ObjectField::cons( optCapture.id().loc(), + ObjectField::UserLocalType, 0, id ); + } + return objField; + } + + /* + * Constructor + */ + + ConsItemList *walkLitConsEl( lit_cons_el litConsEl, TypeRef *consTypeRef ) + { + ConsItemList *list = 0; + switch ( litConsEl.prodName() ) { + case lit_cons_el::ConsData: { + String consData = unescape( litConsEl.lit_dq_data().text().c_str() ); + ConsItem *consItem = ConsItem::cons( litConsEl.lit_dq_data().loc(), + ConsItem::InputText, consData ); + list = ConsItemList::cons( consItem ); + break; + } + case lit_cons_el::SubList: { + list = walkConsElList( litConsEl.ConsElList(), consTypeRef ); + break; + }} + return list; + } + + ConsItemList *walkLitConsElList( _lrepeat_lit_cons_el litConsElList, + LIT_DQ_NL Nl, TypeRef *consTypeRef ) + { + ConsItemList *list = new ConsItemList; + + RepeatIter<lit_cons_el> litConsElIter( litConsElList ); + while ( !litConsElIter.end() ) { + ConsItemList *tail = walkLitConsEl( litConsElIter.value(), consTypeRef ); + list = consListConcat( list, tail ); + litConsElIter.next(); + } + + if ( Nl != 0 ) { + String consData = unescape( Nl.data() ); + ConsItem *consItem = ConsItem::cons( Nl.loc(), ConsItem::InputText, consData ); + ConsItemList *tail = ConsItemList::cons( consItem ); + list = consListConcat( list, tail ); + } + + return list; + } + + ConsItemList *walkConsEl( cons_el consEl, TypeRef *consTypeRef ) + { + ConsItemList *list = 0; + switch ( consEl.prodName() ) { + case cons_el::Lit: { + NamespaceQual *nspaceQual = walkRegionQual( consEl.region_qual() ); + String lit = consEl.backtick_lit().data(); + list = consElLiteral( consEl.backtick_lit().loc(), consTypeRef, lit, nspaceQual ); + break; + } + case cons_el::Tilde: { + String consData = consEl.opt_tilde_data().text().c_str(); + consData += '\n'; + ConsItem *consItem = ConsItem::cons( consEl.opt_tilde_data().loc(), + ConsItem::InputText, consData ); + list = ConsItemList::cons( consItem ); + break; + } + case cons_el::Sq: { + list = walkConsSqConsDataList( consEl.SqConsDataList(), + consEl.sq_lit_term().CONS_SQ_NL() ); + break; + } + case cons_el::CodeExpr: { + ConsItem::Trim trim = ConsItem::TrimDefault; + LangExpr *consExpr = walkCodeExpr( consEl.code_expr() ); + ConsItem *consItem = ConsItem::cons( consExpr->loc, + ConsItem::ExprType, consExpr, trim ); + list = ConsItemList::cons( consItem ); + break; + } + case cons_el::Dq: { + list = walkLitConsElList( consEl.LitConsElList(), + consEl.dq_lit_term().LIT_DQ_NL(), consTypeRef ); + break; + }} + return list; + } + + ConsItemList *walkConsElList( _lrepeat_cons_el consElList, TypeRef *consTypeRef ) + { + ConsItemList *list = new ConsItemList; + + RepeatIter<cons_el> consElIter( consElList ); + + while ( !consElIter.end() ) { + ConsItemList *tail = walkConsEl( consElIter.value(), consTypeRef ); + list = consListConcat( list, tail ); + consElIter.next(); + } + return list; + } + + ConsItemList *walkConsTopEl( cons_top_el consTopEl, TypeRef *consTypeRef ) + { + ConsItemList *list = 0; + switch ( consTopEl.prodName() ) { + case cons_top_el::Dq: { + list = walkLitConsElList( consTopEl.LitConsElList(), + consTopEl.dq_lit_term().LIT_DQ_NL(), consTypeRef ); + break; + } + case cons_top_el::Sq: { + list = walkConsSqConsDataList( consTopEl.SqConsDataList(), + consTopEl.sq_lit_term().CONS_SQ_NL() ); + break; + } + case cons_top_el::Tilde: { + String consData = consTopEl.opt_tilde_data().text().c_str(); + consData += '\n'; + ConsItem *consItem = ConsItem::cons( consTopEl.opt_tilde_data().loc(), + ConsItem::InputText, consData ); + list = ConsItemList::cons( consItem ); + break; + }} + return list; + } + + ConsItemList *walkConsList( cons_list consList, TypeRef *consTypeRef ) + { + Alignment alignment; + ConsItemList *list = new ConsItemList; + RepeatIter<cons_top_el> consTopElIter ( consList ); + while ( !consTopElIter.end() ) { + cons_top_el topEl = consTopElIter.value(); + alignment.check( "constructor", topEl.loc() ); + + ConsItemList *tail = walkConsTopEl( topEl, consTypeRef ); + list = consListConcat( list, tail ); + consTopElIter.next(); + } + return list; + } + + ConsItemList *walkConstructor( constructor Constructor, TypeRef *consTypeRef ) + { + if ( Constructor.prodName() == constructor::TopList ) + return walkConsList( Constructor.cons_list(), consTypeRef ); + else + return walkConsElList( Constructor.ConsElList(), consTypeRef ); + } + + /* + * String + */ + + ConsItemList *walkLitStringEl( lit_string_el litStringEl ) + { + ConsItemList *list = 0; + switch ( litStringEl.prodName() ) { + case lit_string_el::ConsData: { + String consData = unescape( litStringEl.lit_dq_data().text().c_str() ); + ConsItem *stringItem = ConsItem::cons( litStringEl.lit_dq_data().loc(), + ConsItem::InputText, consData ); + list = ConsItemList::cons( stringItem ); + break; + } + case lit_string_el::SubList: { + list = walkStringElList( litStringEl.StringElList() ); + break; + }} + return list; + } + + ConsItemList *walkLitStringElList( _lrepeat_lit_string_el litStringElList, LIT_DQ_NL Nl ) + { + ConsItemList *list = new ConsItemList; + + RepeatIter<lit_string_el> litStringElIter( litStringElList ); + + while ( !litStringElIter.end() ) { + ConsItemList *tail = walkLitStringEl( litStringElIter.value() ); + list = consListConcat( list, tail ); + litStringElIter.next(); + } + + if ( Nl != 0 ) { + String consData = unescape( Nl.data() ); + ConsItem *consItem = ConsItem::cons( Nl.loc(), + ConsItem::InputText, consData ); + ConsItemList *tail = ConsItemList::cons( consItem ); + list = consListConcat( list, tail ); + } + return list; + } + + ConsItemList *walkStringEl( string_el stringEl ) + { + ConsItemList *list = 0; + switch ( stringEl.prodName() ) { + case string_el::Dq: { + list = walkLitStringElList( stringEl.LitStringElList(), + stringEl.dq_lit_term().LIT_DQ_NL() ); + break; + } + case string_el::Sq: { + list = walkConsSqConsDataList( stringEl.SqConsDataList(), + stringEl.sq_lit_term().CONS_SQ_NL() ); + break; + } + case string_el::Tilde: { + String consData = stringEl.opt_tilde_data().text().c_str(); + consData += '\n'; + ConsItem *consItem = ConsItem::cons( stringEl.opt_tilde_data().loc(), + ConsItem::InputText, consData ); + list = ConsItemList::cons( consItem ); + break; + } + case string_el::CodeExpr: { + ConsItem::Trim trim = ConsItem::TrimDefault; + LangExpr *consExpr = walkCodeExpr( stringEl.code_expr() ); + consExpr = liftTrim( consExpr, trim ); + ConsItem *consItem = ConsItem::cons( consExpr->loc, + ConsItem::ExprType, consExpr, trim ); + list = ConsItemList::cons( consItem ); + break; + }} + return list; + } + + ConsItemList *walkStringElList( _lrepeat_string_el stringElList ) + { + ConsItemList *list = new ConsItemList; + + RepeatIter<string_el> stringElIter( stringElList ); + + while ( !stringElIter.end() ) { + ConsItemList *tail = walkStringEl( stringElIter.value() ); + list = consListConcat( list, tail ); + stringElIter.next(); + } + return list; + } + + ConsItemList *walkStringTopEl( string_top_el stringTopEl ) + { + ConsItemList *list = 0; + switch ( stringTopEl.prodName() ) { + case string_top_el::Dq: { + list = walkLitStringElList( stringTopEl.LitStringElList(), + stringTopEl.dq_lit_term().LIT_DQ_NL() ); + break; + } + case string_top_el::Sq: { + list = walkConsSqConsDataList( stringTopEl.SqConsDataList(), + stringTopEl.sq_lit_term().CONS_SQ_NL() ); + break; + } + case string_top_el::Tilde: { + String consData = stringTopEl.opt_tilde_data().text().c_str(); + consData += '\n'; + ConsItem *consItem = ConsItem::cons( stringTopEl.opt_tilde_data().loc(), + ConsItem::InputText, consData ); + list = ConsItemList::cons( consItem ); + break; + }} + return list; + } + + ConsItemList *walkStringList( string_list stringList ) + { + Alignment alignment; + ConsItemList *list = new ConsItemList; + RepeatIter<string_top_el> stringTopElIter( stringList ); + while ( !stringTopElIter.end() ) { + string_top_el topEl = stringTopElIter.value(); + alignment.check( "string", topEl.loc() ); + + ConsItemList *tail = walkStringTopEl( topEl ); + list = consListConcat( list, tail ); + stringTopElIter.next(); + } + return list; + } + + ConsItemList *walkString( string String ) + { + if ( String.prodName() == string::TopList ) + return walkStringList( String.string_list() ); + else + return walkStringElList( String.StringElList() ); + } + + /* + * Accum + */ + + ConsItemList *walkLitAccumEl( lit_accum_el litAccumEl ) + { + ConsItemList *list = 0; + switch ( litAccumEl.prodName() ) { + case lit_accum_el::ConsData: { + String consData = unescape( litAccumEl.lit_dq_data().text().c_str() ); + ConsItem *consItem = ConsItem::cons( litAccumEl.lit_dq_data().loc(), + ConsItem::InputText, consData ); + list = ConsItemList::cons( consItem ); + break; + } + case lit_accum_el::SubList: { + list = walkAccumElList( litAccumEl.AccumElList() ); + break; + }} + return list; + } + + ConsItemList *walkLitAccumElList( _lrepeat_lit_accum_el litAccumElList, LIT_DQ_NL Nl ) + { + ConsItemList *list = new ConsItemList; + + RepeatIter<lit_accum_el> litAccumElIter( litAccumElList ); + + while ( !litAccumElIter.end() ) { + ConsItemList *tail = walkLitAccumEl( litAccumElIter.value() ); + list = consListConcat( list, tail ); + litAccumElIter.next(); + } + + if ( Nl != 0 ) { + String consData = unescape( Nl.data() ); + ConsItem *consItem = ConsItem::cons( Nl.loc(), ConsItem::InputText, consData ); + ConsItemList *tail = ConsItemList::cons( consItem ); + list = consListConcat( list, tail ); + } + + return list; + } + + ConsItemList *walkAccumEl( accum_el accumEl ) + { + ConsItemList *list = 0; + switch ( accumEl.prodName() ) { + case accum_el::Dq: { + list = walkLitAccumElList( accumEl.LitAccumElList(), + accumEl.dq_lit_term().LIT_DQ_NL() ); + break; + } + case accum_el::Sq: { + list = walkConsSqConsDataList( accumEl.SqConsDataList(), + accumEl.sq_lit_term().CONS_SQ_NL() ); + break; + } + case accum_el::Tilde: { + String consData = accumEl.opt_tilde_data().text().c_str(); + consData += '\n'; + ConsItem *consItem = ConsItem::cons( accumEl.opt_tilde_data().loc(), + ConsItem::InputText, consData ); + list = ConsItemList::cons( consItem ); + break; + } + case accum_el::CodeExpr: { + ConsItem::Trim trim = ConsItem::TrimDefault; + LangExpr *accumExpr = walkCodeExpr( accumEl.code_expr() ); + accumExpr = liftTrim( accumExpr, trim ); + ConsItem *consItem = ConsItem::cons( accumExpr->loc, + ConsItem::ExprType, accumExpr, trim ); + list = ConsItemList::cons( consItem ); + break; + }} + return list; + } + + ConsItemList *walkAccumElList( _lrepeat_accum_el accumElList ) + { + ConsItemList *list = new ConsItemList; + + RepeatIter<accum_el> accumElIter( accumElList ); + + while ( !accumElIter.end() ) { + ConsItemList *tail = walkAccumEl( accumElIter.value() ); + list = consListConcat( list, tail ); + accumElIter.next(); + } + return list; + } + + ConsItemList *walkAccumTopEl( accum_top_el accumTopEl ) + { + ConsItemList *list = 0; + switch ( accumTopEl.prodName() ) { + case accum_top_el::Dq: { + list = walkLitAccumElList( accumTopEl.LitAccumElList(), + accumTopEl.dq_lit_term().LIT_DQ_NL() ); + break; + } + case accum_top_el::Sq: { + list = walkConsSqConsDataList( accumTopEl.SqConsDataList(), + accumTopEl.sq_lit_term().CONS_SQ_NL() ); + break; + } + case accum_top_el::Tilde: { + String consData = accumTopEl.opt_tilde_data().text().c_str(); + consData += '\n'; + ConsItem *consItem = ConsItem::cons( accumTopEl.opt_tilde_data().loc(), + ConsItem::InputText, consData ); + list = ConsItemList::cons( consItem ); + break; + } + case accum_top_el::SubList: { + list = walkAccumElList( accumTopEl.AccumElList() ); + break; + }} + return list; + } + + ConsItemList *walkAccumList( Alignment &alignment, accum_list accumList ) + { + accum_top_el topEl = accumList.accum_top_el(); + alignment.check( "accumulator", topEl.loc() ); + + ConsItemList *list = walkAccumTopEl( topEl ); + if ( accumList.prodName() == accum_list::List ) { + ConsItemList *tail = walkAccumList( alignment, accumList._accum_list() ); + list = consListConcat( list, tail ); + } + + return list; + } + + ConsItemList *walkAccumulate( accumulate Accumulate ) + { + Alignment alignment; + ConsItemList *list = walkAccumList( alignment, Accumulate.accum_list() ); + return list; + } + + void walkFieldInit( FieldInitVect *list, field_init fieldInit ) + { + LangExpr *expr = walkCodeExpr( fieldInit.code_expr() ); + FieldInit *init = FieldInit::cons( expr->loc, "_name", expr ); + list->append( init ); + } + + FieldInitVect *walkFieldInit( _lrepeat_field_init fieldInitList ) + { + FieldInitVect *list = new FieldInitVect; + + RepeatIter<field_init> fieldInitIter( fieldInitList ); + + while ( !fieldInitIter.end() ) { + walkFieldInit( list, fieldInitIter.value() ); + fieldInitIter.next(); + } + return list; + } + FieldInitVect *walkOptFieldInit( opt_field_init optFieldInit ) + { + FieldInitVect *list = 0; + if ( optFieldInit.prodName() == opt_field_init::Init ) + list = walkFieldInit( optFieldInit.FieldInitList() ); + return list; + } + + LangExpr *walkStmtOrFactor( stmt_or_factor StmtOrFactor ) + { + LangExpr *expr = 0; + switch ( StmtOrFactor.prodName() ) { + case stmt_or_factor::Parse: { + /* The type we are parsing. */ + type_ref typeRefTree = StmtOrFactor.type_ref(); + TypeRef *typeRef = walkTypeRef( typeRefTree ); + ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() ); + FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); + ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); + + expr = parseCmd( StmtOrFactor.PARSE().loc(), false, false, objField, + typeRef, init, list, true, false, false, "" ); + break; + } + case stmt_or_factor::ParseTree: { + /* The type we are parsing. */ + type_ref typeRefTree = StmtOrFactor.type_ref(); + TypeRef *typeRef = walkTypeRef( typeRefTree ); + ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() ); + FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); + ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); + + expr = parseCmd( StmtOrFactor.PARSE_TREE().loc(), true, false, objField, + typeRef, init, list, true, false, false, "" ); + break; + } + case stmt_or_factor::ParseStop: { + /* The type we are parsing. */ + type_ref typeRefTree = StmtOrFactor.type_ref(); + TypeRef *typeRef = walkTypeRef( typeRefTree ); + ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() ); + FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); + ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); + + expr = parseCmd( StmtOrFactor.PARSE_STOP().loc(), false, true, objField, + typeRef, init, list, true, false, false, "" ); + break; + } + case stmt_or_factor::Reduce: { + /* The reducer name. */ + String reducer = StmtOrFactor.id().data(); + + /* The type we are parsing. */ + type_ref typeRefTree = StmtOrFactor.type_ref(); + TypeRef *typeRef = walkTypeRef( typeRefTree ); + FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); + ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); + + expr = parseCmd( StmtOrFactor.REDUCE().loc(), false, false, 0, + typeRef, init, list, true, true, false, reducer ); + break; + } + case stmt_or_factor::ReadReduce: { + /* The reducer name. */ + String reducer = StmtOrFactor.id().data(); + + /* The type we are parsing. */ + type_ref typeRefTree = StmtOrFactor.type_ref(); + TypeRef *typeRef = walkTypeRef( typeRefTree ); + FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); + ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); + + expr = parseCmd( StmtOrFactor.READ_REDUCE().loc(), false, false, 0, + typeRef, init, list, true, true, true, reducer ); + break; + } + case stmt_or_factor::Send: { + LangVarRef *varRef = walkVarRef( StmtOrFactor.var_ref() ); + ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); + bool eof = walkOptEos( StmtOrFactor.opt_eos() ); + expr = send( StmtOrFactor.SEND().loc(), varRef, list, eof ); + break; + } + case stmt_or_factor::SendTree: { + LangVarRef *varRef = walkVarRef( StmtOrFactor.var_ref() ); + ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); + bool eof = walkOptEos( StmtOrFactor.opt_eos() ); + expr = sendTree( StmtOrFactor.SEND_TREE().loc(), varRef, list, eof ); + break; + } + case stmt_or_factor::MakeTree: { + CallArgVect *exprList = walkCallArgList( StmtOrFactor.call_arg_list() ); + expr = LangExpr::cons( LangTerm::cons( StmtOrFactor.loc(), + LangTerm::MakeTreeType, exprList ) ); + break; + } + case stmt_or_factor::MakeToken: { + CallArgVect *exprList = walkCallArgList( StmtOrFactor.call_arg_list() ); + expr = LangExpr::cons( LangTerm::cons( StmtOrFactor.loc(), + LangTerm::MakeTokenType, exprList ) ); + break; + } + case stmt_or_factor::Cons: { + /* The type we are parsing. */ + type_ref typeRefTree = StmtOrFactor.type_ref(); + TypeRef *typeRef = walkTypeRef( typeRefTree ); + ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() ); + ConsItemList *list = walkConstructor( StmtOrFactor.constructor(), typeRef ); + FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); + + expr = construct( StmtOrFactor.CONS().loc(), objField, list, typeRef, init ); + break; + } + case stmt_or_factor::Match: { + LangVarRef *varRef = walkVarRef( StmtOrFactor.var_ref() ); + PatternItemList *list = walkPattern( StmtOrFactor.pattern(), varRef ); + expr = match( StmtOrFactor.loc(), varRef, list ); + break; + } + case stmt_or_factor::New: { + TypeRef *typeRef = walkTypeRef( StmtOrFactor.type_ref() ); + + ObjectField *captureField = walkOptCapture( StmtOrFactor.opt_capture() ); + FieldInitVect *init = walkFieldInit( StmtOrFactor.FieldInitList() ); + + LangVarRef *captureVarRef = 0; + if ( captureField != 0 ) { + captureVarRef = LangVarRef::cons( captureField->loc, + curNspace(), curStruct(), curScope(), captureField->name ); + } + + expr = LangExpr::cons( LangTerm::consNew( + StmtOrFactor.loc(), typeRef, captureVarRef, init ) ); + + /* Check for redeclaration. */ + if ( captureField != 0 ) { + if ( curScope()->checkRedecl( captureField->name ) != 0 ) { + error( captureField->loc ) << "variable " << + captureField->name << " redeclared" << endp; + } + + /* Insert it into the field map. */ + captureField->typeRef = typeRef; + curScope()->insertField( captureField->name, captureField ); + } + break; + }} + return expr; + } + + LangExpr *walkCodeFactor( code_factor codeFactor, bool used = true ) + { + LangExpr *expr = 0; + switch ( codeFactor.prodName() ) { + case code_factor::VarRef: { + LangVarRef *langVarRef = walkVarRef( codeFactor.var_ref() ); + LangTerm *term = LangTerm::cons( langVarRef->loc, + LangTerm::VarRefType, langVarRef ); + expr = LangExpr::cons( term ); + break; + } + case code_factor::Call: { + LangVarRef *langVarRef = walkVarRef( codeFactor.var_ref() ); + CallArgVect *exprVect = walkCallArgList( codeFactor.call_arg_list() ); + LangTerm *term = LangTerm::cons( langVarRef->loc, langVarRef, exprVect ); + expr = LangExpr::cons( term ); + break; + } + case code_factor::Number: { + String number = codeFactor.number().text().c_str(); + LangTerm *term = LangTerm::cons( codeFactor.number().loc(), + LangTerm::NumberType, number ); + expr = LangExpr::cons( term ); + break; + } + case code_factor::StmtOrFactor: { + expr = walkStmtOrFactor( codeFactor.stmt_or_factor() ); + break; + } + case code_factor::Nil: { + expr = LangExpr::cons( LangTerm::cons( codeFactor.NIL().loc(), + LangTerm::NilType ) ); + break; + } + case code_factor::True: { + expr = LangExpr::cons( LangTerm::cons( codeFactor.TRUE().loc(), + LangTerm::TrueType ) ); + break; + } + case code_factor::False: { + expr = LangExpr::cons( LangTerm::cons( codeFactor.FALSE().loc(), + LangTerm::FalseType ) ); + break; + } + case code_factor::Paren: { + expr = walkCodeExpr( codeFactor.code_expr() ); + break; + } + case code_factor::String: { + ConsItemList *list = walkString( codeFactor.string() ); + expr = LangExpr::cons( LangTerm::cons( codeFactor.string().loc(), list ) ); + break; + } + case code_factor::In: { + TypeRef *typeRef = walkTypeRef( codeFactor.type_ref() ); + LangVarRef *varRef = walkVarRef( codeFactor.var_ref() ); + expr = LangExpr::cons( LangTerm::cons( typeRef->loc, + LangTerm::SearchType, typeRef, varRef ) ); + break; + } + case code_factor::TypeId: { + TypeRef *typeRef = walkTypeRef( codeFactor.type_ref() ); + expr = LangExpr::cons( LangTerm::cons( codeFactor.loc(), + LangTerm::TypeIdType, typeRef ) ); + break; + } + case code_factor::Cast: { + TypeRef *typeRef = walkTypeRef( codeFactor.type_ref() ); + LangExpr *castExpr = walkCodeFactor( codeFactor._code_factor() ); + expr = LangExpr::cons( LangTerm::cons( codeFactor.loc(), + LangTerm::CastType, typeRef, castExpr ) ); + break; + }} + return expr; + } + + LangExpr *walkCodeAdditive( code_additive additive, bool used = true ) + { + LangExpr *expr = 0; + switch ( additive.prodName() ) { + case code_additive::Plus: { + LangExpr *left = walkCodeAdditive( additive._code_additive() ); + LangExpr *right = walkCodeMultiplicitive( additive.code_multiplicitive() ); + expr = LangExpr::cons( additive.PLUS().loc(), left, '+', right ); + break; + } + case code_additive::Minus: { + LangExpr *left = walkCodeAdditive( additive._code_additive() ); + LangExpr *right = walkCodeMultiplicitive( additive.code_multiplicitive() ); + expr = LangExpr::cons( additive.MINUS().loc(), left, '-', right ); + break; + } + case code_additive::Base: { + expr = walkCodeMultiplicitive( additive.code_multiplicitive(), used ); + break; + }} + return expr; + } + + LangExpr *walkCodeUnary( code_unary unary, bool used = true ) + { + LangExpr *expr = 0; + + switch ( unary.prodName() ) { + case code_unary::Bang: { + LangExpr *factor = walkCodeFactor( unary.code_factor() ); + expr = LangExpr::cons( unary.BANG().loc(), '!', factor ); + break; + } + case code_unary::Dollar: { + LangExpr *factor = walkCodeFactor( unary.code_factor() ); + expr = LangExpr::cons( unary.DOLLAR().loc(), '$', factor ); + break; + } + case code_unary::DollarDollar: { + LangExpr *factor = walkCodeFactor( unary.code_factor() ); + expr = LangExpr::cons( unary.DOLLAR().loc(), 'S', factor ); + break; + } + case code_unary::Caret: { + LangExpr *factor = walkCodeFactor( unary.code_factor() ); + expr = LangExpr::cons( unary.CARET().loc(), '^', factor ); + break; + } + case code_unary::At: { + LangExpr *factor = walkCodeFactor( unary.code_factor() ); + expr = LangExpr::cons( unary.AT().loc(), '@', factor ); + break; + } + case code_unary::Percent: { + LangExpr *factor = walkCodeFactor( unary.code_factor() ); + expr = LangExpr::cons( unary.PERCENT().loc(), '%', factor ); + break; + } + case code_unary::Base: { + LangExpr *factor = walkCodeFactor( unary.code_factor(), used ); + expr = factor; + }} + + return expr; + } + + LangExpr *walkCodeRelational( code_relational codeRelational, bool used = true ) + { + LangExpr *expr = 0, *left = 0; + + bool base = codeRelational.prodName() == code_relational::Base; + + if ( ! base ) { + used = true; + left = walkCodeRelational( codeRelational._code_relational() ); + } + + LangExpr *additive = walkCodeAdditive( codeRelational.code_additive(), used ); + + switch ( codeRelational.prodName() ) { + case code_relational::EqEq: { + expr = LangExpr::cons( codeRelational.loc(), left, OP_DoubleEql, additive ); + break; + } + case code_relational::Neq: { + expr = LangExpr::cons( codeRelational.loc(), left, OP_NotEql, additive ); + break; + } + case code_relational::Lt: { + expr = LangExpr::cons( codeRelational.loc(), left, '<', additive ); + break; + } + case code_relational::Gt: { + expr = LangExpr::cons( codeRelational.loc(), left, '>', additive ); + break; + } + case code_relational::LtEq: { + expr = LangExpr::cons( codeRelational.loc(), left, OP_LessEql, additive ); + break; + } + case code_relational::GtEq: { + expr = LangExpr::cons( codeRelational.loc(), left, OP_GrtrEql, additive ); + break; + } + case code_relational::Base: { + expr = additive; + break; + }} + return expr; + } + + LangStmt *walkExprStmt( expr_stmt exprStmt ) + { + LangExpr *expr = walkCodeExpr( exprStmt.code_expr(), false ); + LangStmt *stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr ); + return stmt; + } + + ObjectField *walkVarDef( var_def varDef, ObjectField::Type type ) + { + String id = varDef.id().data(); + TypeRef *typeRef = walkTypeRef( varDef.type_ref() ); + return ObjectField::cons( varDef.id().loc(), type, typeRef, id ); + } + + IterCall *walkIterCall( iter_call Tree ) + { + IterCall *iterCall = 0; + switch ( Tree.prodName() ) { + case iter_call::Call: { + LangVarRef *varRef = walkVarRef( Tree.var_ref() ); + CallArgVect *exprVect = walkCallArgList( Tree.call_arg_list() ); + LangTerm *langTerm = LangTerm::cons( varRef->loc, varRef, exprVect ); + iterCall = IterCall::cons( IterCall::Call, langTerm ); + break; + } + case iter_call::Id: { + String tree = Tree.id().data(); + LangVarRef *varRef = LangVarRef::cons( Tree.id().loc(), + curNspace(), curStruct(), curScope(), tree ); + LangTerm *langTerm = LangTerm::cons( Tree.id().loc(), + LangTerm::VarRefType, varRef ); + LangExpr *langExpr = LangExpr::cons( langTerm ); + iterCall = IterCall::cons( IterCall::Expr, langExpr ); + break; + } + case iter_call::Expr: { + LangExpr *langExpr = walkCodeExpr( Tree.code_expr() ); + iterCall = IterCall::cons( IterCall::Expr, langExpr ); + break; + }} + + return iterCall; + } + + LangStmt *walkElsifClause( elsif_clause elsifClause ) + { + pushScope(); + LangExpr *expr = walkCodeExpr( elsifClause.code_expr() ); + StmtList *stmtList = walkBlockOrSingle( elsifClause.block_or_single() ); + LangStmt *stmt = LangStmt::cons( LangStmt::IfType, expr, stmtList, 0 ); + popScope(); + return stmt; + } + + LangStmt *walkOptionalElse( optional_else optionalElse ) + { + LangStmt *stmt = 0; + if ( optionalElse.prodName() == optional_else::Else ) { + pushScope(); + StmtList *stmtList = walkBlockOrSingle( optionalElse.block_or_single() ); + stmt = LangStmt::cons( LangStmt::ElseType, stmtList ); + popScope(); + } + return stmt; + } + + LangStmt *walkElsifList( elsif_list elsifList ) + { + LangStmt *stmt = 0; + switch ( elsifList.prodName() ) { + case elsif_list::Clause: + stmt = walkElsifClause( elsifList.elsif_clause() ); + stmt->elsePart = walkElsifList( elsifList._elsif_list() ); + break; + case elsif_list::OptElse: + stmt = walkOptionalElse( elsifList.optional_else() ); + break; + } + return stmt; + } + + LangStmt *walkCaseClause( case_clause CaseClause, var_ref VarRef ) + { + pushScope(); + + LangVarRef *varRef = walkVarRef( VarRef ); + + scopeTop->caseClauseVarRef = varRef; + + LangExpr *expr = 0; + + switch ( CaseClause.prodName() ) { + case case_clause::Pattern: { + /* A match pattern. */ + PatternItemList *list = walkPattern( CaseClause.pattern(), varRef ); + expr = match( CaseClause.loc(), varRef, list ); + break; + } + case case_clause::Id: { + /* An identifier to be interpreted as a production name. */ + String prod = CaseClause.id().text().c_str(); + expr = prodCompare( CaseClause.loc(), varRef, prod, 0 ); + break; + } + case case_clause::IdPat: { + String prod = CaseClause.id().text().c_str(); + PatternItemList *list = walkPattern( CaseClause.pattern(), varRef ); + LangExpr *matchExpr = match( CaseClause.loc(), varRef, list ); + expr = prodCompare( CaseClause.loc(), varRef, prod, matchExpr ); + break; + } + } + + StmtList *stmtList = walkBlockOrSingle( CaseClause.block_or_single() ); + + popScope(); + + LangStmt *stmt = LangStmt::cons( LangStmt::IfType, expr, stmtList ); + + return stmt; + } + + LangStmt *walkCaseClauseList( case_clause_list CaseClauseList, var_ref VarRef ) + { + LangStmt *stmt = 0; + switch ( CaseClauseList.prodName() ) { + case case_clause_list::Recursive: { + stmt = walkCaseClause( CaseClauseList.case_clause(), VarRef ); + + LangStmt *recList = walkCaseClauseList( + CaseClauseList._case_clause_list(), VarRef ); + + stmt->setElsePart( recList ); + break; + } + case case_clause_list::BaseCase: { + stmt = walkCaseClause( CaseClauseList.case_clause(), VarRef ); + break; + } + case case_clause_list::BaseDefault: { + pushScope(); + StmtList *stmtList = walkBlockOrSingle( + CaseClauseList.default_clause().block_or_single() ); + popScope(); + stmt = LangStmt::cons( LangStmt::ElseType, stmtList ); + break; + } + } + return stmt; + } + + void walkStructVarDef( struct_var_def StructVarDef ) + { + ObjectField *objField = walkVarDef( StructVarDef.var_def(), + ObjectField::StructFieldType ); + structVarDef( objField->loc, objField ); + } + + TypeRef *walkReferenceTypeRef( reference_type_ref ReferenceTypeRef ) + { + TypeRef *typeRef = walkTypeRef( ReferenceTypeRef.type_ref() ); + return TypeRef::cons( ReferenceTypeRef.REF().loc(), TypeRef::Ref, typeRef ); + } + + ObjectField *walkParamVarDef( param_var_def paramVarDef ) + { + String id = paramVarDef.id().data(); + TypeRef *typeRef = 0; + ObjectField::Type type; + + switch ( paramVarDef.prodName() ) { + case param_var_def::Type: + typeRef = walkTypeRef( paramVarDef.type_ref() ); + type = ObjectField::ParamValType; + break; + case param_var_def::Ref: + typeRef = walkReferenceTypeRef( paramVarDef.reference_type_ref() ); + type = ObjectField::ParamRefType; + break; + } + + return addParam( paramVarDef.id().loc(), type, typeRef, id ); + } + + ParameterList *walkParamVarDefSeq( param_var_def_seq paramVarDefSeq ) + { + ParameterList *paramList = new ParameterList; + while ( paramVarDefSeq != 0 ) { + ObjectField *param = walkParamVarDef( paramVarDefSeq.param_var_def() ); + appendParam( paramList, param ); + paramVarDefSeq = paramVarDefSeq._param_var_def_seq(); + } + return paramList; + } + + ParameterList *walkParamVarDefList( param_var_def_list paramVarDefList ) + { + ParameterList *paramList = walkParamVarDefSeq( + paramVarDefList.param_var_def_seq() ); + return paramList; + } + + bool walkOptExport( opt_export OptExport ) + { + return OptExport.prodName() == opt_export::Export; + } + + void walkFunctionDef( function_def FunctionDef ) + { + ObjectDef *localFrame = blockOpen(); + + bool exprt = walkOptExport( FunctionDef.opt_export() ); + TypeRef *typeRef = walkTypeRef( FunctionDef.type_ref() ); + String id = FunctionDef.id().data(); + ParameterList *paramList = walkParamVarDefList( FunctionDef.ParamVarDefList() ); + StmtList *stmtList = walkLangStmtList( FunctionDef.lang_stmt_list() ); + functionDef( stmtList, localFrame, paramList, typeRef, id, exprt ); + + blockClose(); + } + + void walkInHostDef( in_host_def InHostDef ) + { + ObjectDef *localFrame = blockOpen(); + + TypeRef *typeRef = walkTypeRef( InHostDef.type_ref() ); + String id = InHostDef.id().data(); + ParameterList *paramList = walkParamVarDefList( InHostDef.ParamVarDefList() ); + inHostDef( InHostDef.HostFunc().data(), localFrame, paramList, typeRef, id, false ); + + blockClose(); + } + + void walkIterDef( iter_def IterDef ) + { + ObjectDef *localFrame = blockOpen(); + + String id = IterDef.id().data(); + ParameterList *paramList = walkParamVarDefList( IterDef.ParamVarDefList() ); + StmtList *stmtList = walkLangStmtList( IterDef.lang_stmt_list() ); + iterDef( stmtList, localFrame, paramList, id ); + + blockClose(); + } + + void walkStructItem( struct_item structItem ) + { + switch ( structItem.prodName() ) { + case struct_item::Rl: + walkRlDef( structItem.rl_def() ); + break; + case struct_item::StructVar: + walkStructVarDef( structItem.struct_var_def() ); + break; + case struct_item::Token: + walkTokenDef( structItem.token_def() ); + break; + case struct_item::IgnoreCollector: + walkIgnoreCollector( structItem.ic_def() ); + break; + case struct_item::Ignore: + walkIgnoreDef( structItem.ignore_def() ); + break; + case struct_item::Literal: + walkLiteralDef( structItem.literal_def() ); + break; + case struct_item::Cfl: + walkCflDef( structItem.cfl_def() ); + break; + case struct_item::Region: + walkLexRegion( structItem.region_def() ); + break; + case struct_item::Struct: + walkStructDef( structItem.struct_def() ); + break; + case struct_item::Function: + walkFunctionDef( structItem.function_def() ); + break; + case struct_item::InHost: + walkInHostDef( structItem.in_host_def() ); + break; + case struct_item::Iter: + walkIterDef( structItem.iter_def() ); + break; + case struct_item::PreEof: + walkPreEof( structItem.pre_eof_def() ); + break; + case struct_item::Export: + walkExportDef( structItem.export_def() ); + break; + case struct_item::Precedence: + walkPrecedenceDef( structItem.precedence_def() ); + break; +// case struct_item::ListEl: +// listElDef( structItem.list_el_def().id().data() ); +// break; +// case struct_item::MapEl: { +// map_el_def Def = structItem.map_el_def(); +// TypeRef *keyTr = walkTypeRef( Def.type_ref() ); +// mapElDef( Def.id().data(), keyTr ); +// break; +// } + case struct_item::Alias: + walkAliasDef( structItem.alias_def() ); + break; + } + } + + void walkStructDef( struct_def structDef ) + { + String name = structDef.id().data(); + structHead( structDef.id().loc(), curNspace(), name, ObjectDef::StructType ); + + _lrepeat_struct_item structItemList = structDef.ItemList(); + + RepeatIter<struct_item> structItemIter( structItemList ); + + while ( !structItemIter.end() ) { + walkStructItem( structItemIter.value() ); + structItemIter.next(); + } + + structStack.pop(); + namespaceStack.pop(); + } + + void walkNamespaceDef( namespace_def NamespaceDef, StmtList *stmtList ) + { + String name = NamespaceDef.id().data(); + createNamespace( NamespaceDef.id().loc(), name ); + walkNamespaceItemList( NamespaceDef.ItemList(), stmtList ); + namespaceStack.pop(); + } + + void walkRedItem( host_item item, ReduceTextItemList &list ) + { + if ( item.RED_LHS() != 0 ) { + ReduceTextItem *rti = new ReduceTextItem; + rti->type = ReduceTextItem::LhsRef; + list.append( rti ); + } + else if ( item.RED_RHS_REF() != 0 ) { + ReduceTextItem *rti = new ReduceTextItem; + rti->type = ReduceTextItem::RhsRef; + rti->txt = item.RED_RHS_REF().text().c_str(); + list.append( rti ); + } + else if ( item.RED_TREE_REF() != 0 ) { + ReduceTextItem *rti = new ReduceTextItem; + rti->type = ReduceTextItem::TreeRef; + rti->txt = item.RED_TREE_REF().text().c_str(); + list.append( rti ); + } + else if ( item.RED_RHS_LOC() != 0 ) { + ReduceTextItem *rti = new ReduceTextItem; + rti->type = ReduceTextItem::RhsLoc; + rti->txt = item.RED_RHS_LOC().text().c_str(); + list.append( rti ); + } + else if ( item.RED_RHS_NREF() != 0 ) { + ReduceTextItem *rti = new ReduceTextItem; + rti->type = ReduceTextItem::RhsRef; + rti->n = atoi( item.RED_RHS_NREF().text().c_str() + 1 ); + list.append( rti ); + } + else if ( item.RED_TREE_NREF() != 0 ) { + ReduceTextItem *rti = new ReduceTextItem; + rti->type = ReduceTextItem::TreeRef; + rti->n = atoi( item.RED_TREE_NREF().text().c_str() + 2 ); + list.append( rti ); + } + else if ( item.RED_RHS_NLOC() != 0 ) { + ReduceTextItem *rti = new ReduceTextItem; + rti->type = ReduceTextItem::RhsLoc; + rti->n = atoi( item.RED_RHS_NLOC().text().c_str() + 1 ); + list.append( rti ); + } + else if ( item.RED_OPEN() != 0 ) { + ReduceTextItem *open = new ReduceTextItem; + open->type = ReduceTextItem::Txt; + open->txt = "{"; + list.append( open ); + + walkRedItemList( item.HostItems(), list ); + + ReduceTextItem *close = new ReduceTextItem; + close->type = ReduceTextItem::Txt; + close->txt = "}"; + list.append( close ); + } + else { + if ( list.length() > 0 && list.tail->type == ReduceTextItem::Txt ) { + std::string txt = item.text(); + list.tail->txt.append( txt.c_str(), txt.size() ); + } + else { + ReduceTextItem *rti = new ReduceTextItem; + rti->type = ReduceTextItem::Txt; + rti->txt = item.text().c_str(); + list.append( rti ); + } + } + } + + void walkRedItemList( _lrepeat_host_item itemList, ReduceTextItemList &list ) + { + RepeatIter<host_item> itemIter( itemList ); + + while ( !itemIter.end() ) { + walkRedItem( itemIter.value(), list ); + itemIter.next(); + } + } + + void walkRedNonTerm( red_nonterm RN ) + { + InputLoc loc = RN.RED_OPEN().loc(); + + TypeRef *typeRef = walkTypeRef( RN.type_ref() ); + + ReduceNonTerm *rnt = new ReduceNonTerm( loc, typeRef ); + + walkRedItemList( RN.HostItems(), rnt->itemList ); + + curReduction()->reduceNonTerms.append( rnt ); + } + + void walkRedAction( red_action RA ) + { + InputLoc loc = RA.RED_OPEN().loc(); + String text = RA.HostItems().text().c_str(); + + TypeRef *typeRef = walkTypeRef( RA.type_ref() ); + + ReduceAction *ra = new ReduceAction( loc, typeRef, RA.id().data() ); + + walkRedItemList( RA.HostItems(), ra->itemList ); + + curReduction()->reduceActions.append( ra ); + } + + void walkReductionItem( reduction_item reductionItem ) + { + switch ( reductionItem.prodName() ) { + case reduction_item::NonTerm: { + walkRedNonTerm( reductionItem.red_nonterm() ); + break; + } + case reduction_item::Action: { + walkRedAction( reductionItem.red_action() ); + break; + } + } + } + + void walkReductionList( _lrepeat_reduction_item itemList ) + { + RepeatIter<reduction_item> itemIter( itemList ); + + while ( !itemIter.end() ) { + walkReductionItem( itemIter.value() ); + itemIter.next(); + } + } + + void walkRootItem( root_item rootItem, StmtList *stmtList ) + { + switch ( rootItem.prodName() ) { + case root_item::Rl: + walkRlDef( rootItem.rl_def() ); + break; + case root_item::Token: + walkTokenDef( rootItem.token_def() ); + break; + case root_item::IgnoreCollector: + walkIgnoreCollector( rootItem.ic_def() ); + break; + case root_item::Ignore: + walkIgnoreDef( rootItem.ignore_def() ); + break; + case root_item::Literal: + walkLiteralDef( rootItem.literal_def() ); + break; + case root_item::Cfl: + walkCflDef( rootItem.cfl_def() ); + break; + case root_item::Region: + walkLexRegion( rootItem.region_def() ); + break; + case root_item::Statement: { + LangStmt *stmt = walkStatement( rootItem.statement() ); + if ( stmt != 0 ) + stmtList->append( stmt ); + break; + } + case root_item::Struct: + walkStructDef( rootItem.struct_def() ); + break; + case root_item::Namespace: + walkNamespaceDef( rootItem.namespace_def(), stmtList ); + break; + case root_item::Function: + walkFunctionDef( rootItem.function_def() ); + break; + case root_item::InHost: + walkInHostDef( rootItem.in_host_def() ); + break; + case root_item::Iter: + walkIterDef( rootItem.iter_def() ); + break; + case root_item::PreEof: + walkPreEof( rootItem.pre_eof_def() ); + break; + case root_item::Export: { + LangStmt *stmt = walkExportDef( rootItem.export_def() ); + if ( stmt != 0 ) + stmtList->append( stmt ); + break; + } + case root_item::Alias: + walkAliasDef( rootItem.alias_def() ); + break; + case root_item::Precedence: + walkPrecedenceDef( rootItem.precedence_def() ); + break; + case root_item::Include: { + StmtList *includeList = walkInclude( rootItem._include() ); + if ( includeList ) + stmtList->append( *includeList ); + break; + } + case root_item::Global: { + LangStmt *stmt = walkGlobalDef( rootItem.global_def() ); + if ( stmt != 0 ) + stmtList->append( stmt ); + break; + } + case root_item::Reduction: { + reduction_def RD = rootItem.reduction_def(); + + InputLoc loc = RD.REDUCTION().loc(); + String id = RD.id().data(); + + createReduction( loc, id ); + + walkReductionList( RD.ItemList() ); + + reductionStack.pop(); + break; + }} + } + + void walkNamespaceItem( namespace_item item, StmtList *stmtList ) + { + switch ( item.prodName() ) { + case namespace_item::Rl: + walkRlDef( item.rl_def() ); + break; + case namespace_item::Token: + walkTokenDef( item.token_def() ); + break; + case namespace_item::IgnoreCollector: + walkIgnoreCollector( item.ic_def() ); + break; + case namespace_item::Ignore: + walkIgnoreDef( item.ignore_def() ); + break; + case namespace_item::Literal: + walkLiteralDef( item.literal_def() ); + break; + case namespace_item::Cfl: + walkCflDef( item.cfl_def() ); + break; + case namespace_item::Region: + walkLexRegion( item.region_def() ); + break; + case namespace_item::Struct: + walkStructDef( item.struct_def() ); + break; + case namespace_item::Namespace: + walkNamespaceDef( item.namespace_def(), stmtList ); + break; + case namespace_item::Function: + walkFunctionDef( item.function_def() ); + break; + case namespace_item::InHost: + walkInHostDef( item.in_host_def() ); + break; + case namespace_item::Iter: + walkIterDef( item.iter_def() ); + break; + case namespace_item::PreEof: + walkPreEof( item.pre_eof_def() ); + break; + case namespace_item::Alias: + walkAliasDef( item.alias_def() ); + break; + case namespace_item::Precedence: + walkPrecedenceDef( item.precedence_def() ); + break; + case namespace_item::Include: { + StmtList *includeList = walkInclude( item._include() ); + stmtList->append( *includeList ); + break; + } + case namespace_item::Global: { + LangStmt *stmt = walkGlobalDef( item.global_def() ); + if ( stmt != 0 ) + stmtList->append( stmt ); + break; + }} + } + + bool walkNoIgnoreLeft( no_ignore_left OptNoIngore ) + { + return OptNoIngore.prodName() == no_ignore_left::Ni; + } + + bool walkNoIgnoreRight( no_ignore_right OptNoIngore ) + { + return OptNoIngore.prodName() == no_ignore_right::Ni; + } + + bool walkOptEos( opt_eos OptEos ) + { + opt_eos::prod_name pn = OptEos.prodName(); + return pn == opt_eos::Dot || pn == opt_eos::Eos; + } + + void walkLiteralItem( literal_item literalItem ) + { + bool niLeft = walkNoIgnoreLeft( literalItem.no_ignore_left() ); + bool niRight = walkNoIgnoreRight( literalItem.no_ignore_right() ); + + String lit = literalItem.backtick_lit().data(); + literalDef( literalItem.backtick_lit().loc(), lit, niLeft, niRight ); + } + + void walkLiteralList( literal_list literalList ) + { + if ( literalList.prodName() == literal_list::Item ) + walkLiteralList( literalList._literal_list() ); + walkLiteralItem( literalList.literal_item() ); + } + + void walkLiteralDef( literal_def literalDef ) + { + walkLiteralList( literalDef.literal_list() ); + } + + void walkNamespaceItemList( _lrepeat_namespace_item itemList, StmtList *stmtList ) + { + /* Walk the list of items. */ + RepeatIter<namespace_item> itemIter( itemList ); + while ( !itemIter.end() ) { + walkNamespaceItem( itemIter.value(), stmtList ); + itemIter.next(); + } + } + + StmtList *walkRootItemList( _lrepeat_root_item rootItemList ) + { + StmtList *stmtList = new StmtList; + + /* Walk the list of items. */ + RepeatIter<root_item> rootItemIter( rootItemList ); + while ( !rootItemIter.end() ) { + walkRootItem( rootItemIter.value(), stmtList ); + rootItemIter.next(); + } + return stmtList; + } + + virtual void go( long activeRealm ); +}; + +void LoadColm::go( long activeRealm ) +{ + LoadColm::init(); + + const char *argv[3]; + argv[0] = "load-colm"; + argv[1] = inputFileName; + argv[2] = 0; + + colm_program *program = colm_new_program( &colm_object ); + colm_set_debug( program, activeRealm ); + colm_run_program( program, 2, argv ); + + /* Extract the parse tree. */ + start Start = ColmTree( program ); + str Error = ColmError( program ); + + if ( Start == 0 ) { + gblErrorCount += 1; + InputLoc loc = Error.loc(); + error(loc) << inputFileName << ": parse error: " << Error.text() << std::endl; + return; + } + + StmtList *stmtList = walkRootItemList( Start.RootItemList() ); + pd->streamFileNames.append( colm_extract_fns( program ) ); + colm_delete_program( program ); + + pd->rootCodeBlock = CodeBlock::cons( stmtList, 0 ); +} + +BaseParser *consLoadColm( Compiler *pd, const char *inputFileName ) +{ + return new LoadColm( pd, inputFileName ); +} diff --git a/src/loadfinal.h b/src/loadfinal.h new file mode 100644 index 00000000..0c888f9a --- /dev/null +++ b/src/loadfinal.h @@ -0,0 +1,31 @@ +/* + * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_LOADCOLM_H +#define _COLM_LOADCOLM_H + +#include "parser.h" + +BaseParser *consLoadColm( Compiler *pd, const char *inputFileName ); + +#endif /* _COLM_LOADCOLM_H */ + diff --git a/src/loadinit.cc b/src/loadinit.cc new file mode 100644 index 00000000..f5281da3 --- /dev/null +++ b/src/loadinit.cc @@ -0,0 +1,416 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "loadinit.h" + +#include <string.h> + +#include <iostream> + +#include "gen/if1.h" + +using std::string; + +extern colm_sections colm_object; + +void LoadInit::walkProdElList( String defName, ProdElList *list, prod_el_list &prodElList ) +{ + if ( prodElList.ProdElList() != 0 ) { + prod_el_list RightProdElList = prodElList.ProdElList(); + walkProdElList( defName, list, RightProdElList ); + } + + if ( prodElList.ProdEl() != 0 ) { + prod_el El = prodElList.ProdEl(); + String typeName = El.Id().text().c_str(); + + ObjectField *captureField = 0; + if ( El.OptName().Name() != 0 ) { + /* Has a capture. */ + String fieldName = El.OptName().Name().text().c_str(); + captureField = ObjectField::cons( internal, + ObjectField::RhsNameType, 0, fieldName ); + } + else { + /* Default the capture to the name of the type. */ + String fieldName = typeName; + if ( strcmp( fieldName, defName ) == 0 ) + fieldName = "_" + defName; + captureField = ObjectField::cons( internal, + ObjectField::RhsNameType, 0, fieldName ); + } + + RepeatType repeatType = RepeatNone; + if ( El.OptRepeat().Star() != 0 ) + repeatType = RepeatRepeat; + if ( El.OptRepeat().LeftStar() != 0 ) + repeatType = RepeatLeftRepeat; + + ProdEl *prodEl = prodElName( internal, typeName, + NamespaceQual::cons( curNspace() ), + captureField, repeatType, false ); + + appendProdEl( list, prodEl ); + } +} + +void LoadInit::walkProdList( String defName, LelDefList *outProdList, prod_list &prodList ) +{ + if ( prodList.ProdList() != 0 ) { + prod_list RightProdList = prodList.ProdList(); + walkProdList( defName, outProdList, RightProdList ); + } + + ProdElList *outElList = new ProdElList; + prod_el_list prodElList = prodList.Prod().ProdElList(); + walkProdElList( defName, outElList, prodElList ); + + String name; + if ( prodList.Prod().OptName().Name() != 0 ) + name = prodList.Prod().OptName().Name().text().c_str(); + + bool commit = prodList.Prod().OptCommit().Commit() != 0; + + Production *prod = BaseParser::production( internal, outElList, name, commit, 0, 0 ); + prodAppend( outProdList, prod ); +} + +LexFactor *LoadInit::walkLexFactor( lex_factor &lexFactor ) +{ + LexFactor *factor = 0; + if ( lexFactor.Literal() != 0 ) { + String litString = lexFactor.Literal().text().c_str(); + Literal *literal = Literal::cons( internal, litString, Literal::LitString ); + factor = LexFactor::cons( literal ); + } + if ( lexFactor.Id() != 0 ) { + String id = lexFactor.Id().text().c_str(); + factor = lexRlFactorName( id, internal ); + } + else if ( lexFactor.Expr() != 0 ) { + lex_expr LexExpr = lexFactor.Expr(); + LexExpression *expr = walkLexExpr( LexExpr ); + LexJoin *join = LexJoin::cons( expr ); + factor = LexFactor::cons( join ); + } + else if ( lexFactor.Low() != 0 ) { + String low = lexFactor.Low().text().c_str(); + Literal *lowLit = Literal::cons( internal, low, Literal::LitString ); + + String high = lexFactor.High().text().c_str(); + Literal *highLit = Literal::cons( internal, high, Literal::LitString ); + + Range *range = Range::cons( lowLit, highLit ); + factor = LexFactor::cons( range ); + } + return factor; +} + +LexFactorNeg *LoadInit::walkLexFactorNeg( lex_factor_neg &lexFactorNeg ) +{ + if ( lexFactorNeg.FactorNeg() != 0 ) { + lex_factor_neg Rec = lexFactorNeg.FactorNeg(); + LexFactorNeg *recNeg = walkLexFactorNeg( Rec ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( recNeg, LexFactorNeg::CharNegateType ); + return factorNeg; + } + else { + lex_factor LexFactorTree = lexFactorNeg.Factor(); + LexFactor *factor = walkLexFactor( LexFactorTree ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + return factorNeg; + } +} + +LexFactorRep *LoadInit::walkLexFactorRep( lex_factor_rep &lexFactorRep ) +{ + LexFactorRep *factorRep = 0; + if ( lexFactorRep.Star() != 0 ) { + lex_factor_rep Rec = lexFactorRep.FactorRep(); + LexFactorRep *recRep = walkLexFactorRep( Rec ); + factorRep = LexFactorRep::cons( internal, recRep, 0, 0, LexFactorRep::StarType ); + } + else if ( lexFactorRep.Plus() != 0 ) { + lex_factor_rep Rec = lexFactorRep.FactorRep(); + LexFactorRep *recRep = walkLexFactorRep( Rec ); + factorRep = LexFactorRep::cons( internal, recRep, 0, 0, LexFactorRep::PlusType ); + } + else { + lex_factor_neg LexFactorNegTree = lexFactorRep.FactorNeg(); + LexFactorNeg *factorNeg = walkLexFactorNeg( LexFactorNegTree ); + factorRep = LexFactorRep::cons( factorNeg ); + } + return factorRep; +} + +LexFactorAug *LoadInit::walkLexFactorAug( lex_factor_rep &lexFactorRep ) +{ + LexFactorRep *factorRep = walkLexFactorRep( lexFactorRep ); + return LexFactorAug::cons( factorRep ); +} + +LexTerm *LoadInit::walkLexTerm( lex_term &lexTerm ) +{ + if ( lexTerm.Term() != 0 ) { + lex_term Rec = lexTerm.Term(); + LexTerm *leftTerm = walkLexTerm( Rec ); + + lex_factor_rep LexFactorRepTree = lexTerm.FactorRep(); + LexFactorAug *factorAug = walkLexFactorAug( LexFactorRepTree ); + + LexTerm::Type type = lexTerm.Dot() != 0 ? + LexTerm::ConcatType : LexTerm::RightFinishType; + + LexTerm *term = LexTerm::cons( leftTerm, factorAug, type ); + + return term; + } + else { + lex_factor_rep LexFactorRepTree = lexTerm.FactorRep(); + LexFactorAug *factorAug = walkLexFactorAug( LexFactorRepTree ); + LexTerm *term = LexTerm::cons( factorAug ); + return term; + } +} + +LexExpression *LoadInit::walkLexExpr( lex_expr &LexExprTree ) +{ + if ( LexExprTree.Expr() != 0 ) { + lex_expr Rec = LexExprTree.Expr(); + LexExpression *leftExpr = walkLexExpr( Rec ); + + lex_term lexTerm = LexExprTree.Term(); + LexTerm *term = walkLexTerm( lexTerm ); + LexExpression *expr = LexExpression::cons( leftExpr, term, LexExpression::OrType ); + + return expr; + } + else { + lex_term lexTerm = LexExprTree.Term(); + LexTerm *term = walkLexTerm( lexTerm ); + LexExpression *expr = LexExpression::cons( term ); + return expr; + } +} + +bool walkNoIgnore( opt_ni OptNi ) +{ + return OptNi.Ni() != 0; +} + +void LoadInit::walkTokenList( token_list &tokenList ) +{ + if ( tokenList.TokenList() != 0 ) { + token_list RightTokenList = tokenList.TokenList(); + walkTokenList( RightTokenList ); + } + + if ( tokenList.TokenDef() != 0 ) { + token_def tokenDef = tokenList.TokenDef(); + String name = tokenDef.Id().text().c_str(); + + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, name, pd->nextObjectId++ ); + + lex_expr LexExpr = tokenDef.Expr(); + LexExpression *expr = walkLexExpr( LexExpr ); + LexJoin *join = LexJoin::cons( expr ); + + bool leftNi = walkNoIgnore( tokenDef.LeftNi() ); + bool rightNi = walkNoIgnore( tokenDef.RightNi() ); + + defineToken( internal, name, join, objectDef, 0, false, leftNi, rightNi ); + } + + if ( tokenList.IgnoreDef() != 0 ) { + ignore_def IgnoreDef = tokenList.IgnoreDef(); + + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, String(), pd->nextObjectId++ ); + + lex_expr LexExpr = IgnoreDef.Expr(); + LexExpression *expr = walkLexExpr( LexExpr ); + LexJoin *join = LexJoin::cons( expr ); + + defineToken( internal, String(), join, objectDef, 0, true, false, false ); + } +} + +void LoadInit::walkLexRegion( item &LexRegion ) +{ + pushRegionSet( internal ); + + token_list tokenList = LexRegion.TokenList(); + walkTokenList( tokenList ); + + popRegionSet(); +} + +void LoadInit::walkDefinition( item &define ) +{ + prod_list ProdList = define.ProdList(); + + String name = define.DefId().text().c_str(); + + LelDefList *defList = new LelDefList; + walkProdList( name, defList, ProdList ); + + NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, name, + pd->nextObjectId++ ); + cflDef( ntDef, objectDef, defList ); +} + +void LoadInit::consParseStmt( StmtList *stmtList ) +{ + /* Pop argv, this yields the file name . */ + CallArgVect *popArgs = new CallArgVect; + QualItemVect *popQual = new QualItemVect; + popQual->append( QualItem( QualItem::Arrow, internal, String( "argv" ) ) ); + + LangVarRef *popRef = LangVarRef::cons( internal, curNspace(), 0, + curLocalFrame()->rootScope, NamespaceQual::cons( curNspace() ), + popQual, String("pop") ); + LangExpr *pop = LangExpr::cons( LangTerm::cons( InputLoc(), popRef, popArgs ) ); + + TypeRef *typeRef = TypeRef::cons( internal, pd->uniqueTypeStr ); + ObjectField *objField = ObjectField::cons( internal, + ObjectField::UserLocalType, typeRef, "A" ); + + LangStmt *stmt = varDef( objField, pop, LangStmt::AssignType ); + stmtList->append( stmt ); + + /* Construct a literal string 'r', for second arg to open. */ + ConsItem *modeConsItem = ConsItem::cons( internal, + ConsItem::InputText, String("r") ); + ConsItemList *modeCons = new ConsItemList; + modeCons->append( modeConsItem ); + LangExpr *modeExpr = LangExpr::cons( LangTerm::cons( internal, modeCons ) ); + + /* Reference A->value */ + LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, + curLocalFrame()->rootScope, String("A") ); + LangExpr *Avalue = LangExpr::cons( LangTerm::cons( internal, + LangTerm::VarRefType, varRef ) ); + + /* Call open. */ + LangVarRef *openRef = LangVarRef::cons( internal, + curNspace(), 0, curLocalFrame()->rootScope, String("open") ); + CallArgVect *openArgs = new CallArgVect; + openArgs->append( new CallArg(Avalue) ); + openArgs->append( new CallArg(modeExpr) ); + LangExpr *open = LangExpr::cons( LangTerm::cons( InputLoc(), openRef, openArgs ) ); + + /* Construct a list containing the open stream. */ + ConsItem *consItem = ConsItem::cons( internal, ConsItem::ExprType, open, ConsItem::TrimDefault ); + ConsItemList *list = ConsItemList::cons( consItem ); + + /* Will capture the parser to "P" */ + objField = ObjectField::cons( internal, + ObjectField::UserLocalType, 0, String("P") ); + + /* Ref the start def. */ + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + typeRef = TypeRef::cons( internal, nspaceQual, + String("start"), RepeatNone ); + + /* Parse the above list. */ + LangExpr *parseExpr = parseCmd( internal, false, false, objField, + typeRef, 0, list, true, false, false, "" ); + LangStmt *parseStmt = LangStmt::cons( internal, LangStmt::ExprType, parseExpr ); + stmtList->append( parseStmt ); +} + +void LoadInit::consExportTree( StmtList *stmtList ) +{ + LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, + curLocalFrame()->rootScope, String("P") ); + LangExpr *expr = LangExpr::cons( LangTerm::cons( internal, + LangTerm::VarRefType, varRef ) ); + + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, String("start"), RepeatNone ); + ObjectField *program = ObjectField::cons( internal, + ObjectField::StructFieldType, typeRef, String("ColmTree") ); + LangStmt *programExport = exportStmt( program, LangStmt::AssignType, expr ); + stmtList->append( programExport ); +} + +void LoadInit::consExportError( StmtList *stmtList ) +{ + LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, + curLocalFrame()->rootScope, String("error") ); + LangExpr *expr = LangExpr::cons( LangTerm::cons( internal, + LangTerm::VarRefType, varRef ) ); + + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, String("str"), RepeatNone ); + ObjectField *program = ObjectField::cons( internal, + ObjectField::StructFieldType, typeRef, String("ColmError") ); + LangStmt *programExport = exportStmt( program, LangStmt::AssignType, expr ); + stmtList->append( programExport ); +} + +void LoadInit::go( long activeRealm ) +{ + LoadInit::init(); + + StmtList *stmtList = new StmtList; + + const char *argv[3]; + argv[0] = "load-init"; + argv[1] = inputFileName; + argv[2] = 0; + + colm_program *program = colm_new_program( &colm_object ); + colm_set_debug( program, 0 ); + colm_run_program( program, 2, argv ); + + /* Extract the parse tree. */ + start Start = ColmTree( program ); + + if ( Start == 0 ) { + gblErrorCount += 1; + std::cerr << inputFileName << ": parse error" << std::endl; + return; + } + + /* Walk the list of items. */ + _lrepeat_item ItemList = Start.ItemList(); + RepeatIter<item> itemIter( ItemList ); + while ( !itemIter.end() ) { + + item Item = itemIter.value(); + if ( Item.DefId() != 0 ) + walkDefinition( Item ); + else if ( Item.TokenList() != 0 ) + walkLexRegion( Item ); + itemIter.next(); + } + + pd->streamFileNames.append( colm_extract_fns( program ) ); + colm_delete_program( program ); + + consParseStmt( stmtList ); + consExportTree( stmtList ); + consExportError( stmtList ); + + pd->rootCodeBlock = CodeBlock::cons( stmtList, 0 ); +} diff --git a/src/loadinit.h b/src/loadinit.h new file mode 100644 index 00000000..93a18444 --- /dev/null +++ b/src/loadinit.h @@ -0,0 +1,77 @@ +/* + * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_LOADINIT_H +#define _COLM_LOADINIT_H + +#include <iostream> + +#include <avltree.h> + +#include "compiler.h" +#include "parser.h" + +struct lex_factor; +struct lex_factor_neg; +struct lex_factor_rep; +struct lex_term; +struct lex_expr; +struct token_list; +struct prod_el_list; +struct prod_list; +struct item; + +struct LoadInit +: + public BaseParser +{ + LoadInit( Compiler *pd, const char *inputFileName ) + : + BaseParser(pd), + inputFileName(inputFileName) + {} + + const char *inputFileName; + + /* Constructing the colm language data structures from the the parse tree. */ + LexFactor *walkLexFactor( lex_factor &LexFactorTree ); + LexFactorNeg *walkLexFactorNeg( lex_factor_neg &LexFactorNegTree ); + LexFactorRep *walkLexFactorRep( lex_factor_rep &LexFactorRepTree ); + LexFactorAug *walkLexFactorAug( lex_factor_rep &LexFactorRepTree ); + LexTerm *walkLexTerm( lex_term &LexTerm ); + LexExpression *walkLexExpr( lex_expr &LexExpr ); + void walkTokenList( token_list &TokenList ); + void walkLexRegion( item &LexRegion ); + void walkProdElList( String defName, ProdElList *list, prod_el_list &prodElList ); + void walkProdList( String defName, LelDefList *list, prod_list &prodList ); + void walkDefinition( item &define ); + + /* Constructing statements needed to parse and export the input. */ + void consParseStmt( StmtList *stmtList ); + void consExportTree( StmtList *stmtList ); + void consExportError( StmtList *stmtList ); + + virtual void go( long activeRealm ); +}; + +#endif /* _COLM_LOAD_INIT_H */ + diff --git a/src/lookup.cc b/src/lookup.cc new file mode 100644 index 00000000..cb243dc6 --- /dev/null +++ b/src/lookup.cc @@ -0,0 +1,323 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include <assert.h> +#include <iostream> +#include "compiler.h" + +/* + * Variable Lookup + */ + +using std::cout; +using std::cerr; +using std::endl; + +ObjectDef *UniqueType::objectDef() +{ + if ( typeId == TYPE_TREE || typeId == TYPE_REF ) { + return langEl->objectDef; + } + else if ( typeId == TYPE_STRUCT ) { + return structEl->structDef->objectDef; + } + else if ( typeId == TYPE_GENERIC ) { + return generic->objDef; + } + + /* This should have generated a compiler error. */ + assert( false ); +} + +/* Recurisve find through a single object def's scope. */ +ObjectField *ObjectDef::findFieldInScope( const NameScope *inScope, + const String &name ) const +{ + FieldMapEl *objDefMapEl = inScope->fieldMap.find( name ); + if ( objDefMapEl != 0 ) + return objDefMapEl->value; + if ( inScope->parentScope != 0 ) + return findFieldInScope( inScope->parentScope, name ); + return 0; +} + +ObjectField *NameScope::findField( const String &name ) const +{ + return owningObj->findFieldInScope( this, name ); +} + +ObjectMethod *NameScope::findMethod( const String &name ) const +{ + MethodMapEl *methodMapEl = methodMap.find( name ); + if ( methodMapEl != 0 ) + return methodMapEl->value; + if ( parentScope != 0 ) + return parentScope->findMethod( name ); + return 0; +} + +VarRefLookup LangVarRef::lookupQualification( Compiler *pd, NameScope *rootScope ) const +{ + int lastPtrInQual = -1; + NameScope *searchScope = rootScope; + int firstConstPart = -1; + + for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { + /* Lookup the field int the current qualification. */ + ObjectField *el = searchScope->findField( qi->data ); + if ( el == 0 ) + error(qi->loc) << "cannot resolve qualification " << qi->data << endp; + + /* Lookup the type of the field. */ + el->typeRef->resolveType( pd ); + UniqueType *qualUT = el->typeRef->uniqueType; + + /* If we are dealing with an iterator then dereference it. */ + if ( qualUT->typeId == TYPE_ITER ) + qualUT = el->typeRef->searchUniqueType; + + /* Is it const? */ + if ( firstConstPart < 0 && el->isConst ) + firstConstPart = qi.pos(); + + /* Check for references. When loop is done we will have the last one + * present, if any. */ + if ( qualUT->ptr() ) + lastPtrInQual = qi.pos(); + + if ( qi->form == QualItem::Dot ) { + /* Cannot dot a reference. Iterator yes (access of the iterator + * not the current) */ + if ( qualUT->ptr() ) + error(loc) << "dot cannot be used to access a pointer" << endp; + } + else if ( qi->form == QualItem::Arrow ) { + if ( qualUT->typeId == TYPE_ITER ) + qualUT = el->typeRef->searchUniqueType; + } + + ObjectDef *searchObjDef = qualUT->objectDef(); + if ( searchObjDef == 0 ) + error(qi->loc) << "left hand side of qual has no object defintion" << endp; + searchScope = searchObjDef->rootScope; + } + + return VarRefLookup( lastPtrInQual, firstConstPart, searchScope->owningObj, searchScope ); +} + +bool LangVarRef::isLocalRef() const +{ + if ( qual->length() > 0 ) { + if ( scope->findField( qual->data[0].data ) != 0 ) + return true; + } + else if ( scope->findField( name ) != 0 ) + return true; + else if ( scope->findMethod( name ) != 0 ) + return true; + + return false; +} + +/* For accesing production RHS values inside a switch case that limits our + * search to a particular productions. */ +bool LangVarRef::isProdRef( Compiler *pd ) const +{ + if ( scope->caseClauseVarRef != 0 ) { + UniqueType *varUt = scope->caseClauseVarRef->lookup( pd ); + ObjectDef *searchObjDef = varUt->objectDef(); + + if ( qual->length() > 0 ) { + if ( searchObjDef->rootScope->findField( qual->data[0].data ) != 0 ) + return true; + } + else if ( searchObjDef->rootScope->findField( name ) != 0 ) + return true; + else if ( searchObjDef->rootScope->findMethod( name ) != 0 ) + return true; + } + return false; +} + +bool LangVarRef::isStructRef() const +{ + if ( structDef != 0 ) { + if ( qual->length() > 0 ) { + if ( structDef->objectDef->rootScope->findField( qual->data[0].data ) != 0 ) + return true; + } + else if ( structDef->objectDef->rootScope->findField( name ) != 0 ) + return true; + else if ( structDef->objectDef->rootScope->findMethod( name ) != 0 ) + return true; + } + + return false; +} + +bool LangVarRef::isInbuiltObject() const +{ + if ( qual->length() > 0 ) { + ObjectField *field = scope->findField( qual->data[0].data ); + if ( field != 0 && field->isInbuiltObject() ) + return true; + } + else { + ObjectField *field = scope->findField( name ); + if ( field != 0 ) { + if ( field->isInbuiltObject() ) + return true; + } + } + return false; +} + +VarRefLookup LangVarRef::lookupObj( Compiler *pd ) const +{ + NameScope *rootScope; + + if ( nspaceQual != 0 && nspaceQual->qualNames.length() > 0 ) { + Namespace *nspace = pd->rootNamespace->findNamespace( nspaceQual->qualNames[0] ); + rootScope = nspace->rootScope; + } + else if ( isLocalRef() ) + rootScope = scope; + else if ( isProdRef( pd ) ) { + UniqueType *varUt = scope->caseClauseVarRef->lookup( pd ); + ObjectDef *searchObjDef = varUt->objectDef(); + rootScope = searchObjDef->rootScope; + } + else if ( isStructRef() ) + rootScope = structDef->objectDef->rootScope; + else + rootScope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope; + + return lookupQualification( pd, rootScope ); +} + +VarRefLookup LangVarRef::lookupMethodObj( Compiler *pd ) const +{ + NameScope *rootScope; + + if ( nspaceQual != 0 && nspaceQual->qualNames.length() > 0 ) { + Namespace *nspace = pd->rootNamespace->findNamespace( nspaceQual->qualNames[0] ); + rootScope = nspace->rootScope; + } + else if ( isLocalRef() ) + rootScope = scope; + else if ( isStructRef() ) + rootScope = structDef->objectDef->rootScope; + else + rootScope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope; + + return lookupQualification( pd, rootScope ); +} + + +VarRefLookup LangVarRef::lookupField( Compiler *pd ) const +{ + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupObj( pd ); + + /* Lookup the field. */ + ObjectField *field = lookup.inScope->findField( name ); + if ( field == 0 ) + error(loc) << "cannot find name " << name << " in object" << endp; + + lookup.objField = field; + lookup.uniqueType = field->typeRef->uniqueType; + + if ( field->typeRef->searchUniqueType != 0 ) + lookup.iterSearchUT = field->typeRef->searchUniqueType; + + return lookup; +} + +UniqueType *LangVarRef::lookup( Compiler *pd ) const +{ + /* Lookup the loadObj. */ + VarRefLookup lookup = lookupField( pd ); + + ObjectField *el = lookup.objField; + UniqueType *elUT = el->typeRef->resolveType( pd ); + + /* Deref iterators. */ + if ( elUT->typeId == TYPE_ITER ) + elUT = el->typeRef->searchUniqueType; + + return elUT; +} + +VarRefLookup LangVarRef::lookupMethod( Compiler *pd ) const +{ + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupMethodObj( pd ); + + /* Find the method. */ + ObjectMethod *method = lookup.inScope->findMethod( name ); + if ( method == 0 ) { + /* Not found as a method, try it as an object on which we will call a + * default function. */ + qual->append( QualItem( QualItem::Dot, loc, name ) ); + + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupObj( pd ); + + /* Find the method. */ + method = lookup.inScope->findMethod( "finish" ); + if ( method == 0 ) + error(loc) << "cannot find " << name << "(...) in object" << endp; + } + + lookup.objMethod = method; + lookup.uniqueType = method->returnUT; + + return lookup; +} + +VarRefLookup LangVarRef::lookupIterCall( Compiler *pd ) const +{ + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupObj( pd ); + + /* Find the method. */ + ObjectMethod *method = lookup.inScope->findMethod( name ); + if ( method == 0 ) { + /* Not found as a method, try it as an object on which we will call a + * default function. */ + qual->append( QualItem( QualItem::Dot, loc, name ) ); + + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupObj( pd ); + + /* Find the method. */ + method = lookup.inScope->findMethod( "finish" ); + if ( method == 0 ) + error(loc) << "cannot find " << name << "(...) in object" << endp; + } + + lookup.objMethod = method; + lookup.uniqueType = method->returnUT; + + return lookup; +} diff --git a/src/main.cc b/src/main.cc new file mode 100644 index 00000000..a3a7d2c9 --- /dev/null +++ b/src/main.cc @@ -0,0 +1,836 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <stdio.h> +#include <stdbool.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <iostream> + +#include "debug.h" +#include "pcheck.h" +#include "version.h" +#include "compiler.h" + +#if defined(CONS_INIT) +#include "consinit.h" +#elif defined(LOAD_INIT) +#include "loadinit.h" +#else +#include "loadfinal.h" +#endif + +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +/* Graphviz dot file generation. */ +bool genGraphviz = false; + +using std::ostream; +using std::istream; +using std::ifstream; +using std::ofstream; +using std::ios; +using std::cout; +using std::cerr; +using std::cin; +using std::endl; + +InputLoc internal; + +/* Io globals. */ +istream *inStream = 0; +ostream *outStream = 0; +const char *inputFn = 0; +const char *outputFn = 0; +const char *intermedFn = 0; +const char *binaryFn = 0; +const char *exportHeaderFn = 0; +const char *exportCodeFn = 0; +const char *commitCodeFn = 0; +const char *objectName = "colm_object"; +bool exportCode = false; +bool hostAdapters = true; + +bool generateGraphviz = false; +bool verbose = false; +bool logging = false; +bool branchPointInfo = false; +bool run = false; +bool addUniqueEmptyProductions = false; +bool gblLibrary = false; +long gblActiveRealm = 0; +bool outputSpecifiedWithDashP = false; + +ArgsVector includePaths; +ArgsVector libraryPaths; +DefineVector defineArgs; +ArgsVector additionalCodeFiles; + +/* Print version information. */ +void version(); + +/* Total error count. */ +int gblErrorCount = 0; + +/* + * Alphabet Type for the parsing machinery. The trees/strings of parsed data + * all use char type. Currently we can support signed char, unsigned char or + * char. If changing this, the colm_alph_t type needs to change as well. + * Currently, this is a compile time change only. A colm binary currently + * connot be made to work with multiple alphabet types. + */ + +HostType hostTypesC[] = +{ + { "unsigned", "char", false, 0, UCHAR_MAX, sizeof(unsigned char) }, +}; + + +HostLang hostLangC = { hostTypesC, 1, 0 }; +HostLang *hostLang = &hostLangC; + +/* Print the opening to an error in the input, then return the error ostream. */ +ostream &error( const InputLoc &loc ) +{ + /* Keep the error count. */ + gblErrorCount += 1; + + if ( loc.fileName != 0 ) + cerr << loc.fileName << ":"; + else + cerr << "<input>:"; + + if ( loc.line == -1 ) { + cerr << "INT: "; + } + else { + cerr << loc.line << ":" << loc.col << ": "; + } + return cerr; +} + +/* Print the opening to a program error, then return the error stream. */ +ostream &error() +{ + gblErrorCount += 1; + cerr << "error: " PROGNAME ": "; + return cerr; +} + + +/* Print the opening to a warning, then return the error ostream. */ +ostream &warning( ) +{ + cerr << "warning: " << inputFn << ": "; + return cerr; +} + +/* Print the opening to a warning in the input, then return the error ostream. */ +ostream &warning( const InputLoc &loc ) +{ + assert( inputFn != 0 ); + cerr << "warning: " << inputFn << ":" << + loc.line << ":" << loc.col << ": "; + return cerr; +} + +void escapeLineDirectivePath( std::ostream &out, char *path ) +{ + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } +} + +void escapeLineDirectivePath( std::ostream &out, char *path ); +void scan( char *fileName, istream &input ); + +bool printStatistics = false; + +/* Print a summary of the options. */ +void usage() +{ + cout << +"usage: colm [options] file\n" +"general:\n" +" -h, -H, -?, --help print this usage and exit\n" +" -v --version print version information and exit\n" +" -b <ident> use <ident> as name of C object encapulaing the program\n" +" -o <file> if -c given, write C parse object to <file>,\n" +" otherwise write binary to <file>\n" +" -p <file> write C parse object to <file>\n" +" -e <file> write C++ export header to <file>\n" +" -x <file> write C++ export code to <file>\n" +" -m <file> write C++ commit code to <file>\n" +" -a <file> additional code file to include in output program\n" +" -E N=V set a string value available in the program\n" +" -I <path> additional include path for the compiler\n" +" -i activate branchpoint information\n" +" -L <path> additional library path for the linker\n" +" -l activate logging\n" +" -r run output program and replace process\n" +" -c compile only (don't produce binary)\n" +" -V print dot format (graphiz)\n" +" -d print verbose debug information\n" +#if DEBUG +" -D <tag> print more information about <tag>\n" +" (BYTECODE|PARSE|MATCH|COMPILE|POOL|PRINT|INPUT|SCAN\n" +#endif + ; +} + +/* Print version information. */ +void version() +{ + cout << "Colm version " COLM_VERSION << " " COLM_PUBDATE << endl << + "Copyright (c) 2007-2019 by Adrian D. Thurston" << endl; +} + +/* Scans a string looking for the file extension. If there is a file + * extension then pointer returned points to inside the string + * passed in. Otherwise returns null. */ +const char *findFileExtension( const char *stemFile ) +{ + const char *ppos = stemFile + strlen(stemFile) - 1; + + /* Scan backwards from the end looking for the first dot. + * If we encounter a '/' before the first dot, then stop the scan. */ + while ( 1 ) { + /* If we found a dot or got to the beginning of the string then + * we are done. */ + if ( ppos == stemFile || *ppos == '.' ) + break; + + /* If we hit a / then there is no extension. Done. */ + if ( *ppos == '/' ) { + ppos = stemFile; + break; + } + ppos--; + } + + /* If we got to the front of the string then bail we + * did not find an extension */ + if ( ppos == stemFile ) + ppos = 0; + + return ppos; +} + +/* Make a file name from a stem. Removes the old filename suffix and + * replaces it with a new one. Returns a newed up string. */ +char *fileNameFromStem( const char *stemFile, const char *suffix ) +{ + int len = strlen( stemFile ); + assert( len > 0 ); + + /* Get the extension. */ + const char *ppos = findFileExtension( stemFile ); + + /* If an extension was found, then shorten what we think the len is. */ + if ( ppos != 0 ) + len = ppos - stemFile; + + int slen = suffix != 0 ? strlen( suffix ) : 0; + char *retVal = new char[ len + slen + 1 ]; + strncpy( retVal, stemFile, len ); + if ( suffix != 0 ) + strcpy( retVal + len, suffix ); + retVal[len+slen] = 0; + + return retVal; +} + +void openOutputCompiled() +{ + /* Start with the fn given by -o option. */ + binaryFn = outputFn; + + if ( binaryFn == 0 ) + binaryFn = fileNameFromStem( inputFn, 0 ); + + if ( intermedFn == 0 ) + intermedFn = fileNameFromStem( binaryFn, ".c" ); + + if ( binaryFn != 0 && inputFn != 0 && + strcmp( inputFn, binaryFn ) == 0 ) + { + error() << "output file \"" << binaryFn << + "\" is the same as the input file" << endl; + } + + if ( intermedFn != 0 && inputFn != 0 && + strcmp( inputFn, intermedFn ) == 0 ) + { + error() << "intermediate file \"" << intermedFn << + "\" is the same as the input file" << endl; + } + + if ( intermedFn != 0 ) { + /* Open the output stream, attaching it to the filter. */ + ofstream *outFStream = new ofstream( intermedFn ); + + if ( !outFStream->is_open() ) { + error() << "error opening " << intermedFn << " for writing" << endl; + exit(1); + } + + outStream = outFStream; + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +void openOutputLibrary() +{ + if ( outputFn == 0 ) + outputFn = fileNameFromStem( inputFn, ".c" ); + + /* Make sure we are not writing to the same file as the input file. */ + if ( outputFn != 0 && inputFn != 0 && + strcmp( inputFn, outputFn ) == 0 ) + { + error() << "output file \"" << outputFn << + "\" is the same as the input file" << endl; + } + + if ( outputFn != 0 ) { + /* Open the output stream, attaching it to the filter. */ + ofstream *outFStream = new ofstream( outputFn ); + + if ( !outFStream->is_open() ) { + error() << "error opening " << outputFn << " for writing" << endl; + exit(1); + } + + outStream = outFStream; + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +void openExports( ) +{ + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFn != 0 && exportHeaderFn != 0 && strcmp( inputFn, exportHeaderFn ) == 0 ) { + error() << "output file \"" << exportHeaderFn << + "\" is the same as the input file" << endl; + } + + if ( exportHeaderFn != 0 ) { + /* Open the output stream, attaching it to the filter. */ + ofstream *outFStream = new ofstream( exportHeaderFn ); + + if ( !outFStream->is_open() ) { + error() << "error opening " << exportHeaderFn << " for writing" << endl; + exit(1); + } + + outStream = outFStream; + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +void openExportsImpl( ) +{ + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFn != 0 && exportCodeFn != 0 && strcmp( inputFn, exportCodeFn ) == 0 ) { + error() << "output file \"" << exportCodeFn << + "\" is the same as the input file" << endl; + } + + if ( exportCodeFn != 0 ) { + /* Open the output stream, attaching it to the filter. */ + ofstream *outFStream = new ofstream( exportCodeFn ); + + if ( !outFStream->is_open() ) { + error() << "error opening " << exportCodeFn << " for writing" << endl; + exit(1); + } + + outStream = outFStream; + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +void openCommit( ) +{ + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFn != 0 && commitCodeFn != 0 && strcmp( inputFn, commitCodeFn ) == 0 ) { + error() << "output file \"" << commitCodeFn << + "\" is the same as the input file" << endl; + } + + if ( commitCodeFn != 0 ) { + /* Open the output stream, attaching it to the filter. */ + ofstream *outFStream = new ofstream( commitCodeFn ); + + if ( !outFStream->is_open() ) { + error() << "error opening " << commitCodeFn << " for writing" << endl; + exit(1); + } + + outStream = outFStream; + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +int compileOutputCommand( const char *command ) +{ + if ( verbose ) + cout << "compiling with: '" << command << "'" << endl; + int res = system( command ); + if ( res != 0 ) + error() << "there was a problem compiling the output" << endl; + + return res; +} + +void runOutputProgram() +{ + if ( verbose ) + cout << "running output: '" << binaryFn << "'" << endl; + + execl( binaryFn, binaryFn, NULL ); + /* We shall never return here! */ +} + +void compileOutput( const char *argv0, const bool inSource, char *srcLocation ) +{ + /* Find the location of the colm program that is executing. */ + char *location = strdup( argv0 ); + char *last; + int length = 1024 + strlen( intermedFn ) + strlen( binaryFn ); + if ( inSource ) { + last = strrchr( location, '/' ); + assert( last != 0 ); + last[0] = 0; + length += 3 * strlen( location ); + } + else { + last = location + strlen( location ) - 1; + while ( true ) { + if ( last == location ) { + last[0] = '.'; + last[1] = 0; + break; + } + if ( *last == '/' ) { + last[0] = 0; + break; + } + last -= 1; + } + } + for ( ArgsVector::Iter af = additionalCodeFiles; af.lte(); af++ ) + length += strlen( *af ) + 2; + for ( ArgsVector::Iter ip = includePaths; ip.lte(); ip++ ) + length += strlen( *ip ) + 3; + for ( ArgsVector::Iter lp = libraryPaths; lp.lte(); lp++ ) + length += strlen( *lp ) + 3; +#define COMPILE_COMMAND_STRING "gcc -Wall -Wwrite-strings" \ + " -g" \ + " -o %s" \ + " %s" + char *command = new char[length]; + if ( inSource ) { + sprintf( command, + COMPILE_COMMAND_STRING + " -I%s/../aapl" + " -I%s/include" + " -L%s" + " -Wl,-rpath,%s", + binaryFn, intermedFn, srcLocation, + srcLocation, location, location ); + } + else { + sprintf( command, + COMPILE_COMMAND_STRING + " -I" PREFIX "/include" + " -L" PREFIX "/lib" + " -Wl,-rpath," PREFIX "/lib", + binaryFn, intermedFn ); + } +#undef COMPILE_COMMAND_STRING + for ( ArgsVector::Iter af = additionalCodeFiles; af.lte(); af++ ) { + strcat( command, " " ); + strcat( command, *af ); + } + for ( ArgsVector::Iter ip = includePaths; ip.lte(); ip++ ) { + strcat( command, " -I" ); + strcat( command, *ip ); + } + for ( ArgsVector::Iter lp = libraryPaths; lp.lte(); lp++ ) { + strcat( command, " -L" ); + strcat( command, *lp ); + } + strcat( command, " -lcolm" ); + + if( !compileOutputCommand( command ) && run ) + runOutputProgram(); + + delete[] command; +} + +bool inSourceTree( const char *argv0, char *&location ) +{ + const char *lastSlash = strrchr( argv0, '/' ); + if ( lastSlash != 0 ) { + /* Take off the file name. */ + int rootLen = lastSlash - argv0; + + /* Create string for dir. */ + char *mainPath = new char[rootLen + 16]; + memcpy( mainPath, argv0, rootLen ); + mainPath[rootLen] = 0; + + /* If built using ldconfig then there will be a .libs dir. */ + lastSlash = strrchr( mainPath, '/' ); + if ( lastSlash != 0 ) { + if ( strlen( lastSlash ) >= 6 && memcmp( lastSlash, "/.libs", 7 ) == 0 ) { + rootLen = lastSlash - mainPath; + mainPath[rootLen] = 0; + } + } + + strcpy( mainPath + rootLen, "/main.cc" ); + + struct stat sb; + int res = stat( mainPath, &sb ); + if ( res == 0 && S_ISREG( sb.st_mode ) ) { + mainPath[rootLen] = 0; + location = mainPath; + return true; + } + + delete[] mainPath; + } + + return false; +} + +void processArgs( int argc, const char **argv ) +{ + ParamCheck pc( "p:cD:e:x:I:L:vdliro:S:M:vHh?-:sVa:m:b:E:", argc, argv ); + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + case 'I': + includePaths.append( pc.parameterArg ); + break; + case 'v': + version(); + exit(0); + break; + case 'd': + verbose = true; + break; + case 'l': + logging = true; + break; + case 'L': + libraryPaths.append( pc.parameterArg ); + break; + case 'i': + branchPointInfo = true; + break; + case 'r': + run = true; + break; + case 'p': + outputSpecifiedWithDashP = true; + /* fallthrough */ + case 'o': + /* Output. */ + if ( *pc.parameterArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFn != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFn = pc.parameterArg; + } + break; + + case 'b': + /* object name. */ + if ( *pc.parameterArg == 0 ) + error() << "a zero length object name was given" << endl; + else { + /* Ok, remember the output file name. */ + objectName = pc.parameterArg; + hostAdapters = false; + } + break; + + case 'H': case 'h': case '?': + usage(); + exit(0); + case 's': + printStatistics = true; + break; + case 'V': + generateGraphviz = true; + break; + case '-': + if ( strcasecmp(pc.parameterArg, "help") == 0 ) { + usage(); + exit(0); + } + else if ( strcasecmp(pc.parameterArg, "version") == 0 ) { + version(); + exit(0); + } + else { + error() << "--" << pc.parameterArg << + " is an invalid argument" << endl; + } + break; + case 'c': + gblLibrary = true; + break; + case 'e': + exportHeaderFn = pc.parameterArg; + break; + case 'x': + exportCodeFn = pc.parameterArg; + break; + case 'a': + additionalCodeFiles.append( pc.parameterArg ); + break; + case 'm': + commitCodeFn = pc.parameterArg; + break; + + case 'E': { + const char *eq = strchr( pc.parameterArg, '=' ); + if ( eq == 0 ) + fatal( "-E option argument must contain =" ); + if ( eq == pc.parameterArg ) + fatal( "-E variable name is of zero length" ); + + defineArgs.append( DefineArg( + String( pc.parameterArg, eq-pc.parameterArg ), + String( eq + 1 ) ) ); + + break; + } + + case 'D': +#if DEBUG + // @NOTE: keep this in sync with 'debug.c': 'colm_realm_names' + if ( strcmp( pc.parameterArg, colm_realm_names[0] ) == 0 ) + gblActiveRealm |= REALM_BYTECODE; + else if ( strcmp( pc.parameterArg, colm_realm_names[1] ) == 0 ) + gblActiveRealm |= REALM_PARSE; + else if ( strcmp( pc.parameterArg, colm_realm_names[2] ) == 0 ) + gblActiveRealm |= REALM_MATCH; + else if ( strcmp( pc.parameterArg, colm_realm_names[3] ) == 0 ) + gblActiveRealm |= REALM_COMPILE; + else if ( strcmp( pc.parameterArg, colm_realm_names[4] ) == 0 ) + gblActiveRealm |= REALM_POOL; + else if ( strcmp( pc.parameterArg, colm_realm_names[5] ) == 0 ) + gblActiveRealm |= REALM_PRINT; + else if ( strcmp( pc.parameterArg, colm_realm_names[6] ) == 0 ) + gblActiveRealm |= REALM_INPUT; + else if ( strcmp( pc.parameterArg, colm_realm_names[7] ) == 0 ) + gblActiveRealm |= REALM_SCAN; + else + fatal( "unknown argument to -D %s\n", pc.parameterArg ); +#else + fatal( "-D option specified but debugging messsages not compiled in\n" ); +#endif + break; + + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + /* It is interpreted as an input file. */ + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( inputFn != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + inputFn = pc.curArg; + } + break; + } + } +} + +bool readCheck( const char *fn ) +{ + int result = true; + + /* Check if we can open the input file for reading. */ + ifstream *inFile = new ifstream( fn ); + if ( ! inFile->is_open() ) + result = false; + + delete inFile; + return result; +} + +/* Main, process args and call yyparse to start scanning input. */ +int main(int argc, const char **argv) +{ + processArgs( argc, argv ); + + if ( verbose ) + gblActiveRealm = 0xffffffff; + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFn != 0 && outputFn != 0 && + strcmp( inputFn, outputFn ) == 0 ) + { + error() << "output file \"" << outputFn << + "\" is the same as the input file" << endl; + } + +#if defined(LOAD_INIT) || defined(LOAD_COLM) + /* Open the input file for reading. */ + if ( inputFn == 0 ) { + error() << "colm: no input file given" << endl; + } + else { + /* Check if we can open the input file for reading. */ + if ( ! readCheck( inputFn ) ) + error() << "could not open " << inputFn << " for reading" << endl; + } +#endif + + if ( !gblLibrary && outputSpecifiedWithDashP ) { + error() << "-p option must be used with -c" << endl; + } + + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + Compiler *pd = new Compiler; + +#if defined(CONS_INIT) + BaseParser *parser = new ConsInit( pd ); +#elif defined(LOAD_INIT) + BaseParser *parser = new LoadInit( pd, inputFn ); +#else + BaseParser *parser = consLoadColm( pd, inputFn ); +#endif + + parser->go( gblActiveRealm ); + + /* Parsing complete, check for errors.. */ + if ( gblErrorCount > 0 ) + return 1; + + /* Initiate a compile following a parse. */ + pd->compile(); + + /* + * Write output. + */ + if ( generateGraphviz ) { + outStream = &cout; + pd->writeDotFile(); + } + else { + if ( gblLibrary ) + openOutputLibrary(); + else + openOutputCompiled(); + + pd->generateOutput( gblActiveRealm, ( commitCodeFn == 0 ) ); + if ( outStream != 0 ) + delete outStream; + + if ( !gblLibrary ) { + char *location = 0; + bool inSource = inSourceTree( argv[0], location ); + compileOutput( argv[0], inSource, location ); + } + + if ( exportHeaderFn != 0 ) { + openExports(); + pd->generateExports(); + delete outStream; + } + if ( exportCodeFn != 0 ) { + openExportsImpl(); + pd->generateExportsImpl(); + delete outStream; + } + if ( commitCodeFn != 0 ) { + openCommit(); + pd->writeCommit(); + delete outStream; + } + } + + delete parser; + delete pd; + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + return 0; +} diff --git a/src/map.c b/src/map.c new file mode 100644 index 00000000..052e5445 --- /dev/null +++ b/src/map.c @@ -0,0 +1,876 @@ +/* + * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <colm/map.h> + +#include <assert.h> +#include <stdbool.h> + +#include <colm/pdarun.h> +#include <colm/pool.h> +#include <colm/bytecode.h> + +struct colm_struct *colm_map_el_get( struct colm_program *prg, + map_el_t *map_el, word_t gen_id, word_t field ) +{ + struct generic_info *gi = &prg->rtd->generic_info[gen_id]; + map_el_t *result = 0; + switch ( field ) { + case 0: + result = map_el->prev; + break; + case 1: + result = map_el->next; + break; + default: + assert( 0 ); + break; + } + + struct colm_struct *s = result != 0 ? + colm_struct_container( result, gi->el_offset ) : 0; + return s; +} + +struct colm_struct *colm_map_get( struct colm_program *prg, + map_t *map, word_t gen_id, word_t field ) +{ + struct generic_info *gi = &prg->rtd->generic_info[gen_id]; + map_el_t *result = 0; + switch ( field ) { + case 0: + result = map->head; + break; + case 1: + result = map->tail; + break; + default: + assert( 0 ); + break; + } + + struct colm_struct *s = result != 0 ? + colm_struct_container( result, gi->el_offset ) : 0; + return s; +} + +void map_list_abandon( map_t *map ) +{ + map->head = map->tail = 0; +} + +void map_list_add_before( map_t *map, map_el_t *next_el, map_el_t *new_el ) +{ + /* Set the next pointer of the new element to next_el. We do + * this regardless of the state of the list. */ + new_el->next = next_el; + + /* Set reverse pointers. */ + if ( next_el == 0 ) { + /* There is no next elememnt. We are inserting at the tail. */ + new_el->prev = map->tail; + map->tail = new_el; + } + else { + /* There is a next element and we can access next's previous. */ + new_el->prev = next_el->prev; + next_el->prev = new_el; + } + + /* Set forward pointers. */ + if ( new_el->prev == 0 ) { + /* There is no previous element. Set the head pointer.*/ + map->head = new_el; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + new_el->prev->next = new_el; + } +} + +void map_list_add_after( map_t *map, map_el_t *prev_el, map_el_t *new_el ) +{ + /* Set the previous pointer of new_el to prev_el. We do + * this regardless of the state of the list. */ + new_el->prev = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + new_el->next = map->head; + map->head = new_el; + } + else { + /* There was a prev_el, we can access previous next. */ + new_el->next = prev_el->next; + prev_el->next = new_el; + } + + /* Set reverse pointers. */ + if (new_el->next == 0) { + /* There is no next element. Set the tail pointer. */ + map->tail = new_el; + } + else { + /* There is a next element. Set it's prev pointer. */ + new_el->next->prev = new_el; + } +} + + +map_el_t *map_list_detach( map_t *map, map_el_t *el ) +{ + /* Set forward pointers to skip over el. */ + if ( el->prev == 0 ) + map->head = el->next; + else + el->prev->next = el->next; + + /* Set reverse pointers to skip over el. */ + if ( el->next == 0 ) + map->tail = el->prev; + else + el->next->prev = el->prev; + + /* Update List length and return element we detached. */ + return el; +} + + +/* Once an insertion position is found, attach a element to the tree. */ +void map_attach_rebal( map_t *map, map_el_t *element, map_el_t *parent_el, map_el_t *last_less ) +{ + /* Increment the number of element in the tree. */ + map->tree_size += 1; + + /* Set element's parent. */ + element->parent = parent_el; + + /* New element always starts as a leaf with height 1. */ + element->left = 0; + element->right = 0; + element->height = 1; + + /* Are we inserting in the tree somewhere? */ + if ( parent_el != 0 ) { + /* We have a parent so we are somewhere in the tree. If the parent + * equals lastLess, then the last traversal in the insertion went + * left, otherwise it went right. */ + if ( last_less == parent_el ) { + parent_el->left = element; + + map_list_add_before( map, parent_el, element ); + } + else { + parent_el->right = element; + + map_list_add_after( map, parent_el, element ); + } + } + else { + /* No parent element so we are inserting the root. */ + map->root = element; + + map_list_add_after( map, map->tail, element ); + } + + /* Recalculate the heights. */ + map_recalc_heights( map, parent_el ); + + /* Find the first unbalance. */ + map_el_t *ub = mapFindFirstUnbalGP( map, element ); + + /* rebalance. */ + if ( ub != 0 ) + { + /* We assert that after this single rotation the + * tree is now properly balanced. */ + map_rebalance( map, ub ); + } +} + +#if 0 +/* Recursively delete all the children of a element. */ +void map_delete_children_of( map_t *map, map_el_t *element ) +{ + /* Recurse left. */ + if ( element->left ) { + map_delete_children_of( map, element->left ); + + /* Delete left element. */ + delete element->left; + element->left = 0; + } + + /* Recurse right. */ + if ( element->right ) { + map_delete_children_of( map, element->right ); + + /* Delete right element. */ + delete element->right; + element->left = 0; + } +} + +void map_empty( map_t *map ) +{ + if ( map->root ) { + /* Recursively delete from the tree structure. */ + map_delete_children_of( map, map->root ); + delete map->root; + map->root = 0; + map->tree_size = 0; + + map_list_abandon( map ); + } +} +#endif + +/* rebalance from a element whose gradparent is unbalanced. Only + * call on a element that has a grandparent. */ +map_el_t *map_rebalance( map_t *map, map_el_t *n ) +{ + long lheight, rheight; + map_el_t *a, *b, *c; + map_el_t *t1, *t2, *t3, *t4; + + map_el_t *p = n->parent; /* parent (Non-NUL). L*/ + map_el_t *gp = p->parent; /* Grand-parent (Non-NULL). */ + map_el_t *ggp = gp->parent; /* Great grand-parent (may be NULL). */ + + if (gp->right == p) + { + /* gp + * * p + p + */ + if (p->right == n) + { + /* gp + * * p + p + * * n + n + */ + a = gp; + b = p; + c = n; + t1 = gp->left; + t2 = p->left; + t3 = n->left; + t4 = n->right; + } + else + { + /* gp + * * p + p + * / + * n + */ + a = gp; + b = n; + c = p; + t1 = gp->left; + t2 = n->left; + t3 = n->right; + t4 = p->right; + } + } + else + { + /* gp + * / + * p + */ + if (p->right == n) + { + /* gp + * / + * p + * * n + n + */ + a = p; + b = n; + c = gp; + t1 = p->left; + t2 = n->left; + t3 = n->right; + t4 = gp->right; + } + else + { + /* gp + * / + * p + * / + * n + */ + a = n; + b = p; + c = gp; + t1 = n->left; + t2 = n->right; + t3 = p->right; + t4 = gp->right; + } + } + + /* Perform rotation. + */ + + /* Tie b to the great grandparent. */ + if ( ggp == 0 ) + map->root = b; + else if ( ggp->left == gp ) + ggp->left = b; + else + ggp->right = b; + b->parent = ggp; + + /* Tie a as a leftchild of b. */ + b->left = a; + a->parent = b; + + /* Tie c as a rightchild of b. */ + b->right = c; + c->parent = b; + + /* Tie t1 as a leftchild of a. */ + a->left = t1; + if ( t1 != 0 ) t1->parent = a; + + /* Tie t2 as a rightchild of a. */ + a->right = t2; + if ( t2 != 0 ) t2->parent = a; + + /* Tie t3 as a leftchild of c. */ + c->left = t3; + if ( t3 != 0 ) t3->parent = c; + + /* Tie t4 as a rightchild of c. */ + c->right = t4; + if ( t4 != 0 ) t4->parent = c; + + /* The heights are all recalculated manualy and the great + * grand-parent is passed to recalcHeights() to ensure + * the heights are correct up the tree. + * + * Note that recalcHeights() cuts out when it comes across + * a height that hasn't changed. + */ + + /* Fix height of a. */ + lheight = a->left ? a->left->height : 0; + rheight = a->right ? a->right->height : 0; + a->height = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of c. */ + lheight = c->left ? c->left->height : 0; + rheight = c->right ? c->right->height : 0; + c->height = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b. */ + lheight = a->height; + rheight = c->height; + b->height = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b's parents. */ + map_recalc_heights( map, ggp ); + return ggp; +} + +/* Recalculates the heights of all the ancestors of element. */ +void map_recalc_heights( map_t *map, map_el_t *element ) +{ + while ( element != 0 ) + { + long lheight = element->left ? element->left->height : 0; + long rheight = element->right ? element->right->height : 0; + + long new_height = (lheight > rheight ? lheight : rheight) + 1; + + /* If there is no chage in the height, then there will be no + * change in any of the ancestor's height. We can stop going up. + * If there was a change, continue upward. */ + if (new_height == element->height) + return; + else + element->height = new_height; + + element = element->parent; + } +} + +/* Finds the first element whose grandparent is unbalanced. */ +map_el_t *mapFindFirstUnbalGP( map_t *map, map_el_t *element ) +{ + long lheight, rheight, balance_prop; + map_el_t *gp; + + if ( element == 0 || element->parent == 0 || + element->parent->parent == 0 ) + return 0; + + /* Don't do anything if we we have no grandparent. */ + gp = element->parent->parent; + while ( gp != 0 ) + { + lheight = gp->left ? gp->left->height : 0; + rheight = gp->right ? gp->right->height : 0; + balance_prop = lheight - rheight; + + if ( balance_prop < -1 || balance_prop > 1 ) + return element; + + element = element->parent; + gp = gp->parent; + } + return 0; +} + + + +/* Finds the first element that is unbalanced. */ +map_el_t *map_find_first_unbal_el( map_t *map, map_el_t *element ) +{ + if ( element == 0 ) + return 0; + + while ( element != 0 ) + { + long lheight = element->left ? + element->left->height : 0; + long rheight = element->right ? + element->right->height : 0; + long balance_prop = lheight - rheight; + + if ( balance_prop < -1 || balance_prop > 1 ) + return element; + + element = element->parent; + } + return 0; +} + +/* Replace a element in the tree with another element not in the tree. */ +void map_replace_el( map_t *map, map_el_t *element, map_el_t *replacement ) +{ + map_el_t *parent = element->parent, + *left = element->left, + *right = element->right; + + replacement->left = left; + if (left) + left->parent = replacement; + replacement->right = right; + if (right) + right->parent = replacement; + + replacement->parent = parent; + if (parent) + { + if (parent->left == element) + parent->left = replacement; + else + parent->right = replacement; + } + else { + map->root = replacement; + } + + replacement->height = element->height; +} + + +/* Removes a element from a tree and puts filler in it's place. + * Filler should be null or a child of element. */ +void map_remove_el( map_t *map, map_el_t *element, map_el_t *filler ) +{ + map_el_t *parent = element->parent; + + if ( parent ) + { + if ( parent->left == element ) + parent->left = filler; + else + parent->right = filler; + } + else { + map->root = filler; + } + + if ( filler ) + filler->parent = parent; + + return; +} + +#if 0 +/* Recursive worker for tree copying. */ +map_el_t *map_copy_branch( program_t *prg, map_t *map, map_el_t *el, kid_t *old_next_down, kid_t **new_next_down ) +{ + /* Duplicate element. Either the base element's copy constructor or defaul + * constructor will get called. Both will suffice for initting the + * pointers to null when they need to be. */ + map_el_t *new_el = map_el_allocate( prg ); + + if ( (kid_t*)el == old_next_down ) + *new_next_down = (kid_t*)new_el; + + /* If the left tree is there, copy it. */ + if ( new_el->left ) { + new_el->left = map_copy_branch( prg, map, new_el->left, old_next_down, new_next_down ); + new_el->left->parent = new_el; + } + + map_list_add_after( map, map->tail, new_el ); + + /* If the right tree is there, copy it. */ + if ( new_el->right ) { + new_el->right = map_copy_branch( prg, map, new_el->right, old_next_down, new_next_down ); + new_el->right->parent = new_el; + } + + return new_el; +} +#endif + +static long map_cmp( program_t *prg, map_t *map, const tree_t *tree1, const tree_t *tree2 ) +{ + if ( map->generic_info->key_type == TYPE_TREE ) { + return colm_cmp_tree( prg, tree1, tree2 ); + } + else { + if ( (long)tree1 < (long)tree2 ) + return -1; + else if ( (long)tree1 > (long)tree2) + return 1; + return 0; + } +} + +map_el_t *map_insert_el( program_t *prg, map_t *map, map_el_t *element, map_el_t **last_found ) +{ + long key_relation; + map_el_t *cur_el = map->root, *parent_el = 0; + map_el_t *last_less = 0; + + while ( true ) { + if ( cur_el == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Attach underneath the leaf and rebalance. */ + map_attach_rebal( map, element, parent_el, last_less ); + + if ( last_found != 0 ) + *last_found = element; + return element; + } + + key_relation = map_cmp( prg, map, + element->key, cur_el->key ); + + /* Do we go left? */ + if ( key_relation < 0 ) { + parent_el = last_less = cur_el; + cur_el = cur_el->left; + } + /* Do we go right? */ + else if ( key_relation > 0 ) { + parent_el = cur_el; + cur_el = cur_el->right; + } + /* We have hit the target. */ + else { + if ( last_found != 0 ) + *last_found = cur_el; + return 0; + } + } +} + +#if 0 +map_el_t *map_insert_key( program_t *prg, map_t *map, tree_t *key, map_el_t **last_found ) +{ + long key_relation; + map_el_t *cur_el = map->root, *parent_el = 0; + map_el_t *last_less = 0; + + while ( true ) { + if ( cur_el == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Create the new element, attach it underneath the leaf + * and rebalance. */ + map_el_t *element = map_el_allocate( prg ); + element->key = key; + map_attach_rebal( map, element, parent_el, last_less ); + + if ( last_found != 0 ) + *last_found = element; + return element; + } + + key_relation = map_cmp( prg, map, key, cur_el->key ); + + /* Do we go left? */ + if ( key_relation < 0 ) { + parent_el = last_less = cur_el; + cur_el = cur_el->left; + } + /* Do we go right? */ + else if ( key_relation > 0 ) { + parent_el = cur_el; + cur_el = cur_el->right; + } + /* We have hit the target. */ + else { + if ( last_found != 0 ) + *last_found = cur_el; + return 0; + } + } +} +#endif + +map_el_t *colm_map_insert( program_t *prg, map_t *map, map_el_t *map_el ) +{ + return map_insert_el( prg, map, map_el, 0 ); +} + +map_el_t *colm_vmap_insert( program_t *prg, map_t *map, struct_t *key, struct_t *value ) +{ + struct colm_struct *s = colm_struct_new( prg, map->generic_info->el_struct_id ); + + colm_struct_set_field( s, struct_t*, map->generic_info->el_offset, key ); + colm_struct_set_field( s, struct_t*, 0, value ); + + map_el_t *map_el = colm_struct_get_addr( s, map_el_t*, map->generic_info->el_offset ); + + return colm_map_insert( prg, map, map_el ); +} + +map_el_t *colm_vmap_remove( program_t *prg, map_t *map, tree_t *key ) +{ + map_el_t *map_el = colm_map_find( prg, map, key ); + if ( map_el != 0 ) + colm_map_detach( prg, map, map_el ); + return 0; +} + +tree_t *colm_vmap_find( program_t *prg, map_t *map, tree_t *key ) +{ + map_el_t *map_el = colm_map_find( prg, map, key ); + if ( map_el != 0 ) { + struct_t *s = colm_generic_el_container( prg, map_el, + map->generic_info - prg->rtd->generic_info ); + tree_t *val = colm_struct_get_field( s, tree_t*, 0 ); + + if ( map->generic_info->value_type == TYPE_TREE ) + colm_tree_upref( prg, val ); + + return val; + } + return 0; +} + +void colm_map_detach( program_t *prg, map_t *map, map_el_t *map_el ) +{ + map_detach( prg, map, map_el ); +} + +map_el_t *colm_map_find( program_t *prg, map_t *map, tree_t *key ) +{ + return map_impl_find( prg, map, key ); +} + +/** + * \brief Find a element in the tree with the given key. + * + * \returns The element if key exists, null if the key does not exist. + */ +map_el_t *map_impl_find( program_t *prg, map_t *map, tree_t *key ) +{ + map_el_t *cur_el = map->root; + long key_relation; + + while ( cur_el != 0 ) { + key_relation = map_cmp( prg, map, key, cur_el->key ); + + /* Do we go left? */ + if ( key_relation < 0 ) + cur_el = cur_el->left; + /* Do we go right? */ + else if ( key_relation > 0 ) + cur_el = cur_el->right; + /* We have hit the target. */ + else { + return cur_el; + } + } + return 0; +} + + +/** + * \brief Find a element, then detach it from the tree. + * + * The element is not deleted. + * + * \returns The element detached if the key is found, othewise returns null. + */ +map_el_t *map_detach_by_key( program_t *prg, map_t *map, tree_t *key ) +{ + map_el_t *element = map_impl_find( prg, map, key ); + if ( element ) + map_detach( prg, map, element ); + + return element; +} + +/** + * \brief Detach a element from the tree. + * + * If the element is not in the tree then undefined behaviour results. + * + * \returns The element given. + */ +map_el_t *map_detach( program_t *prg, map_t *map, map_el_t *element ) +{ + map_el_t *replacement, *fixfrom; + long lheight, rheight; + + /* Remove the element from the ordered list. */ + map_list_detach( map, element ); + + /* Update treeSize. */ + map->tree_size--; + + /* Find a replacement element. */ + if (element->right) + { + /* Find the leftmost element of the right subtree. */ + replacement = element->right; + while (replacement->left) + replacement = replacement->left; + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->parent == element) + fixfrom = replacement; + else + fixfrom = replacement->parent; + + map_remove_el( map, replacement, replacement->right ); + map_replace_el( map, element, replacement ); + } + else if (element->left) + { + /* Find the rightmost element of the left subtree. */ + replacement = element->left; + while (replacement->right) + replacement = replacement->right; + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->parent == element) + fixfrom = replacement; + else + fixfrom = replacement->parent; + + map_remove_el( map, replacement, replacement->left ); + map_replace_el( map, element, replacement ); + } + else + { + /* We need to start fixing at the parent of the element. */ + fixfrom = element->parent; + + /* The element we are deleting is a leaf element. */ + map_remove_el( map, element, 0 ); + } + + /* If fixfrom is null it means we just deleted + * the root of the tree. */ + if ( fixfrom == 0 ) + return element; + + /* Fix the heights after the deletion. */ + map_recalc_heights( map, fixfrom ); + + /* Fix every unbalanced element going up in the tree. */ + map_el_t *ub = map_find_first_unbal_el( map, fixfrom ); + while ( ub ) + { + /* Find the element to rebalance by moving down from the first unbalanced + * element 2 levels in the direction of the greatest heights. On the + * second move down, the heights may be equal ( but not on the first ). + * In which case go in the direction of the first move. */ + lheight = ub->left ? ub->left->height : 0; + rheight = ub->right ? ub->right->height : 0; + assert( lheight != rheight ); + if (rheight > lheight) + { + ub = ub->right; + lheight = ub->left ? + ub->left->height : 0; + rheight = ub->right ? + ub->right->height : 0; + if (rheight > lheight) + ub = ub->right; + else if (rheight < lheight) + ub = ub->left; + else + ub = ub->right; + } + else + { + ub = ub->left; + lheight = ub->left ? + ub->left->height : 0; + rheight = ub->right ? + ub->right->height : 0; + if (rheight > lheight) + ub = ub->right; + else if (rheight < lheight) + ub = ub->left; + else + ub = ub->left; + } + + + /* rebalance returns the grandparant of the subtree formed + * by the element that were rebalanced. + * We must continue upward from there rebalancing. */ + fixfrom = map_rebalance( map, ub ); + + /* Find the next unbalaced element. */ + ub = map_find_first_unbal_el( map, fixfrom ); + } + + return element; +} + + + diff --git a/src/map.cc b/src/map.cc new file mode 100644 index 00000000..4d3bd090 --- /dev/null +++ b/src/map.cc @@ -0,0 +1,27 @@ +/* + * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pdarun.h" +#include <assert.h> + + + diff --git a/src/map.h b/src/map.h new file mode 100644 index 00000000..1d6db2d7 --- /dev/null +++ b/src/map.h @@ -0,0 +1,86 @@ +/* + * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_MAP_H +#define _COLM_MAP_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <colm/program.h> +#include <colm/struct.h> + +#include "internal.h" + +void map_list_abandon( map_t *map ); + +void map_list_add_before( map_t *map, map_el_t *next_el, map_el_t *new_el ); +void map_list_add_after( map_t *map, map_el_t *prev_el, map_el_t *new_el ); +map_el_t *map_list_detach( map_t *map, map_el_t *el ); +void map_attach_rebal( map_t *map, map_el_t *element, map_el_t *parent_el, map_el_t *last_less ); +void map_delete_children_of( map_t *map, map_el_t *element ); +void map_empty( map_t *map ); +map_el_t *map_rebalance( map_t *map, map_el_t *n ); +void map_recalc_heights( map_t *map, map_el_t *element ); +map_el_t *mapFindFirstUnbalGP( map_t *map, map_el_t *element ); +map_el_t *map_find_first_unbal_el( map_t *map, map_el_t *element ); +void map_remove_el( map_t *map, map_el_t *element, map_el_t *filler ); +void map_replace_el( map_t *map, map_el_t *element, map_el_t *replacement ); +map_el_t *map_insert_el( program_t *prg, map_t *map, map_el_t *element, map_el_t **last_found ); +map_el_t *map_insert_key( program_t *prg, map_t *map, tree_t *key, map_el_t **last_found ); +map_el_t *map_impl_find( program_t *prg, map_t *map, tree_t *key ); +map_el_t *map_detach_by_key( program_t *prg, map_t *map, tree_t *key ); +map_el_t *map_detach( program_t *prg, map_t *map, map_el_t *element ); +map_el_t *map_copy_branch( program_t *prg, map_t *map, map_el_t *el, + kid_t *old_next_down, kid_t **new_next_down ); + +struct tree_pair map_remove( program_t *prg, map_t *map, tree_t *key ); + +long cmp_tree( program_t *prg, const tree_t *tree1, const tree_t *tree2 ); + +void map_impl_remove_el( program_t *prg, map_t *map, map_el_t *element ); +int map_impl_remove_key( program_t *prg, map_t *map, tree_t *key ); + +tree_t *map_find( program_t *prg, map_t *map, tree_t *key ); +long map_length( map_t *map ); +tree_t *map_unstore( program_t *prg, map_t *map, tree_t *key, tree_t *existing ); +int map_insert( program_t *prg, map_t *map, tree_t *key, tree_t *element ); +void map_unremove( program_t *prg, map_t *map, tree_t *key, tree_t *element ); +tree_t *map_uninsert( program_t *prg, map_t *map, tree_t *key ); +tree_t *map_store( program_t *prg, map_t *map, tree_t *key, tree_t *element ); + +map_el_t *colm_map_insert( program_t *prg, map_t *map, map_el_t *map_el ); +void colm_map_detach( program_t *prg, map_t *map, map_el_t *map_el ); +map_el_t *colm_map_find( program_t *prg, map_t *map, tree_t *key ); + +map_el_t *colm_vmap_insert( program_t *prg, map_t *map, struct_t *key, struct_t *value ); +map_el_t *colm_vmap_remove( program_t *prg, map_t *map, tree_t *key ); +tree_t *colm_map_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter ); +tree_t *colm_vmap_find( program_t *prg, map_t *map, tree_t *key ); + +#if defined(__cplusplus) +} +#endif + +#endif /* _COLM_MAP_H */ + diff --git a/src/parser.cc b/src/parser.cc new file mode 100644 index 00000000..a41288b1 --- /dev/null +++ b/src/parser.cc @@ -0,0 +1,1128 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "parser.h" + +#include <stdbool.h> +#include <stdlib.h> +#include <errno.h> + +#include <iostream> + +using std::endl; + +void BaseParser::listElDef( String name ) +{ + /* + * The unique type. This is a def with a single empty form. + */ + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, + name, pd->nextObjectId++ ); + + LelDefList *defList = new LelDefList; + + Production *prod = BaseParser::production( InputLoc(), + new ProdElList, String(), false, 0, 0 ); + + prodAppend( defList, prod ); + + NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); + BaseParser::cflDef( ntDef, objectDef, defList ); + + /* + * List element with the same name as containing context. + */ + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + String id = curStruct()->objectDef->name; + RepeatType repeatType = RepeatNone; + TypeRef *objTr = TypeRef::cons( InputLoc(), nspaceQual, id, repeatType ); + TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::ListPtrs, 0, objTr, 0 ); + + ObjectField *of = ObjectField::cons( InputLoc(), + ObjectField::GenericElementType, elTr, name ); + + structVarDef( InputLoc(), of ); +} + +void BaseParser::mapElDef( String name, TypeRef *keyType ) +{ + /* + * The unique type. This is a def with a single empty form. + */ + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, + name, pd->nextObjectId++ ); + + LelDefList *defList = new LelDefList; + + Production *prod = BaseParser::production( InputLoc(), + new ProdElList, String(), false, 0, 0 ); + prodAppend( defList, prod ); + + NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); + BaseParser::cflDef( ntDef, objectDef, defList ); + + /* + * Same name as containing context. + */ + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + String id = curStruct()->objectDef->name; + TypeRef *objTr = TypeRef::cons( InputLoc(), nspaceQual, id, RepeatNone ); + TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::MapPtrs, 0, objTr, keyType ); + + ObjectField *of = ObjectField::cons( InputLoc(), + ObjectField::GenericElementType, elTr, name ); + structVarDef( InputLoc(), of ); +} + +#if 0 +void BaseParser::argvDecl() +{ + String structName = "argv_el"; + structHead( internal, pd->rootNamespace, structName, ObjectDef::StructType ); + + /* First the argv value. */ + String name = "value"; + String type = "str"; + NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); + TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, type, RepeatNone ); + ObjectField *objField = ObjectField::cons( internal, + ObjectField::StructFieldType, typeRef, name ); + structVarDef( objField->loc, objField ); + + pd->argvEl = objField->context; + + /* Now the list element. */ + listElDef( "el" ); + + structStack.pop(); + namespaceStack.pop(); +} +#endif + +void BaseParser::init() +{ + /* Set up the root namespace. */ + pd->rootNamespace = createRootNamespace(); + + /* Setup the global object. */ + String global = "global"; + pd->globalObjectDef = ObjectDef::cons( ObjectDef::UserType, + global, pd->nextObjectId++ ); + + pd->rootNamespace->rootScope->owningObj = pd->globalObjectDef; + + pd->global = new StructDef( internal, global, pd->globalObjectDef ); + pd->globalSel = declareStruct( pd, 0, global, pd->global ); + + /* Setup the input object. */ + global = "_input"; + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::BuiltinType, + global, pd->nextObjectId++ ); + + pd->input = new StructDef( internal, global, objectDef ); + pd->inputSel = declareStruct( pd, pd->rootNamespace, + pd->input->name, pd->input ); + + /* Setup the stream object. */ + global = "stream"; + objectDef = ObjectDef::cons( ObjectDef::BuiltinType, + global, pd->nextObjectId++ ); + + pd->stream = new StructDef( internal, global, objectDef ); + pd->streamSel = declareStruct( pd, pd->rootNamespace, + pd->stream->name, pd->stream ); + + /* Initialize the dictionary of graphs. This is our symbol table. The + * initialization needs to be done on construction which happens at the + * beginning of a machine spec so any assignment operators can reference + * the builtins. */ + pd->initGraphDict(); + + pd->rootLocalFrame = ObjectDef::cons( ObjectDef::FrameType, + "local", pd->nextObjectId++ ); + localFrameTop = pd->rootLocalFrame; + scopeTop = pd->rootLocalFrame->rootScope; + + + /* Declarations of internal types. They must be declared now because we use + * them directly, rather than via type lookup. */ + pd->declareBaseLangEls(); + pd->initUniqueTypes(); + + //argvDecl(); + + /* Internal variables. */ + addArgvList(); + addStdsList(); +} + +void BaseParser::addRegularDef( const InputLoc &loc, Namespace *nspace, + const String &name, LexJoin *join ) +{ + GraphDictEl *newEl = nspace->rlMap.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new LexDefinition( name, join ); + newEl->isInstance = false; + newEl->loc = loc; + } + else { + // Recover by ignoring the duplicate. + error(loc) << "regular definition \"" << name << "\" already exists" << endl; + } +} + +TokenRegion *BaseParser::createRegion( const InputLoc &loc, RegionImpl *impl ) +{ + TokenRegion *tokenRegion = new TokenRegion( loc, + pd->regionList.length(), impl ); + + pd->regionList.append( tokenRegion ); + + return tokenRegion; +} + +void BaseParser::pushRegionSet( const InputLoc &loc ) +{ + RegionImpl *implTokenIgnore = new RegionImpl; + RegionImpl *implTokenOnly = new RegionImpl; + RegionImpl *implIgnoreOnly = new RegionImpl; + + pd->regionImplList.append( implTokenIgnore ); + pd->regionImplList.append( implTokenOnly ); + pd->regionImplList.append( implIgnoreOnly ); + + TokenRegion *tokenIgnore = createRegion( loc, implTokenIgnore ); + TokenRegion *tokenOnly = createRegion( loc, implTokenOnly ); + TokenRegion *ignoreOnly = createRegion( loc, implIgnoreOnly ); + TokenRegion *collectIgnore = createRegion( loc, implIgnoreOnly ); + + RegionSet *regionSet = new RegionSet( + implTokenIgnore, implTokenIgnore, implIgnoreOnly, + tokenIgnore, tokenOnly, ignoreOnly, collectIgnore ); + + collectIgnore->ignoreOnly = ignoreOnly; + + pd->regionSetList.append( regionSet ); + regionStack.push( regionSet ); +} + +void BaseParser::popRegionSet() +{ + regionStack.pop(); +} + +Namespace *BaseParser::createRootNamespace() +{ + /* Gets id of zero and default name. No parent. */ + Namespace *nspace = new Namespace( internal, + String("___ROOT_NAMESPACE"), 0, 0 ); + + nspace->rootScope->owningObj = pd->globalObjectDef; + + pd->namespaceList.append( nspace ); + namespaceStack.push( nspace ); + + return nspace; +} + +Namespace *BaseParser::createNamespace( const InputLoc &loc, const String &name ) +{ + Namespace *parent = namespaceStack.top(); + + /* Make the new namespace. */ + Namespace *nspace = parent->findNamespace( name ); + + if ( nspace == 0 ) { + nspace = new Namespace( loc, name, + pd->namespaceList.length(), parent ); + + /* Link the new namespace's scope to the parent namespace's scope. */ + nspace->rootScope->parentScope = parent->rootScope; + nspace->rootScope->owningObj = pd->globalObjectDef; + + parent->childNamespaces.append( nspace ); + pd->namespaceList.append( nspace ); + } + + namespaceStack.push( nspace ); + + return nspace; +} + +Reduction *BaseParser::createReduction( const InputLoc loc, const String &name ) +{ + Namespace *parent = namespaceStack.top(); + Reduction *reduction = parent->findReduction( name ); + + if ( reduction == 0 ) { + reduction = new Reduction( loc, name ); + parent->reductions.append( reduction ); + } + + reductionStack.push( reduction ); + + return reduction; +} + +LexJoin *BaseParser::literalJoin( const InputLoc &loc, const String &data ) +{ + Literal *literal = Literal::cons( loc, data, Literal::LitString ); + LexFactor *factor = LexFactor::cons( literal ); + LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); + LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); + LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); + LexTerm *term = LexTerm::cons( factorAug ); + LexExpression *expr = LexExpression::cons( term ); + LexJoin *join = LexJoin::cons( expr ); + return join; +} + +void BaseParser::defineToken( const InputLoc &loc, String name, LexJoin *join, + ObjectDef *objectDef, CodeBlock *transBlock, bool ignore, + bool noPreIgnore, bool noPostIgnore ) +{ + bool pushedRegion = false; + if ( !insideRegion() ) { + if ( ignore ) + error(loc) << "ignore tokens can only appear inside scanners" << endp; + + pushedRegion = true; + pushRegionSet( internal ); + } + + /* Check the name if this is a token. */ + if ( !ignore && name == 0 ) + error(loc) << "tokens must have a name" << endp; + + /* Give a default name to ignores. */ + if ( name == 0 ) + name.setAs( 32, "_ignore_%.4x", pd->nextTokenId ); + + Namespace *nspace = curNspace(); + RegionSet *regionSet = regionStack.top(); + + TokenDef *tokenDef = TokenDef::cons( name, String(), false, ignore, join, + transBlock, loc, 0, nspace, regionSet, objectDef, curStruct() ); + + regionSet->tokenDefList.append( tokenDef ); + nspace->tokenDefList.append( tokenDef ); + + tokenDef->noPreIgnore = noPreIgnore; + tokenDef->noPostIgnore = noPostIgnore; + + TokenInstance *tokenInstance = TokenInstance::cons( tokenDef, + join, loc, pd->nextTokenId++, nspace, + regionSet->tokenIgnore ); + + regionSet->tokenIgnore->impl->tokenInstanceList.append( tokenInstance ); + + tokenDef->noPreIgnore = noPreIgnore; + tokenDef->noPostIgnore = noPostIgnore; + + if ( ignore ) { + /* The instance for the ignore-only. */ + TokenInstance *tokenInstanceIgn = TokenInstance::cons( tokenDef, + join, loc, pd->nextTokenId++, nspace, regionSet->ignoreOnly ); + + tokenInstanceIgn->dupOf = tokenInstance; + + regionSet->ignoreOnly->impl->tokenInstanceList.append( tokenInstanceIgn ); + } + else { + /* The instance for the token-only. */ + TokenInstance *tokenInstanceTok = TokenInstance::cons( tokenDef, + join, loc, pd->nextTokenId++, nspace, regionSet->tokenOnly ); + + tokenInstanceTok->dupOf = tokenInstance; + + regionSet->tokenOnly->impl->tokenInstanceList.append( tokenInstanceTok ); + } + + /* This is created and pushed in the name. */ + if ( pushedRegion ) + popRegionSet(); + + if ( join != 0 ) { + /* Create a regular language definition so the token can be used to + * make other tokens */ + addRegularDef( loc, curNspace(), name, join ); + } +} + +void BaseParser::zeroDef( const InputLoc &loc, const String &name ) +{ + if ( !insideRegion() ) + error(loc) << "zero token should be inside token" << endp; + + RegionSet *regionSet = regionStack.top(); + Namespace *nspace = curNspace(); + + LexJoin *join = literalJoin( loc, String("`") ); + + TokenDef *tokenDef = TokenDef::cons( name, String(), false, false, join, + 0, loc, 0, nspace, regionSet, 0, curStruct() ); + + tokenDef->isZero = true; + + regionSet->tokenDefList.append( tokenDef ); + nspace->tokenDefList.append( tokenDef ); + + /* No token instance created. */ +} + +void BaseParser::literalDef( const InputLoc &loc, const String &data, + bool noPreIgnore, bool noPostIgnore ) +{ + /* Create a name for the literal. */ + String name( 32, "_literal_%.4x", pd->nextTokenId ); + + bool pushedRegion = false; + if ( !insideRegion() ) { + pushRegionSet( loc ); + pushedRegion = true; + } + + bool unusedCI; + String interp; + prepareLitString( interp, unusedCI, data, loc ); + + /* Look for the production's associated region. */ + Namespace *nspace = curNspace(); + RegionSet *regionSet = regionStack.top(); + + LiteralDictEl *ldel = nspace->literalDict.find( interp ); + if ( ldel != 0 ) + error( loc ) << "literal already defined in this namespace" << endp; + + LexJoin *join = literalJoin( loc, data ); + + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, + name, pd->nextObjectId++ ); + + /* The token definition. */ + TokenDef *tokenDef = TokenDef::cons( name, data, true, false, join, + 0, loc, 0, nspace, regionSet, objectDef, 0 ); + + regionSet->tokenDefList.append( tokenDef ); + nspace->tokenDefList.append( tokenDef ); + + /* The instance for the token/ignore region. */ + TokenInstance *tokenInstance = TokenInstance::cons( tokenDef, join, + loc, pd->nextTokenId++, nspace, regionSet->tokenIgnore ); + + regionSet->tokenIgnore->impl->tokenInstanceList.append( tokenInstance ); + + ldel = nspace->literalDict.insert( interp, tokenInstance ); + + /* Make the duplicate for the token-only region. */ + tokenDef->noPreIgnore = noPreIgnore; + tokenDef->noPostIgnore = noPostIgnore; + + /* The instance for the token-only region. */ + TokenInstance *tokenInstanceTok = TokenInstance::cons( tokenDef, + join, loc, pd->nextTokenId++, nspace, + regionSet->tokenOnly ); + + tokenInstanceTok->dupOf = tokenInstance; + + regionSet->tokenOnly->impl->tokenInstanceList.append( tokenInstanceTok ); + + if ( pushedRegion ) + popRegionSet(); +} + +void BaseParser::addArgvList() +{ + TypeRef *valType = TypeRef::cons( internal, pd->uniqueTypeStr ); + TypeRef *elType = TypeRef::cons( internal, TypeRef::ListEl, valType ); + pd->argvTypeRef = TypeRef::cons( internal, TypeRef::List, 0, elType, valType ); +} + +void BaseParser::addStdsList() +{ + TypeRef *valType = TypeRef::cons( internal, pd->uniqueTypeStream ); + TypeRef *elType = TypeRef::cons( internal, TypeRef::ListEl, valType ); + pd->stdsTypeRef = TypeRef::cons( internal, TypeRef::List, 0, elType, valType ); +} + +ObjectDef *BaseParser::blockOpen() +{ + /* Init the object representing the local frame. */ + ObjectDef *frame = ObjectDef::cons( ObjectDef::FrameType, + "local", pd->nextObjectId++ ); + + localFrameTop = frame; + scopeTop = frame->rootScope; + return frame; +} + +void BaseParser::blockClose() +{ + localFrameTop = pd->rootLocalFrame; + scopeTop = pd->rootLocalFrame->rootScope; +} + +void BaseParser::functionDef( StmtList *stmtList, ObjectDef *localFrame, + ParameterList *paramList, TypeRef *typeRef, const String &name, bool exprt ) +{ + CodeBlock *codeBlock = CodeBlock::cons( stmtList, localFrame ); + Function *newFunction = Function::cons( curNspace(), typeRef, name, + paramList, codeBlock, pd->nextFuncId++, false, exprt ); + pd->functionList.append( newFunction ); + newFunction->inContext = curStruct(); +} + +void BaseParser::inHostDef( const String &hostCall, ObjectDef *localFrame, + ParameterList *paramList, TypeRef *typeRef, const String &name, bool exprt ) +{ + Function *newFunction = Function::cons( curNspace(), typeRef, name, + paramList, 0, pd->nextHostId++, false, exprt ); + newFunction->hostCall = hostCall; + newFunction->localFrame = localFrame; + newFunction->inHost = true; + pd->inHostList.append( newFunction ); + newFunction->inContext = curStruct(); +} + +void BaseParser::iterDef( StmtList *stmtList, ObjectDef *localFrame, + ParameterList *paramList, const String &name ) +{ + CodeBlock *codeBlock = CodeBlock::cons( stmtList, localFrame ); + Function *newFunction = Function::cons( curNspace(), 0, name, + paramList, codeBlock, pd->nextFuncId++, true, false ); + pd->functionList.append( newFunction ); +} + +LangStmt *BaseParser::globalDef( ObjectField *objField, LangExpr *expr, + LangStmt::Type assignType ) +{ + LangStmt *stmt = 0; + ObjectDef *object = pd->globalObjectDef; + Namespace *nspace = curNspace(); //pd->rootNamespace; + + if ( nspace->rootScope->checkRedecl( objField->name ) != 0 ) + error(objField->loc) << "object field renamed" << endp; + + object->insertField( nspace->rootScope, objField->name, objField ); + + if ( expr != 0 ) { + LangVarRef *varRef = LangVarRef::cons( objField->loc, + curNspace(), curStruct(), curScope(), objField->name ); + + stmt = LangStmt::cons( objField->loc, assignType, varRef, expr ); + } + + return stmt; +} + +LangStmt *BaseParser::exportStmt( ObjectField *objField, + LangStmt::Type assignType, LangExpr *expr ) +{ + LangStmt *stmt = 0; + + ObjectDef *object = pd->globalObjectDef; + Namespace *nspace = curNspace(); //pd->rootNamespace; + + if ( curStruct() != 0 ) + error(objField->loc) << "cannot export parser context variables" << endp; + + if ( nspace->rootScope->checkRedecl( objField->name ) != 0 ) + error(objField->loc) << "object field renamed" << endp; + + object->insertField( nspace->rootScope, objField->name, objField ); + objField->isExport = true; + + if ( expr != 0 ) { + LangVarRef *varRef = LangVarRef::cons( objField->loc, + curNspace(), 0, curScope(), objField->name ); + + stmt = LangStmt::cons( objField->loc, assignType, varRef, expr ); + } + + return stmt; +} + + +void BaseParser::cflDef( NtDef *ntDef, ObjectDef *objectDef, LelDefList *defList ) +{ + Namespace *nspace = curNspace(); + + ntDef->objectDef = objectDef; + ntDef->defList = defList; + + nspace->ntDefList.append( ntDef ); + + /* Declare the captures in the object. */ + for ( LelDefList::Iter prod = *defList; prod.lte(); prod++ ) { + for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++ ) { + /* If there is a capture, create the field. */ + if ( pel->captureField != 0 ) { + /* Might already exist. */ + ObjectField *newOf = objectDef->rootScope->checkRedecl( + pel->captureField->name ); + if ( newOf != 0 ) { + /* FIXME: check the types are the same. */ + } + else { + newOf = pel->captureField; + newOf->typeRef = pel->typeRef; + objectDef->rootScope->insertField( newOf->name, newOf ); + } + + newOf->rhsVal.append( RhsVal( pel ) ); + } + } + } +} + +ReOrBlock *BaseParser::lexRegularExprData( ReOrBlock *reOrBlock, ReOrItem *reOrItem ) +{ + ReOrBlock *ret; + + /* An optimization to lessen the tree size. If an or char is directly under + * the left side on the right and the right side is another or char then + * paste them together and return the left side. Otherwise just put the two + * under a new or data node. */ + if ( reOrItem->type == ReOrItem::Data && + reOrBlock->type == ReOrBlock::RecurseItem && + reOrBlock->item->type == ReOrItem::Data ) + { + /* Append the right side to right side of the left and toss the + * right side. */ + reOrBlock->item->data += reOrItem->data; + delete reOrItem; + ret = reOrBlock; + } + else { + /* Can't optimize, put the left and right under a new node. */ + ret = ReOrBlock::cons( reOrBlock, reOrItem ); + } + return ret; +} + +LexFactor *BaseParser::lexRlFactorName( const String &data, const InputLoc &loc ) +{ + LexFactor *factor = 0; + /* Find the named graph. */ + Namespace *nspace = curNspace(); + + while ( nspace != 0 ) { + GraphDictEl *gdNode = nspace->rlMap.find( data ); + if ( gdNode != 0 ) { + if ( gdNode->isInstance ) { + /* Recover by retuning null as the factor node. */ + error(loc) << "references to graph instantiations not allowed " + "in expressions" << endl; + factor = 0; + } + else { + /* Create a factor node that is a lookup of an expression. */ + factor = LexFactor::cons( loc, gdNode->value ); + } + break; + } + + nspace = nspace->parentNamespace; + } + + if ( nspace == 0 ) { + /* Recover by returning null as the factor node. */ + error(loc) << "graph lookup of \"" << data << "\" failed" << endl; + factor = 0; + } + + return factor; +} + +int BaseParser::lexFactorRepNum( const InputLoc &loc, const String &data ) +{ + /* Convert the priority number to a long. Check for overflow. */ + errno = 0; + long rep = strtol( data, 0, 10 ); + if ( errno == ERANGE && rep == LONG_MAX ) { + /* Repetition too large. Recover by returing repetition 1. */ + error(loc) << "repetition number " << data << " overflows" << endl; + rep = 1; + } + return rep; +} + +LexFactorAug *BaseParser::lexFactorLabel( const InputLoc &loc, + const String &data, LexFactorAug *factorAug ) +{ + /* Create the object field. */ + TypeRef *typeRef = TypeRef::cons( loc, pd->uniqueTypeStr ); + ObjectField *objField = ObjectField::cons( loc, + ObjectField::LexSubstrType, typeRef, data ); + + /* Create the enter and leaving actions that will mark the substring. */ + Action *enter = Action::cons( MarkMark, pd->nextMatchEndNum++ ); + Action *leave = Action::cons( MarkMark, pd->nextMatchEndNum++ ); + pd->actionList.append( enter ); + pd->actionList.append( leave ); + + /* Add entering and leaving actions. */ + factorAug->actions.append( ParserAction( loc, at_start, 0, enter ) ); + factorAug->actions.append( ParserAction( loc, at_leave, 0, leave ) ); + + factorAug->reCaptureVect.append( ReCapture( enter, leave, objField ) ); + + return factorAug; +} + +LexJoin *BaseParser::lexOptJoin( LexJoin *join, LexJoin *context ) +{ + if ( context != 0 ) { + /* Create the enter and leaving actions that will mark the substring. */ + Action *mark = Action::cons( MarkMark, pd->nextMatchEndNum++ ); + pd->actionList.append( mark ); + + join->context = context; + join->mark = mark; + } + + return join; +} + +LangExpr *BaseParser::send( const InputLoc &loc, LangVarRef *varRef, + ConsItemList *list, bool eof ) +{ + ParserText *parserText = ParserText::cons( loc, + curNspace(), list, true, false, false, "" ); + pd->parserTextList.append( parserText ); + + return LangExpr::cons( LangTerm::consSend( loc, varRef, + parserText, eof ) ); +} + +LangExpr *BaseParser::sendTree( const InputLoc &loc, LangVarRef *varRef, + ConsItemList *list, bool eof ) +{ + ParserText *parserText = ParserText::cons( loc, + curNspace(), list, true, false, false, "" ); + pd->parserTextList.append( parserText ); + + return LangExpr::cons( LangTerm::consSendTree( loc, varRef, + parserText, eof ) ); +} + +LangExpr *BaseParser::parseCmd( const InputLoc &loc, bool tree, bool stop, + ObjectField *objField, TypeRef *typeRef, FieldInitVect *fieldInitVect, + ConsItemList *list, bool used, bool reduce, bool read, const String &reducer ) +{ + LangExpr *expr = 0; + + /* Item list for what we are sending to the parser. */ + ConsItemList *consItemList = new ConsItemList; + + /* The parser may be referenced. */ + LangVarRef *varRef = 0; + if ( objField != 0 ) { + varRef = LangVarRef::cons( objField->loc, + curNspace(), curStruct(), curScope(), objField->name ); + } + + /* The typeref for the parser. */ + TypeRef *parserTypeRef = TypeRef::cons( loc, + TypeRef::Parser, 0, typeRef, 0 ); + + if ( objField != 0 ) + used = true; + + ParserText *parserText = ParserText::cons( loc, curNspace(), + list, used, reduce, read, reducer ); + pd->parserTextList.append( parserText ); + + LangTerm::Type langTermType = stop ? LangTerm::ParseStopType : ( tree ? + LangTerm::ParseTreeType : LangTerm::ParseType ); + + expr = LangExpr::cons( LangTerm::cons( loc, langTermType, + varRef, objField, parserTypeRef, fieldInitVect, consItemList, + parserText ) ); + + /* Check for redeclaration. */ + if ( objField != 0 ) { + if ( curScope()->checkRedecl( objField->name ) != 0 ) { + error( objField->loc ) << "variable " << objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + objField->typeRef = typeRef; + curScope()->insertField( objField->name, objField ); + } + + return expr; +} + +PatternItemList *BaseParser::consPatternEl( LangVarRef *varRef, PatternItemList *list ) +{ + /* Store the variable reference in the pattern itemm. */ + list->head->varRef = varRef; + + if ( varRef != 0 ) { + if ( curScope()->checkRedecl( varRef->name ) != 0 ) { + error( varRef->loc ) << "variable " << varRef->name << + " redeclared" << endp; + } + + TypeRef *typeRef = list->head->prodEl->typeRef; + ObjectField *objField = ObjectField::cons( InputLoc(), + ObjectField::UserLocalType, typeRef, varRef->name ); + + /* Insert it into the field map. */ + curScope()->insertField( varRef->name, objField ); + } + + return list; +} + +PatternItemList *BaseParser::patternElNamed( const InputLoc &loc, + LangVarRef *parsedVarRef, NamespaceQual *nspaceQual, const String &data, + RepeatType repeatType ) +{ + TypeRef *typeRef = TypeRef::cons( loc, parsedVarRef, nspaceQual, data, repeatType ); + ProdEl *prodEl = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef, 0 ); + PatternItem *patternItem = PatternItem::cons( PatternItem::TypeRefForm, loc, prodEl ); + return PatternItemList::cons( patternItem ); +} + +PatternItemList *BaseParser::patternElType( const InputLoc &loc, + LangVarRef *parsedVarRef, NamespaceQual *nspaceQual, const String &data, + RepeatType repeatType ) +{ + PdaLiteral *literal = new PdaLiteral( loc, data ); + TypeRef *typeRef = TypeRef::cons( loc, parsedVarRef, nspaceQual, literal, repeatType ); + + ProdEl *prodEl = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef, 0 ); + PatternItem *patternItem = PatternItem::cons( PatternItem::TypeRefForm, loc, prodEl ); + return PatternItemList::cons( patternItem ); +} + +ProdElList *BaseParser::appendProdEl( ProdElList *prodElList, ProdEl *prodEl ) +{ + prodEl->pos = prodElList->length(); + prodElList->append( prodEl ); + return prodElList; +} + +PatternItemList *BaseParser::patListConcat( PatternItemList *list1, + PatternItemList *list2 ) +{ + if ( list1 == 0 ) + list1 = new PatternItemList(); + + list1->append( *list2 ); + delete list2; + return list1; +} + +ConsItemList *BaseParser::consListConcat( ConsItemList *list1, + ConsItemList *list2 ) +{ + if ( list1 == 0 ) + list1 = new ConsItemList(); + + list1->append( *list2 ); + delete list2; + return list1; +} + +LangStmt *BaseParser::forScope( const InputLoc &loc, const String &data, + NameScope *scope, TypeRef *typeRef, IterCall *iterCall, StmtList *stmtList ) +{ + /* Check for redeclaration. */ + if ( curScope()->checkRedecl( data ) != 0 ) + error( loc ) << "variable " << data << " redeclared" << endp; + + /* Note that we pass in a null type reference. This type is dependent on + * the result of the iter_call lookup since it must contain a reference to + * the iterator that is called. This lookup is done at compile time. */ + ObjectField *iterField = ObjectField::cons( loc, + ObjectField::UserLocalType, (TypeRef*)0, data ); + curScope()->insertField( data, iterField ); + + LangStmt *stmt = LangStmt::cons( loc, LangStmt::ForIterType, + iterField, typeRef, iterCall, stmtList, curStruct(), scope ); + + return stmt; +} + +void BaseParser::preEof( const InputLoc &loc, StmtList *stmtList, ObjectDef *localFrame ) +{ + if ( !insideRegion() ) + error(loc) << "preeof must be used inside an existing region" << endl; + + CodeBlock *codeBlock = CodeBlock::cons( stmtList, localFrame ); + codeBlock->context = curStruct(); + + RegionSet *regionSet = regionStack.top(); + regionSet->tokenIgnore->preEofBlock = codeBlock; +} + +ProdEl *BaseParser::prodElName( const InputLoc &loc, const String &data, + NamespaceQual *nspaceQual, ObjectField *objField, + RepeatType repeatType, bool commit ) +{ + TypeRef *typeRef = TypeRef::cons( loc, nspaceQual, data, repeatType ); + ProdEl *prodEl = new ProdEl( ProdEl::ReferenceType, loc, objField, commit, typeRef, 0 ); + return prodEl; +} + +ProdEl *BaseParser::prodElLiteral( const InputLoc &loc, const String &data, + NamespaceQual *nspaceQual, ObjectField *objField, RepeatType repeatType, + bool commit ) +{ + /* Create a new prodEl node going to a concat literal. */ + PdaLiteral *literal = new PdaLiteral( loc, data ); + TypeRef *typeRef = TypeRef::cons( loc, nspaceQual, literal, repeatType ); + ProdEl *prodEl = new ProdEl( ProdEl::LiteralType, loc, objField, commit, typeRef, 0 ); + return prodEl; +} + +ConsItemList *BaseParser::consElLiteral( const InputLoc &loc, + TypeRef *consTypeRef, const String &data, NamespaceQual *nspaceQual ) +{ + PdaLiteral *literal = new PdaLiteral( loc, data ); + TypeRef *typeRef = TypeRef::cons( loc, consTypeRef, nspaceQual, literal ); + ProdEl *prodEl = new ProdEl( ProdEl::LiteralType, loc, 0, false, typeRef, 0 ); + ConsItem *consItem = ConsItem::cons( loc, ConsItem::LiteralType, prodEl ); + ConsItemList *list = ConsItemList::cons( consItem ); + return list; +} + +Production *BaseParser::production( const InputLoc &loc, ProdElList *prodElList, + String name, bool commit, CodeBlock *codeBlock, LangEl *predOf ) +{ + Production *prod = Production::cons( loc, 0, prodElList, + name, commit, codeBlock, pd->prodList.length(), 0 ); + prod->predOf = predOf; + + /* Link the production elements back to the production. */ + for ( ProdEl *prodEl = prodElList->head; prodEl != 0; prodEl = prodEl->next ) + prodEl->production = prod; + + pd->prodList.append( prod ); + + return prod; +} + +void BaseParser::objVarDef( ObjectDef *objectDef, ObjectField *objField ) +{ + if ( objectDef->rootScope->checkRedecl( objField->name ) != 0 ) + error() << "object field renamed" << endp; + + objectDef->rootScope->insertField( objField->name, objField ); +} + +LelDefList *BaseParser::prodAppend( LelDefList *defList, Production *definition ) +{ + definition->prodNum = defList->length(); + defList->append( definition ); + return defList; +} + +LangExpr *BaseParser::construct( const InputLoc &loc, ObjectField *objField, + ConsItemList *list, TypeRef *typeRef, FieldInitVect *fieldInitVect ) +{ + Constructor *constructor = Constructor::cons( loc, curNspace(), + list, pd->nextPatConsId++ ); + pd->replList.append( constructor ); + + LangVarRef *varRef = 0; + if ( objField != 0 ) { + varRef = LangVarRef::cons( objField->loc, + curNspace(), curStruct(), curScope(), objField->name ); + } + + LangExpr *expr = LangExpr::cons( LangTerm::cons( loc, LangTerm::ConstructType, + varRef, objField, typeRef, fieldInitVect, constructor ) ); + + /* Check for redeclaration. */ + if ( objField != 0 ) { + if ( curScope()->checkRedecl( objField->name ) != 0 ) { + error( objField->loc ) << "variable " << objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + objField->typeRef = typeRef; + curScope()->insertField( objField->name, objField ); + } + + return expr; +} + +LangExpr *BaseParser::match( const InputLoc &loc, LangVarRef *varRef, + PatternItemList *list ) +{ + Pattern *pattern = Pattern::cons( loc, curNspace(), + list, pd->nextPatConsId++ ); + pd->patternList.append( pattern ); + + LangExpr *expr = LangExpr::cons( LangTerm::consMatch( + InputLoc(), varRef, pattern ) ); + + return expr; +} + +LangExpr *BaseParser::prodCompare( const InputLoc &loc, LangVarRef *varRef, + const String &prod, LangExpr *matchExpr ) +{ + LangExpr *expr = LangExpr::cons( LangTerm::consProdCompare( + InputLoc(), varRef, prod, matchExpr ) ); + + return expr; +} + +LangStmt *BaseParser::varDef( ObjectField *objField, + LangExpr *expr, LangStmt::Type assignType ) +{ + LangStmt *stmt = 0; + + /* Check for redeclaration. */ + if ( curScope()->checkRedecl( objField->name ) != 0 ) { + error( objField->loc ) << "variable " << objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + curScope()->insertField( objField->name, objField ); + + //cout << "var def " << $1->objField->name << endl; + + if ( expr != 0 ) { + LangVarRef *varRef = LangVarRef::cons( objField->loc, + curNspace(), curStruct(), curScope(), objField->name ); + + stmt = LangStmt::cons( objField->loc, assignType, varRef, expr ); + } + + return stmt; +} + +LangExpr *BaseParser::require( const InputLoc &loc, + LangVarRef *varRef, PatternItemList *list ) +{ + Pattern *pattern = Pattern::cons( loc, curNspace(), + list, pd->nextPatConsId++ ); + pd->patternList.append( pattern ); + + LangExpr *expr = LangExpr::cons( LangTerm::consMatch( + InputLoc(), varRef, pattern ) ); + return expr; +} + +void BaseParser::structVarDef( const InputLoc &loc, ObjectField *objField ) +{ + ObjectDef *object; + if ( curStruct() == 0 ) + error(loc) << "internal error: no context stack items found" << endp; + + StructDef *structDef = curStruct(); + object = structDef->objectDef; + + if ( object->rootScope->checkRedecl( objField->name ) != 0 ) + error(objField->loc) << "object field renamed" << endp; + + object->rootScope->insertField( objField->name, objField ); +} + +void BaseParser::structHead( const InputLoc &loc, Namespace *inNspace, + const String &data, ObjectDef::Type objectType ) +{ + ObjectDef *objectDef = ObjectDef::cons( objectType, + data, pd->nextObjectId++ ); + + StructDef *context = new StructDef( loc, data, objectDef ); + structStack.push( context ); + + inNspace->structDefList.append( context ); + + /* Make the namespace for the struct. */ + createNamespace( loc, data ); +} + +StmtList *BaseParser::appendStatement( StmtList *stmtList, LangStmt *stmt ) +{ + if ( stmt != 0 ) + stmtList->append( stmt ); + return stmtList; +} + +ParameterList *BaseParser::appendParam( ParameterList *paramList, ObjectField *objField ) +{ + paramList->append( objField ); + return paramList; +} + +ObjectField *BaseParser::addParam( const InputLoc &loc, + ObjectField::Type type, TypeRef *typeRef, const String &name ) +{ + ObjectField *objField = ObjectField::cons( loc, type, typeRef, name ); + return objField; +} + +PredDecl *BaseParser::predTokenName( const InputLoc &loc, NamespaceQual *qual, + const String &data ) +{ + TypeRef *typeRef = TypeRef::cons( loc, qual, data ); + PredDecl *predDecl = new PredDecl( typeRef, pd->predValue ); + return predDecl; +} + +PredDecl *BaseParser::predTokenLit( const InputLoc &loc, const String &data, + NamespaceQual *nspaceQual ) +{ + PdaLiteral *literal = new PdaLiteral( loc, data ); + TypeRef *typeRef = TypeRef::cons( loc, nspaceQual, literal ); + PredDecl *predDecl = new PredDecl( typeRef, pd->predValue ); + return predDecl; +} + +void BaseParser::alias( const InputLoc &loc, const String &data, TypeRef *typeRef ) +{ + Namespace *nspace = curNspace(); + TypeAlias *typeAlias = new TypeAlias( loc, nspace, data, typeRef ); + nspace->typeAliasList.append( typeAlias ); +} + +void BaseParser::precedenceStmt( PredType predType, PredDeclList *predDeclList ) +{ + while ( predDeclList->length() > 0 ) { + PredDecl *predDecl = predDeclList->detachFirst(); + predDecl->predType = predType; + pd->predDeclList.append( predDecl ); + } + pd->predValue++; +} + +void BaseParser::pushScope() +{ + scopeTop = curLocalFrame()->pushScope( curScope() ); +} + +void BaseParser::popScope() +{ + scopeTop = curScope()->parentScope; +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 00000000..aafa3f2b --- /dev/null +++ b/src/parser.h @@ -0,0 +1,197 @@ +/* + * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_PARSER_H +#define _COLM_PARSER_H + +#include <iostream> + +#include <avltree.h> + +#include "compiler.h" +#include "parser.h" + +#define PROPERTY_REDUCE_FIRST 0x1 + +struct BaseParser +{ + BaseParser( Compiler *pd ) + : pd(pd), enterRl(false) + {} + + virtual ~BaseParser() {} + + Compiler *pd; + + RegionSetVect regionStack; + NamespaceVect namespaceStack; + ReductionVect reductionStack; + StructStack structStack; + ObjectDef *localFrameTop; + NameScope *scopeTop; + + bool enterRl; + + bool insideRegion() + { return regionStack.length() > 0; } + + StructDef *curStruct() + { return structStack.length() == 0 ? 0 : structStack.top(); } + + Namespace *curNspace() + { return namespaceStack.top(); } + + NameScope *curScope() + { return scopeTop; } + + ObjectDef *curLocalFrame() + { return localFrameTop; } + + Reduction *curReduction() + { return reductionStack.top(); } + + /* Lexical feedback. */ + + void listElDef( String name ); + void mapElDef( String name, TypeRef *keyType ); + + void argvDecl(); + void init(); + void addRegularDef( const InputLoc &loc, Namespace *nspace, + const String &name, LexJoin *join ); + TokenRegion *createRegion( const InputLoc &loc, RegionImpl *impl ); + Namespace *createRootNamespace(); + Namespace *createNamespace( const InputLoc &loc, const String &name ); + void pushRegionSet( const InputLoc &loc ); + void popRegionSet(); + void addProduction( const InputLoc &loc, const String &name, + ProdElList *prodElList, bool commit, + CodeBlock *redBlock, LangEl *predOf ); + void addArgvList(); + void addStdsList(); + LexJoin *literalJoin( const InputLoc &loc, const String &data ); + + Reduction *createReduction( const InputLoc loc, const String &name ); + + void defineToken( const InputLoc &loc, String name, LexJoin *join, + ObjectDef *objectDef, CodeBlock *transBlock, + bool ignore, bool noPreIgnore, bool noPostIgnore ); + + void zeroDef( const InputLoc &loc, const String &name ); + void literalDef( const InputLoc &loc, const String &data, + bool noPreIgnore, bool noPostIgnore ); + + ObjectDef *blockOpen(); + void blockClose(); + + void inHostDef( const String &hostCall, ObjectDef *localFrame, + ParameterList *paramList, TypeRef *typeRef, + const String &name, bool exprt ); + void functionDef( StmtList *stmtList, ObjectDef *localFrame, + ParameterList *paramList, TypeRef *typeRef, + const String &name, bool exprt ); + + void iterDef( StmtList *stmtList, ObjectDef *localFrame, + ParameterList *paramList, const String &name ); + LangStmt *globalDef( ObjectField *objField, LangExpr *expr, + LangStmt::Type assignType ); + void cflDef( NtDef *ntDef, ObjectDef *objectDef, LelDefList *defList ); + ReOrBlock *lexRegularExprData( ReOrBlock *reOrBlock, ReOrItem *reOrItem ); + + int lexFactorRepNum( const InputLoc &loc, const String &data ); + LexFactor *lexRlFactorName( const String &data, const InputLoc &loc ); + LexFactorAug *lexFactorLabel( const InputLoc &loc, const String &data, + LexFactorAug *factorAug ); + LexJoin *lexOptJoin( LexJoin *join, LexJoin *context ); + LangExpr *send( const InputLoc &loc, LangVarRef *varRef, + ConsItemList *list, bool eof ); + LangExpr *sendTree( const InputLoc &loc, LangVarRef *varRef, + ConsItemList *list, bool eof ); + LangExpr *parseCmd( const InputLoc &loc, bool tree, bool stop, ObjectField *objField, + TypeRef *typeRef, FieldInitVect *fieldInitVect, ConsItemList *list, + bool used, bool reduce, bool read, const String &reducer ); + PatternItemList *consPatternEl( LangVarRef *varRef, PatternItemList *list ); + PatternItemList *patternElNamed( const InputLoc &loc, LangVarRef *varRef, + NamespaceQual *nspaceQual, const String &data, RepeatType repeatType ); + PatternItemList *patternElType( const InputLoc &loc, LangVarRef *varRef, + NamespaceQual *nspaceQual, const String &data, RepeatType repeatType ); + PatternItemList *patListConcat( PatternItemList *list1, PatternItemList *list2 ); + ConsItemList *consListConcat( ConsItemList *list1, ConsItemList *list2 ); + LangStmt *forScope( const InputLoc &loc, const String &data, + NameScope *scope, TypeRef *typeRef, IterCall *iterCall, StmtList *stmtList ); + void preEof( const InputLoc &loc, StmtList *stmtList, ObjectDef *localFrame ); + + ProdEl *prodElName( const InputLoc &loc, const String &data, + NamespaceQual *nspaceQual, ObjectField *objField, RepeatType repeatType, + bool commit ); + ProdEl *prodElLiteral( const InputLoc &loc, const String &data, + NamespaceQual *nspaceQual, ObjectField *objField, RepeatType repeatType, + bool commit ); + ConsItemList *consElLiteral( const InputLoc &loc, TypeRef *consTypeRef, + const String &data, NamespaceQual *nspaceQual ); + Production *production( const InputLoc &loc, ProdElList *prodElList, + String name, bool commit, CodeBlock *codeBlock, LangEl *predOf ); + void objVarDef( ObjectDef *objectDef, ObjectField *objField ); + LelDefList *prodAppend( LelDefList *defList, Production *definition ); + + LangExpr *construct( const InputLoc &loc, ObjectField *objField, + ConsItemList *list, TypeRef *typeRef, FieldInitVect *fieldInitVect ); + LangExpr *match( const InputLoc &loc, LangVarRef *varRef, + PatternItemList *list ); + LangExpr *prodCompare( const InputLoc &loc, LangVarRef *varRef, + const String &prod, LangExpr *matchExpr ); + LangStmt *varDef( ObjectField *objField, + LangExpr *expr, LangStmt::Type assignType ); + LangStmt *exportStmt( ObjectField *objField, LangStmt::Type assignType, LangExpr *expr ); + + + LangExpr *require( const InputLoc &loc, LangVarRef *varRef, PatternItemList *list ); + void structVarDef( const InputLoc &loc, ObjectField *objField ); + void structHead( const InputLoc &loc, Namespace *inNspace, + const String &data, ObjectDef::Type objectType ); + StmtList *appendStatement( StmtList *stmtList, LangStmt *stmt ); + ParameterList *appendParam( ParameterList *paramList, ObjectField *objField ); + ObjectField *addParam( const InputLoc &loc, + ObjectField::Type type, TypeRef *typeRef, const String &name ); + PredDecl *predTokenName( const InputLoc &loc, NamespaceQual *qual, const String &data ); + PredDecl *predTokenLit( const InputLoc &loc, const String &data, + NamespaceQual *nspaceQual ); + void alias( const InputLoc &loc, const String &data, TypeRef *typeRef ); + void precedenceStmt( PredType predType, PredDeclList *predDeclList ); + ProdElList *appendProdEl( ProdElList *prodElList, ProdEl *prodEl ); + + void pushScope(); + void popScope(); + + virtual void go( long activeRealm ) = 0; + + BstSet<String, ColmCmpStr> genericElDefined; + + NamespaceQual *emptyNspaceQual() + { + return NamespaceQual::cons( curNspace() ); + } + +}; + +#endif /* _COLM_PARSER_H */ + diff --git a/src/parsetree.cc b/src/parsetree.cc new file mode 100644 index 00000000..572f0610 --- /dev/null +++ b/src/parsetree.cc @@ -0,0 +1,1495 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> + +#include <iostream> + +#include "fsmgraph.h" +#include "compiler.h" +#include "parsetree.h" + +using namespace std; +ostream &operator<<( ostream &out, const NameRef &nameRef ); +ostream &operator<<( ostream &out, const NameInst &nameInst ); +ostream &operator<<( ostream &out, const Token &token ); + +/* Convert the literal string which comes in from the scanner into an array of + * characters with escapes and options interpreted. Also null terminates the + * string. Though this null termination should not be relied on for + * interpreting literals in the parser because the string may contain a + * literal string with \0 */ +void prepareLitString( String &result, bool &caseInsensitive, + const String &srcString, const InputLoc &loc ) +{ + result.setAs( String::Fresh(), srcString.length() ); + caseInsensitive = false; + + char *src = srcString.data + 1; + char *end = 0; + bool backtick = srcString[0] == '`'; + + if ( !backtick ) { + end = srcString.data + srcString.length() - 1; + + while ( *end != '\'' && *end != '\"' && *end != '\n' ) { + if ( *end == 'i' ) + caseInsensitive = true; + else { + error( loc ) << "literal string '" << *end << + "' option not supported" << endl; + } + end -= 1; + } + + if ( *end == '\n' ) + end++; + } + else { + end = srcString.data + srcString.length(); + if ( srcString.length() > 2 && *(end-1) == '`' ) + end -= 1; + } + + char *dest = result.data; + int len = 0; + while ( src != end ) { + if ( !backtick && *src == '\\' ) { + switch ( src[1] ) { + case '0': dest[len++] = '\0'; break; + case 'a': dest[len++] = '\a'; break; + case 'b': dest[len++] = '\b'; break; + case 't': dest[len++] = '\t'; break; + case 'n': dest[len++] = '\n'; break; + case 'v': dest[len++] = '\v'; break; + case 'f': dest[len++] = '\f'; break; + case 'r': dest[len++] = '\r'; break; + case '\n': break; + default: dest[len++] = src[1]; break; + } + src += 2; + } + else { + dest[len++] = *src++; + } + } + + result.chop( len ); +} + +int CmpUniqueType::compare( const UniqueType &ut1, const UniqueType &ut2 ) +{ + if ( ut1.typeId < ut2.typeId ) + return -1; + else if ( ut1.typeId > ut2.typeId ) + return 1; + switch ( ut1.typeId ) { + case TYPE_TREE: + case TYPE_REF: + if ( ut1.langEl < ut2.langEl ) + return -1; + else if ( ut1.langEl > ut2.langEl ) + return 1; + break; + case TYPE_ITER: + if ( ut1.iterDef < ut2.iterDef ) + return -1; + else if ( ut1.iterDef > ut2.iterDef ) + return 1; + break; + + case TYPE_NOTYPE: + case TYPE_NIL: + case TYPE_INT: + case TYPE_BOOL: + case TYPE_LIST_PTRS: + case TYPE_MAP_PTRS: + case TYPE_VOID: + break; + + case TYPE_STRUCT: + if ( ut1.structEl < ut2.structEl ) + return -1; + else if ( ut1.structEl > ut2.structEl ) + return 1; + break; + case TYPE_GENERIC: + if ( ut1.generic < ut2.generic ) + return -1; + else if ( ut1.generic > ut2.generic ) + return 1; + break; + } + + return 0; +} + +int CmpUniqueRepeat::compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 ) +{ + if ( ut1.repeatType < ut2.repeatType ) + return -1; + else if ( ut1.repeatType > ut2.repeatType ) + return 1; + else { + if ( ut1.langEl < ut2.langEl ) + return -1; + else if ( ut1.langEl > ut2.langEl ) + return 1; + } + + return 0; +} + +int CmpUniqueGeneric::compare( const UniqueGeneric &ut1, const UniqueGeneric &ut2 ) +{ + if ( ut1.type < ut2.type ) + return -1; + else if ( ut1.type > ut2.type ) + return 1; + else if ( ut1.value < ut2.value ) + return -1; + else if ( ut1.value > ut2.value ) + return 1; + else { + switch ( ut1.type ) { + case UniqueGeneric::List: + case UniqueGeneric::ListEl: + case UniqueGeneric::Parser: + break; + + case UniqueGeneric::Map: + case UniqueGeneric::MapEl: + if ( ut1.key < ut2.key ) + return -1; + else if ( ut1.key > ut2.key ) + return 1; + break; + } + } + return 0; +} + +FsmGraph *LexDefinition::walk( Compiler *pd ) +{ + /* Recurse on the expression. */ + FsmGraph *rtnVal = join->walk( pd ); + + /* If the expression below is a join operation with multiple expressions + * then it just had epsilon transisions resolved. If it is a join + * with only a single expression then run the epsilon op now. */ + if ( join->expr != 0 ) + rtnVal->epsilonOp(); + + return rtnVal; +} + +void RegionImpl::makeNameTree( const InputLoc &loc, Compiler *pd ) +{ + NameInst *nameInst = new NameInst( pd->nextNameId++ ); + pd->nameInstList.append( nameInst ); + + /* Guess we do this now. */ + makeActions( pd ); + + /* Save off the name inst into the token region. This is only legal for + * token regions because they are only ever referenced once (near the root + * of the name tree). They cannot have more than one corresponding name + * inst. */ + assert( regionNameInst == 0 ); + regionNameInst = nameInst; +} + +InputLoc TokenInstance::getLoc() +{ + return action != 0 ? action->loc : semiLoc; +} + +/* + * If there are any LMs then all of the following entry points must reset + * tokstart: + * + * 1. fentry(StateRef) + * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) + * 3. targt of any transition that has an fcall (the return loc). + * 4. start state of all longest match routines. + */ + +Action *RegionImpl::newAction( Compiler *pd, const InputLoc &loc, + const String &name, InlineList *inlineList ) +{ + Action *action = Action::cons( loc, name, inlineList ); + pd->actionList.append( action ); + action->isLmAction = true; + return action; +} + +void RegionImpl::makeActions( Compiler *pd ) +{ + /* Make actions that set the action id. */ + for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = InlineList::cons(); + inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi, + InlineItem::LmSetActId ) ); + char *actName = new char[50]; + sprintf( actName, "store%i", lmi->longestMatchId ); + lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the last character. */ + for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = InlineList::cons(); + inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi, + InlineItem::LmOnLast ) ); + char *actName = new char[50]; + sprintf( actName, "imm%i", lmi->longestMatchId ); + lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the next + * character. These actions will set tokend themselves (it is the current + * char). */ + for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = InlineList::cons(); + inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi, + InlineItem::LmOnNext ) ); + char *actName = new char[50]; + sprintf( actName, "lagh%i", lmi->longestMatchId ); + lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart at tokend. These + * actions execute some time after matching the last char. */ + for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = InlineList::cons(); + inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi, + InlineItem::LmOnLagBehind ) ); + char *actName = new char[50]; + sprintf( actName, "lag%i", lmi->longestMatchId ); + lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + InputLoc loc; + loc.line = 1; + loc.col = 1; + + /* Create the error action. */ + InlineList *il6 = InlineList::cons(); + il6->append( InlineItem::cons( loc, this, 0, InlineItem::LmSwitch ) ); + lmActSelect = newAction( pd, loc, "lagsel", il6 ); +} + +void RegionImpl::restart( FsmGraph *graph, FsmTrans *trans ) +{ + FsmState *fromState = trans->fromState; + graph->detachTrans( fromState, trans->toState, trans ); + graph->attachTrans( fromState, graph->startState, trans ); +} + +void RegionImpl::runLongestMatch( Compiler *pd, FsmGraph *graph ) +{ + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( 0 ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* Transfer the first item of non-empty lmAction tables to the item sets + * of the states that follow. Exclude states that have no transitions out. + * This must happen on a separate pass so that on each iteration of the + * next pass we have the item set entries from all lmAction tables. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + FsmState *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() > 0 ) { + /* Fill the item sets. */ + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( lmAct->value ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + } + } + } + } + + /* The lmItem sets are now filled, telling us which longest match rules + * can succeed in which states. First determine if we need to make sure + * act is defaulted to zero. */ + int maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* The actions executed on starting to match a token. */ + graph->isolateStartState(); + graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); + if ( maxItemSetLength > 1 ) { + /* The longest match action switch may be called when tokens are + * matched, in which case act must be initialized, there must be a + * case to handle the error, and the generated machine will require an + * error state. */ + lmSwitchHandlesError = true; + graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId ); + } + + /* The place to store transitions to restart. It maybe possible for the + * restarting to affect the searching through the graph that follows. For + * now take the safe route and save the list of transitions to restart + * until after all searching is done. */ + Vector<FsmTrans*> restartTrans; + + /* Set actions that do immediate token recognition, set the longest match part + * id and set the token ending. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + FsmState *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() == 0 ) { + /* Can execute the immediate action for the longest match + * part. Redirect the action to the start state. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->actOnLast ); + restartTrans.append( trans ); + } + else { + /* Look for non final states that have a non-empty item + * set. If these are present then we need to record the + * end of the token. Also Find the highest item set + * length reachable from here (excluding at transtions to + * final states). */ + bool nonFinalNonEmptyItemSet = false; + maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) + nonFinalNonEmptyItemSet = true; + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* If there are reachable states that are not final and + * have non empty item sets or that have an item set + * length greater than one then we need to set tokend + * because the error action that matches the token will + * require it. */ + if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) + trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); + + /* Some states may not know which longest match item to + * execute, must set it. */ + if ( maxItemSetLength > 1 ) { + /* There are transitions out, another match may come. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->setActId ); + } + } + } + } + } + + /* Now that all graph searching is done it certainly safe set the + * restarting. It may be safe above, however this must be verified. */ + for ( Vector<FsmTrans*>::Iter rs = restartTrans; rs.lte(); rs++ ) + restart( graph, *rs ); + + int lmErrActionOrd = pd->curActionOrd++; + + /* Embed the error for recognizing a char. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) { + if ( st->isFinState() ) { + /* On error execute the onActNext action, which knows that + * the last character of the token was one back and restart. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actOnNext, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, + st->lmItemSet[0]->actOnNext ); + st->eofTarget = graph->startState; + } + else { + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actLagBehind, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, + st->lmItemSet[0]->actLagBehind ); + st->eofTarget = graph->startState; + } + } + else if ( st->lmItemSet.length() > 1 ) { + /* Need to use the select. Take note of the which items the select + * is needed for so only the necessary actions are included. */ + for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) { + if ( *plmi != 0 ) + (*plmi)->inLmSelect = true; + } + /* On error, execute the action select and go to the start state. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &lmActSelect, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, lmActSelect ); + st->eofTarget = graph->startState; + } + } + + /* Finally, the start state should be made final. */ + graph->setFinState( graph->startState ); +} + +void RegionImpl::transferScannerLeavingActions( FsmGraph *graph ) +{ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->outActionTable.length() > 0 ) + graph->setErrorActions( st, st->outActionTable ); + } +} + +FsmGraph *RegionImpl::walk( Compiler *pd ) +{ + /* Make each part of the longest match. */ + int numParts = 0; + FsmGraph **parts = new FsmGraph*[tokenInstanceList.length()]; + for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { + /* Watch out for patternless tokens. */ + if ( lmi->join != 0 ) { + /* Create the machine and embed the setting of the longest match id. */ + parts[numParts] = lmi->join->walk( pd ); + parts[numParts]->longMatchAction( pd->curActionOrd++, lmi ); + + /* Look for tokens that accept the zero length-word. The first one found + * will be used as the default token. */ + if ( defaultTokenInstance == 0 && parts[numParts]->startState->isFinState() ) + defaultTokenInstance = lmi; + + numParts += 1; + } + } + FsmGraph *retFsm = parts[0]; + + if ( defaultTokenInstance != 0 && defaultTokenInstance->tokenDef->tdLangEl->isIgnore ) + error() << "ignore token cannot be a scanner's zero-length token" << endp; + + /* The region is empty. Return the empty set. */ + if ( numParts == 0 ) { + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Before we union the patterns we need to deal with leaving actions. They + * are transfered to error transitions out of the final states (like local + * error actions) and to eof actions. In the scanner we need to forbid + * on_last for any final state that has an leaving action. */ + for ( int i = 0; i < numParts; i++ ) + transferScannerLeavingActions( parts[i] ); + + /* Union machines one and up with machine zero. */ + FsmGraph *retFsm = parts[0]; + for ( int i = 1; i < numParts; i++ ) { + retFsm->unionOp( parts[i] ); + afterOpMinimize( retFsm ); + } + + runLongestMatch( pd, retFsm ); + delete[] parts; + } + + /* Need the entry point for the region. */ + retFsm->setEntry( regionNameInst->id, retFsm->startState ); + + return retFsm; +} + +/* Walk an expression node. */ +FsmGraph *LexJoin::walk( Compiler *pd ) +{ + FsmGraph *retFsm = expr->walk( pd ); + + /* Maybe the the context. */ + if ( context != 0 ) { + retFsm->leaveFsmAction( pd->curActionOrd++, mark ); + FsmGraph *contextGraph = context->walk( pd ); + retFsm->concatOp( contextGraph ); + } + + return retFsm; +} + +/* Clean up after an expression node. */ +LexExpression::~LexExpression() +{ + switch ( type ) { + case OrType: case IntersectType: case SubtractType: + case StrongSubtractType: + delete expression; + delete term; + break; + case TermType: + delete term; + break; + case BuiltinType: + break; + } +} + +/* Evaluate a single expression node. */ +FsmGraph *LexExpression::walk( Compiler *pd, bool lastInSeq ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case OrType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd, false ); + /* Evaluate the term. */ + FsmGraph *rhs = term->walk( pd ); + /* Perform union. */ + rtnVal->unionOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case IntersectType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmGraph *rhs = term->walk( pd ); + /* Perform intersection. */ + rtnVal->intersectOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case SubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmGraph *rhs = term->walk( pd ); + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case StrongSubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + + /* Evaluate the term and pad it with any* machines. */ + FsmGraph *rhs = dotStarFsm( pd ); + FsmGraph *termFsm = term->walk( pd ); + FsmGraph *trailAnyStar = dotStarFsm( pd ); + rhs->concatOp( termFsm ); + rhs->concatOp( trailAnyStar ); + + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case TermType: { + /* Return result of the term. */ + rtnVal = term->walk( pd ); + break; + } + case BuiltinType: { + /* Duplicate the builtin. */ + rtnVal = makeBuiltin( builtin, pd ); + break; + } + } + + return rtnVal; +} + +/* Clean up after a term node. */ +LexTerm::~LexTerm() +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + delete term; + delete factorAug; + break; + case FactorAugType: + delete factorAug; + break; + } +} + +/* Evaluate a term node. */ +FsmGraph *LexTerm::walk( Compiler *pd, bool lastInSeq ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case ConcatType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd, false ); + /* Evaluate the LexFactorRep. */ + FsmGraph *rhs = factorAug->walk( pd ); + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightStartType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the LexFactorRep. */ + FsmGraph *rhs = factorAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the right get the higher start priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The start transitions right machine get the higher priority. + * Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightFinishType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the LexFactorRep. */ + FsmGraph *rhs = factorAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the finishing transitions to the right + * get the higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The finishing transitions of the right machine get the higher + * priority. Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case LeftType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the LexFactorRep. */ + FsmGraph *rhs = factorAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The right machine gets the lower priority. Since + * startTransPrior might unnecessarily increase the number of + * states during the state machine construction process (due to + * isolation), we use allTransPrior instead, which has the same + * effect. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case FactorAugType: { + rtnVal = factorAug->walk( pd ); + break; + } + } + return rtnVal; +} + +LexFactorAug::~LexFactorAug() +{ + delete factorRep; +} + +void LexFactorAug::assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd ) +{ + /* Assign actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + switch ( actions[i].type ) { + case at_start: + graph->startFsmAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_leave: + graph->leaveFsmAction( actionOrd[i], actions[i].action ); + break; + } + } +} + +/* Evaluate a factor with augmentation node. */ +FsmGraph *LexFactorAug::walk( Compiler *pd ) +{ + /* Make the array of function orderings. */ + int *actionOrd = 0; + if ( actions.length() > 0 ) + actionOrd = new int[actions.length()]; + + /* First walk the list of actions, assigning order to all starting + * actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type == at_start ) + actionOrd[i] = pd->curActionOrd++; + } + + /* Evaluate the factor with repetition. */ + FsmGraph *rtnVal = factorRep->walk( pd ); + + /* Compute the remaining action orderings. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type != at_start ) + actionOrd[i] = pd->curActionOrd++; + } + + assignActions( pd, rtnVal , actionOrd ); + + if ( actionOrd != 0 ) + delete[] actionOrd; + return rtnVal; +} + + +/* Clean up after a factor with repetition node. */ +LexFactorRep::~LexFactorRep() +{ + switch ( type ) { + case StarType: case StarStarType: case OptionalType: case PlusType: + case ExactType: case MaxType: case MinType: case RangeType: + delete factorRep; + break; + case FactorNegType: + delete factorNeg; + break; + } +} + +/* Evaluate a factor with repetition node. */ +FsmGraph *LexFactorRep::walk( Compiler *pd ) +{ + FsmGraph *retFsm = 0; + + switch ( type ) { + case StarType: { + /* Evaluate the LexFactorRep. */ + retFsm = factorRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case StarStarType: { + /* Evaluate the LexFactorRep. */ + retFsm = factorRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Set up the prior descs. All gets priority one, whereas leaving gets + * priority zero. Make a unique key so that these priorities don't + * interfere with any priorities set by the user. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* Leaveing gets priority 0. Use same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case OptionalType: { + /* Make the null fsm. */ + FsmGraph *nu = new FsmGraph(); + nu->lambdaFsm( ); + + /* Evaluate the LexFactorRep. */ + retFsm = factorRep->walk( pd ); + + /* Perform the question operator. */ + retFsm->unionOp( nu ); + afterOpMinimize( retFsm ); + break; + } + case PlusType: { + /* Evaluate the LexFactorRep. */ + retFsm = factorRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying plus operator to a machine that " + "accpets zero length word" << endl; + } + + /* Need a duplicated for the star end. */ + FsmGraph *dup = new FsmGraph( *retFsm ); + + /* The start func orders need to be shifted before doing the star. */ + pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + break; + } + case ExactType: { + /* Get an int from the repetition amount. */ + if ( lowerRep == 0 ) { + /* No copies. Don't need to evaluate the factorRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "exactly zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first LexFactorRep. */ + retFsm = factorRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MaxType: { + /* Get an int from the repetition amount. */ + if ( upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "max zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first LexFactorRep. */ + retFsm = factorRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying max repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MinType: { + /* Evaluate the repeated machine. */ + retFsm = factorRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying min repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the repetition + * and the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Acts just like a star op on the machine to return. */ + retFsm->starOp( ); + afterOpMinimize( retFsm ); + } + else { + /* Take a duplicate for the plus. */ + FsmGraph *dup = new FsmGraph( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + /* Tak on the kleene star. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + break; + } + case RangeType: { + /* Check for bogus range. */ + if ( upperRep - lowerRep < 0 ) { + error(loc) << "invalid range repetition" << endl; + + /* Return null machine as recovery. */ + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else if ( lowerRep == 0 && upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorRep. This + * defeats the purpose so give a warning. */ + warning(loc) << "zero to zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Now need to evaluate the repeated machine. */ + retFsm = factorRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying range repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing both kinds + * of repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Just doing max repetition. Already guarded against n == 0. */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + else if ( lowerRep == upperRep ) { + /* Just doing exact repetition. Already guarded against n == 0. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + else { + /* This is the case that 0 < lowerRep < upperRep. Take a + * duplicate for the optional repeat. */ + FsmGraph *dup = new FsmGraph( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Do optional repetition on the second half. */ + dup->optionalRepeatOp( upperRep - lowerRep ); + afterOpMinimize( dup ); + + /* Tak on the duplicate machine. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + } + break; + } + case FactorNegType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factorNeg->walk( pd ); + break; + }} + return retFsm; +} + + +/* Clean up after a factor with negation node. */ +LexFactorNeg::~LexFactorNeg() +{ + switch ( type ) { + case NegateType: + case CharNegateType: + delete factorNeg; + break; + case FactorType: + delete factor; + break; + } +} + +/* Evaluate a factor with negation node. */ +FsmGraph *LexFactorNeg::walk( Compiler *pd ) +{ + FsmGraph *retFsm = 0; + + switch ( type ) { + case NegateType: { + /* Evaluate the factorNeg. */ + FsmGraph *toNegate = factorNeg->walk( pd ); + + /* Negation is subtract from dot-star. */ + retFsm = dotStarFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case CharNegateType: { + /* Evaluate the factorNeg. */ + FsmGraph *toNegate = factorNeg->walk( pd ); + + /* CharNegation is subtract from dot. */ + retFsm = dotFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case FactorType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factor->walk( pd ); + break; + }} + return retFsm; +} + +/* Clean up after a factor node. */ +LexFactor::~LexFactor() +{ + switch ( type ) { + case LiteralType: + delete literal; + break; + case RangeType: + delete range; + break; + case OrExprType: + delete reItem; + break; + case RegExprType: + delete regExp; + break; + case ReferenceType: + break; + case ParenType: + delete join; + break; + } +} + +/* Evaluate a factor node. */ +FsmGraph *LexFactor::walk( Compiler *pd ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case LiteralType: + rtnVal = literal->walk( pd ); + break; + case RangeType: + rtnVal = range->walk( pd ); + break; + case OrExprType: + rtnVal = reItem->walk( pd, 0 ); + break; + case RegExprType: + rtnVal = regExp->walk( pd, 0 ); + break; + case ReferenceType: + rtnVal = varDef->walk( pd ); + break; + case ParenType: + rtnVal = join->walk( pd ); + break; + } + + return rtnVal; +} + + +/* Clean up a range object. Must delete the two literals. */ +Range::~Range() +{ + delete lowerLit; + delete upperLit; +} + +bool Range::verifyRangeFsm( FsmGraph *rangeEnd ) +{ + /* Must have two states. */ + if ( rangeEnd->stateList.length() != 2 ) + return false; + /* The start state cannot be final. */ + if ( rangeEnd->startState->isFinState() ) + return false; + /* There should be only one final state. */ + if ( rangeEnd->finStateSet.length() != 1 ) + return false; + /* The final state cannot have any transitions out. */ + if ( rangeEnd->finStateSet[0]->outList.length() != 0 ) + return false; + /* The start state should have only one transition out. */ + if ( rangeEnd->startState->outList.length() != 1 ) + return false; + /* The singe transition out of the start state should not be a range. */ + FsmTrans *startTrans = rangeEnd->startState->outList.head; + if ( startTrans->lowKey != startTrans->highKey ) + return false; + return true; +} + +/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ +FsmGraph *Range::walk( Compiler *pd ) +{ + /* Construct and verify the suitability of the lower end of the range. */ + FsmGraph *lowerFsm = lowerLit->walk( pd ); + if ( !verifyRangeFsm( lowerFsm ) ) { + error(lowerLit->loc) << + "bad range lower end, must be a single character" << endl; + } + + /* Construct and verify the upper end. */ + FsmGraph *upperFsm = upperLit->walk( pd ); + if ( !verifyRangeFsm( upperFsm ) ) { + error(upperLit->loc) << + "bad range upper end, must be a single character" << endl; + } + + /* Grab the keys from the machines, then delete them. */ + Key lowKey = lowerFsm->startState->outList.head->lowKey; + Key highKey = upperFsm->startState->outList.head->lowKey; + delete lowerFsm; + delete upperFsm; + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(lowerLit->loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Return the range now that it is validated. */ + FsmGraph *retFsm = new FsmGraph(); + retFsm->rangeFsm( lowKey, highKey ); + return retFsm; +} + +/* Evaluate a literal object. */ +FsmGraph *Literal::walk( Compiler *pd ) +{ + /* FsmGraph to return, is the alphabet signed. */ + FsmGraph *rtnVal = 0; + + switch ( type ) { + case Number: { + /* Make the fsm key in int format. */ + Key fsmKey = makeFsmKeyNum( literal.data, loc, pd ); + /* Make the new machine. */ + rtnVal = new FsmGraph(); + rtnVal->concatFsm( fsmKey ); + break; + } + case LitString: { + /* Make the array of keys in int format. */ + String interp; + bool caseInsensitive; + prepareLitString( interp, caseInsensitive, literal, loc ); + Key *arr = new Key[interp.length()]; + makeFsmKeyArray( arr, interp.data, interp.length(), pd ); + + /* Make the new machine. */ + rtnVal = new FsmGraph(); + if ( caseInsensitive ) + rtnVal->concatFsmCI( arr, interp.length() ); + else + rtnVal->concatFsm( arr, interp.length() ); + delete[] arr; + break; + }} + return rtnVal; +} + +/* Clean up after a regular expression object. */ +RegExpr::~RegExpr() +{ + switch ( type ) { + case RecurseItem: + delete regExp; + delete item; + break; + case Empty: + break; + } +} + +/* Evaluate a regular expression object. */ +FsmGraph *RegExpr::walk( Compiler *pd, RegExpr *rootRegex ) +{ + /* This is the root regex, pass down a pointer to this. */ + if ( rootRegex == 0 ) + rootRegex = this; + + FsmGraph *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Walk both items. */ + FsmGraph *fsm1 = regExp->walk( pd, rootRegex ); + FsmGraph *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + fsm1->concatOp( fsm2 ); + rtnVal = fsm1; + } + break; + } + case Empty: { + /* FIXME: Return something here. */ + rtnVal = 0; + break; + } + } + return rtnVal; +} + +/* Clean up after an item in a regular expression. */ +ReItem::~ReItem() +{ + switch ( type ) { + case Data: + case Dot: + break; + case OrBlock: + case NegOrBlock: + delete orBlock; + break; + } +} + +/* Evaluate a regular expression object. */ +FsmGraph *ReItem::walk( Compiler *pd, RegExpr *rootRegex ) +{ + /* The fsm to return, is the alphabet signed? */ + FsmGraph *rtnVal = 0; + + switch ( type ) { + case Data: { + /* Move the data into an integer array and make a concat fsm. */ + Key *arr = new Key[data.length()]; + makeFsmKeyArray( arr, data.data, data.length(), pd ); + + /* Make the concat fsm. */ + rtnVal = new FsmGraph(); + if ( rootRegex != 0 && rootRegex->caseInsensitive ) + rtnVal->concatFsmCI( arr, data.length() ); + else + rtnVal->concatFsm( arr, data.length() ); + delete[] arr; + break; + } + case Dot: { + /* Make the dot fsm. */ + rtnVal = dotFsm( pd ); + break; + } + case OrBlock: { + /* Get the or block and minmize it. */ + rtnVal = orBlock->walk( pd, rootRegex ); + if ( rtnVal == 0 ) { + rtnVal = new FsmGraph(); + rtnVal->lambdaFsm(); + } + rtnVal->minimizePartition2(); + break; + } + case NegOrBlock: { + /* Get the or block and minimize it. */ + FsmGraph *fsm = orBlock->walk( pd, rootRegex ); + fsm->minimizePartition2(); + + /* Make a dot fsm and subtract from it. */ + rtnVal = dotFsm( pd ); + rtnVal->subtractOp( fsm ); + rtnVal->minimizePartition2(); + break; + } + } + + return rtnVal; +} + +/* Clean up after an or block of a regular expression. */ +ReOrBlock::~ReOrBlock() +{ + switch ( type ) { + case RecurseItem: + delete orBlock; + delete item; + break; + case Empty: + break; + } +} + + +/* Evaluate an or block of a regular expression. */ +FsmGraph *ReOrBlock::walk( Compiler *pd, RegExpr *rootRegex ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Evaluate the two fsm. */ + FsmGraph *fsm1 = orBlock->walk( pd, rootRegex ); + FsmGraph *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + fsm1->unionOp( fsm2 ); + rtnVal = fsm1; + } + break; + } + case Empty: { + rtnVal = 0; + break; + } + } + return rtnVal;; +} + +/* Evaluate an or block item of a regular expression. */ +FsmGraph *ReOrItem::walk( Compiler *pd, RegExpr *rootRegex ) +{ + /* The return value, is the alphabet signed? */ + FsmGraph *rtnVal = 0; + switch ( type ) { + case Data: { + /* Make the or machine. */ + rtnVal = new FsmGraph(); + + /* Put the or data into an array of ints. Note that we find unique + * keys. Duplicates are silently ignored. The alternative would be to + * issue warning or an error but since we can't with [a0-9a] or 'a' | + * 'a' don't bother here. */ + KeySet keySet; + makeFsmUniqueKeyArray( keySet, data.data, data.length(), + rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); + + /* Run the or operator. */ + rtnVal->orFsm( keySet.data, keySet.length() ); + break; + } + case Range: { + /* Make the upper and lower keys. */ + Key lowKey = makeFsmKeyChar( lower, pd ); + Key highKey = makeFsmKeyChar( upper, pd ); + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Make the range machine. */ + rtnVal = new FsmGraph(); + rtnVal->rangeFsm( lowKey, highKey ); + + if ( rootRegex != 0 && rootRegex->caseInsensitive ) { + if ( lowKey <= 'Z' && 'A' <= highKey ) { + Key otherLow = lowKey < 'A' ? Key('A') : lowKey; + Key otherHigh = 'Z' < highKey ? Key('Z') : highKey; + + otherLow = 'a' + ( otherLow - 'A' ); + otherHigh = 'a' + ( otherHigh - 'A' ); + + FsmGraph *otherRange = new FsmGraph(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + else if ( lowKey <= 'z' && 'a' <= highKey ) { + Key otherLow = lowKey < 'a' ? Key('a') : lowKey; + Key otherHigh = 'z' < highKey ? Key('z') : highKey; + + otherLow = 'A' + ( otherLow - 'a' ); + otherHigh = 'A' + ( otherHigh - 'a' ); + + FsmGraph *otherRange = new FsmGraph(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + } + + break; + }} + return rtnVal; +} diff --git a/src/parsetree.h b/src/parsetree.h new file mode 100644 index 00000000..f2d94226 --- /dev/null +++ b/src/parsetree.h @@ -0,0 +1,3607 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_PARSETREE_H +#define _COLM_PARSETREE_H + +#include <iostream> +#include <string.h> +#include <string> + +#include <avlbasic.h> +#include <avlmap.h> +#include <bstmap.h> +#include <bstset.h> +#include <vector.h> +#include <dlist.h> +#include <dlistval.h> +#include <dlistmel.h> + +#include "global.h" +#include "cstring.h" +#include "bytecode.h" + + +/* Operators that are represented with single symbol characters. */ +#define OP_DoubleEql 'e' +#define OP_NotEql 'q' +#define OP_LessEql 'l' +#define OP_GrtrEql 'g' +#define OP_LogicalAnd 'a' +#define OP_LogicalOr 'o' +#define OP_Deref 'd' + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +struct NameInst; +struct FsmGraph; +struct RedFsm; +struct ObjectDef; +struct ElementOf; +struct UniqueType; +struct ObjectField; +struct TransBlock; +struct CodeBlock; +struct PdaLiteral; +struct TypeAlias; +struct RegionSet; +struct NameScope; +struct IterCall; +struct TemplateType; +struct ObjectMethod; +struct Reduction; +struct Production; +struct LangVarRef; + +/* + * Code Vector + */ +struct CodeVect : public Vector<code_t> +{ + void appendHalf( half_t half ) + { + /* not optimal. */ + append( half & 0xff ); + append( (half>>8) & 0xff ); + } + + void appendWord( word_t word ) + { + /* not optimal. */ + append( word & 0xff ); + append( (word>>8) & 0xff ); + append( (word>>16) & 0xff ); + append( (word>>24) & 0xff ); + #if SIZEOF_LONG == 8 + append( (word>>32) & 0xff ); + append( (word>>40) & 0xff ); + append( (word>>48) & 0xff ); + append( (word>>56) & 0xff ); + #endif + } + + void setHalf( long pos, half_t half ) + { + /* not optimal. */ + data[pos] = half & 0xff; + data[pos+1] = (half>>8) & 0xff; + } + + void insertHalf( long pos, half_t half ) + { + /* not optimal. */ + insert( pos, half & 0xff ); + insert( pos+1, (half>>8) & 0xff ); + } + + void insertWord( long pos, word_t word ) + { + /* not at all optimal. */ + insert( pos, word & 0xff ); + insert( pos+1, (word>>8) & 0xff ); + insert( pos+2, (word>>16) & 0xff ); + insert( pos+3, (word>>24) & 0xff ); + #if SIZEOF_LONG == 8 + insert( pos+4, (word>>32) & 0xff ); + insert( pos+5, (word>>40) & 0xff ); + insert( pos+6, (word>>48) & 0xff ); + insert( pos+7, (word>>56) & 0xff ); + #endif + } + + void insertTree( long pos, tree_t *tree ) + { insertWord( pos, (word_t) tree ); } +}; + + + +/* Types of builtin machines. */ +enum BuiltinMachine +{ + BT_Any, + BT_Ascii, + BT_Extend, + BT_Alpha, + BT_Digit, + BT_Alnum, + BT_Lower, + BT_Upper, + BT_Cntrl, + BT_Graph, + BT_Print, + BT_Punct, + BT_Space, + BT_Xdigit, + BT_Lambda, + BT_Empty +}; + +/* Must match the LI defines in pdarun.h. */ +enum LocalType +{ + LT_Tree = 1, + LT_Iter, + LT_RevIter, + LT_UserIter +}; + +struct LocalLoc +{ + LocalLoc( LocalType type, int scope, int offset ) + : scope(scope), type(type), offset(offset) {} + + int scope; + LocalType type; + int offset; +}; + +struct Locals +{ + Vector<LocalLoc> locals; + + void append( const LocalLoc &ll ) + { + int pos = 0; + while ( pos < locals.length() && ll.scope >= locals[pos].scope ) + pos += 1; + locals.insert( pos, ll ); + } +}; + +typedef BstSet<char> CharSet; +typedef Vector<unsigned char> UnsignedCharVect; + +struct Compiler; +struct TypeRef; + +/* Leaf type. */ +struct Literal; + +/* tree_t nodes. */ + +struct LexTerm; +struct LexFactorAug; +struct LexFactorRep; +struct LexFactorNeg; +struct LexFactor; +struct LexExpression; +struct LexJoin; +struct JoinOrLm; +struct RegionJoinOrLm; +struct TokenRegion; +struct Namespace; +struct StructDef; +struct TokenDef; +struct TokenDefListReg; +struct TokenDefListNs; +struct TokenInstance; +struct TokenInstanceListReg; +struct Range; +struct LangEl; + +enum AugType +{ + at_start, + at_leave +}; + +struct Action; +struct PriorDesc; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct ExplicitMachine; +struct InlineItem; +struct InlineList; + +/* Reference to a named state. */ +typedef Vector<String> NameRef; +typedef Vector<NameRef*> NameRefList; +typedef Vector<NameInst*> NameTargList; + +/* Structure for storing location of epsilon transitons. */ +struct EpsilonLink +{ + EpsilonLink( const InputLoc &loc, NameRef &target ) + : loc(loc), target(target) { } + + InputLoc loc; + NameRef target; +}; + +struct Label +{ + Label( const InputLoc &loc, const String &data, ObjectField *objField ) + : loc(loc), data(data), objField(objField) { } + + InputLoc loc; + String data; + ObjectField *objField; +}; + +/* Structure represents an action assigned to some LexFactorAug node. The + * factor with aug will keep an array of these. */ +struct ParserAction +{ + ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) + : loc(loc), type(type), localErrKey(localErrKey), action(action) { } + + InputLoc loc; + AugType type; + int localErrKey; + Action *action; +}; + +struct Token +{ + String data; + InputLoc loc; +}; + +void prepareLitString( String &result, bool &caseInsensitive, + const String &srcString, const InputLoc &loc ); + +std::ostream &operator<<(std::ostream &out, const Token &token ); + +typedef AvlMap< String, TokenInstance*, ColmCmpStr > LiteralDict; +typedef AvlMapEl< String, TokenInstance* > LiteralDictEl; + +/* Store the value and type of a priority augmentation. */ +struct PriorityAug +{ + PriorityAug( AugType type, int priorKey, int priorValue ) : + type(type), priorKey(priorKey), priorValue(priorValue) { } + + AugType type; + int priorKey; + int priorValue; +}; + +/* + * A Variable Definition + */ +struct LexDefinition +{ + LexDefinition( const String &name, LexJoin *join ) + : name(name), join(join) { } + + /* Parse tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( const InputLoc &loc, Compiler *pd ); + + String name; + LexJoin *join; +}; + +typedef Vector<String> StringVect; +typedef CmpTable<String, ColmCmpStr> CmpStrVect; + +struct NamespaceQual +{ + NamespaceQual() + : + cachedNspaceQual(0), + declInNspace(0) + {} + + static NamespaceQual *cons( Namespace *declInNspace ) + { + NamespaceQual *nsq = new NamespaceQual; + nsq->declInNspace = declInNspace; + return nsq; + } + + Namespace *cachedNspaceQual; + Namespace *declInNspace; + + StringVect qualNames; + + Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart ); + Namespace *getQual( Compiler *pd ); + bool thisOnly() + { return qualNames.length() != 0; } +}; + +struct ReCapture +{ + ReCapture( Action *markEnter, Action *markLeave, ObjectField *objField ) + : markEnter(markEnter), markLeave(markLeave), objField(objField) {} + + Action *markEnter; + Action *markLeave; + ObjectField *objField; +}; + + +typedef Vector<ReCapture> ReCaptureVect; + +struct TokenDefPtr1 +{ + TokenDef *prev, *next; +}; + +struct TokenDefPtr2 +{ + TokenDef *prev, *next; +}; + +struct TokenDef +: + public TokenDefPtr1, + public TokenDefPtr2 +{ + TokenDef() + : + action(0), tdLangEl(0), inLmSelect(false), dupOf(0), + noPostIgnore(false), noPreIgnore(false), isZero(false) + {} + + static TokenDef *cons( const String &name, const String &literal, + bool isLiteral, bool isIgnore, LexJoin *join, CodeBlock *codeBlock, + const InputLoc &semiLoc, int longestMatchId, Namespace *nspace, + RegionSet *regionSet, ObjectDef *objectDef, StructDef *contextIn ) + { + TokenDef *t = new TokenDef; + + t->name = name; + t->literal = literal; + t->isLiteral = isLiteral; + t->isIgnore = isIgnore; + t->join = join; + t->action = 0; + t->codeBlock = codeBlock; + t->tdLangEl = 0; + t->semiLoc = semiLoc; + t->longestMatchId = longestMatchId; + t->inLmSelect = false; + t->nspace = nspace; + t->regionSet = regionSet; + t->objectDef = objectDef; + t->contextIn = contextIn; + t->dupOf = 0; + t->noPostIgnore = false; + t->noPreIgnore = false; + t->isZero = false; + + return t; + } + + InputLoc getLoc(); + + String name; + String literal; + bool isLiteral; + bool isIgnore; + LexJoin *join; + Action *action; + CodeBlock *codeBlock; + LangEl *tdLangEl; + InputLoc semiLoc; + + Action *setActId; + Action *actOnLast; + Action *actOnNext; + Action *actLagBehind; + int longestMatchId; + bool inLmSelect; + Namespace *nspace; + RegionSet *regionSet; + ReCaptureVect reCaptureVect; + ObjectDef *objectDef; + StructDef *contextIn; + + TokenDef *dupOf; + bool noPostIgnore; + bool noPreIgnore; + bool isZero; +}; + +struct TokenInstancePtr +{ + TokenInstance *prev, *next; +}; + +struct TokenInstance +: + public TokenInstancePtr +{ + TokenInstance() + : + action(0), + inLmSelect(false), + dupOf(0) + {} + + static TokenInstance *cons( TokenDef *tokenDef, + LexJoin *join, const InputLoc &semiLoc, + int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion ) + { + TokenInstance *t = new TokenInstance; + + t->tokenDef = tokenDef; + t->join = join; + t->action = 0; + t->semiLoc = semiLoc; + t->longestMatchId = longestMatchId; + t->inLmSelect = false; + t->nspace = nspace; + t->tokenRegion = tokenRegion; + t->dupOf = 0; + + return t; + } + + InputLoc getLoc(); + + TokenDef *tokenDef; + LexJoin *join; + Action *action; + InputLoc semiLoc; + + Action *setActId; + Action *actOnLast; + Action *actOnNext; + Action *actLagBehind; + int longestMatchId; + bool inLmSelect; + Namespace *nspace; + TokenRegion *tokenRegion; + + TokenInstance *dupOf; +}; + +struct LelDefList; + +struct NtDef +{ + static NtDef *cons( const String &name, Namespace *nspace, + LelDefList *defList, ObjectDef *objectDef, + StructDef *contextIn, bool reduceFirst ) + { + NtDef *nt = new NtDef; + + nt->name = name; + nt->nspace = nspace; + nt->defList = defList; + nt->objectDef = objectDef; + nt->contextIn = contextIn; + nt->reduceFirst = reduceFirst; + + return nt; + } + + static NtDef *cons( const String &name, Namespace *nspace, + StructDef *contextIn, bool reduceFirst ) + { + NtDef *nt = new NtDef; + + nt->name = name; + nt->nspace = nspace; + nt->defList = 0; + nt->objectDef = 0; + nt->contextIn = contextIn; + nt->reduceFirst = reduceFirst; + + return nt; + } + + String name; + Namespace *nspace; + LelDefList *defList; + ObjectDef *objectDef; + StructDef *contextIn; + bool reduceFirst; + + NtDef *prev, *next; +}; + +struct NtDefList : DList<NtDef> {}; + +/* Declare a new type so that ptreetypes.h need not include dlist.h. */ +struct TokenInstanceListReg : DListMel<TokenInstance, TokenInstancePtr> {}; + +/* Declare a new type so that ptreetypes.h need not include dlist.h. */ +struct TokenDefListReg : DListMel<TokenDef, TokenDefPtr1> {}; +struct TokenDefListNs : DListMel<TokenDef, TokenDefPtr2> {}; + +struct StructStack + : public Vector<StructDef*> +{ + StructDef *top() + { return length() > 0 ? Vector<StructDef*>::top() : 0; } +}; + +struct StructEl; + +struct StructDef +{ + StructDef( const InputLoc &loc, const String &name, ObjectDef *objectDef ) + : + loc(loc), + name(name), + objectDef(objectDef), + structEl(0) + {} + + InputLoc loc; + String name; + ObjectDef *objectDef; + StructEl *structEl; + + StructDef *prev, *next; +}; + +struct StructEl +{ + StructEl( const String &name, StructDef *structDef ) + : + name(name), + structDef(structDef), + id(-1) + {} + + String name; + StructDef *structDef; + int id; + + StructEl *prev, *next; +}; + +typedef DList<StructEl> StructElList; +struct StructDefList : DList<StructDef> {}; + +struct TypeMapEl + : public AvlTreeEl<TypeMapEl> +{ + enum Type + { + AliasType = 1, + LangElType, + StructType + }; + + const String &getKey() { return key; } + + TypeMapEl( Type type, const String &key, TypeRef *typeRef ) + : type(type), key(key), value(0), typeRef(typeRef), structEl(0) {} + + TypeMapEl( Type type, const String &key, LangEl *value ) + : type(type), key(key), value(value), typeRef(0), structEl(0) {} + + TypeMapEl( Type type, const String &key, StructEl *structEl ) + : type(type), key(key), value(0), typeRef(0), structEl(structEl) {} + + Type type; + String key; + LangEl *value; + TypeRef *typeRef; + StructEl *structEl; + + TypeMapEl *prev, *next; +}; + +/* Symbol Map. */ +typedef AvlTree< TypeMapEl, String, ColmCmpStr > TypeMap; + +typedef Vector<TokenRegion*> RegionVect; + +struct RegionImpl +{ + RegionImpl() + : + regionNameInst(0), + lmActSelect(0), + lmSwitchHandlesError(false), + defaultTokenInstance(0), + wasEmpty(false) + {} + + InputLoc loc; + + /* This gets saved off during the name walk. Can save it off because token + * regions are referenced once only. */ + NameInst *regionNameInst; + + TokenInstanceListReg tokenInstanceList; + Action *lmActSelect; + bool lmSwitchHandlesError; + TokenInstance *defaultTokenInstance; + + /* We alway init empty scanners with a single token. If we had to do this + * then wasEmpty is true. */ + bool wasEmpty; + + RegionImpl *prev, *next; + + void runLongestMatch( Compiler *pd, FsmGraph *graph ); + void transferScannerLeavingActions( FsmGraph *graph ); + FsmGraph *walk( Compiler *pd ); + + void restart( FsmGraph *graph, FsmTrans *trans ); + void makeNameTree( const InputLoc &loc, Compiler *pd ); + void makeActions( Compiler *pd ); + Action *newAction( Compiler *pd, const InputLoc &loc, + const String &name, InlineList *inlineList ); +}; + +struct TokenRegion +{ + /* Construct with a list of joins */ + TokenRegion( const InputLoc &loc, int id, RegionImpl *impl ) + : + loc(loc), + id(id), + preEofBlock(0), + zeroLel(0), + ignoreOnly(0), + impl(impl) + { } + + InputLoc loc; + int id; + + CodeBlock *preEofBlock; + + LangEl *zeroLel; + TokenRegion *ignoreOnly; + + RegionImpl *impl; + + TokenRegion *next, *prev; + + /* tree_t traversal. */ + void findName( Compiler *pd ); +}; + +struct RegionSet +{ + RegionSet( RegionImpl *implTokenIgnore, RegionImpl *implTokenOnly, + RegionImpl *implIgnoreOnly, TokenRegion *tokenIgnore, + TokenRegion *tokenOnly, TokenRegion *ignoreOnly, + TokenRegion *collectIgnore ) + : + implTokenIgnore(implTokenIgnore), + implTokenOnly(implTokenOnly), + implIgnoreOnly(implIgnoreOnly), + + tokenIgnore(tokenIgnore), + tokenOnly(tokenOnly), + ignoreOnly(ignoreOnly), + collectIgnore(collectIgnore) + {} + + /* Provides the scanner state machines. We reuse ignore-only. */ + RegionImpl *implTokenIgnore; + RegionImpl *implTokenOnly; + RegionImpl *implIgnoreOnly; + + TokenRegion *tokenIgnore; + TokenRegion *tokenOnly; + TokenRegion *ignoreOnly; + TokenRegion *collectIgnore; + + TokenDefListReg tokenDefList; + + RegionSet *next, *prev; +}; + +typedef Vector<RegionSet*> RegionSetVect; + +typedef DList<RegionSet> RegionSetList; +typedef DList<TokenRegion> RegionList; +typedef DList<RegionImpl> RegionImplList; + +typedef Vector<Namespace*> NamespaceVect; +typedef Vector<Reduction*> ReductionVect; + +/* Generics have runtime-representations, so we must track them as unique + * types. This gives the runtimes some idea of what is contained in the + * structures. */ +struct GenericType + : public DListEl<GenericType> +{ + GenericType( long typeId, long id, TypeRef *elTr, + TypeRef *keyTr, TypeRef *valueTr, ObjectField *el ) + : + typeId(typeId), id(id), + elTr(elTr), keyTr(keyTr), valueTr(valueTr), + elUt(0), keyUt(0), valueUt(0), + objDef(0), el(el), elOffset(0) + {} + + void declare( Compiler *pd, Namespace *nspace ); + + long typeId; + long id; + + TypeRef *elTr; + TypeRef *keyTr; + TypeRef *valueTr; + + UniqueType *elUt; + UniqueType *keyUt; + UniqueType *valueUt; + + ObjectDef *objDef; + ObjectField *el; + long elOffset; +}; + +typedef DList<GenericType> GenericList; + +/* Graph dictionary. */ +struct GraphDictEl +: + public AvlTreeEl<GraphDictEl>, + public DListEl<GraphDictEl> +{ + GraphDictEl( const String &key ) + : key(key), value(0), isInstance(false) { } + + GraphDictEl( const String &key, LexDefinition *value ) + : key(key), value(value), isInstance(false) { } + + const String &getKey() { return key; } + + String key; + LexDefinition *value; + bool isInstance; + + /* Location info of graph definition. Points to variable name of assignment. */ + InputLoc loc; +}; + +typedef AvlTree<GraphDictEl, String, ColmCmpStr> GraphDict; +typedef DList<GraphDictEl> GraphList; + +struct TypeAlias +{ + TypeAlias( const InputLoc &loc, Namespace *nspace, + const String &name, TypeRef *typeRef ) + : + loc(loc), + nspace(nspace), + name(name), + typeRef(typeRef) + {} + + InputLoc loc; + Namespace *nspace; + String name; + TypeRef *typeRef; + + TypeAlias *prev, *next; +}; + +typedef DList<TypeAlias> TypeAliasList; + +typedef AvlMap<String, ObjectField*, ColmCmpStr> FieldMap; +typedef AvlMapEl<String, ObjectField*> FieldMapEl; + +typedef AvlMap<String, ObjectMethod*, ColmCmpStr> MethodMap; +typedef AvlMapEl<String, ObjectMethod*> MethodMapEl; + +/* tree_t of name scopes for an object def. All of the object fields inside this + * tree live in one object def. This is used for scoping names in functions. */ +struct NameScope +{ + NameScope() + : + owningObj(0), + parentScope(0), + childIter(0), + caseClauseVarRef(0) + {} + + ObjectDef *owningObj; + FieldMap fieldMap; + MethodMap methodMap; + + NameScope *parentScope; + DList<NameScope> children; + + /* For iteration after declaration. */ + NameScope *childIter; + LangVarRef *caseClauseVarRef; + + NameScope *prev, *next; + + int depth() + { + int depth = 0; + NameScope *scope = this; + while ( scope != 0 ) { + depth += 1; + scope = scope->parentScope; + } + return depth; + } + + ObjectField *findField( const String &name ) const; + ObjectMethod *findMethod( const String &name ) const; + + ObjectField *checkRedecl( const String &name ); + void insertField( const String &name, ObjectField *value ); + +}; + + +struct Namespace +{ + /* Construct with a list of joins */ + Namespace( const InputLoc &loc, const String &name, int id, + Namespace *parentNamespace ) : + loc(loc), name(name), id(id), + parentNamespace(parentNamespace) + { + rootScope = new NameScope; + } + + /* tree_t traversal. */ + Namespace *findNamespace( const String &name ); + Reduction *findReduction( const String &name ); + + InputLoc loc; + String name; + int id; + + /* Literal patterns and the dictionary mapping literals to the underlying + * tokens. */ + LiteralDict literalDict; + + /* List of tokens defs in the namespace. */ + TokenDefListNs tokenDefList; + + /* List of nonterminal defs in the namespace. */ + NtDefList ntDefList; + + StructDefList structDefList; + + /* Dictionary of symbols within the region. */ + TypeMap typeMap; + GenericList genericList; + + /* regular language definitions. */ + GraphDict rlMap; + + TypeAliasList typeAliasList; + + Namespace *parentNamespace; + NamespaceVect childNamespaces; + + ReductionVect reductions; + + NameScope *rootScope; + + Namespace *next, *prev; + + void declare( Compiler *pd ); +}; + +typedef DList<Namespace> NamespaceList; +typedef BstSet< Namespace*, CmpOrd<Namespace*> > NamespaceSet; + +struct ReduceTextItem +{ + enum Type { + LhsRef, + RhsRef, + TreeRef, + RhsLoc, + Txt + }; + + ReduceTextItem() : n(0) {} + + Type type; + String txt; + int n; + + ReduceTextItem *prev, *next; +}; + +typedef DList<ReduceTextItem> ReduceTextItemList; + +struct ReduceNonTerm +{ + ReduceNonTerm( const InputLoc &loc, TypeRef *nonTerm ) + : + loc(loc), + nonTerm(nonTerm) + {} + + InputLoc loc; + TypeRef *nonTerm; + ReduceTextItemList itemList; + + ReduceNonTerm *prev, *next; +}; + +struct ReduceAction +{ + ReduceAction( const InputLoc &loc, TypeRef *nonTerm, + const String &prod ) + : + loc(loc), nonTerm(nonTerm), + prod(prod), + production(0) + {} + + InputLoc loc; + TypeRef *nonTerm; + String prod; + ReduceTextItemList itemList; + + Production *production; + + ReduceAction *prev, *next; +}; + +typedef DList<ReduceAction> ReduceActionList; +typedef DList<ReduceNonTerm> ReduceNonTermList; + +typedef Vector<ReduceAction*> ReduceActionVect; + +struct Reduction +{ + Reduction( const InputLoc &loc, String name ) + : + loc(loc), name(name), + needData(0), needLoc(0), + postfixBased(false), + parserBased(false) + { + static int nextId = 1; + id = nextId++; + var = name.data; + var.data[0] = tolower( var.data[0] ); + } + + InputLoc loc; + String name; + String var; + int id; + + bool *needData; + bool *needLoc; + + bool postfixBased; + bool parserBased; + + ReduceActionList reduceActions; + ReduceNonTermList reduceNonTerms; +}; + +/* + * LexJoin + */ +struct LexJoin +{ + LexJoin() + : + expr(0), + context(0), + mark(0) + {} + + static LexJoin *cons( LexExpression *expr ) + { + LexJoin *j = new LexJoin; + j->expr = expr; + return j; + } + + /* tree_t traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + void varDecl( Compiler *pd, TokenDef *tokenDef ); + + /* Data. */ + LexExpression *expr; + LexJoin *context; + Action *mark; +}; + +/* + * LexExpression + */ +struct LexExpression +{ + enum Type { + OrType, + IntersectType, + SubtractType, + StrongSubtractType, + TermType, + BuiltinType + }; + + LexExpression( ) : + expression(0), term(0), builtin((BuiltinMachine)-1), + type((Type)-1), prev(this), next(this) { } + + /* Construct with an expression on the left and a term on the right. */ + static LexExpression *cons( LexExpression *expression, LexTerm *term, Type type ) + { + LexExpression *ret = new LexExpression; + ret->type = type; + ret->expression = expression; + ret->term = term; + return ret; + } + + /* Construct with only a term. */ + static LexExpression *cons( LexTerm *term ) + { + LexExpression *ret = new LexExpression; + ret->type = TermType; + ret->term = term; + return ret; + } + + /* Construct with a builtin type. */ + static LexExpression *cons( BuiltinMachine builtin ) + { + LexExpression *ret = new LexExpression; + ret->type = BuiltinType; + ret->builtin = builtin; + return ret; + } + + ~LexExpression(); + + /* tree_t traversal. */ + FsmGraph *walk( Compiler *pd, bool lastInSeq = true ); + void makeNameTree( Compiler *pd ); + void varDecl( Compiler *pd, TokenDef *tokenDef ); + + /* Node data. */ + LexExpression *expression; + LexTerm *term; + BuiltinMachine builtin; + Type type; + + LexExpression *prev, *next; +}; + +/* + * LexTerm + */ +struct LexTerm +{ + enum Type { + ConcatType, + RightStartType, + RightFinishType, + LeftType, + FactorAugType + }; + + LexTerm() : + term(0), factorAug(0), type((Type)-1) { } + + static LexTerm *cons( LexTerm *term, LexFactorAug *factorAug ) + { + LexTerm *ret = new LexTerm; + ret->type = ConcatType; + ret->term = term; + ret->factorAug = factorAug; + return ret; + } + + static LexTerm *cons( LexTerm *term, LexFactorAug *factorAug, Type type ) + { + LexTerm *ret = new LexTerm; + ret->type = type; + ret->term = term; + ret->factorAug = factorAug; + return ret; + } + + static LexTerm *cons( LexFactorAug *factorAug ) + { + LexTerm *ret = new LexTerm; + ret->type = FactorAugType; + ret->factorAug = factorAug; + return ret; + } + + ~LexTerm(); + + FsmGraph *walk( Compiler *pd, bool lastInSeq = true ); + void makeNameTree( Compiler *pd ); + void varDecl( Compiler *pd, TokenDef *tokenDef ); + + LexTerm *term; + LexFactorAug *factorAug; + Type type; + + /* Priority descriptor for RightFinish type. */ + PriorDesc priorDescs[2]; +}; + + +/* Third level of precedence. Augmenting nodes with actions and priorities. */ +struct LexFactorAug +{ + LexFactorAug() : + factorRep(0) { } + + static LexFactorAug *cons( LexFactorRep *factorRep ) + { + LexFactorAug *f = new LexFactorAug; + f->factorRep = factorRep; + return f; + } + + ~LexFactorAug(); + + /* tree_t traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + void varDecl( Compiler *pd, TokenDef *tokenDef ); + + void assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd ); + + /* Actions and priorities assigned to the factor node. */ + Vector<ParserAction> actions; + ReCaptureVect reCaptureVect; + + LexFactorRep *factorRep; +}; + +/* Fourth level of precedence. Trailing unary operators. Provide kleen star, + * optional and plus. */ +struct LexFactorRep +{ + enum Type { + StarType, + StarStarType, + OptionalType, + PlusType, + ExactType, + MaxType, + MinType, + RangeType, + FactorNegType + }; + + LexFactorRep() + : + factorRep(0), + factorNeg(0), + lowerRep(0), + upperRep(0), + type((Type)-1) + { } + + static LexFactorRep *cons( const InputLoc &loc, LexFactorRep *factorRep, + int lowerRep, int upperRep, Type type ) + { + LexFactorRep *f = new LexFactorRep; + f->type = type; + f->loc = loc; + f->factorRep = factorRep; + f->factorNeg = 0; + f->lowerRep = lowerRep; + f->upperRep = upperRep; + return f; + } + + static LexFactorRep *cons( LexFactorNeg *factorNeg ) + { + LexFactorRep *f = new LexFactorRep; + f->type = FactorNegType; + f->factorNeg = factorNeg; + return f; + } + + ~LexFactorRep(); + + /* tree_t traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + InputLoc loc; + LexFactorRep *factorRep; + LexFactorNeg *factorNeg; + int lowerRep, upperRep; + Type type; + + /* Priority descriptor for StarStar type. */ + PriorDesc priorDescs[2]; +}; + +/* Fifth level of precedence. Provides Negation. */ +struct LexFactorNeg +{ + enum Type { + NegateType, + CharNegateType, + FactorType + }; + + LexFactorNeg() + : + factorNeg(0), + factor(0), + type((Type)-1) + {} + + static LexFactorNeg *cons( LexFactorNeg *factorNeg, Type type ) + { + LexFactorNeg *f = new LexFactorNeg; + f->type = type; + f->factorNeg = factorNeg; + f->factor = 0; + return f; + } + + static LexFactorNeg *cons( LexFactor *factor ) + { + LexFactorNeg *f = new LexFactorNeg; + f->type = FactorType; + f->factorNeg = 0; + f->factor = factor; + return f; + } + + ~LexFactorNeg(); + + /* tree_t traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + LexFactorNeg *factorNeg; + LexFactor *factor; + Type type; +}; + +/* + * LexFactor + */ +struct LexFactor +{ + /* Language elements a factor node can be. */ + enum Type { + LiteralType, + RangeType, + OrExprType, + RegExprType, + ReferenceType, + ParenType + }; + + LexFactor() + : + literal(0), + range(0), + reItem(0), + regExp(0), + varDef(0), + join(0), + lower(0), + upper(0), + type((Type)-1) + {} + + /* Construct with a literal fsm. */ + static LexFactor *cons( Literal *literal ) + { + LexFactor *f = new LexFactor; + f->type = LiteralType; + f->literal = literal; + return f; + } + + /* Construct with a range. */ + static LexFactor *cons( Range *range ) + { + LexFactor *f = new LexFactor; + f->type = RangeType; + f->range = range; + return f; + } + + /* Construct with the or part of a regular expression. */ + static LexFactor *cons( ReItem *reItem ) + { + LexFactor *f = new LexFactor; + f->type = OrExprType; + f->reItem = reItem; + return f; + } + + /* Construct with a regular expression. */ + static LexFactor *cons( RegExpr *regExp ) + { + LexFactor *f = new LexFactor; + f->type = RegExprType; + f->regExp = regExp; + return f; + } + + /* Construct with a reference to a var def. */ + static LexFactor *cons( const InputLoc &loc, LexDefinition *varDef ) + { + LexFactor *f = new LexFactor; + f->type = ReferenceType; + f->loc = loc; + f->varDef = varDef; + return f; + } + + /* Construct with a parenthesized join. */ + static LexFactor *cons( LexJoin *join ) + { + LexFactor *f = new LexFactor; + f->type = ParenType; + f->join = join; + return f; + } + + /* Cleanup. */ + ~LexFactor(); + + /* tree_t traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + InputLoc loc; + Literal *literal; + Range *range; + ReItem *reItem; + RegExpr *regExp; + LexDefinition *varDef; + LexJoin *join; + int lower, upper; + Type type; +}; + +/* A range machine. Only ever composed of two literals. */ +struct Range +{ + static Range *cons( Literal *lowerLit, Literal *upperLit ) + { + Range *r = new Range; + r->lowerLit = lowerLit; + r->upperLit = upperLit; + return r; + } + + ~Range(); + FsmGraph *walk( Compiler *pd ); + bool verifyRangeFsm( FsmGraph *rangeEnd ); + + Literal *lowerLit; + Literal *upperLit; +}; + +/* Some literal machine. Can be a number or literal string. */ +struct Literal +{ + enum LiteralType { Number, LitString }; + + static Literal *cons( const InputLoc &loc, const String &literal, LiteralType type ) + { + Literal *l = new Literal; + l->loc = loc; + l->literal = literal; + l->type = type; + return l; + } + + FsmGraph *walk( Compiler *pd ); + + InputLoc loc; + String literal; + LiteralType type; +}; + +/* Regular expression. */ +struct RegExpr +{ + enum RegExpType { RecurseItem, Empty }; + + /* Constructors. */ + static RegExpr *cons() + { + RegExpr *r = new RegExpr; + r->type = Empty; + r->caseInsensitive = false; + return r; + } + + static RegExpr *cons( RegExpr *regExp, ReItem *item ) + { + RegExpr *r = new RegExpr; + r->regExp = regExp; + r->item = item; + r->type = RecurseItem; + r->caseInsensitive = false; + return r; + } + + ~RegExpr(); + FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); + + RegExpr *regExp; + ReItem *item; + RegExpType type; + bool caseInsensitive; +}; + +/* An item in a regular expression. */ +struct ReItem +{ + enum ReItemType { Data, Dot, OrBlock, NegOrBlock }; + + static ReItem *cons( const String &data ) + { + ReItem *r = new ReItem; + r->data = data; + r->type = Data; + return r; + } + + static ReItem *cons( ReItemType type ) + { + ReItem *r = new ReItem; + r->type = type; + return r; + } + + static ReItem *cons( ReOrBlock *orBlock, ReItemType type ) + { + ReItem *r = new ReItem; + r->orBlock = orBlock; + r->type = type; + return r; + } + + ~ReItem(); + FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); + + String data; + ReOrBlock *orBlock; + ReItemType type; +}; + +/* An or block item. */ +struct ReOrBlock +{ + enum ReOrBlockType { RecurseItem, Empty }; + + /* Constructors. */ + static ReOrBlock *cons() + { + ReOrBlock *r = new ReOrBlock; + r->type = Empty; + return r; + } + + static ReOrBlock *cons( ReOrBlock *orBlock, ReOrItem *item ) + { + ReOrBlock *r = new ReOrBlock; + r->orBlock = orBlock; + r->item = item; + r->type = RecurseItem; + return r; + } + + ~ReOrBlock(); + FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); + + ReOrBlock *orBlock; + ReOrItem *item; + ReOrBlockType type; +}; + +/* An item in an or block. */ +struct ReOrItem +{ + enum ReOrItemType { Data, Range }; + + static ReOrItem *cons( const InputLoc &loc, const String &data ) + { + ReOrItem *r = new ReOrItem; + r->loc = loc; + r->data = data; + r->type = Data; + return r; + } + + static ReOrItem *cons( const InputLoc &loc, char lower, char upper ) + { + ReOrItem *r = new ReOrItem; + r->loc = loc; + r->lower = lower; + r->upper = upper; + r->type = Range; + return r; + } + + FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); + + InputLoc loc; + String data; + char lower; + char upper; + ReOrItemType type; +}; + + +/* + * Inline code tree + */ +struct InlineList; +struct InlineItem +{ + enum Type + { + Text, + LmSwitch, + LmSetActId, + LmSetTokEnd, + LmOnLast, + LmOnNext, + LmOnLagBehind, + LmInitAct, + LmInitTokStart, + LmSetTokStart + }; + + static InlineItem *cons( const InputLoc &loc, const String &data, Type type ) + { + InlineItem *i = new InlineItem; + i->loc = loc; + i->data = data; + i->nameRef = 0; + i->children = 0; + i->type = type; + return i; + } + + static InlineItem *cons( const InputLoc &loc, NameRef *nameRef, Type type ) + { + InlineItem *i = new InlineItem; + i->loc = loc; + i->nameRef = nameRef; + i->children = 0; + i->type = type; + return i; + } + + static InlineItem *cons( const InputLoc &loc, RegionImpl *tokenRegion, + TokenInstance *longestMatchPart, Type type ) + { + InlineItem *i = new InlineItem; + i->loc = loc; + i->nameRef = 0; + i->children = 0; + i->tokenRegion = tokenRegion; + i->longestMatchPart = longestMatchPart; + i->type = type; + return i; + } + + static InlineItem *cons( const InputLoc &loc, NameInst *nameTarg, Type type ) + { + InlineItem *i = new InlineItem; + i->loc = loc; + i->nameRef = 0; + i->nameTarg = nameTarg; + i->children = 0; + i->type = type; + return i; + } + + static InlineItem *cons( const InputLoc &loc, Type type ) + { + InlineItem *i = new InlineItem; + i->loc = loc; + i->nameRef = 0; + i->children = 0; + i->type = type; + return i; + } + + InputLoc loc; + String data; + NameRef *nameRef; + NameInst *nameTarg; + InlineList *children; + RegionImpl *tokenRegion; + TokenInstance *longestMatchPart; + Type type; + + InlineItem *prev, *next; +}; + +struct InlineList +: + public DList<InlineItem> +{ + InlineList( int i ) {} + + static InlineList *cons() + { + return new InlineList( 0 ); + } +}; + + +struct ProdEl; +struct LangVarRef; +struct ObjectField; + +struct PatternItem +{ + enum Form { + TypeRefForm, + InputTextForm + }; + + static PatternItem *cons( Form form, const InputLoc &loc, const String &data ) + { + PatternItem *p = new PatternItem; + p->form = form; + p->loc = loc; + p->prodEl = 0; + p->data = data; + p->region = 0; + p->varRef = 0; + p->bindId = 0; + return p; + } + + static PatternItem *cons( Form form, const InputLoc &loc, ProdEl *prodEl ) + { + PatternItem *p = new PatternItem; + p->form = form; + p->loc = loc; + p->prodEl = prodEl; + p->region = 0; + p->varRef = 0; + p->bindId = 0; + return p; + } + + Form form; + InputLoc loc; + ProdEl *prodEl; + String data; + TokenRegion *region; + LangVarRef *varRef; + long bindId; + PatternItem *prev, *next; +}; + +struct LangExpr; + +struct PatternItemList + : public DList<PatternItem> +{ + static PatternItemList *cons( PatternItem *patternItem ) + { + PatternItemList *list = new PatternItemList; + list->append( patternItem ); + return list; + } +}; + +struct ConsItem +{ + enum Trim { + TrimYes, + TrimNo, + TrimDefault + }; + + enum Type { + InputText, + ExprType, + LiteralType + }; + + ConsItem() + : + type((Type)-1), + expr(0), + langEl(0), + prodEl(0), + bindId(-1), + trim(TrimDefault) + { + } + + static ConsItem *cons( const InputLoc &loc, Type type, const String &data ) + { + ConsItem *r = new ConsItem; + r->loc = loc; + r->type = type; + r->data = data; + return r; + } + + static ConsItem *cons( const InputLoc &loc, Type type, LangExpr *expr, Trim trim ) + { + ConsItem *r = new ConsItem; + r->loc = loc; + r->type = type; + r->expr = expr; + r->trim = trim; + return r; + } + + static ConsItem *cons( const InputLoc &loc, Type type, ProdEl *prodEl ) + { + ConsItem *r = new ConsItem; + r->loc = loc; + r->type = type; + r->expr = 0; + r->prodEl = prodEl; + return r; + } + + InputLoc loc; + Type type; + String data; + LangExpr *expr; + LangEl *langEl; + ProdEl *prodEl; + long bindId; + Trim trim; + + ConsItem *prev, *next; +}; + +struct ConsItemList +: + public DList<ConsItem> +{ + static ConsItemList *cons( ConsItem *ci ) + { + ConsItemList *cil = new ConsItemList; + cil->append( ci ); + return cil; + } + + static ConsItemList *cons() + { + return new ConsItemList; + } + + void resolve( Compiler *pd ); + void evaluateSendStream( Compiler *pd, CodeVect &code ); +}; + +struct Pattern +{ + Pattern() + : + nspace(0), + list(0), + patRepId(0), + langEl(0), + pdaRun(0), + nextBindId(1) + {} + + static Pattern *cons( const InputLoc &loc, Namespace *nspace, + PatternItemList *list, int patRepId ) + { + Pattern *p = new Pattern; + p->loc = loc; + p->nspace = nspace; + p->list = list; + p->patRepId = patRepId; + return p; + } + + InputLoc loc; + Namespace *nspace; + PatternItemList *list; + long patRepId; + LangEl *langEl; + struct pda_run *pdaRun; + long nextBindId; + Pattern *prev, *next; +}; + +typedef DList<Pattern> PatList; + +struct Constructor +{ + static Constructor *cons( const InputLoc &loc, Namespace *nspace, + ConsItemList *list, int patRepId ) + { + Constructor *r = new Constructor; + r->loc = loc; + r->nspace = nspace; + r->list = list; + r->patRepId = patRepId; + r->langEl = 0; + r->pdaRun = 0; + r->nextBindId = 1; + r->parse = true; + return r; + } + + InputLoc loc; + Namespace *nspace; + ConsItemList *list; + int patRepId; + LangEl *langEl; + struct pda_run *pdaRun; + long nextBindId; + bool parse; + + Constructor *prev, *next; +}; + +typedef DList<Constructor> ConsList; + +struct ParserText +{ + static ParserText *cons( const InputLoc &loc, + Namespace *nspace, ConsItemList *list, + bool used, bool reduce, bool read, + const String &reducer ) + { + ParserText *p = new ParserText; + p->loc = loc; + p->nspace = nspace; + p->list = list; + p->langEl = 0; + p->pdaRun = 0; + p->nextBindId = 1; + p->parse = true; + p->used = used; + p->reduce = reduce; + p->read = read; + p->reducer = reducer; + p->reducerId = -1; + return p; + } + + InputLoc loc; + Namespace *nspace; + ConsItemList *list; + LangEl *langEl; + struct pda_run *pdaRun; + long nextBindId; + bool parse; + bool used; + bool reduce; + bool read; + String reducer; + int reducerId; + + ParserText *prev, *next; +}; + +typedef DList<ParserText> ParserTextList; + +struct Function; + +struct IterDef +{ + enum Type { Tree, Child, RevChild, Repeat, + RevRepeat, User, ListEl, + RevListVal, MapEl }; + + IterDef( Type type, Function *func ); + IterDef( Type type ); + + Type type; + + Function *func; +}; + +struct IterImpl +{ + enum Type { Tree, Child, RevChild, Repeat, + RevRepeat, User, ListEl, ListVal, + RevListVal, MapEl, MapVal }; + + IterImpl( Type type, Function *func ); + IterImpl( Type type ); + + Type type; + + Function *func; + bool useFuncId; + bool useSearchUT; + bool useGenericId; + + code_t inCreateWV; + code_t inCreateWC; + code_t inUnwind; + code_t inDestroy; + code_t inAdvance; + + code_t inGetCurR; + code_t inGetCurWC; + code_t inSetCurWC; + + code_t inRefFromCur; +}; + +struct CmpIterDef +{ + static int compare( const IterDef &id1, const IterDef &id2 ) + { + if ( id1.type < id2.type ) + return -1; + else if ( id1.type > id2.type ) + return 1; + else if ( id1.type == IterDef::User ) { + if ( id1.func < id2.func ) + return -1; + else if ( id1.func > id2.func ) + return 1; + } + + return 0; + } +}; + +typedef AvlSet<IterDef, CmpIterDef> IterDefSet; +typedef AvlSetEl<IterDef> IterDefSetEl; + + +/* + * Unique Types. + */ + +/* + * type_ref -> qualified_name + * type_ref -> '*' type_ref + * type_ref -> '&' type_ref + * type_ref -> list type_ref type_ref + * type_ref -> map type_ref type_ref + * type_ref -> vector type_ref + * type_ref -> parser type_ref + * type_ref -> iter_tree type_ref + * type_ref -> iter_child type_ref + * type_ref -> iter_revchild type_ref + * type_ref -> iter_repeat type_ref + * type_ref -> iter_revrepeat type_ref + * type_ref -> iter_user type_ref + * + * type -> nil + * type -> def term + * type -> def nonterm + * type -> '*' type + * type -> '&' type + * type -> list type + * type -> map type type + * type -> vector type + * type -> parser type + * type -> iter_tree type + * type -> iter_child type + * type -> iter_revchild type + * type -> iter_repeat type + * type -> iter_revrepeat type + * type -> iter_user type + */ + +struct UniqueType : public AvlTreeEl<UniqueType> +{ + UniqueType( enum TYPE typeId ) : + typeId(typeId), + langEl(0), + iterDef(0), + structEl(0), + generic(0) + {} + + UniqueType( enum TYPE typeId, LangEl *langEl ) : + typeId(typeId), + langEl(langEl), + iterDef(0), + structEl(0), + generic(0) + {} + + UniqueType( enum TYPE typeId, IterDef *iterDef ) : + typeId(typeId), + langEl(0), + iterDef(iterDef), + structEl(0), + generic(0) + {} + + UniqueType( enum TYPE typeId, StructEl *structEl ) : + typeId(typeId), + langEl(0), + iterDef(0), + structEl(structEl), + generic(0) + {} + + UniqueType( enum TYPE typeId, GenericType *generic ) : + typeId(typeId), + langEl(0), + iterDef(0), + structEl(0), + generic(generic) + {} + + enum TYPE typeId; + LangEl *langEl; + IterDef *iterDef; + StructEl *structEl; + GenericType *generic; + + ObjectDef *objectDef(); + + bool tree() + { return typeId == TYPE_TREE; } + + bool parser() + { return typeId == TYPE_GENERIC && generic->typeId == GEN_PARSER; } + + bool ptr() + { return typeId == TYPE_STRUCT || typeId == TYPE_GENERIC; } + + bool listOf( UniqueType *ut ) + { return typeId == TYPE_GENERIC && generic->typeId == GEN_LIST && generic->valueUt == ut; } + + bool val() { + return typeId == TYPE_STRUCT || + typeId == TYPE_GENERIC || + typeId == TYPE_INT || + typeId == TYPE_BOOL; + } +}; + +struct CmpUniqueType +{ + static int compare( const UniqueType &ut1, const UniqueType &ut2 ); +}; + +typedef AvlBasic< UniqueType, CmpUniqueType > UniqueTypeMap; + +enum RepeatType { + RepeatNone = 1, + RepeatRepeat, + RepeatList, + RepeatOpt, + RepeatLeftRepeat, + RepeatLeftList, +}; + +/* + * Repeat types. + */ + +struct UniqueRepeat + : public AvlTreeEl<UniqueRepeat> +{ + UniqueRepeat( RepeatType repeatType, LangEl *langEl ) : + repeatType(repeatType), + langEl(langEl), declLangEl(0) {} + + RepeatType repeatType; + LangEl *langEl; + LangEl *declLangEl; +}; + +struct CmpUniqueRepeat +{ + static int compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 ); +}; + +typedef AvlBasic< UniqueRepeat, CmpUniqueRepeat > UniqueRepeatMap; + +/* + * Unique generics. Allows us to do singleton declarations of generic types and + * supporting structures. For example, the list type, but also the list element + * struct created for the list type. + */ + +struct UniqueGeneric + : public AvlTreeEl<UniqueGeneric> +{ + enum Type + { + List, + ListEl, + Map, + MapEl, + Parser + }; + + UniqueGeneric( Type type, UniqueType *value ) + : + type(type), + key(0), + value(value), + generic(0), + structEl(0) + {} + + UniqueGeneric( Type type, UniqueType *key, UniqueType *value ) + : + type(type), + key(key), + value(value), + generic(0), + structEl(0) + {} + + Type type; + UniqueType *key; + UniqueType *value; + + GenericType *generic; + StructEl *structEl; +}; + +struct CmpUniqueGeneric +{ + static int compare( const UniqueGeneric &ut1, + const UniqueGeneric &ut2 ); +}; + +typedef AvlBasic< UniqueGeneric, CmpUniqueGeneric > UniqueGenericMap; + +/* + * + */ + +typedef AvlMap< StringVect, int, CmpStrVect > VectorTypeIdMap; +typedef AvlMapEl< StringVect, int > VectorTypeIdMapEl; + +typedef Vector<TypeRef*> TypeRefVect; + +struct TypeRef +{ + enum Type + { + Unspecified, + Name, + Literal, + Iterator, + List, + ListPtrs, + ListEl, + Map, + MapEl, + MapPtrs, + Parser, + Ref + }; + + TypeRef() + : + type((Type)-1), + nspaceQual(0), + pdaLiteral(0), + iterCall(0), + iterDef(0), + typeRef1(0), + typeRef2(0), + typeRef3(0), + repeatType(RepeatNone), + parsedVarRef(0), + parsedTypeRef(0), + nspace(0), + uniqueType(0), + searchUniqueType(0), + generic(0), + searchTypeRef(0) + {} + + /* Qualification and a type name. These require lookup. */ + static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual, + const String &typeName ) + { + TypeRef *t = new TypeRef; + t->type = Name; + t->loc = loc; + t->nspaceQual = nspaceQual; + t->typeName = typeName; + t->repeatType = RepeatNone; + return t; + } + + /* Qualification and a type name. These require lookup. */ + static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual, + String typeName, RepeatType repeatType ) + { + TypeRef *t = cons( loc, nspaceQual, typeName ); + t->repeatType = repeatType; + return t; + } + + static TypeRef *cons( const InputLoc &loc, LangVarRef *parsedVarRef, + NamespaceQual *nspaceQual, String typeName, RepeatType repeatType ) + { + TypeRef *t = cons( loc, nspaceQual, typeName ); + t->parsedVarRef = parsedVarRef; + t->repeatType = repeatType; + return t; + } + + static TypeRef *cons( const InputLoc &loc, TypeRef *parsedTypeRef, + NamespaceQual *nspaceQual, String typeName, RepeatType repeatType ) + { + TypeRef *t = cons( loc, nspaceQual, typeName ); + t->parsedTypeRef = parsedTypeRef; + t->repeatType = repeatType; + return t; + } + + /* Qualification and a type name. These require lookup. */ + static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual, + PdaLiteral *pdaLiteral ) + { + TypeRef *t = new TypeRef; + t->type = Literal; + t->loc = loc; + t->nspaceQual = nspaceQual; + t->pdaLiteral = pdaLiteral; + t->repeatType = RepeatNone; + return t; + } + + static TypeRef *cons( const InputLoc &loc, TypeRef *parsedTypeRef, + NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral ) + { + TypeRef *t = cons( loc, nspaceQual, pdaLiteral ); + t->parsedTypeRef = parsedTypeRef; + return t; + } + + /* Qualification and a type name. These require lookup. */ + static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual, + PdaLiteral *pdaLiteral, RepeatType repeatType ) + { + TypeRef *t = cons( loc, nspaceQual, pdaLiteral ); + t->repeatType = repeatType; + return t; + } + + static TypeRef *cons( const InputLoc &loc, LangVarRef *parsedVarRef, + NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral, RepeatType repeatType ) + { + TypeRef *t = cons( loc, nspaceQual, pdaLiteral ); + t->parsedVarRef = parsedVarRef; + t->repeatType = repeatType; + return t; + } + + static TypeRef *cons( const InputLoc &loc, TypeRef *parsedTypeRef, + NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral, RepeatType repeatType ) + { + TypeRef *t = cons( loc, nspaceQual, pdaLiteral ); + t->parsedTypeRef = parsedTypeRef; + t->repeatType = repeatType; + return t; + } + + /* Generics. */ + static TypeRef *cons( const InputLoc &loc, Type type, + NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 ) + { + TypeRef *t = new TypeRef; + t->type = type; + t->loc = loc; + t->nspaceQual = nspaceQual; + t->typeRef1 = typeRef1; + t->typeRef2 = typeRef2; + t->repeatType = RepeatNone; + return t; + } + + static TypeRef *cons( const InputLoc &loc, Type type, + NamespaceQual *nspaceQual, TypeRef *typeRef1, + TypeRef *typeRef2, TypeRef *typeRef3 ) + { + TypeRef *t = new TypeRef; + t->type = type; + t->loc = loc; + t->nspaceQual = nspaceQual; + t->typeRef1 = typeRef1; + t->typeRef2 = typeRef2; + t->typeRef3 = typeRef3; + t->repeatType = RepeatNone; + return t; + } + + /* Pointers and Refs. */ + static TypeRef *cons( const InputLoc &loc, Type type, TypeRef *typeRef1 ) + { + TypeRef *t = new TypeRef; + t->type = type; + t->loc = loc; + t->typeRef1 = typeRef1; + t->repeatType = RepeatNone; + return t; + } + + /* Resolution not needed. */ + + /* Iterator definition. */ + static TypeRef *cons( const InputLoc &loc, TypeRef *typeRef, IterCall *iterCall ) + { + TypeRef *t = new TypeRef; + t->type = Iterator; + t->loc = loc; + t->repeatType = RepeatNone; + t->iterCall = iterCall; + t->searchTypeRef = typeRef; + return t; + } + + /* Unique type is given directly. */ + static TypeRef *cons( const InputLoc &loc, UniqueType *uniqueType ) + { + TypeRef *t = new TypeRef; + t->type = Unspecified; + t->loc = loc; + t->repeatType = RepeatNone; + t->uniqueType = uniqueType; + return t; + } + + void resolveRepeat( Compiler *pd ); + + Namespace *resolveNspace( Compiler *pd ); + UniqueType *resolveIterator( Compiler *pd ); + UniqueType *resolveTypeName( Compiler *pd ); + UniqueType *resolveTypeLiteral( Compiler *pd ); + UniqueType *resolveTypeList( Compiler *pd ); + UniqueType *resolveTypeListEl( Compiler *pd ); + UniqueType *resolveTypeMap( Compiler *pd ); + UniqueType *resolveTypeMapEl( Compiler *pd ); + UniqueType *resolveTypeParser( Compiler *pd ); + UniqueType *resolveType( Compiler *pd ); + UniqueType *resolveTypeRef( Compiler *pd ); + + bool uniqueGeneric( UniqueGeneric *&inMap, + Compiler *pd, const UniqueGeneric &searchKey ); + + StructEl *declareMapElStruct( Compiler *pd, TypeRef *keyType, TypeRef *valType ); + StructEl *declareListEl( Compiler *pd, TypeRef *valType ); + + Type type; + InputLoc loc; + NamespaceQual *nspaceQual; + String typeName; + PdaLiteral *pdaLiteral; + IterCall *iterCall; + IterDef *iterDef; + TypeRef *typeRef1; + TypeRef *typeRef2; + TypeRef *typeRef3; + RepeatType repeatType; + + /* For pattern and constructor context. */ + LangVarRef *parsedVarRef; + TypeRef *parsedTypeRef; + + /* Resolved. */ + Namespace *nspace; + UniqueType *uniqueType; + UniqueType *searchUniqueType; + GenericType *generic; + TypeRef *searchTypeRef; +}; + +typedef DList<ObjectField> ParameterList; + +struct ObjectMethod +{ + enum Type + { + Call, + ParseFinish + }; + + ObjectMethod( TypeRef *returnTypeRef, String name, + int opcodeWV, int opcodeWC, int numParams, + UniqueType **types, ParameterList *paramList, bool isConst ) + : + type(Call), + returnUT(0), + returnTypeRef(returnTypeRef), + returnTypeId(0), + name(name), + opcodeWV(opcodeWV), + opcodeWC(opcodeWC), + numParams(numParams), + paramList(paramList), + isConst(isConst), + funcId(0), + useFuncId(false), + useCallObj(true), + func(0), + iterDef(0), + useFnInstr(false), + useGenericId(false), + generic(0) + { + } + + ObjectMethod( UniqueType *returnUT, String name, + int opcodeWV, int opcodeWC, int numParams, + UniqueType **types, ParameterList *paramList, + bool isConst ) + : + type(Call), + returnUT(returnUT), + returnTypeRef(0), + returnTypeId(0), + name(name), + opcodeWV(opcodeWV), + opcodeWC(opcodeWC), + numParams(numParams), + paramList(paramList), + isConst(isConst), + funcId(0), + useFuncId(false), + useCallObj(true), + func(0), + iterDef(0), + useFnInstr(false), + useGenericId(false), + generic(0) + { + this->paramUTs = new UniqueType*[numParams]; + memcpy( this->paramUTs, types, sizeof(UniqueType*)*numParams ); + } + + Type type; + UniqueType *returnUT; + TypeRef *returnTypeRef; + long returnTypeId; + String name; + long opcodeWV; + long opcodeWC; + long numParams; + UniqueType **paramUTs; + ParameterList *paramList; + bool isConst; + long funcId; + bool useFuncId; + bool useCallObj; + Function *func; + IterDef *iterDef; + bool useFnInstr; + + bool useGenericId; + GenericType *generic; +}; + +struct RhsVal +{ + RhsVal( ProdEl *prodEl ) + : + prodEl(prodEl) + {} + + ProdEl *prodEl; +}; + +struct ObjectField +{ + enum Type + { + UserLocalType = 1, + UserFieldType, + StructFieldType, + LhsElType, + RedRhsType, + InbuiltFieldType, + InbuiltOffType, + InbuiltObjectType, + RhsNameType, + ParamValType, + ParamRefType, + LexSubstrType, + GenericElementType, + GenericDependentType + }; + + ObjectField() + : + typeRef(0), + scope(0), + offset(0), + beenReferenced(false), + isConst(false), + refActive(false), + isExport(false), + isConstVal(false), + useGenericId(false), + generic(0), + mapKeyField(0), + dirtyTree(false), + inGetR( IN_HALT ), + inGetWC( IN_HALT ), + inGetWV( IN_HALT ), + inSetWC( IN_HALT ), + inSetWV( IN_HALT ), + inGetValR( IN_HALT ), + inGetValWC( IN_HALT ), + inGetValWV( IN_HALT ), + inSetValWC( IN_HALT ), + inSetValWV( IN_HALT ), + iterImpl( 0 ) + {} + + static ObjectField *cons( const InputLoc &loc, + Type type, TypeRef *typeRef, const String &name ) + { + ObjectField *c = new ObjectField; + c->loc = loc; + c->type = type; + c->typeRef = typeRef; + c->name = name; + c->initField( ); + return c; + } + + void initField(); + + bool isParam() + { return type == ParamValType || type == ParamRefType; } + + bool isLhsEl() + { return type == LhsElType; } + + bool isRhsGet() + { return type == RhsNameType; } + + bool useOffset() + { + return type != RhsNameType && + type != InbuiltFieldType && + type != InbuiltObjectType; + } + + bool isInbuiltObject() + { return type == InbuiltObjectType; } + + bool exists() + { + switch ( type ) { + case ObjectField::LhsElType: + case ObjectField::UserLocalType: + case ObjectField::RedRhsType: + case ObjectField::UserFieldType: + case ObjectField::StructFieldType: + case ObjectField::GenericDependentType: + return true; + default: + return false; + } + } + + InputLoc loc; + Type type; + TypeRef *typeRef; + String name; + NameScope *scope; + long offset; + bool beenReferenced; + /* Declared const. */ + bool isConst; + bool refActive; + bool isExport; + + /* Value is a const thing when that retrieved by the runtime. Requires a + * const val id. */ + bool isConstVal; + int constValId; + String constValArg; + + bool useGenericId; + GenericType *generic; + + ObjectField *mapKeyField; + + /* True if some aspect of the tree has possibly been written to. This does + * not include attributes. This is here so we can optimize the storage of + * old lhs vars. If only a lhs attribute changes we don't need to preserve + * the original for backtracking. */ + bool dirtyTree; + + Vector<RhsVal> rhsVal; + + code_t inGetR; + code_t inGetWC; + code_t inGetWV; + code_t inSetWC; + code_t inSetWV; + code_t inGetValR; + code_t inGetValWC; + code_t inGetValWV; + code_t inSetValWC; + code_t inSetValWV; + + IterImpl *iterImpl; + + ObjectField *prev, *next; +}; + +typedef DListVal<ObjectField*> FieldList; + +typedef DList<ObjectField> ParameterList; + + +struct ObjectDef +{ + enum Type { + UserType, + FrameType, + IterType, + BuiltinType, + StructType + }; + + ObjectDef() + : + nextOffset(0), + firstNonTree(0) + {} + + static ObjectDef *cons( Type type, String name, int id ) + { + ObjectDef *o = new ObjectDef; + + o->type = type; + o->name = name; + o->id = id; + + o->rootScope = new NameScope; + o->rootScope->owningObj = o; + + return o; + } + + Type type; + String name; + FieldList fieldList; + + NameScope *rootScope; + + NameScope *pushScope( NameScope *curScope ); + + long id; + long nextOffset; + long firstNonTree; + + void referenceField( Compiler *pd, ObjectField *field ); + void placeField( Compiler *pd, ObjectField *field ); + void createCode( Compiler *pd, CodeVect &code ); + ObjectField *findFieldInScope( const NameScope *scope, const String &name ) const; + ObjectField *checkRedecl( NameScope *inScope, const String &name ); + void insertField( NameScope *inScope, const String &name, ObjectField *value ); + void resolve( Compiler *pd ); + ObjectField *findFieldNum( long offset ); + ObjectField *findFieldType( Compiler *pd, UniqueType *ut ); + + long size() { return nextOffset; } + long sizeTrees() { return firstNonTree; } +}; + +struct CallArg +{ + CallArg( LangExpr *expr ) + : expr(expr), exprUT(0), offTmp(-1), offQualRef(-1) {} + + LangExpr *expr; + UniqueType *exprUT; + int offTmp; + int offQualRef; +}; + +typedef Vector<LangExpr*> ExprVect; +typedef Vector<CallArg*> CallArgVect; +typedef Vector<String> StringVect; + +struct FieldInit +{ + static FieldInit *cons( const InputLoc &loc, String name, LangExpr *expr ) + { + FieldInit *fi = new FieldInit; + fi->loc = loc; + fi->name = name; + fi->expr = expr; + return fi; + } + + InputLoc loc; + String name; + LangExpr *expr; + + UniqueType *exprUT; +}; + +typedef Vector<FieldInit*> FieldInitVect; + +struct VarRefLookup +{ + VarRefLookup( int lastPtrInQual, int firstConstPart, + ObjectDef *inObject, NameScope *inScope ) + : + lastPtrInQual(lastPtrInQual), + firstConstPart(firstConstPart), + inObject(inObject), + inScope(inScope), + objField(0), + objMethod(0), + uniqueType(0), + iterSearchUT(0) + {} + + int lastPtrInQual; + int firstConstPart; + ObjectDef *inObject; + NameScope *inScope; + ObjectField *objField; + ObjectMethod *objMethod; + UniqueType *uniqueType; + UniqueType *iterSearchUT; +}; + +struct QualItem +{ + enum Form { Dot, Arrow }; + + QualItem( Form form, const InputLoc &loc, const String &data ) + : form(form), loc(loc), data(data) {} + + Form form; + InputLoc loc; + String data; +}; + +typedef Vector<QualItem> QualItemVect; + +struct LangVarRef +{ + static LangVarRef *cons( const InputLoc &loc, Namespace *nspace, + StructDef *structDef, NameScope *scope, + NamespaceQual *nspaceQual, QualItemVect *qual, + const String &name ) + { + LangVarRef *l = new LangVarRef; + l->loc = loc; + l->nspace = nspace; + l->structDef = structDef; + l->scope = scope; + l->nspaceQual = nspaceQual; + l->qual = qual; + l->name = name; + return l; + } + + static LangVarRef *cons( const InputLoc &loc, Namespace *nspace, + StructDef *structDef, NameScope *scope, const String &name ) + { + return cons( loc, nspace, structDef, scope, + NamespaceQual::cons( nspace ), new QualItemVect, name ); + } + + void resolve( Compiler *pd ) const; + UniqueType *lookup( Compiler *pd ) const; + + UniqueType *loadField( Compiler *pd, CodeVect &code, ObjectDef *inObject, + ObjectField *el, bool forWriting, bool revert ) const; + + VarRefLookup lookupIterCall( Compiler *pd ) const; + VarRefLookup lookupMethod( Compiler *pd ) const; + VarRefLookup lookupField( Compiler *pd ) const; + + VarRefLookup lookupQualification( Compiler *pd, NameScope *rootScope ) const; + VarRefLookup lookupObj( Compiler *pd ) const; + VarRefLookup lookupMethodObj( Compiler *pd ) const; + + bool isInbuiltObject() const; + bool isLocalRef() const; + bool isProdRef( Compiler *pd ) const; + bool isStructRef() const; + void loadQualification( Compiler *pd, CodeVect &code, NameScope *rootScope, + int lastPtrInQual, bool forWriting, bool revert ) const; + void loadInbuiltObject( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const; + void loadLocalObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const; + void loadContextObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const; + void loadGlobalObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const; + void loadObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const; + void loadScopedObj( Compiler *pd, CodeVect &code, + NameScope *scope, int lastPtrInQual, bool forWriting ) const; + + void verifyRefPossible( Compiler *pd, VarRefLookup &lookup ) const; + bool canTakeRef( Compiler *pd, VarRefLookup &lookup ) const; + + void setFieldIter( Compiler *pd, CodeVect &code, ObjectDef *inObject, + ObjectField *objField, UniqueType *objUT, UniqueType *exprType, + bool revert ) const; + void setFieldSearch( Compiler *pd, CodeVect &code, + ObjectDef *inObject, UniqueType *exprType ) const; + void setField( Compiler *pd, CodeVect &code, ObjectDef *inObject, + ObjectField *el, UniqueType *exprUT, bool revert ) const; + + void assignValue( Compiler *pd, CodeVect &code, UniqueType *exprUT ) const; + + IterImpl *chooseTriterCall( Compiler *pd, UniqueType *searchUT, CallArgVect *args ); + + /* The deref generics value is for iterator calls with lists and maps as args. */ + ObjectField **evaluateArgs( Compiler *pd, CodeVect &code, + VarRefLookup &lookup, CallArgVect *args ); + + void callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const; + UniqueType *evaluateCall( Compiler *pd, CodeVect &code, CallArgVect *args ); + UniqueType *evaluate( Compiler *pd, CodeVect &code, bool forWriting = false ) const; + ObjectField *evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const; + ObjectField *preEvaluateRef( Compiler *pd, CodeVect &code ) const; + void resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjectField **paramRefs ) const; + long loadQualificationRefs( Compiler *pd, CodeVect &code, NameScope *rootScope ) const; + void popRefQuals( Compiler *pd, CodeVect &code, + VarRefLookup &lookup, CallArgVect *args, bool temps ) const; + + bool isFinishCall( VarRefLookup &lookup ) const; + + InputLoc loc; + Namespace *nspace; + StructDef *structDef; + NameScope *scope; + NamespaceQual *nspaceQual; + QualItemVect *qual; + String name; + long argSize; +}; + +struct LangTerm +{ + enum Type { + VarRefType, + MethodCallType, + NumberType, + StringType, + MatchType, + ProdCompareType, + NewType, + ConstructType, + TypeIdType, + SearchType, + NilType, + TrueType, + FalseType, + ParseType, + ParseTreeType, + ParseStopType, + SendType, + SendTreeType, + MakeTreeType, + MakeTokenType, + EmbedStringType, + CastType + }; + + LangTerm() + : + generic(0), + constructor(0), + consItemList(0), + parserText(0) + {} + + static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->varRef = varRef; + return t; + } + + static LangTerm *cons( const InputLoc &loc, LangVarRef *varRef, CallArgVect *args ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = MethodCallType; + t->varRef = varRef; + t->args = args; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, CallArgVect *args ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->args = args; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, String data ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->varRef = 0; + t->data = data; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->varRef = 0; + t->typeRef = 0; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, TypeRef *typeRef ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->varRef = 0; + t->typeRef = typeRef; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, TypeRef *typeRef, + LangExpr *langExpr ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->varRef = 0; + t->typeRef = typeRef; + t->expr = langExpr; + return t; + } + + static LangTerm *consMatch( const InputLoc &loc, + LangVarRef *varRef, Pattern *pattern ) + { + LangTerm *t = new LangTerm; + t->type = MatchType; + t->loc = loc; + t->varRef = varRef; + t->pattern = pattern; + return t; + } + + static LangTerm *consProdCompare( const InputLoc &loc, + LangVarRef *varRef, const String &prod, LangExpr *matchExpr ) + { + LangTerm *t = new LangTerm; + t->type = ProdCompareType; + t->loc = loc; + t->varRef = varRef; + t->prod = prod; + t->expr = matchExpr; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef, + Pattern *pattern ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->varRef = varRef; + t->pattern = pattern; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, TypeRef *typeRef, + LangVarRef *varRef ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->varRef = varRef; + t->typeRef = typeRef; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef, + ObjectField *objField, TypeRef *typeRef, FieldInitVect *fieldInitArgs, + Constructor *constructor ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->varRef = varRef; + t->objField = objField; + t->typeRef = typeRef; + t->fieldInitArgs = fieldInitArgs; + t->constructor = constructor; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef, + ObjectField *objField, TypeRef *typeRef, FieldInitVect *fieldInitArgs, + ConsItemList *consItemList, ParserText *parserText ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->varRef = varRef; + t->objField = objField; + t->typeRef = typeRef; + t->fieldInitArgs = fieldInitArgs; + t->consItemList = consItemList; + t->parserText = parserText; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, LangExpr *expr ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = type; + t->expr = expr; + return t; + } + + static LangTerm *cons( const InputLoc &loc, ConsItemList *consItemList ) + { + LangTerm *t = new LangTerm; + t->loc = loc; + t->type = EmbedStringType; + t->consItemList = consItemList; + return t; + } + + static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef, + ParserText *parserText ) + { + LangTerm *s = new LangTerm; + s->loc = loc; + s->type = type; + s->varRef = varRef; + s->parserText = parserText; + return s; + } + + static LangTerm *consSend( const InputLoc &loc, LangVarRef *varRef, + ParserText *parserText, bool eof ) + { + LangTerm *s = new LangTerm; + s->loc = loc; + s->type = SendType; + s->varRef = varRef; + s->parserText = parserText; + s->eof = eof; + return s; + } + + static LangTerm *consSendTree( const InputLoc &loc, LangVarRef *varRef, + ParserText *parserText, bool eof ) + { + LangTerm *s = new LangTerm; + s->loc = loc; + s->type = SendTreeType; + s->varRef = varRef; + s->parserText = parserText; + s->eof = eof; + return s; + } + + static LangTerm *consNew( const InputLoc &loc, TypeRef *typeRef, + LangVarRef *captureVarRef, FieldInitVect *fieldInitArgs ) + { + LangTerm *s = new LangTerm; + s->type = NewType; + s->loc = loc; + s->typeRef = typeRef; + s->varRef = captureVarRef; + s->fieldInitArgs = fieldInitArgs; + return s; + } + + void resolveFieldArgs( Compiler *pd ); + void resolve( Compiler *pd ); + + void evaluateCapture( Compiler *pd, CodeVect &code, UniqueType *valUt ) const; + void evaluateCapture( Compiler *pd, CodeVect &code, bool isTree ) const; + UniqueType *evaluateNew( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateConstruct( Compiler *pd, CodeVect &code ) const; + + static void parseFrag( Compiler *pd, CodeVect &code, int stopId ); + + UniqueType *evaluateParse( Compiler *pd, CodeVect &code, bool tree, bool stop ) const; + UniqueType *evaluateReadReduce( Compiler *pd, CodeVect &code ) const; + void evaluateSendStream( Compiler *pd, CodeVect &code ) const; + void evaluateSendParser( Compiler *pd, CodeVect &code, bool strings ) const; + UniqueType *evaluateSend( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateSendTree( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateMatch( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateProdCompare( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluate( Compiler *pd, CodeVect &code ) const; + void assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const; + UniqueType *evaluateMakeToken( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateMakeTree( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateEmbedString( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateSearch( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateCast( Compiler *pd, CodeVect &code ) const; + void resolveFieldArgs( Compiler *pd ) const; + + InputLoc loc; + Type type; + LangVarRef *varRef; + CallArgVect *args; + NamespaceQual *nspaceQual; + String data; + ObjectField *objField; + TypeRef *typeRef; + Pattern *pattern; + String prod; + FieldInitVect *fieldInitArgs; + GenericType *generic; + Constructor *constructor; + ConsItemList *consItemList; + ParserText *parserText; + LangExpr *expr; + bool eof; +}; + +struct LangExpr +{ + enum Type { + BinaryType, + UnaryType, + TermType + }; + + static LangExpr *cons( const InputLoc &loc, LangExpr *left, + char op, LangExpr *right ) + { + LangExpr *e = new LangExpr; + e->loc = loc; + e->type = BinaryType; + e->left = left; + e->op = op; + e->right = right; + return e; + } + + static LangExpr *cons( const InputLoc &loc, char op, LangExpr *right ) + { + LangExpr *e = new LangExpr; + e->loc = loc; + e->type = UnaryType; + e->left = 0; + e->op = op; + e->right =right; + return e; + } + + static LangExpr *cons( LangTerm *term ) + { + LangExpr *e = new LangExpr; + e->type = TermType; + e->term = term; + return e; + } + + void resolve( Compiler *pd ) const; + + UniqueType *evaluate( Compiler *pd, CodeVect &code ) const; + bool canTakeRef( Compiler *pd ) const; + + InputLoc loc; + Type type; + LangExpr *left; + char op; + LangExpr *right; + LangTerm *term; +}; + +struct LangStmt; +typedef DList<LangStmt> StmtList; + +struct IterCall +{ + enum Form { + Call, + Expr + }; + + IterCall() + : + langTerm(0), + langExpr(0), + wasExpr(false) + {} + + static IterCall *cons( Form form, LangTerm *langTerm ) + { + IterCall *iterCall = new IterCall; + iterCall->form = form; + iterCall->langTerm = langTerm; + return iterCall; + } + + static IterCall *cons( Form form, LangExpr *langExpr ) + { + IterCall *iterCall = new IterCall; + iterCall->form = form; + iterCall->langExpr = langExpr; + return iterCall; + } + + void resolve( Compiler *pd ) const; + + Form form; + LangTerm *langTerm; + LangExpr *langExpr; + bool wasExpr; +}; + +struct LangStmt +{ + enum Type { + AssignType, + ExprType, + IfType, + ElseType, + RejectType, + WhileType, + ReturnType, + YieldType, + ForIterType, + BreakType + }; + + LangStmt() + : + type((Type)-1), + varRef(0), + langTerm(0), + objField(0), + typeRef(0), + expr(0), + constructor(0), + parserText(0), + exprPtrVect(0), + fieldInitVect(0), + stmtList(0), + elsePart(0), + iterCall(0), + context(0), + scope(0), + consItemList(0), + + /* Normally you don't need to initialize double list pointers, however, + * we make use of the next pointer for returning a pair of statements + * using one pointer to a LangStmt, so we need to initialize the + * pointers. */ + prev(0), + next(0) + {} + + static LangStmt *cons( const InputLoc &loc, Type type, FieldInitVect *fieldInitVect ) + { + LangStmt *s = new LangStmt; + s->loc = loc; + s->type = type; + s->fieldInitVect = fieldInitVect; + return s; + } + + static LangStmt *cons( const InputLoc &loc, Type type, CallArgVect *exprPtrVect ) + { + LangStmt *s = new LangStmt; + s->loc = loc; + s->type = type; + s->exprPtrVect = exprPtrVect; + return s; + } + + static LangStmt *cons( const InputLoc &loc, Type type, LangExpr *expr ) + { + LangStmt *s = new LangStmt; + s->loc = loc; + s->type = type; + s->expr = expr; + return s; + } + + static LangStmt *cons( Type type, LangVarRef *varRef ) + { + LangStmt *s = new LangStmt; + s->type = type; + s->varRef = varRef; + return s; + } + + static LangStmt *cons( const InputLoc &loc, Type type, ObjectField *objField ) + { + LangStmt *s = new LangStmt; + s->loc = loc; + s->type = type; + s->objField = objField; + return s; + } + + static LangStmt *cons( const InputLoc &loc, Type type, LangVarRef *varRef, LangExpr *expr ) + { + LangStmt *s = new LangStmt; + s->loc = loc; + s->type = type; + s->varRef = varRef; + s->expr = expr; + return s; + } + + static LangStmt *cons( Type type, LangExpr *expr, StmtList *stmtList ) + { + LangStmt *s = new LangStmt; + s->type = type; + s->expr = expr; + s->stmtList = stmtList; + return s; + } + + static LangStmt *cons( Type type, LangExpr *expr, StmtList *stmtList, LangStmt *elsePart ) + { + LangStmt *s = new LangStmt; + s->type = type; + s->expr = expr; + s->stmtList = stmtList; + s->elsePart = elsePart; + return s; + } + + void setElsePart( LangStmt *elsePart ) + { + this->elsePart = elsePart; + } + + static LangStmt *cons( Type type, StmtList *stmtList ) + { + LangStmt *s = new LangStmt; + s->type = type; + s->stmtList = stmtList; + return s; + } + + + static LangStmt *cons( const InputLoc &loc, Type type ) + { + LangStmt *s = new LangStmt; + s->loc = loc; + s->type = type; + return s; + } + + static LangStmt *cons( Type type, LangVarRef *varRef, Constructor *constructor ) + { + LangStmt *s = new LangStmt; + s->type = type; + s->varRef = varRef; + s->constructor = constructor; + return s; + } + + static LangStmt *cons( const InputLoc &loc, Type type, ObjectField *objField, + TypeRef *typeRef, LangTerm *langTerm, StmtList *stmtList ) + { + LangStmt *s = new LangStmt; + s->loc = loc; + s->type = type; + s->langTerm = langTerm; + s->objField = objField; + s->typeRef = typeRef; + s->stmtList = stmtList; + return s; + } + + static LangStmt *cons( const InputLoc &loc, Type type, ObjectField *objField, + TypeRef *typeRef, IterCall *iterCall, StmtList *stmtList, + StructDef *context, NameScope *scope ) + { + LangStmt *s = new LangStmt; + s->loc = loc; + s->type = type; + s->objField = objField; + s->typeRef = typeRef; + s->iterCall = iterCall; + s->stmtList = stmtList; + s->context = context; + s->scope = scope; + return s; + } + + static LangStmt *cons( const InputLoc &loc, Type type, ConsItemList *consItemList ) + { + LangStmt *s = new LangStmt; + s->loc = loc; + s->type = type; + s->consItemList = consItemList; + return s; + } + + static LangStmt *cons( Type type ) + { + LangStmt *s = new LangStmt; + s->type = type; + return s; + } + + void declareForIter( Compiler *pd ) const; + + void declare( Compiler *pd ) const; + + void resolveForIter( Compiler *pd ) const; + void resolve( Compiler *pd ) const; + void resolveParserItems( Compiler *pd ) const; + + void chooseDefaultIter( Compiler *pd, IterCall *iterCall ) const; + void compileWhile( Compiler *pd, CodeVect &code ) const; + void compileForIterBody( Compiler *pd, CodeVect &code, UniqueType *iterUT ) const; + void compileForIter( Compiler *pd, CodeVect &code ) const; + void compile( Compiler *pd, CodeVect &code ) const; + + InputLoc loc; + Type type; + LangVarRef *varRef; + LangTerm *langTerm; + ObjectField *objField; + TypeRef *typeRef; + LangExpr *expr; + Constructor *constructor; + ParserText *parserText; + CallArgVect *exprPtrVect; + FieldInitVect *fieldInitVect; + StmtList *stmtList; + /* Either another if, or an else. */ + LangStmt *elsePart; + String name; + IterCall *iterCall; + StructDef *context; + NameScope *scope; + ConsItemList *consItemList; + + /* Normally you don't need to initialize double list pointers, however, we + * make use of the next pointer for returning a pair of statements using + * one pointer to a LangStmt, so we need to initialize it above. */ + LangStmt *prev, *next; +}; + +struct CodeBlock +{ + CodeBlock() + : + frameId(-1), + context(0) + {} + + static CodeBlock *cons( StmtList *stmtList, ObjectDef *localFrame ) + { + CodeBlock *c = new CodeBlock; + c->stmtList = stmtList; + c->localFrame = localFrame; + return c; + } + + void declare( Compiler *pd ) const; + void resolve( Compiler *pd ) const; + void compile( Compiler *pd, CodeVect &code ) const; + + long frameId; + StmtList *stmtList; + ObjectDef *localFrame; + Locals locals; + StructDef *context; + + /* Each frame has two versions of + * the code: revert and commit. */ + CodeVect codeWV, codeWC; +}; + +struct Function +{ + Function() + : + nspace(0), + paramListSize(0), + paramUTs(0), + inContext(0), + objMethod(0), + inHost(false) + {} + + static Function *cons( Namespace *nspace, TypeRef *typeRef, const String &name, + ParameterList *paramList, CodeBlock *codeBlock, + int funcId, bool isUserIter, bool exprt ) + { + Function *f = new Function; + + f->nspace = nspace; + f->typeRef = typeRef; + f->name = name; + f->paramList = paramList; + f->codeBlock = codeBlock; + f->funcId = funcId; + f->isUserIter = isUserIter; + f->exprt = exprt; + + return f; + } + + Namespace *nspace; + TransBlock *transBlock; + TypeRef *typeRef; + String name; + String hostCall; + ParameterList *paramList; + CodeBlock *codeBlock; + ObjectDef *localFrame; + long funcId; + bool isUserIter; + long paramListSize; + UniqueType **paramUTs; + StructDef *inContext; + bool exprt; + ObjectMethod *objMethod; + bool inHost; + + Function *prev, *next; +}; + +typedef DList<Function> FunctionList; + +#endif /* _COLM_PARSETREE_H */ + diff --git a/src/pcheck.cc b/src/pcheck.cc new file mode 100644 index 00000000..6f41a7ce --- /dev/null +++ b/src/pcheck.cc @@ -0,0 +1,156 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pcheck.h" + +#include <stdbool.h> + +/* Construct a new parameter checker with for paramSpec. */ +ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv ) +: + state(noparam), + argOffset(0), + curArg(0), + iCurArg(1), + paramSpec(paramSpec), + argc(argc), + argv(argv) +{ +} + +/* Check a single option. Returns the index of the next parameter. Sets p to + * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if + * there is one, NULL otherwise. */ +bool ParamCheck::check() +{ + bool requiresParam; + + if ( iCurArg >= argc ) { /* Off the end of the arg list. */ + state = noparam; + return false; + } + + if ( argOffset != 0 && *argOffset == 0 ) { + /* We are at the end of an arg string. */ + iCurArg += 1; + if ( iCurArg >= argc ) { + state = noparam; + return false; + } + argOffset = 0; + } + + if ( argOffset == 0 ) { + /* Set the current arg. */ + curArg = argv[iCurArg]; + + /* We are at the beginning of an arg string. */ + if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */ + argv[iCurArg][0] != '-' || /* Not a param. */ + argv[iCurArg][1] == 0 ) { /* Only a dash. */ + parameter = 0; + parameterArg = 0; + + iCurArg += 1; + state = noparam; + return true; + } + argOffset = argv[iCurArg] + 1; + } + + /* Get the arg char. */ + char argChar = *argOffset; + + /* Loop over all the parms and look for a match. */ + const char *pSpec = paramSpec; + while ( *pSpec != 0 ) { + char pSpecChar = *pSpec; + + /* If there is a ':' following the char then + * it requires a parm. If a parm is required + * then move ahead two in the parmspec. Otherwise + * move ahead one in the parm spec. */ + if ( pSpec[1] == ':' ) { + requiresParam = true; + pSpec += 2; + } + else { + requiresParam = false; + pSpec += 1; + } + + /* Do we have a match. */ + if ( argChar == pSpecChar ) { + if ( requiresParam ) { + if ( argOffset[1] == 0 ) { + /* The param must follow. */ + if ( iCurArg + 1 == argc ) { + /* We are the last arg so there + * cannot be a parameter to it. */ + parameter = argChar; + parameterArg = 0; + iCurArg += 1; + argOffset = 0; + state = invalid; + return true; + } + else { + /* the parameter to the arg is the next arg. */ + parameter = pSpecChar; + parameterArg = argv[iCurArg + 1]; + iCurArg += 2; + argOffset = 0; + state = match; + return true; + } + } + else { + /* The param for the arg is built in. */ + parameter = pSpecChar; + parameterArg = argOffset + 1; + iCurArg += 1; + argOffset = 0; + state = match; + return true; + } + } + else { + /* Good, we matched the parm and no + * arg is required. */ + parameter = pSpecChar; + parameterArg = 0; + argOffset += 1; + state = match; + return true; + } + } + } + + /* We did not find a match. Bad Argument. */ + parameter = argChar; + parameterArg = 0; + argOffset += 1; + state = invalid; + return true; +} + + diff --git a/src/pcheck.h b/src/pcheck.h new file mode 100644 index 00000000..96746470 --- /dev/null +++ b/src/pcheck.h @@ -0,0 +1,50 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_PCHECK_H +#define _COLM_PCHECK_H + +class ParamCheck +{ +public: + ParamCheck( const char *paramSpec, int argc, const char **argv ); + + bool check(); + + const char *parameterArg; /* The argument to the parameter. */ + char parameter; /* The parameter matched. */ + enum { match, invalid, noparam } state; + + const char *argOffset; /* If we are reading params inside an + * arg this points to the offset. */ + + const char *curArg; /* Pointer to the current arg. */ + int iCurArg; /* Index to the current arg. */ + +private: + const char *paramSpec; /* Parameter spec supplied by the coder. */ + int argc; /* Arguement data from the command line. */ + const char **argv; +}; + +#endif /* _COLM_PCHECK_H */ + diff --git a/src/pdabuild.cc b/src/pdabuild.cc new file mode 100644 index 00000000..27cd9616 --- /dev/null +++ b/src/pdabuild.cc @@ -0,0 +1,2205 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define EOF_REGION 0 + +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <assert.h> + +#include <iostream> + +/* Dumping the fsm. */ +#include <mergesort.h> + +/* Parsing. */ +#include "compiler.h" +#include "pdacodegen.h" + +using std::endl; +using std::cerr; +using std::cout; + +char startDefName[] = "start"; + +extern "C" tree_t **internal_host_call( program_t *prg, long code, tree_t **sp ) +{ + return 0; +} + +extern "C" void internal_commit_reduce_forward( program_t *prg, tree_t **root, + struct pda_run *pda_run, parse_tree_t *pt ) +{ + commit_clear_parse_tree( prg, root, pda_run, pt->child ); +} + +extern "C" long internal_commit_union_sz( int reducer ) +{ + return 0; +} + +extern "C" void internal_init_need() +{ +} + +extern "C" int internal_reducer_need_tok( program_t *prg, struct pda_run *, int id ) +{ + return 3; +} + +extern "C" int internal_reducer_need_ign( program_t *prg, struct pda_run * ) +{ + return 3; +} + +/* Count the transitions in the fsm by walking the state list. */ +int countTransitions( PdaGraph *fsm ) +{ + int numTrans = 0; + PdaState *state = fsm->stateList.head; + while ( state != 0 ) { + numTrans += state->transMap.length(); + state = state->next; + } + return numTrans; +} + +LangEl::LangEl( Namespace *nspace, const String &name, Type type ) +: + nspace(nspace), + name(name), + lit(name), + type(type), + id(-1), + numAppearances(0), + commit(false), + isIgnore(false), + reduceFirst(false), + isLiteral(false), + isRepeat(false), + isList(false), + isOpt(false), + parseStop(false), + isEOF(false), + leftRecursive(false), + repeatOf(0), + tokenDef(0), + rootDef(0), + termDup(0), + eofLel(0), + pdaGraph(0), + pdaTables(0), + transBlock(0), + objectDef(0), + thisSize(0), + ofiOffset(0), + parserId(-1), + predType(PredNone), + predValue(0), + contextDef(0), + contextIn(0), + noPreIgnore(false), + noPostIgnore(false), + isZero(false) +{ +} + +PdaGraph *ProdElList::walk( Compiler *pd, Production *prod ) +{ + PdaGraph *prodFsm = new PdaGraph(); + PdaState *last = prodFsm->addState(); + prodFsm->setStartState( last ); + + int prodLength = 0; + for ( Iter prodEl = first(); prodEl.lte(); prodEl++, prodLength++ ) { + //PdaGraph *itemFsm = prodEl->walk( pd ); + long value = prodEl->langEl->id; + + PdaState *newState = prodFsm->addState(); + PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value ); + + newTrans->isShift = true; + newTrans->shiftPrior = prodEl->priorVal; + //cerr << "PRIOR VAL: " << newTrans->shiftPrior << endl; + + if ( prodEl->commit ) { + //cout << "COMMIT: inserting commit of length: " << pd->prodLength << endl; + /* Insert the commit into transitions out of last */ + for ( TransMap::Iter trans = last->transMap; trans.lte(); trans++ ) + trans->value->commits.insert( prodLength ); + } + + last = newState; + } + + /* Make the last state the final state. */ + prodFsm->setFinState( last ); + return prodFsm; +} + + +ProdElList *Compiler::makeProdElList( LangEl *langEl ) +{ + ProdElList *prodElList = new ProdElList(); + UniqueType *uniqueType = findUniqueType( TYPE_TREE, langEl ); + TypeRef *typeRef = TypeRef::cons( internal, uniqueType ); + prodElList->append( new ProdEl( internal, typeRef ) ); + prodElList->tail->langEl = langEl; + return prodElList; +} + +void Compiler::makeDefinitionNames() +{ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + int prodNum = 1; + for ( LelDefList::Iter def = lel->defList; def.lte(); def++ ) { + def->data.setAs( lel->name.length() + 32, "%s-%i", + lel->name.data, prodNum++ ); + } + } +} + +/* Make sure there there are no language elements whose type is unkonwn. This + * can happen when an id is used on the rhs of a definition but is not defined + * as anything. */ +void Compiler::noUndefindLangEls() +{ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->type == LangEl::Unknown ) + error() << "'" << lel->name << "' was not defined as anything" << endp; + } +} + +void Compiler::makeLangElIds() +{ + /* The first id 0 is reserved for the stack sentinal. A negative id means + * error to the parsing function, inducing backtracking. */ + nextLelId = 1; + + /* First pass assigns to the user terminals. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + /* Must be a term, and not any of the special reserved terminals. + * Remember if the non terminal is a user non terminal. */ + if ( lel->type == LangEl::Term && + !lel->isEOF && + lel != errorLangEl && + lel != noTokenLangEl ) + { + lel->id = nextLelId++; + } + } + + //eofLangEl->id = nextLelId++; + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + /* Must be a term, and not any of the special reserved terminals. + * Remember if the non terminal is a user non terminal. */ + if ( lel->isEOF ) + lel->id = nextLelId++; + } + + /* Next assign to the eof notoken, which we always create. */ + noTokenLangEl->id = nextLelId++; + + /* Possibly assign to the error language element. */ + if ( errorLangEl != 0 ) + errorLangEl->id = nextLelId++; + + /* Save this for the code generation. */ + firstNonTermId = nextLelId; + + /* A third and final pass assigns to everything else. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + /* Anything else not yet assigned gets assigned now. */ + if ( lel->id < 0 ) + lel->id = nextLelId++; + } + + assert( ptrLangEl->id == LEL_ID_PTR ); + assert( strLangEl->id == LEL_ID_STR ); + assert( ignoreLangEl->id == LEL_ID_IGNORE ); +} + +void Compiler::makeStructElIds() +{ + firstStructElId = nextLelId; + + /* Start at the next lang el id and go up from there. Using disjoint sets + * allows us to verify that a tree is a tree and struct is a struct because + * the ID field is at the same offset. */ + int nextId = nextLelId; + for ( StructElList::Iter sel = structEls; sel.lte(); sel++ ) + sel->id = nextId++; + + structInbuiltId = nextId++; + structInputId = nextId++; + structStreamId = nextId++; +} + +void Compiler::refNameSpace( LangEl *lel, Namespace *nspace ) +{ + if ( nspace == rootNamespace ) { + lel->refName = "::" + lel->refName; + return; + } + + lel->refName = nspace->name + "::" + lel->refName; + lel->declName = nspace->name + "::" + lel->declName; + lel->xmlTag = nspace->name + "::" + lel->xmlTag; + refNameSpace( lel, nspace->parentNamespace ); +} + +void Compiler::makeLangElNames() +{ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + lel->fullName = lel->name; + lel->fullLit = lel->lit; + lel->refName = lel->lit; + lel->declName = lel->lit; + lel->xmlTag = lel->name; + + /* If there is also a namespace next to the type, we add a prefix to + * the type. It's not convenient to name C++ classes the same as a + * namespace in the same scope. We don't want to restrict colm, so we + * add a workaround for the least-common case. The type gets t_ prefix. + * */ + Namespace *nspace = lel->nspace->findNamespace( lel->name ); + if ( nspace != 0 ) { + lel->refName = "t_" + lel->refName; + lel->fullName = "t_" + lel->fullName; + lel->declName = "t_" + lel->declName; + lel->xmlTag = "t_" + lel->xmlTag; + } + + refNameSpace( lel, lel->nspace ); + } +} + +/* Set up dot sets, shift info, and prod sets. */ +void Compiler::makeProdFsms() +{ + /* There are two items in the index for each production (high and low). */ + int indexLen = prodList.length() * 2; + dotItemIndex.setAsNew( indexLen ); + int dsiLow = 0, indexPos = 0; + + /* Build FSMs for all production language elements. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) + prod->fsm = prod->prodElList->walk( this, prod ); + + makeNonTermFirstSets(); + makeFirstSets(); + + /* Build FSMs for all production language elements. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( addUniqueEmptyProductions ) { + /* This must be re-implemented. */ + assert( false ); + //if ( !prod->isLeftRec && prod->uniqueEmptyLeader != 0 ) { + // PdaGraph *emptyLeader = prod->uniqueEmptyLeader->walk( this ); + // emptyLeader->concatOp( prod->fsm ); + // prod->fsm = emptyLeader; + //} + } + + /* Compute the machine's length. */ + prod->fsmLength = prod->fsm->fsmLength( ); + + /* Productions have a unique production id for each final state. + * This lets us use a production length specific to each final state. + * Start states are always isolated therefore if the start state is + * final then reductions from it will always have a fixed production + * length. This is a simple method for determining the length + * of zero-length derivations when reducing. */ + + /* Number of dot items needed for the production is elements + 1 + * because the dot can be before the first and after the last element. */ + int numForProd = prod->fsm->stateList.length() + 1; + + /* Set up the low and high values in the index for this production. */ + dotItemIndex.data[indexPos].key = dsiLow; + dotItemIndex.data[indexPos].value = prod; + dotItemIndex.data[indexPos+1].key = dsiLow + numForProd - 1; + dotItemIndex.data[indexPos+1].value = prod; + + int dsi = dsiLow; + for ( PdaStateList::Iter state = prod->fsm->stateList; state.lte(); state++, dsi++ ) { + /* All transitions are shifts. */ + for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) + assert( out->value->isShift ); + + state->dotSet.insert( dsi ); + } + + /* Move over the production. */ + dsiLow += numForProd; + indexPos += 2; + + if ( prod->prodCommit ) { + for ( PdaStateSet::Iter fin = prod->fsm->finStateSet; fin.lte(); fin++ ) { + int length = prod->fsmLength; + //cerr << "PENDING COMMIT IN FINAL STATE of " << prod->prodId << + // " with len: " << length << endl; + (*fin)->pendingCommits.insert( ProdIdPair( prod->prodId, length ) ); + } + } + } + + /* Make the final state specific prod id to prod id mapping. */ + prodIdIndex = new Production*[prodList.length()]; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) + prodIdIndex[prod->prodId] = prod; +} + +/* Want the first set of over src. If the first set contains epsilon, go over + * it and over tab. If overSrc is the end of the production, find the follow + * from the table, taking only the characters on which the parent is reduced. + * */ +void Compiler::findFollow( AlphSet &result, PdaState *overTab, + PdaState *overSrc, Production *parentDef ) +{ + if ( overSrc->isFinState() ) { + assert( overSrc->transMap.length() == 0 ); + + /* At the end of the production. Turn to the table. */ + long redCode = makeReduceCode( parentDef->prodId, false ); + for ( TransMap::Iter tabTrans = overTab->transMap; tabTrans.lte(); tabTrans++ ) { + for ( ActDataList::Iter adl = tabTrans->value->actions; adl.lte(); adl++ ) { + if ( *adl == redCode ) + result.insert( tabTrans->key ); + } + } + } + else { + /* Get the first set of the item. If the first set contains epsilon + * then move over overSrc and overTab and recurse. */ + assert( overSrc->transMap.length() == 1 ); + TransMap::Iter pastTrans = overSrc->transMap; + + LangEl *langEl = langElIndex[pastTrans->key]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + bool hasEpsilon = false; + for ( LelDefList::Iter def = langEl->defList; def.lte(); def++ ) { + result.insert( def->firstSet ); + + if ( def->firstSet.find( -1 ) ) + hasEpsilon = true; + } + + /* Find the equivalent state in the parser. */ + if ( hasEpsilon ) { + PdaTrans *tabTrans = overTab->findTrans( pastTrans->key ); + findFollow( result, tabTrans->toState, + pastTrans->value->toState, parentDef ); + } + + /* Now possibly the dup. */ + if ( langEl->termDup != 0 ) + result.insert( langEl->termDup->id ); + } + else { + result.insert( pastTrans->key ); + } + } +} + +PdaState *Compiler::followProd( PdaState *tabState, PdaState *prodState ) +{ + while ( prodState->transMap.length() == 1 ) { + TransMap::Iter prodTrans = prodState->transMap; + PdaTrans *tabTrans = tabState->findTrans( prodTrans->key ); + prodState = prodTrans->value->toState; + tabState = tabTrans->toState; + } + return tabState; +} + +void Compiler::trySetTime( PdaTrans *trans, long code, long &time ) +{ + /* Find the item. */ + for ( ActDataList::Iter adl = trans->actions; adl.lte(); adl++ ) { + if ( *adl == code ) { + /* If the time of the shift is not already set, set it. */ + if ( trans->actOrds[adl.pos()] == 0 ) { + //cerr << "setting time: state = " << tabState->stateNum + // << ", trans = " << tabTrans->lowKey + // << ", time = " << time << endl; + trans->actOrds[adl.pos()] = time++; + } + break; + } + } +} + +/* Go down a defintiion and then handle the follow actions. */ +void Compiler::pdaOrderFollow( LangEl *rootEl, PdaState *tabState, + PdaTrans *tabTrans, PdaTrans *srcTrans, Production *parentDef, + Production *definition, long &time ) +{ + /* We need the follow from tabState/srcState over the defintion we are + * currently processing. */ + PdaState *overTab = tabTrans->toState; + PdaState *overSrc = srcTrans->toState; + + AlphSet alphSet; + if ( parentDef == rootEl->rootDef ) + alphSet.insert( rootEl->eofLel->id ); + else + findFollow( alphSet, overTab, overSrc, parentDef ); + + /* Now follow the production to find out where it expands to. */ + PdaState *expandToState = followProd( tabState, definition->fsm->startState ); + + /* Find the reduce item. */ + long redCode = makeReduceCode( definition->prodId, false ); + + for ( TransMap::Iter tt = expandToState->transMap; tt.lte(); tt++ ) { + if ( alphSet.find( tt->key ) ) { + trySetTime( tt->value, redCode, time ); + + /* If the items token region is not recorded in the state, do it now. */ + addRegion( expandToState, tt->value, tt->key, + tt->value->noPreIgnore, tt->value->noPostIgnore ); + } + } +} + +bool regionVectHas( RegionVect ®Vect, TokenRegion *region ) +{ + for ( RegionVect::Iter trvi = regVect; trvi.lte(); trvi++ ) { + if ( *trvi == region ) + return true; + } + return false; +} + +void Compiler::addRegion( PdaState *tabState, PdaTrans *tabTrans, + long pdaKey, bool noPreIgnore, bool noPostIgnore ) +{ + LangEl *langEl = langElIndex[pdaKey]; + if ( langEl != 0 && langEl->type == LangEl::Term ) { + TokenRegion *region = 0; + RegionSet *regionSet = 0; + + /* If it is not the eof, then use the region associated + * with the token definition. */ + if ( langEl->isZero ) { + region = langEl->tokenDef->regionSet->collectIgnore; + regionSet = langEl->tokenDef->regionSet; + } + else if ( !langEl->isEOF && langEl->tokenDef != 0 ) { + region = langEl->tokenDef->regionSet->tokenIgnore; + regionSet = langEl->tokenDef->regionSet; + } + + if ( region != 0 ) { + /* region. */ + TokenRegion *scanRegion = region; + + if ( langEl->noPreIgnore ) + scanRegion = regionSet->tokenOnly; + + if ( !regionVectHas( tabState->regions, scanRegion ) ) + tabState->regions.append( scanRegion ); + + /* Pre-region of to state */ + PdaState *toState = tabTrans->toState; + if ( !langEl->noPostIgnore && + regionSet->ignoreOnly != 0 && + !regionVectHas( toState->preRegions, regionSet->ignoreOnly ) ) + { + toState->preRegions.append( regionSet->ignoreOnly ); + } + } + } +} + +#if 0 + orderState( tabState, prodState, time ): + if not tabState.dotSet.find( prodState.dotID ) + tabState.dotSet.insert( prodState.dotID ) + tabTrans = tabState.findMatchingTransition( prodState.getTransition() ) + + if tabTrans is NonTerminal: + for production in tabTrans.nonTerm.prodList: + orderState( tabState, production.startState, time ) + + for all expandToState in tabTrans.expandToStates: + for all followTrans in expandToState.transList + reduceAction = findAction( production.reduction ) + if reduceAction.time is unset: + reduceAction.time = time++ + end + end + end + end + end + + shiftAction = tabTrans.findAction( shift ) + if shiftAction.time is unset: + shiftAction.time = time++ + end + + orderState( tabTrans.toState, prodTrans.toState, time ) + end + end + + orderState( parseTable.startState, startProduction.startState, 1 ) +#endif + +void Compiler::pdaOrderProd( LangEl *rootEl, PdaState *tabState, + PdaState *srcState, Production *parentDef, long &time ) +{ + assert( srcState->dotSet.length() == 1 ); + if ( tabState->dotSet2.find( srcState->dotSet[0] ) ) + return; + tabState->dotSet2.insert( srcState->dotSet[0] ); + + assert( srcState->transMap.length() == 0 || srcState->transMap.length() == 1 ); + + if ( srcState->transMap.length() == 1 ) { + TransMap::Iter srcTrans = srcState->transMap; + + /* Find the equivalent state in the parser. */ + PdaTrans *tabTrans = tabState->findTrans( srcTrans->key ); + + /* Recurse into the transition if it is a non-terminal. */ + LangEl *langEl = langElIndex[srcTrans->key]; + if ( langEl != 0 ) { + if ( langEl->reduceFirst ) { + /* Use a shortest match ordering for the contents of this + * nonterminal. Does follows for all productions first, then + * goes down the productions. */ + for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) { + pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value, + parentDef, expDef, time ); + } + for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) + pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time ); + + } + else { + /* The default action ordering. For each prod, goes down the + * prod then sets the follow before going to the next prod. */ + for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) { + pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time ); + + pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value, + parentDef, expDef, time ); + } + } + } + + trySetTime( tabTrans, SHIFT_CODE, time ); + + /* Now possibly for the dup. */ + if ( langEl != 0 && langEl->termDup != 0 ) { + PdaTrans *dupTrans = tabState->findTrans( langEl->termDup->id ); + trySetTime( dupTrans, SHIFT_CODE, time ); + } + + /* If the items token region is not recorded in the state, do it now. */ + addRegion( tabState, tabTrans, srcTrans->key, + srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore ); + + /* Go over one in the production. */ + pdaOrderProd( rootEl, tabTrans->toState, + srcTrans->value->toState, parentDef, time ); + } +} + +void Compiler::pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls ) +{ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + assert( (state->stateBits & SB_ISMARKED) == 0 ); + + /* Traverse the src state's transitions. */ + long last = 0; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( ! trans.first() ) + assert( last < trans->key ); + last = trans->key; + } + } + + /* Compute the action orderings, record the max value. */ + long time = 1; + for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + PdaState *startState = (*pe)->rootDef->fsm->startState; + pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time ); + + /* Walk over the start lang el and set the time for shift of + * the eof action that completes the parse. */ + PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id ); + PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id ); + eofTrans->actOrds[0] = time++; + } + + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + if ( state->regions.length() == 0 ) { + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + /* There are no regions and EOF leaves the state. Add the eof + * token region. */ + PdaTrans *trans = tel->value; + LangEl *lel = langElIndex[trans->lowKey]; + if ( lel != 0 && lel->isEOF ) + state->regions.append( EOF_REGION ); + } + } + } + + ///* Warn about states with empty token region lists. */ + //for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + // if ( state->regions.length() == 0 ) { + // warning() << "state has an empty token region, state: " << + // state->stateNum << endl; + // } + //} + + /* Some actions may not have an ordering. I believe these to be actions + * that result in a parse error and they arise because the state tables + * are LALR(1) but the action ordering is LR(1). LALR(1) causes some + * reductions that lead nowhere. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + PdaTrans *trans = tel->value; + /* Check every action has an ordering. */ + for ( ActDataList::Iter adl = trans->actOrds; adl.lte(); adl++ ) { + if ( *adl == 0 ) + *adl = time++; + } + } + } +} + +void Compiler::advanceReductions( PdaGraph *pdaGraph ) +{ + /* Loop all states. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + if ( !state->advanceReductions ) + continue; + + bool outHasShift = false; + ReductionMap outReds; + LongSet outCommits; + for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) { + /* Get the transition from the trans el. */ + if ( out->value->isShift ) + outHasShift = true; + outReds.insert( out->value->reductions ); + outCommits.insert( out->value->commits ); + } + + bool inHasShift = false; + ReductionMap inReds; + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + /* Get the transition from the trans el. */ + if ( in->isShift ) + inHasShift = true; + inReds.insert( in->reductions ); + } + + if ( !outHasShift && outReds.length() == 1 && + inHasShift && inReds.length() == 0 ) + { + //cerr << "moving reduction to shift" << endl; + + /* Move the reduction to all in transitions. */ + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + assert( in->actions.length() == 1 ); + assert( in->actions[0] == SHIFT_CODE ); + in->actions[0] = makeReduceCode( outReds[0].key, true ); + in->afterShiftCommits.insert( outCommits ); + } + + /* + * Remove all transitions out of the state. + */ + + /* Detach out range transitions. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + pdaGraph->detachTrans( state, trans->value->toState, trans->value ); + delete trans->value; + } + state->transMap.empty(); + + /* Redirect all the in transitions to the actionDestState. */ + pdaGraph->inTransMove( actionDestState, state ); + } + } + + pdaGraph->removeUnreachableStates(); +} + +void Compiler::sortActions( PdaGraph *pdaGraph ) +{ + /* Sort the actions. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + PdaTrans *trans = tel->value; + + /* Sort by the action ords. */ + ActDataList actions( trans->actions ); + ActDataList actOrds( trans->actOrds ); + ActDataList actPriors( trans->actPriors ); + trans->actions.empty(); + trans->actOrds.empty(); + trans->actPriors.empty(); + while ( actOrds.length() > 0 ) { + int min = 0; + for ( int i = 1; i < actOrds.length(); i++ ) { + if ( actPriors[i] > actPriors[min] || + (actPriors[i] == actPriors[min] && + actOrds[i] < actOrds[min] ) ) + { + min = i; + } + } + trans->actions.append( actions[min] ); + trans->actOrds.append( actOrds[min] ); + trans->actPriors.append( actPriors[min] ); + actions.remove(min); + actOrds.remove(min); + actPriors.remove(min); + } + + if ( branchPointInfo && trans->actions.length() > 1 ) { + cerr << "info: branch point" + << " state: " << state->stateNum + << " trans: "; + LangEl *lel = langElIndex[trans->lowKey]; + if ( lel == 0 ) + cerr << (char)trans->lowKey << endl; + else + cerr << lel->lit << endl; + + for ( ActDataList::Iter act = trans->actions; act.lte(); act++ ) { + switch ( *act & 0x3 ) { + case 1: + cerr << " shift" << endl; + break; + case 2: + cerr << " reduce " << + prodIdIndex[(*act >> 2)]->data << endl; + break; + case 3: + cerr << " shift-reduce" << endl; + break; + } + } + } + + /* Verify that shifts of nonterminals don't have any branch + * points or commits. */ + if ( trans->lowKey >= firstNonTermId ) { + if ( trans->actions.length() != 1 || + (trans->actions[0] & 0x3) != 1 ) + { + error() << "TRANS ON NONTERMINAL is something " + "other than a shift" << endl; + } + if ( trans->commits.length() > 0 ) + error() << "TRANS ON NONTERMINAL has a commit" << endl; + } + + /* TODO: Shift-reduces are optimizations. Verify that + * shift-reduces exist only if they don't entail a conflict. */ + } + } +} + +void Compiler::reduceActions( PdaGraph *pdaGraph ) +{ + /* Reduce the actions. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + PdaTrans *trans = tel->value; + PdaActionSetEl *inSet; + + int commitLen = trans->commits.length() > 0 ? + trans->commits[trans->commits.length()-1] : 0; + + if ( trans->afterShiftCommits.length() > 0 ) { + int afterShiftCommit = trans->afterShiftCommits[ + trans->afterShiftCommits.length()-1]; + + if ( commitLen > 0 && commitLen+1 > afterShiftCommit ) + commitLen = ( commitLen + 1 ); + else + commitLen = afterShiftCommit; + } + else { + commitLen = commitLen * -1; + } + + //if ( commitLen != 0 ) { + // cerr << "FINAL ACTION COMMIT LEN: " << commitLen << endl; + //} + + pdaGraph->actionSet.insert( ActionData( trans->toState->stateNum, + trans->actions, commitLen ), &inSet ); + trans->actionSetEl = inSet; + } + } +} + +void Compiler::computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph ) +{ + /* Get the entry into the graph and traverse over the root. The resulting + * state can have eof, nothing else can. */ + PdaState *overStart = pdaGraph->followFsm( + langEl->startState, + langEl->rootDef->fsm ); + + /* The graph must reduce to root all on it's own. It cannot depend on + * require EOF. */ + for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { + if ( st == overStart ) + continue; + + for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { + if ( tr->value->lowKey == langEl->eofLel->id ) + st->advanceReductions = true; + } + } +} + +void Compiler::verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph ) +{ + /* Get the entry into the graph and traverse over the root. The resulting + * state can have eof, nothing else can. */ + PdaState *overStart = pdaGraph->followFsm( + langEl->startState, + langEl->rootDef->fsm ); + + /* The graph must reduce to root all on it's own. It cannot depend on + * require EOF. */ + for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { + if ( st == overStart ) + continue; + + for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { + if ( tr->value->lowKey == langEl->eofLel->id ) { + /* This needs a better error message. Appears to be voodoo. */ + error() << "grammar is not usable with parse_stop" << endp; + } + } + } +} + +LangEl *Compiler::predOf( PdaTrans *trans, long action ) +{ + LangEl *lel; + if ( action == SHIFT_CODE ) + lel = langElIndex[trans->lowKey]; + else + lel = prodIdIndex[action >> 2]->predOf; + return lel; +} + + +bool Compiler::precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 ) +{ + bool swap = false; + if ( l2->predValue > l1->predValue ) + swap = true; + else if ( l1->predValue == l2->predValue ) { + if ( l1->predType == PredLeft && action1 == SHIFT_CODE ) + swap = true; + else if ( l1->predType == PredRight && action2 == SHIFT_CODE ) + swap = true; + } + return swap; +} + +bool Compiler::precedenceRemoveBoth( LangEl *l1, LangEl *l2 ) +{ + if ( l1->predValue == l2->predValue && l1->predType == PredNonassoc ) + return true; + return false; +} + +void Compiler::resolvePrecedence( PdaGraph *pdaGraph ) +{ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); + + for ( long t = 0; t < state->transMap.length(); /* increment at end */ ) { + PdaTrans *trans = state->transMap[t].value; + +again: + /* Find action with precedence. */ + for ( int i = 0; i < trans->actions.length(); i++ ) { + LangEl *li = predOf( trans, trans->actions[i] ); + + if ( li != 0 && li->predType != PredNone ) { + /* Find another action with precedence. */ + for ( int j = i+1; j < trans->actions.length(); j++ ) { + LangEl *lj = predOf( trans, trans->actions[j] ); + + if ( lj != 0 && lj->predType != PredNone ) { + /* Conflict to check. */ + bool swap = precedenceSwap( trans->actions[i], + trans->actions[j], li, lj ); + + if ( swap ) { + long t = trans->actions[i]; + trans->actions[i] = trans->actions[j]; + trans->actions[j] = t; + } + + trans->actions.remove( j ); + if ( precedenceRemoveBoth( li, lj ) ) + trans->actions.remove( i ); + + goto again; + } + } + } + } + + /* If there are still actions then move to the next one. If not, + * (due to nonassoc) then remove the transition. */ + if ( trans->actions.length() > 0 ) + t += 1; + else + state->transMap.vremove( t ); + } + } +} + +void Compiler::analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls ) +{ + pdaGraph->maxState = pdaGraph->stateList.length() - 1; + pdaGraph->maxLelId = nextLelId - 1; + pdaGraph->maxOffset = pdaGraph->stateList.length() * pdaGraph->maxLelId; + + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( trans->value->isShift ) { + trans->value->actions.append( SHIFT_CODE ); + trans->value->actPriors.append( trans->value->shiftPrior ); + } + for ( ReductionMap::Iter red = trans->value->reductions; red.lte(); red++ ) { + trans->value->actions.append( makeReduceCode( red->key, false ) ); + trans->value->actPriors.append( red->value ); + } + trans->value->actOrds.appendDup( 0, trans->value->actions.length() ); + } + } + + pdaActionOrder( pdaGraph, parserEls ); + sortActions( pdaGraph ); + resolvePrecedence( pdaGraph ); + + /* Verify that any type we parse_stop can actually be parsed that way. */ + for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + LangEl *lel = *pe; + if ( lel->parseStop ) + computeAdvanceReductions(lel , pdaGraph); + } + + advanceReductions( pdaGraph ); + pdaGraph->setStateNumbers(); + reduceActions( pdaGraph ); + + /* Set the action ids. */ + int actionSetId = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) + asi->key.id = actionSetId++; + + /* Get the max index. */ + pdaGraph->maxIndex = actionSetId - 1; + + /* Compute the max prod length. */ + pdaGraph->maxProdLen = 0; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( (unsigned)prod->fsmLength > pdaGraph->maxProdLen ) + pdaGraph->maxProdLen = prod->fsmLength; + } + + /* Asserts that any transition with a nonterminal has a single action + * which is either a shift or a shift-reduce. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + LangEl *langEl = langElIndex[trans->value->lowKey]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + assert( trans->value->actions.length() == 1 ); + assert( trans->value->actions[0] == SHIFT_CODE || + (trans->value->actions[0] & 0x3) == SHIFT_REDUCE_CODE ); + } + } + } + + /* Assert that shift reduces always appear on their own. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) { + if ( (*act & 0x3) == SHIFT_REDUCE_CODE ) + assert( trans->value->actions.length() == 1 ); + } + } + } + + /* Verify that any type we parse_stop can actually be parsed that way. */ + for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + LangEl *lel = *pe; + if ( lel->parseStop ) + verifyParseStopGrammar(lel , pdaGraph); + } +} + +void Compiler::wrapNonTerminals() +{ + /* Make a language element that will be used to make the root productions. + * These are used for making parsers rooted at any production (including + * the start symbol). */ + rootLangEl = declareLangEl( this, rootNamespace, "_root", LangEl::NonTerm ); + + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + /* Make a single production used when the lel is a root. */ + ProdElList *prodElList = makeProdElList( lel ); + lel->rootDef = Production::cons( InputLoc(), rootLangEl, + prodElList, String(), false, 0, + prodList.length(), rootLangEl->defList.length() ); + prodList.append( lel->rootDef ); + rootLangEl->defList.append( lel->rootDef ); + + /* First resolve. */ + for ( ProdElList::Iter prodEl = *prodElList; prodEl.lte(); prodEl++ ) + resolveProdEl( prodEl ); + } +} + +bool Compiler::makeNonTermFirstSetProd( Production *prod, PdaState *state ) +{ + bool modified = false; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( trans->key >= firstNonTermId ) { + long *inserted = prod->nonTermFirstSet.insert( trans->key ); + if ( inserted != 0 ) + modified = true; + + bool hasEpsilon = false; + LangEl *lel = langElIndex[trans->key]; + for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) { + for ( ProdIdSet::Iter pid = ldef->nonTermFirstSet; + pid.lte(); pid++ ) + { + if ( *pid == -1 ) + hasEpsilon = true; + else { + long *inserted = prod->nonTermFirstSet.insert( *pid ); + if ( inserted != 0 ) + modified = true; + } + } + } + + if ( hasEpsilon ) { + if ( trans->value->toState->isFinState() ) { + long *inserted = prod->nonTermFirstSet.insert( -1 ); + if ( inserted != 0 ) + modified = true; + } + + bool lmod = makeNonTermFirstSetProd( prod, trans->value->toState ); + if ( lmod ) + modified = true; + } + } + } + return modified; +} + + +void Compiler::makeNonTermFirstSets() +{ + bool modified = true; + while ( modified ) { + modified = false; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->fsm->startState->isFinState() ) { + long *inserted = prod->nonTermFirstSet.insert( -1 ); + if ( inserted != 0 ) + modified = true; + } + + bool lmod = makeNonTermFirstSetProd( prod, prod->fsm->startState ); + if ( lmod ) + modified = true; + } + } + + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->nonTermFirstSet.find( prod->prodName->id ) ) + prod->isLeftRec = true; + } +} + +void Compiler::printNonTermFirstSets() +{ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + cerr << prod->data << ": "; + for ( ProdIdSet::Iter pid = prod->nonTermFirstSet; pid.lte(); pid++ ) + { + if ( *pid < 0 ) + cerr << " <EPSILON>"; + else { + LangEl *lel = langElIndex[*pid]; + cerr << " " << lel->name; + } + } + cerr << endl; + + if ( prod->isLeftRec ) + cerr << "PROD IS LEFT REC: " << prod->data << endl; + } +} + +bool Compiler::makeFirstSetProd( Production *prod, PdaState *state ) +{ + bool modified = false; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( trans->key < firstNonTermId ) { + long *inserted = prod->firstSet.insert( trans->key ); + if ( inserted != 0 ) + modified = true; + } + else { + long *inserted = prod->firstSet.insert( trans->key ); + if ( inserted != 0 ) + modified = true; + + LangEl *klangEl = langElIndex[trans->key]; + if ( klangEl != 0 && klangEl->termDup != 0 ) { + long *inserted2 = prod->firstSet.insert( klangEl->termDup->id ); + if ( inserted2 != 0 ) + modified = true; + } + + bool hasEpsilon = false; + LangEl *lel = langElIndex[trans->key]; + for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) { + for ( ProdIdSet::Iter pid = ldef->firstSet; + pid.lte(); pid++ ) + { + if ( *pid == -1 ) + hasEpsilon = true; + else { + long *inserted = prod->firstSet.insert( *pid ); + if ( inserted != 0 ) + modified = true; + } + } + } + + if ( hasEpsilon ) { + if ( trans->value->toState->isFinState() ) { + long *inserted = prod->firstSet.insert( -1 ); + if ( inserted != 0 ) + modified = true; + } + + bool lmod = makeFirstSetProd( prod, trans->value->toState ); + if ( lmod ) + modified = true; + } + } + } + return modified; +} + + +void Compiler::makeFirstSets() +{ + bool modified = true; + while ( modified ) { + modified = false; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->fsm->startState->isFinState() ) { + long *inserted = prod->firstSet.insert( -1 ); + if ( inserted != 0 ) + modified = true; + } + + bool lmod = makeFirstSetProd( prod, prod->fsm->startState ); + if ( lmod ) + modified = true; + } + } +} + +void Compiler::printFirstSets() +{ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + cerr << prod->data << ": "; + for ( ProdIdSet::Iter pid = prod->firstSet; pid.lte(); pid++ ) + { + if ( *pid < 0 ) + cerr << " <EPSILON>"; + else { + LangEl *lel = langElIndex[*pid]; + if ( lel != 0 ) + cerr << endl << " " << lel->name; + else + cerr << endl << " " << *pid; + } + } + cerr << endl; + } +} + +void Compiler::insertUniqueEmptyProductions() +{ + int limit = prodList.length(); + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->prodId == limit ) + break; + + /* Get a language element. */ + char name[20]; + sprintf(name, "U%li", prodList.length()); + LangEl *prodName = addLangEl( this, rootNamespace, name, LangEl::NonTerm ); + Production *newDef = Production::cons( InputLoc(), prodName, + 0, String(), false, 0, prodList.length(), prodName->defList.length() ); + prodName->defList.append( newDef ); + prodList.append( newDef ); + + prod->uniqueEmptyLeader = prodName; + } +} + +struct local_info *Compiler::makeLocalInfo( Locals &locals ) +{ + struct local_info *localInfo = new local_info[locals.locals.length()]; + memset( localInfo, 0, sizeof(struct local_info) * locals.locals.length() ); + + for ( Vector<LocalLoc>::Iter l = locals.locals; l.lte(); l++ ) { + localInfo[l.pos()].type = (int) l->type; + localInfo[l.pos()].offset = l->offset; + } + return localInfo; +} + +short *Compiler::makeTrees( ObjectDef *objectDef, int &numTrees ) +{ + numTrees = 0; + for ( FieldList::Iter of = objectDef->fieldList; of.lte(); of++ ) { + if ( of->value->exists() ) { + UniqueType *ut = of->value->typeRef->resolveType( this ); + if ( ut->typeId == TYPE_TREE ) + numTrees += 1; + } + } + + short *trees = new short[numTrees]; + memset( trees, 0, sizeof(short) * numTrees ); + + short pos = 0; + for ( FieldList::Iter of = objectDef->fieldList; of.lte(); of++ ) { + if ( of->value->exists() ) { + UniqueType *ut = of->value->typeRef->resolveType( this ); + if ( ut->typeId == TYPE_TREE ) { + trees[pos] = of->value->offset; + pos += 1; + } + } + } + + return trees; +} + + +void Compiler::makeRuntimeData() +{ + long count = 0; + + /* + * ProdLengths + * ProdLhsIs + * ProdNames + * ProdCodeBlocks + * ProdCodeBlockLens + */ + + runtimeData->frame_info = new frame_info[nextFrameId]; + runtimeData->num_frames = nextFrameId; + memset( runtimeData->frame_info, 0, sizeof(struct frame_info) * nextFrameId ); + + /* + * Init code block. + */ + if ( rootCodeBlock == 0 ) { + runtimeData->root_code = 0; + runtimeData->root_code_len = 0; + runtimeData->root_frame_id = 0; + } + else { + runtimeData->root_code = rootCodeBlock->codeWC.data; + runtimeData->root_code_len = rootCodeBlock->codeWC.length(); + runtimeData->root_frame_id = rootCodeBlock->frameId; + } + + runtimeData->frame_info[rootCodeBlock->frameId].codeWV = 0; + runtimeData->frame_info[rootCodeBlock->frameId].codeLenWV = 0; + + runtimeData->frame_info[rootCodeBlock->frameId].locals = makeLocalInfo( rootCodeBlock->locals ); + runtimeData->frame_info[rootCodeBlock->frameId].locals_len = rootCodeBlock->locals.locals.length(); + + runtimeData->frame_info[rootCodeBlock->frameId].frame_size = rootLocalFrame->size(); + runtimeData->frame_info[rootCodeBlock->frameId].arg_size = 0; + runtimeData->frame_info[rootCodeBlock->frameId].ret_tree = false; + + /* + * prodInfo + */ + count = prodList.length(); + runtimeData->prod_info = new prod_info[count]; + runtimeData->num_prods = count; + + count = 0; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + runtimeData->prod_info[count].lhs_id = prod->prodName->id; + runtimeData->prod_info[count].prod_num = prod->prodNum; + runtimeData->prod_info[count].length = prod->fsmLength; + runtimeData->prod_info[count].name = prod->data; + runtimeData->prod_info[count].frame_id = -1; + + CodeBlock *block = prod->redBlock; + if ( block != 0 ) { + runtimeData->prod_info[count].frame_id = block->frameId; + runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data; + runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length(); + + runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals ); + runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length(); + + runtimeData->frame_info[block->frameId].frame_size = block->localFrame->size(); + runtimeData->frame_info[block->frameId].arg_size = 0; + runtimeData->frame_info[block->frameId].ret_tree = false; + } + + runtimeData->prod_info[count].lhs_upref = true; + runtimeData->prod_info[count].copy = prod->copy.data; + runtimeData->prod_info[count].copy_len = prod->copy.length() / 2; + count += 1; + } + + /* + * regionInfo + */ + runtimeData->num_regions = regionList.length()+1; + runtimeData->region_info = new region_info[runtimeData->num_regions]; + memset( runtimeData->region_info, 0, + sizeof(struct region_info) * runtimeData->num_regions ); + + runtimeData->region_info[0].default_token = -1; + runtimeData->region_info[0].eof_frame_id = -1; + runtimeData->region_info[0].ci_lel_id = 0; + + for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { + long regId = reg->id+1; + runtimeData->region_info[regId].default_token = + reg->impl->defaultTokenInstance == 0 ? + -1 : + reg->impl->defaultTokenInstance->tokenDef->tdLangEl->id; + runtimeData->region_info[regId].eof_frame_id = -1; + runtimeData->region_info[regId].ci_lel_id = reg->zeroLel != 0 ? reg->zeroLel->id : 0; + + CodeBlock *block = reg->preEofBlock; + if ( block != 0 ) { + runtimeData->region_info[regId].eof_frame_id = block->frameId; + runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data; + runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length(); + + runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals ); + runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length(); + + runtimeData->frame_info[block->frameId].frame_size = block->localFrame->size(); + runtimeData->frame_info[block->frameId].arg_size = 0; + runtimeData->frame_info[block->frameId].ret_tree = false; + } + } + + /* + * lelInfo + */ + + count = nextLelId; + runtimeData->lel_info = new lang_el_info[count]; + runtimeData->num_lang_els = count; + memset( runtimeData->lel_info, 0, sizeof(struct lang_el_info)*count ); + + for ( int i = 0; i < nextLelId; i++ ) { + LangEl *lel = langElIndex[i]; + if ( lel != 0 ) { + runtimeData->lel_info[i].name = lel->fullLit; + runtimeData->lel_info[i].xml_tag = lel->xmlTag; + runtimeData->lel_info[i].repeat = lel->isRepeat; + runtimeData->lel_info[i].list = lel->isList; + runtimeData->lel_info[i].literal = lel->isLiteral; + runtimeData->lel_info[i].ignore = lel->isIgnore; + runtimeData->lel_info[i].frame_id = -1; + + CodeBlock *block = lel->transBlock; + if ( block != 0 ) { + runtimeData->lel_info[i].frame_id = block->frameId; + runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data; + runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length(); + + runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals ); + runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length(); + + runtimeData->frame_info[block->frameId].frame_size = block->localFrame->size(); + runtimeData->frame_info[block->frameId].arg_size = 0; + runtimeData->frame_info[block->frameId].ret_tree = false; + } + + runtimeData->lel_info[i].object_type_id = + lel->objectDef == 0 ? 0 : lel->objectDef->id; + runtimeData->lel_info[i].ofi_offset = lel->ofiOffset; + runtimeData->lel_info[i].object_length = + lel->objectDef != 0 ? lel->objectDef->size() : 0; + +// runtimeData->lelInfo[i].contextTypeId = 0; +// lel->context == 0 ? 0 : lel->context->contextObjDef->id; +// runtimeData->lelInfo[i].contextLength = 0; //lel->context == 0 ? 0 : +// lel->context->contextObjDef->size(); +// if ( lel->context != 0 ) { +// cout << "type: " << runtimeData->lelInfo[i].contextTypeId << " length: " << +// runtimeData->lelInfo[i].contextLength << endl; +// } + + runtimeData->lel_info[i].term_dup_id = lel->termDup == 0 ? 0 : lel->termDup->id; + + if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 && + lel->tokenDef->join->context != 0 ) + runtimeData->lel_info[i].mark_id = lel->tokenDef->join->mark->markId; + else + runtimeData->lel_info[i].mark_id = -1; + + runtimeData->lel_info[i].num_capture_attr = 0; + } + else { + memset(&runtimeData->lel_info[i], 0, sizeof(struct lang_el_info) ); + runtimeData->lel_info[i].name = "__UNUSED"; + runtimeData->lel_info[i].xml_tag = "__UNUSED"; + runtimeData->lel_info[i].frame_id = -1; + } + } + + /* + * struct_el_info + */ + + count = structEls.length(); + runtimeData->sel_info = new struct_el_info[count]; + runtimeData->num_struct_els = count; + memset( runtimeData->sel_info, 0, sizeof(struct struct_el_info)*count ); + StructElList::Iter sel = structEls; + for ( int i = 0; i < count; i++, sel++ ) { + int treesLen; + runtimeData->sel_info[i].size = sel->structDef->objectDef->size(); + runtimeData->sel_info[i].trees = makeTrees( sel->structDef->objectDef, treesLen ); + runtimeData->sel_info[i].trees_len = treesLen; + } + + /* + * function_info + */ + count = functionList.length(); + + runtimeData->function_info = new function_info[count]; + runtimeData->num_functions = count; + memset( runtimeData->function_info, 0, sizeof(struct function_info)*count ); + for ( FunctionList::Iter func = functionList; func.lte(); func++ ) { + + runtimeData->function_info[func->funcId].frame_id = -1; + + CodeBlock *block = func->codeBlock; + if ( block != 0 ) { + runtimeData->function_info[func->funcId].frame_id = block->frameId; + + /* Name. */ + runtimeData->frame_info[block->frameId].name = func->name; + + /* Code. */ + runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data; + runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length(); + runtimeData->frame_info[block->frameId].codeWC = block->codeWC.data; + runtimeData->frame_info[block->frameId].codeLenWC = block->codeWC.length(); + + /* Locals. */ + runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals ); + runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length(); + + /* Meta. */ + runtimeData->frame_info[block->frameId].frame_size = func->localFrame->size(); + runtimeData->frame_info[block->frameId].arg_size = func->paramListSize; + + bool retTree = false; + if ( func->typeRef ) { + UniqueType *ut = func->typeRef->resolveType( this ); + retTree = ut->tree(); + } + runtimeData->frame_info[block->frameId].ret_tree = retTree; + } + + runtimeData->function_info[func->funcId].frame_size = func->localFrame->size(); + runtimeData->function_info[func->funcId].arg_size = func->paramListSize; + } + + /* + * pat_cons_info + */ + + /* Filled in later after patterns are parsed. */ + runtimeData->pat_repl_info = new pat_cons_info[nextPatConsId]; + memset( runtimeData->pat_repl_info, 0, sizeof(struct pat_cons_info) * nextPatConsId ); + runtimeData->num_patterns = nextPatConsId; + runtimeData->pat_repl_nodes = 0; + runtimeData->num_pattern_nodes = 0; + + + /* + * generic_info + */ + count = 1; + for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) + count += nspace->genericList.length(); + assert( count == nextGenericId ); + + runtimeData->generic_info = new generic_info[count]; + runtimeData->num_generics = count; + memset( &runtimeData->generic_info[0], 0, sizeof(struct generic_info) ); + for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) { + for ( GenericList::Iter gen = nspace->genericList; gen.lte(); gen++ ) { + runtimeData->generic_info[gen->id].type = gen->typeId; + + runtimeData->generic_info[gen->id].el_struct_id = + ( gen->typeId == GEN_MAP || gen->typeId == GEN_LIST ) ? + gen->elUt->structEl->id : -1; + runtimeData->generic_info[gen->id].el_offset = + gen->el != 0 ? gen->el->offset : -1; + + runtimeData->generic_info[gen->id].key_type = + gen->keyUt != 0 ? gen->keyUt->typeId : TYPE_NOTYPE; + runtimeData->generic_info[gen->id].key_offset = 0; + + runtimeData->generic_info[gen->id].value_type = + gen->valueUt != 0 ? gen->valueUt->typeId : TYPE_NOTYPE; + runtimeData->generic_info[gen->id].value_offset = 0; + + runtimeData->generic_info[gen->id].parser_id = + gen->typeId == GEN_PARSER ? gen->elUt->langEl->parserId : -1; + } + } + + runtimeData->argv_generic_id = argvTypeRef->generic->id; + runtimeData->stds_generic_id = stdsTypeRef->generic->id; + + /* + * Literals + */ + runtimeData->num_literals = literalStrings.length(); + runtimeData->litdata = new const char *[literalStrings.length()]; + runtimeData->litlen = new long [literalStrings.length()]; + runtimeData->literals = 0; + for ( StringMap::Iter el = literalStrings; el.lte(); el++ ) { + /* Data. */ + char *data = new char[el->key.length()+1]; + memcpy( data, el->key.data, el->key.length() ); + data[el->key.length()] = 0; + runtimeData->litdata[el->value] = data; + + /* Length. */ + runtimeData->litlen[el->value] = el->key.length(); + } + + /* Captured attributes. Loop over tokens and count first. */ + long numCapturedAttr = 0; +// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { +// for ( TokenInstanceListReg::Iter td = reg->tokenInstanceList; td.lte(); td++ ) +// numCapturedAttr += td->reCaptureVect.length(); +// } + runtimeData->capture_attr = new CaptureAttr[numCapturedAttr]; + runtimeData->num_captured_attr = numCapturedAttr; + memset( runtimeData->capture_attr, 0, sizeof( CaptureAttr ) * numCapturedAttr ); + + count = 0; +// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { +// for ( TokenInstanceListReg::Iter td = reg->tokenInstanceList; td.lte(); td++ ) { +// runtimeData->lelInfo[td->token->id].captureAttr = count; +// runtimeData->lelInfo[td->token->id].numCaptureAttr = td->reCaptureVect.length(); +// for ( ReCaptureVect::Iter c = td->reCaptureVect; c.lte(); c++ ) { +// runtimeData->captureAttr[count].mark_enter = c->markEnter->markId; +// runtimeData->captureAttr[count].mark_leave = c->markLeave->markId; +// runtimeData->captureAttr[count].offset = c->objField->offset; +// +// count += 1; +// } +// } +// } + + runtimeData->fsm_tables = fsmTables; + runtimeData->pda_tables = pdaTables; + + /* FIXME: need a parser descriptor. */ + runtimeData->start_states = new int[nextParserId]; + runtimeData->eof_lel_ids = new int[nextParserId]; + runtimeData->parser_lel_ids = new int[nextParserId]; + runtimeData->num_parsers = nextParserId; + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->parserId >= 0 ) { + runtimeData->start_states[lel->parserId] = lel->startState->stateNum; + runtimeData->eof_lel_ids[lel->parserId] = lel->eofLel->id; + runtimeData->parser_lel_ids[lel->parserId] = lel->id; + } + } + + runtimeData->global_size = globalObjectDef->size(); + + /* + * Boundary between terms and non-terms. + */ + runtimeData->first_non_term_id = firstNonTermId; + + /* + * Boundary between trees and structs + */ + runtimeData->first_struct_el_id = firstStructElId; + + /* Special trees. */ + runtimeData->integer_id = -1; //intLangEl->id; + runtimeData->string_id = strLangEl->id; + runtimeData->any_id = anyLangEl->id; + runtimeData->eof_id = 0; //eofLangEl->id; + runtimeData->no_token_id = noTokenLangEl->id; + runtimeData->global_id = globalSel->id; + runtimeData->argv_el_id = argvElSel->id; + runtimeData->stds_el_id = stdsElSel->id; + runtimeData->struct_inbuilt_id = structInbuiltId; + runtimeData->struct_stream_id = structStreamId; + runtimeData->struct_input_id = structInputId; + + runtimeData->fsm_execute = &internalFsmExecute; + runtimeData->send_named_lang_el = &internalSendNamedLangEl; + runtimeData->init_bindings = &internalInitBindings; + runtimeData->pop_binding = &internalPopBinding; + + runtimeData->host_call = &internal_host_call; + runtimeData->commit_reduce_forward = &internal_commit_reduce_forward; + runtimeData->commit_union_sz = &internal_commit_union_sz; + runtimeData->init_need = &internal_init_need; + runtimeData->reducer_need_tok = &internal_reducer_need_tok; + runtimeData->reducer_need_ign = &internal_reducer_need_ign; +} + +/* Borrow alg->state for mapsTo. */ +void countNodes( program_t *prg, int &count, parse_tree_t *parseTree, kid_t *kid ) +{ + if ( kid != 0 ) { + count += 1; + + /* Should't have to recurse here. */ + tree_t *ignoreList = tree_left_ignore( prg, kid->tree ); + if ( ignoreList != 0 ) { + kid_t *ignore = ignoreList->child; + while ( ignore != 0 ) { + count += 1; + ignore = ignore->next; + } + } + + ignoreList = tree_right_ignore( prg, kid->tree ); + if ( ignoreList != 0 ) { + kid_t *ignore = ignoreList->child; + while ( ignore != 0 ) { + count += 1; + ignore = ignore->next; + } + } + + //count += prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; + + if ( !( parseTree->flags & PF_NAMED ) && + !( parseTree->flags & PF_ARTIFICIAL ) && + tree_child( prg, kid->tree ) != 0 ) + { + countNodes( prg, count, parseTree->child, tree_child( prg, kid->tree ) ); + } + countNodes( prg, count, parseTree->next, kid->next ); + } +} + +void fillNodes( program_t *prg, int &nextAvail, struct bindings *bindings, long &bindId, + struct pat_cons_node *nodes, parse_tree_t *parseTree, kid_t *kid, int ind ) +{ + if ( kid != 0 ) { + struct pat_cons_node &node = nodes[ind]; + + kid_t *child = + !( parseTree->flags & PF_NAMED ) && + !( parseTree->flags & PF_ARTIFICIAL ) && + tree_child( prg, kid->tree ) != 0 + ? + tree_child( prg, kid->tree ) : 0; + + parse_tree_t *ptChild = + !( parseTree->flags & PF_NAMED ) && + !( parseTree->flags & PF_ARTIFICIAL ) && + tree_child( prg, kid->tree ) != 0 + ? + parseTree->child : 0; + + /* Set up the fields. */ + node.id = kid->tree->id; + node.prod_num = kid->tree->prod_num; + node.length = string_length( kid->tree->tokdata ); + node.data = string_data( kid->tree->tokdata ); + + /* Ignore items. */ + tree_t *ignoreList = tree_left_ignore( prg, kid->tree ); + kid_t *ignore = ignoreList == 0 ? 0 : ignoreList->child; + node.left_ignore = ignore == 0 ? -1 : nextAvail; + + while ( ignore != 0 ) { + struct pat_cons_node &node = nodes[nextAvail++]; + + memset( &node, 0, sizeof(struct pat_cons_node) ); + node.id = ignore->tree->id; + node.prod_num = ignore->tree->prod_num; + node.next = ignore->next == 0 ? -1 : nextAvail; + + node.length = string_length( ignore->tree->tokdata ); + node.data = string_data( ignore->tree->tokdata ); + + ignore = ignore->next; + } + + /* Ignore items. */ + ignoreList = tree_right_ignore( prg, kid->tree ); + ignore = ignoreList == 0 ? 0 : ignoreList->child; + node.right_ignore = ignore == 0 ? -1 : nextAvail; + + while ( ignore != 0 ) { + struct pat_cons_node &node = nodes[nextAvail++]; + + memset( &node, 0, sizeof(struct pat_cons_node) ); + node.id = ignore->tree->id; + node.prod_num = ignore->tree->prod_num; + node.next = ignore->next == 0 ? -1 : nextAvail; + + node.length = string_length( ignore->tree->tokdata ); + node.data = string_data( ignore->tree->tokdata ); + + ignore = ignore->next; + } + + ///* The captured attributes. */ + //for ( int i = 0; i < prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; i++ ) { + // CaptureAttr *cap = prg->rtd->captureAttr + + // prg->rtd->lelInfo[kid->tree->id].captureAttr + i; + // + // tree_t *attr = colm_get_attr( kid->tree, cap->offset ); + // + // struct pat_cons_node &node = nodes[nextAvail++]; + // memset( &node, 0, sizeof(struct pat_cons_node) ); + // + // node.id = attr->id; + // node.prodNum = attr->prodNum; + // node.length = stringLength( attr->tokdata ); + // node.data = stringData( attr->tokdata ); + //} + + node.stop = parseTree->flags & PF_TERM_DUP; + + node.child = child == 0 ? -1 : nextAvail++; + + /* Recurse. */ + fillNodes( prg, nextAvail, bindings, bindId, nodes, ptChild, child, node.child ); + + /* Since the parser is bottom up the bindings are in a bottom up + * traversal order. Check after recursing. */ + node.bind_id = 0; + if ( bindId < bindings->length() && bindings->data[bindId] == parseTree ) { + /* Remember that binding ids are indexed from one. */ + node.bind_id = bindId++; + + //cout << "binding match in " << __PRETTY_FUNCTION__ << endl; + //cout << "bindId: " << node.bindId << endl; + } + + node.next = kid->next == 0 ? -1 : nextAvail++; + + /* Move to the next child. */ + fillNodes( prg, nextAvail, bindings, bindId, nodes, parseTree->next, kid->next, node.next ); + } +} + +void Compiler::fillInPatterns( program_t *prg ) +{ + /* + * patReplNodes + */ + + /* Count is referenced and computed by mapNode. */ + int count = 0; + for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { + countNodes( prg, count, + pat->pdaRun->stack_top->next, + pat->pdaRun->stack_top->next->shadow ); + } + + for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) { + countNodes( prg, count, + repl->pdaRun->stack_top->next, + repl->pdaRun->stack_top->next->shadow ); + } + + runtimeData->pat_repl_nodes = new pat_cons_node[count]; + runtimeData->num_pattern_nodes = count; + + int nextAvail = 0; + + for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { + int ind = nextAvail++; + runtimeData->pat_repl_info[pat->patRepId].offset = ind; + + /* BindIds are indexed base one. */ + runtimeData->pat_repl_info[pat->patRepId].num_bindings = + pat->pdaRun->bindings->length() - 1; + + /* Init the bind */ + long bindId = 1; + fillNodes( prg, nextAvail, pat->pdaRun->bindings, bindId, + runtimeData->pat_repl_nodes, + pat->pdaRun->stack_top->next, + pat->pdaRun->stack_top->next->shadow, + ind ); + } + + for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) { + int ind = nextAvail++; + runtimeData->pat_repl_info[repl->patRepId].offset = ind; + + /* BindIds are indexed base one. */ + runtimeData->pat_repl_info[repl->patRepId].num_bindings = + repl->pdaRun->bindings->length() - 1; + + long bindId = 1; + fillNodes( prg, nextAvail, repl->pdaRun->bindings, bindId, + runtimeData->pat_repl_nodes, + repl->pdaRun->stack_top->next, + repl->pdaRun->stack_top->next->shadow, + ind ); + } + + assert( nextAvail == count ); +} + + +int Compiler::findIndexOff( struct pda_tables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &curLen ) +{ + for ( int start = 0; start < curLen; ) { + int offset = start; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( pdaTables->owners[offset] != -1 ) + goto next_start; + + offset++; + if ( ! trans.last() ) { + TransMap::Iter next = trans.next(); + offset += next->key - trans->key - 1; + } + } + + /* Got though the whole list without a conflict. */ + return start; + +next_start: + start++; + } + + return curLen; +} + +struct CmpSpan +{ + static int compare( PdaState *state1, PdaState *state2 ) + { + int dist1 = 0, dist2 = 0; + + if ( state1->transMap.length() > 0 ) { + TransMap::Iter first1 = state1->transMap.first(); + TransMap::Iter last1 = state1->transMap.last(); + dist1 = last1->key - first1->key; + } + + if ( state2->transMap.length() > 0 ) { + TransMap::Iter first2 = state2->transMap.first(); + TransMap::Iter last2 = state2->transMap.last(); + dist2 = last2->key - first2->key; + } + + if ( dist1 < dist2 ) + return 1; + else if ( dist2 < dist1 ) + return -1; + return 0; + } +}; + +PdaGraph *Compiler::makePdaGraph( LangElSet &parserEls ) +{ + //for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) + // cerr << prod->prodId << " " << prod->data << endl; + + PdaGraph *pdaGraph = new PdaGraph(); + lalr1GenerateParser( pdaGraph, parserEls ); + pdaGraph->setStateNumbers(); + analyzeMachine( pdaGraph, parserEls ); + + //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl; + + return pdaGraph; +} + +struct pda_tables *Compiler::makePdaTables( PdaGraph *pdaGraph ) +{ + int count, pos; + struct pda_tables *pdaTables = new pda_tables; + + /* + * Counting max indices. + */ + count = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + count++; + if ( ! trans.last() ) { + TransMap::Iter next = trans.next(); + count += next->key - trans->key - 1; + } + } + } + + + /* Allocate indices and owners. */ + pdaTables->num_indices = count; + pdaTables->indices = new int[count]; + pdaTables->owners = new int[count]; + for ( long i = 0; i < count; i++ ) { + pdaTables->indices[i] = -1; + pdaTables->owners[i] = -1; + } + + /* Allocate offsets. */ + int numStates = pdaGraph->stateList.length(); + pdaTables->offsets = new unsigned int[numStates]; + pdaTables->num_states = numStates; + + /* Place transitions into indices/owners */ + PdaState **states = new PdaState*[numStates]; + long ds = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) + states[ds++] = state; + + /* Sorting baseded on span length. Gives an improvement, but incures a + * cost. Off for now. */ + //MergeSort< PdaState*, CmpSpan > mergeSort; + //mergeSort.sort( states, numStates ); + + int indLen = 0; + for ( int s = 0; s < numStates; s++ ) { + PdaState *state = states[s]; + + int indOff = findIndexOff( pdaTables, pdaGraph, state, indLen ); + pdaTables->offsets[state->stateNum] = indOff; + + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + pdaTables->indices[indOff] = trans->value->actionSetEl->key.id; + pdaTables->owners[indOff] = state->stateNum; + indOff++; + + if ( ! trans.last() ) { + TransMap::Iter next = trans.next(); + indOff += next->key - trans->key - 1; + } + } + + if ( indOff > indLen ) + indLen = indOff; + } + + /* We allocated the max, but cmpression gives us less. */ + pdaTables->num_indices = indLen; + delete[] states; + + + /* + * Keys + */ + count = pdaGraph->stateList.length() * 2;; + pdaTables->keys = new int[count]; + pdaTables->num_keys = count; + + count = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + if ( state->transMap.length() == 0 ) { + pdaTables->keys[count+0] = 0; + pdaTables->keys[count+1] = 0; + } + else { + TransMap::Iter first = state->transMap.first(); + TransMap::Iter last = state->transMap.last(); + pdaTables->keys[count+0] = first->key; + pdaTables->keys[count+1] = last->key; + } + count += 2; + } + + /* + * Targs + */ + count = pdaGraph->actionSet.length(); + pdaTables->targs = new unsigned int[count]; + pdaTables->num_targs = count; + + count = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) + pdaTables->targs[count++] = asi->key.targ; + + /* + * ActInds + */ + count = pdaGraph->actionSet.length(); + pdaTables->act_inds = new unsigned int[count]; + pdaTables->num_act_inds = count; + + count = pos = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) { + pdaTables->act_inds[count++] = pos; + pos += asi->key.actions.length() + 1; + } + + /* + * Actions + */ + count = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) + count += asi->key.actions.length() + 1; + + pdaTables->actions = new unsigned int[count]; + pdaTables->num_actions = count; + + count = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) { + for ( ActDataList::Iter ali = asi->key.actions; ali.lte(); ali++ ) + pdaTables->actions[count++] = *ali; + + pdaTables->actions[count++] = 0; + } + + /* + * CommitLen + */ + count = pdaGraph->actionSet.length(); + pdaTables->commit_len = new int[count]; + pdaTables->num_commit_len = count; + + count = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) + pdaTables->commit_len[count++] = asi->key.commitLen; + + /* + * tokenRegionInds. Start at one so region index 0 is null (unset). + */ + count = 0; + pos = 1; + pdaTables->token_region_inds = new int[pdaTables->num_states]; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + pdaTables->token_region_inds[count++] = pos; + pos += state->regions.length() + 1; + } + + + /* + * tokenRegions. Build in a null at the beginning. + */ + + count = 1; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) + count += state->regions.length() + 1; + + pdaTables->num_region_items = count; + pdaTables->token_regions = new int[pdaTables->num_region_items]; + + count = 0; + pdaTables->token_regions[count++] = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) { + int id = ( *reg == EOF_REGION ) ? 0 : (*reg)->id + 1; + pdaTables->token_regions[count++] = id; + } + + pdaTables->token_regions[count++] = 0; + } + + /* + * tokenPreRegions. Build in a null at the beginning. + */ + + count = 1; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) + count += state->regions.length() + 1; + + pdaTables->num_pre_region_items = count; + pdaTables->token_pre_regions = new int[pdaTables->num_pre_region_items]; + + count = 0; + pdaTables->token_pre_regions[count++] = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) { + assert( state->preRegions.length() <= 1 ); + if ( state->preRegions.length() == 0 || state->preRegions[0]->impl->wasEmpty ) + pdaTables->token_pre_regions[count++] = -1; + else + pdaTables->token_pre_regions[count++] = state->preRegions[0]->id + 1; + } + + pdaTables->token_pre_regions[count++] = 0; + } + + + return pdaTables; +} + +void Compiler::makeParser( LangElSet &parserEls ) +{ + pdaGraph = makePdaGraph( parserEls ); + pdaTables = makePdaTables( pdaGraph ); +} + diff --git a/src/pdacodegen.cc b/src/pdacodegen.cc new file mode 100644 index 00000000..d6435ea9 --- /dev/null +++ b/src/pdacodegen.cc @@ -0,0 +1,698 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> + +#include <iostream> + +#include "compiler.h" +#include "pdacodegen.h" + +using std::cerr; +using std::endl; + +#define FRESH_BLOCK 8128 +#define act_sb "0x1" +#define act_rb "0x2" +#define lower "0x0000ffff" +#define upper "0xffff0000" + +void escapeLiteralString( std::ostream &out, const char *path, int length ) +{ + for ( const char *pc = path, *end = path+length; pc != end; pc++ ) { + switch ( *pc ) { + case '\\': out << "\\\\"; break; + case '"': out << "\\\""; break; + case '\a': out << "\\a"; break; + case '\b': out << "\\b"; break; + case '\t': out << "\\t"; break; + case '\n': out << "\\n"; break; + case '\v': out << "\\v"; break; + case '\f': out << "\\f"; break; + case '\r': out << "\\r"; break; + default: out << *pc; break; + } + } +} + +void escapeLiteralString( std::ostream &out, const char *path ) +{ + escapeLiteralString( out, path, strlen(path) ); +} + +void PdaCodeGen::defineRuntime() +{ + out << + "extern struct colm_sections " << objectName << ";\n" + "\n"; +} + +void PdaCodeGen::writeRuntimeData( colm_sections *runtimeData, struct pda_tables *pdaTables ) +{ + /* + * Blocks of code in frames. + */ + for ( int i = 0; i < runtimeData->num_frames; i++ ) { + /* FIXME: horrible code cloning going on here. */ + if ( runtimeData->frame_info[i].codeLenWV > 0 ) { + out << "static code_t code_" << i << "_wv[] = {\n\t"; + + code_t *block = runtimeData->frame_info[i].codeWV; + for ( int j = 0; j < runtimeData->frame_info[i].codeLenWV; j++ ) { + out << (unsigned long) block[j]; + + if ( j < runtimeData->frame_info[i].codeLenWV-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + } + + if ( runtimeData->frame_info[i].codeLenWC > 0 ) { + out << "static code_t code_" << i << "_wc[] = {\n\t"; + + code_t *block = runtimeData->frame_info[i].codeWC; + for ( int j = 0; j < runtimeData->frame_info[i].codeLenWC; j++ ) { + out << (unsigned long) block[j]; + + if ( j < runtimeData->frame_info[i].codeLenWC-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + } + + if ( runtimeData->frame_info[i].locals_len > 0 ) { + out << "static struct local_info locals_" << i << "[] = {\n\t"; + + struct local_info *li = runtimeData->frame_info[i].locals; + for ( int j = 0; j < runtimeData->frame_info[i].locals_len; j++ ) { + out << "{ " << (int)li[j].type << ", " << li[j].offset << " }"; + + if ( j < runtimeData->frame_info[i].locals_len-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + } + } + + /* + * Blocks in production info. + */ + for ( int i = 0; i < runtimeData->num_prods; i++ ) { + if ( runtimeData->prod_info[i].copy_len > 0 ) { + out << "static unsigned char copy_" << i << "[] = {\n\t"; + + unsigned char *block = runtimeData->prod_info[i].copy; + for ( int j = 0; j < runtimeData->prod_info[i].copy_len; j++ ) { + out << (long) block[j*2] << ", " << (long) block[j*2+1]; + + if ( j < runtimeData->prod_info[i].copy_len-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + } + } + + /* + * Init code. + */ + out << "static code_t " << rootCode() << "[] = {\n\t"; + code_t *block = runtimeData->root_code ; + for ( int j = 0; j < runtimeData->root_code_len; j++ ) { + out << (unsigned int) block[j]; + + if ( j < runtimeData->root_code_len-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + /* + * lelInfo + */ + out << "static struct lang_el_info " << lelInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_lang_els; i++ ) { + struct lang_el_info *el = &runtimeData->lel_info[i]; + out << "\t{"; + + /* Name. */ + out << " \""; + escapeLiteralString( out, el->name ); + out << "\", "; + + /* Name. */ + out << " \""; + escapeLiteralString( out, el->xml_tag ); + out << "\", "; + + /* Repeat, literal, ignore flags. */ + out << (int)el->repeat << ", "; + out << (int)el->list << ", "; + out << (int)el->literal << ", "; + out << (int)el->ignore << ", "; + out << el->frame_id << ", "; + out << el->object_type_id << ", "; + out << el->ofi_offset << ", "; + out << el->object_length << ", "; + out << el->term_dup_id << ", "; + out << el->mark_id << ", "; + out << el->capture_attr << ", "; + out << el->num_capture_attr; + + out << " }"; + + if ( i < runtimeData->num_lang_els-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + + for ( int i = 0; i < runtimeData->num_struct_els; i++ ) { + struct struct_el_info *el = &runtimeData->sel_info[i]; + if ( el->trees_len > 0 ) { + out << "static short struct_trees_" << i << "[] = {\n\t"; + + short *ti = el->trees; + for ( int j = 0; j < el->trees_len; j++ ) + out << ti[j] << ", "; + out << "\n};\n\n"; + } + } + + /* + * selInfo + */ + out << "static struct struct_el_info " << selInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_struct_els; i++ ) { + struct struct_el_info *el = &runtimeData->sel_info[i]; + out << "\t{ "; + out << el->size << ", "; + + /* trees. */ + if ( el->trees_len > 0 ) + out << "struct_trees_" << i << ", "; + else + out << "0, "; + out << el->trees_len << ", "; + + out << " },\n"; + } + out << "\n};\n\n"; + + /* + * frameInfo + */ + out << "static struct frame_info " << frameInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_frames; i++ ) { + out << "\t{ "; + + /* The Name. */ + if ( runtimeData->frame_info[i].name ) + out << "\"" << runtimeData->frame_info[i].name << "\", "; + else + out << "\"\", "; + + if ( runtimeData->frame_info[i].codeLenWV > 0 ) + out << "code_" << i << "_wv, "; + else + out << "0, "; + out << runtimeData->frame_info[i].codeLenWV << ", "; + + if ( runtimeData->frame_info[i].codeLenWC > 0 ) + out << "code_" << i << "_wc, "; + else + out << "0, "; + out << runtimeData->frame_info[i].codeLenWC << ", "; + + /* locals. */ + if ( runtimeData->frame_info[i].locals_len > 0 ) + out << "locals_" << i << ", "; + else + out << "0, "; + + out << runtimeData->frame_info[i].locals_len << ", "; + + out << + runtimeData->frame_info[i].arg_size << ", " << + runtimeData->frame_info[i].frame_size; + + out << " }"; + + if ( i < runtimeData->num_frames-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + + /* + * prodInfo + */ + out << "static struct prod_info " << prodInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_prods; i++ ) { + out << "\t{ "; + + out << runtimeData->prod_info[i].lhs_id << ", "; + out << runtimeData->prod_info[i].prod_num << ", "; + out << runtimeData->prod_info[i].length << ", "; + + out << + '"' << runtimeData->prod_info[i].name << "\", " << + runtimeData->prod_info[i].frame_id << ", " << + (int)runtimeData->prod_info[i].lhs_upref << ", "; + + if ( runtimeData->prod_info[i].copy_len > 0 ) + out << "copy_" << i << ", "; + else + out << "0, "; + + out << runtimeData->prod_info[i].copy_len << ", "; + + + out << " }"; + + if ( i < runtimeData->num_prods-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + /* + * patReplInfo + */ + out << "static struct pat_cons_info " << patReplInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_patterns; i++ ) { + out << " { " << runtimeData->pat_repl_info[i].offset << ", " << + runtimeData->pat_repl_info[i].num_bindings << " },\n"; + } + out << "};\n\n"; + + /* + * patReplNodes + */ + out << "static struct pat_cons_node " << patReplNodes() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_pattern_nodes; i++ ) { + struct pat_cons_node &node = runtimeData->pat_repl_nodes[i]; + out << " { " << node.id << ", " << + node.prod_num << ", " << node.next << ", " << + node.child << ", " << node.bind_id << ", "; + if ( node.data == 0 ) + out << "0"; + else { + out << '\"'; + escapeLiteralString( out, node.data, node.length ); + out << '\"'; + } + out << ", " << node.length << ", "; + + out << node.left_ignore << ", "; + out << node.right_ignore << ", "; + + out << (int)node.stop << " },\n"; + } + out << "};\n\n"; + + /* + * functionInfo + */ + out << "static struct function_info " << functionInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_functions; i++ ) { + out << "\t{ " << + runtimeData->function_info[i].frame_id << ", " << + runtimeData->function_info[i].arg_size << ", " << + runtimeData->function_info[i].frame_size; + out << " }"; + + if ( i < runtimeData->num_functions-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + /* + * regionInfo + */ + out << "static struct region_info " << regionInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_regions; i++ ) { + out << "\t{ " << runtimeData->region_info[i].default_token << + ", " << runtimeData->region_info[i].eof_frame_id << + ", " << runtimeData->region_info[i].ci_lel_id << + " }"; + + if ( i < runtimeData->num_regions-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + /* + * genericInfo + */ + out << "static struct generic_info " << genericInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_generics; i++ ) { + out << "\t{ " << + runtimeData->generic_info[i].type << ", " << + runtimeData->generic_info[i].el_struct_id << ", " << + runtimeData->generic_info[i].el_offset << ", " << + runtimeData->generic_info[i].key_type << ", " << + runtimeData->generic_info[i].key_offset << ", " << + runtimeData->generic_info[i].value_type << ", " << + runtimeData->generic_info[i].value_offset << ", " << + runtimeData->generic_info[i].parser_id; + out << " },\n"; + } + out << "};\n\n"; + + /* + * literals + */ + out << "static const char *" << litdata() << "[] = {\n"; + for ( int i = 0; i < runtimeData->num_literals; i++ ) { + out << "\t\""; + escapeLiteralString( out, runtimeData->litdata[i], runtimeData->litlen[i] ); + out << "\",\n"; + } + out << "};\n\n"; + + out << "static long " << litlen() << "[] = {\n\t"; + for ( int i = 0; i < runtimeData->num_literals; i++ ) + out << runtimeData->litlen[i] << ", "; + out << "};\n\n"; + + out << "static head_t *" << literals() << "[] = {\n\t"; + for ( int i = 0; i < runtimeData->num_literals; i++ ) + out << "0, "; + out << "};\n\n"; + + out << "static int startStates[] = {\n\t"; + for ( long i = 0; i < runtimeData->num_parsers; i++ ) { + out << runtimeData->start_states[i] << ", "; + } + out << "};\n\n"; + + out << "static int eofLelIds[] = {\n\t"; + for ( long i = 0; i < runtimeData->num_parsers; i++ ) { + out << runtimeData->eof_lel_ids[i] << ", "; + } + out << "};\n\n"; + + out << "static int parserLelIds[] = {\n\t"; + for ( long i = 0; i < runtimeData->num_parsers; i++ ) { + out << runtimeData->parser_lel_ids[i] << ", "; + } + out << "};\n\n"; + + out << "static CaptureAttr captureAttr[] = {\n"; + for ( long i = 0; i < runtimeData->num_captured_attr; i++ ) { + out << "\t{ " << + runtimeData->capture_attr[i].mark_enter << ", " << + runtimeData->capture_attr[i].mark_leave << ", " << + runtimeData->capture_attr[i].offset << " },\n"; + } + + out << "};\n\n"; + + out << + "tree_t **" << objectName << "_host_call( program_t *prg, long code, tree_t **sp );\n" + "void " << objectName << "_commit_reduce_forward( program_t *prg, tree_t **root,\n" + " struct pda_run *pda_run, parse_tree_t *pt );\n" + "long " << objectName << "_commit_union_sz( int reducer );\n" + "void " << objectName << "_init_need();\n" + "int " << objectName << "_reducer_need_tok( program_t *prg, " + "struct pda_run *pda_run, int id );\n" + "int " << objectName << "_reducer_need_ign( program_t *prg, " + "struct pda_run *pda_run );\n" + "void " << objectName << "_read_reduce( program_t *prg, int reducer, input_t *stream );\n" + "\n"; + + out << + "struct colm_sections " << objectName << " = \n" + "{\n" + " " << lelInfo() << ",\n" + " " << runtimeData->num_lang_els << ",\n" + "\n" + " " << selInfo() << ",\n" + " " << runtimeData->num_struct_els << ",\n" + "\n" + " " << prodInfo() << ",\n" + " " << runtimeData->num_prods << ",\n" + "\n" + " " << regionInfo() << ",\n" + " " << runtimeData->num_regions << ",\n" + "\n" + " " << rootCode() << ",\n" + " " << runtimeData->root_code_len << ",\n" + " " << runtimeData->root_frame_id << ",\n" + "\n" + " " << frameInfo() << ",\n" + " " << runtimeData->num_frames << ",\n" + "\n" + " " << functionInfo() << ",\n" + " " << runtimeData->num_functions << ",\n" + "\n" + " " << patReplInfo() << ",\n" + " " << runtimeData->num_patterns << ",\n" + "\n" + " " << patReplNodes() << ",\n" + " " << runtimeData->num_pattern_nodes << ",\n" + "\n" + " " << genericInfo() << ",\n" + " " << runtimeData->num_generics << ",\n" + " " << runtimeData->argv_generic_id << ",\n" + " " << runtimeData->stds_generic_id << ",\n" + "\n" + " " << litdata() << ",\n" + " " << litlen() << ",\n" + " " << literals() << ",\n" + " " << runtimeData->num_literals << ",\n" + "\n" + " captureAttr,\n" + " " << runtimeData->num_captured_attr << ",\n" + "\n" + " &fsmTables_start,\n" + " &pid_0_pdaTables,\n" + " startStates, eofLelIds, parserLelIds, " << runtimeData->num_parsers << ",\n" + "\n" + " " << runtimeData->global_size << ",\n" + "\n" + " " << runtimeData->first_non_term_id << ",\n" + " " << runtimeData->first_struct_el_id << ",\n" + " " << runtimeData->integer_id << ",\n" + " " << runtimeData->string_id << ",\n" + " " << runtimeData->any_id << ",\n" + " " << runtimeData->eof_id << ",\n" + " " << runtimeData->no_token_id << ",\n" + " " << runtimeData->global_id << ",\n" + " " << runtimeData->argv_el_id << ",\n" + " " << runtimeData->stds_el_id << ",\n" + " " << runtimeData->struct_inbuilt_id << ",\n" + " " << runtimeData->struct_inbuilt_id << ",\n" + " " << runtimeData->struct_stream_id << ",\n" + " &fsm_execute,\n" + " &sendNamedLangEl,\n" + " &initBindings,\n" + " &popBinding,\n" + " &" << objectName << "_host_call,\n" + " &" << objectName << "_commit_reduce_forward,\n" + " &" << objectName << "_commit_union_sz,\n" + " &" << objectName << "_init_need,\n" + " &" << objectName << "_reducer_need_tok,\n" + " &" << objectName << "_reducer_need_ign,\n" + " &" << objectName << "_read_reduce,\n" + "};\n" + "\n"; +} + +void PdaCodeGen::writeParserData( long id, struct pda_tables *tables ) +{ + String prefix = "pid_" + String(0, "%ld", id) + "_"; + + out << "static int " << prefix << indices() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_indices; i++ ) { + out << tables->indices[i]; + + if ( i < tables->num_indices-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static int " << prefix << owners() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_indices; i++ ) { + out << tables->owners[i]; + + if ( i < tables->num_indices-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static int " << prefix << keys() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_keys; i++ ) { + out << tables->keys[i]; + + if ( i < tables->num_keys-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static unsigned int " << prefix << offsets() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_states; i++ ) { + out << tables->offsets[i]; + + if ( i < tables->num_states-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static unsigned int " << prefix << targs() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_targs; i++ ) { + out << tables->targs[i]; + + if ( i < tables->num_targs-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static unsigned int " << prefix << actInds() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_act_inds; i++ ) { + out << tables->act_inds[i]; + + if ( i < tables->num_act_inds-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static unsigned int " << prefix << actions() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_actions; i++ ) { + out << tables->actions[i]; + + if ( i < tables->num_actions-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static int " << prefix << commitLen() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_commit_len; i++ ) { + out << tables->commit_len[i]; + + if ( i < tables->num_commit_len-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static int " << prefix << tokenRegionInds() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_states; i++ ) { + out << tables->token_region_inds[i]; + + if ( i < tables->num_states-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static int " << prefix << tokenRegions() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_region_items; i++ ) { + out << tables->token_regions[i]; + + if ( i < tables->num_region_items-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "static int " << prefix << tokenPreRegions() << "[] = {\n\t"; + for ( int i = 0; i < tables->num_pre_region_items; i++ ) { + out << tables->token_pre_regions[i]; + + if ( i < tables->num_pre_region_items-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << + "static struct pda_tables " << prefix << "pdaTables =\n" + "{\n" + " " << prefix << indices() << ",\n" + " " << prefix << owners() << ",\n" + " " << prefix << keys() << ",\n" + " " << prefix << offsets() << ",\n" + " " << prefix << targs() << ",\n" + " " << prefix << actInds() << ",\n" + " " << prefix << actions() << ",\n" + " " << prefix << commitLen() << ",\n" + + " " << prefix << tokenRegionInds() << ",\n" + " " << prefix << tokenRegions() << ",\n" + " " << prefix << tokenPreRegions() << ",\n" + "\n" + " " << tables->num_indices << ",\n" + " " << tables->num_keys << ",\n" + " " << tables->num_states << ",\n" + " " << tables->num_targs << ",\n" + " " << tables->num_act_inds << ",\n" + " " << tables->num_actions << ",\n" + " " << tables->num_commit_len << ",\n" + " " << tables->num_region_items << ",\n" + " " << tables->num_pre_region_items << "\n" + "};\n" + "\n"; +} + diff --git a/src/pdacodegen.h b/src/pdacodegen.h new file mode 100644 index 00000000..759dd6e0 --- /dev/null +++ b/src/pdacodegen.h @@ -0,0 +1,107 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_PDACODEGEN_H +#define _COLM_PDACODEGEN_H + +struct Compiler; + +struct PdaCodeGen +{ + PdaCodeGen( ostream &out ) + : + out(out) + {} + + /* + * Code Generation. + */ + void startCodeGen(); + void endCodeGen( int endLine ); + + void writeReference( Production *prod, char *data ); + void writeUndoReference( Production *prod, char *data ); + void writeFinalReference( Production *prod, char *data ); + void writeFirstLocate( Production *prod ); + void writeRhsLocate( Production *prod ); + + void defineRuntime(); + void writeRuntimeData( colm_sections *runtimeData, struct pda_tables *pdaTables ); + void writeParserData( long id, struct pda_tables *tables ); + + String PARSER() { return "parser_"; } + + String startState() { return PARSER() + "startState"; } + String indices() { return PARSER() + "indices"; } + String owners() { return PARSER() + "owners"; } + String keys() { return PARSER() + "keys"; } + String offsets() { return PARSER() + "offsets"; } + String targs() { return PARSER() + "targs"; } + String actInds() { return PARSER() + "actInds"; } + String actions() { return PARSER() + "actions"; } + String commitLen() { return PARSER() + "commitLen"; } + String fssProdIdIndex() { return PARSER() + "fssProdIdIndex"; } + String prodLengths() { return PARSER() + "prodLengths"; } + String prodLhsIds() { return PARSER() + "prodLhsIds"; } + String prodNames() { return PARSER() + "prodNames"; } + String lelInfo() { return PARSER() + "lelInfo"; } + String selInfo() { return PARSER() + "selInfo"; } + String prodInfo() { return PARSER() + "prodInfo"; } + String tokenRegionInds() { return PARSER() + "tokenRegionInds"; } + String tokenRegions() { return PARSER() + "tokenRegions"; } + String tokenPreRegions() { return PARSER() + "tokenPreRegions"; } + String prodCodeBlocks() { return PARSER() + "prodCodeBlocks"; } + String prodCodeBlockLens() { return PARSER() + "prodCodeBlockLens"; } + String rootCode() { return PARSER() + "rootCode"; } + String frameInfo() { return PARSER() + "frameInfo"; } + String functionInfo() { return PARSER() + "functionInfo"; } + String objFieldInfo() { return PARSER() + "objFieldInfo"; } + String patReplInfo() { return PARSER() + "patReplInfo"; } + String patReplNodes() { return PARSER() + "patReplNodes"; } + String regionInfo() { return PARSER() + "regionInfo"; } + String genericInfo() { return PARSER() + "genericInfo"; } + String litdata() { return PARSER() + "litdata"; } + String litlen() { return PARSER() + "litlen"; } + String literals() { return PARSER() + "literals"; } + String fsmTables() { return PARSER() + "fsmTables"; } + + /* + * Graphviz Generation + */ + void writeTransList( PdaState *state ); + void writeDotFile( PdaGraph *graph ); + void writeDotFile( ); + + ostream &out; +}; + +extern "C" +{ + void internalFsmExecute( struct pda_run *pdaRun, struct input_impl *inputStream ); + void internalSendNamedLangEl( program_t *prg, tree_t **sp, + struct pda_run *pdaRun, struct input_impl *is ); + void internalInitBindings( struct pda_run *pdaRun ); + void internalPopBinding( struct pda_run *pdaRun, parse_tree_t *parseTree ); +} + +#endif /* _COLM_PDACODEGEN_H */ + diff --git a/src/pdagraph.cc b/src/pdagraph.cc new file mode 100644 index 00000000..c18c61e1 --- /dev/null +++ b/src/pdagraph.cc @@ -0,0 +1,533 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pdagraph.h" + +#include <assert.h> +#include <stdbool.h> + +#include <iostream> + +using std::cerr; +using std::endl; + +/* Create a new fsm state. State has not out transitions or in transitions, not + * out out transition data and not number. */ +PdaState::PdaState() +: + /* No in transitions. */ + inRange(), + + /* No entry points, or epsilon trans. */ + pendingCommits(), + + stateSet(0), + + /* Only used during merging. Normally null. */ + stateDictEl(0), + + /* No state identification bits. */ + stateBits(0), + + onClosureQueue(false), + inClosedMap(false), + followMarked(false), + + advanceReductions(false) +{ +} + +/* Copy everything except the action transitions. That is left up to the + * PdaGraph copy constructor. */ +PdaState::PdaState(const PdaState &other) +: + inRange(), + + /* Duplicate the entry id set, epsilon transitions and context sets. These + * are sets of integers and as such need no fixing. */ + pendingCommits(other.pendingCommits), + + stateSet(0), + + /* This is only used during merging. Normally null. */ + stateDictEl(0), + + /* Fsm state data. */ + stateBits(other.stateBits), + + dotSet(other.dotSet), + onClosureQueue(false), + inClosedMap(false), + followMarked(false), + + transMap() +{ + /* Duplicate all the transitions. */ + for ( TransMap::Iter trans = other.transMap; trans.lte(); trans++ ) { + /* Dupicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + PdaTrans *newTrans = new PdaTrans(*trans->value); + newTrans->toState = trans->value->toState; + transMap.append( TransMapEl( newTrans->lowKey, newTrans ) ); + } +} + +/* If there is a state dict element, then delete it. Everything else is left + * up to the FsmGraph destructor. */ +PdaState::~PdaState() +{ + if ( stateDictEl != 0 ) + delete stateDictEl; +} + +/* Graph constructor. */ +PdaGraph::PdaGraph() +: + /* No start state. */ + startState(0) +{ +} + +/* Copy all graph data including transitions. */ +PdaGraph::PdaGraph( const PdaGraph &graph ) +: + /* Lists start empty. Will be filled by copy. */ + stateList(), + misfitList(), + + /* Copy in the entry points, + * pointers will be resolved later. */ + startState(graph.startState), + + /* Will be filled by copy. */ + finStateSet() +{ + /* Create the states and record their map in the original state. */ + PdaStateList::Iter origState = graph.stateList; + for ( ; origState.lte(); origState++ ) { + /* Make the new state. */ + PdaState *newState = new PdaState( *origState ); + + /* Add the state to the list. */ + stateList.append( newState ); + + /* Set the mapsTo item of the old state. */ + origState->stateMap = newState; + } + + /* Derefernce all the state maps. */ + for ( PdaStateList::Iter state = stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + PdaState *toState = trans->value->toState != 0 ? + trans->value->toState->stateMap : 0; + + /* Attach The transition to the duplicate. */ + trans->value->toState = 0; + attachTrans( state, toState, trans->value ); + } + } + + /* Fix the start state pointer and the new start state's count of in + * transiions. */ + startState = startState->stateMap; + + /* Build the final state set. */ + PdaStateSet::Iter st = graph.finStateSet; + for ( ; st.lte(); st++ ) + finStateSet.insert((*st)->stateMap); +} + +/* Deletes all transition data then deletes each state. */ +PdaGraph::~PdaGraph() +{ + /* Delete all the transitions. */ + PdaStateList::Iter state = stateList; + for ( ; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) + delete trans->value; + } + + /* Delete all the states. */ + stateList.empty(); +} + +/* Set a state final. The state has its isFinState set to true and the state + * is added to the finStateSet. */ +void PdaGraph::setFinState( PdaState *state ) +{ + /* Is it already a fin state. */ + if ( state->stateBits & SB_ISFINAL ) + return; + + state->stateBits |= SB_ISFINAL; + finStateSet.insert( state ); +} + +void PdaGraph::unsetAllFinStates( ) +{ + for ( PdaStateSet::Iter st = finStateSet; st.lte(); st++ ) { + PdaState *state = *st; + state->stateBits &= ~ SB_ISFINAL; + } + finStateSet.empty(); +} + +/* Set and unset a state as the start state. */ +void PdaGraph::setStartState( PdaState *state ) +{ + /* Sould change from unset to set. */ + assert( startState == 0 ); + startState = state; +} + +/* Mark all states reachable from state. Traverses transitions forward. Used + * for removing states that have no path into them. */ +void PdaGraph::markReachableFromHere( PdaState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( trans->value->toState != 0 ) + markReachableFromHere( trans->value->toState ); + } +} + +void PdaGraph::setStateNumbers() +{ + int curNum = 0; + PdaStateList::Iter state = stateList; + for ( ; state.lte(); state++ ) + state->stateNum = curNum++; +} + +/* Insert a transition into an inlist. The head must be supplied. */ +void PdaGraph::attachToInList( PdaState *from, PdaState *to, + PdaTrans *&head, PdaTrans *trans ) +{ + trans->ilnext = head; + trans->ilprev = 0; + + /* If in trans list is not empty, set the head->prev to trans. */ + if ( head != 0 ) + head->ilprev = trans; + + /* Now insert ourselves at the front of the list. */ + head = trans; +}; + +/* Detach a transition from an inlist. The head of the inlist must be supplied. */ +void PdaGraph::detachFromInList( PdaState *from, PdaState *to, + PdaTrans *&head, PdaTrans *trans ) +{ + /* Detach in the inTransList. */ + if ( trans->ilprev == 0 ) + head = trans->ilnext; + else + trans->ilprev->ilnext = trans->ilnext; + + if ( trans->ilnext != 0 ) + trans->ilnext->ilprev = trans->ilprev; +} + +/* Attach states on the default transition, range list or on out/in list key. + * Type of attaching and is controlled by keyType. First makes a new + * transition. If there is already a transition out from fromState on the + * default, then will assertion fail. */ +PdaTrans *PdaGraph::appendNewTrans( PdaState *from, PdaState *to, long lowKey, long ) +{ + /* Make the new transition. */ + PdaTrans *retVal = new PdaTrans(); + + /* The transition is now attached. Remember the parties involved. */ + retVal->fromState = from; + retVal->toState = to; + + /* Make the entry in the out list for the transitions. */ + from->transMap.append( TransMapEl( lowKey, retVal ) ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + + /* Attach using inRange as the head pointer. */ + attachToInList( from, to, to->inRange.head, retVal ); + + return retVal; +} + +PdaTrans *PdaGraph::insertNewTrans( PdaState *from, PdaState *to, long lowKey, long ) +{ + /* Make the new transition. */ + PdaTrans *retVal = new PdaTrans(); + + /* The transition is now attached. Remember the parties involved. */ + retVal->fromState = from; + retVal->toState = to; + + /* Make the entry in the out list for the transitions. */ + from->transMap.insert( lowKey, retVal ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + + /* Attach using inRange as the head pointer. */ + attachToInList( from, to, to->inRange.head, retVal ); + + return retVal; +} + +/* Attach for range lists or for the default transition. Type of attaching is + * controlled by the keyType parameter. This attach should be used when a + * transition already is allocated and must be attached to a target state. + * Does not handle adding the transition into the out list. */ +void PdaGraph::attachTrans( PdaState *from, PdaState *to, PdaTrans *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + trans->fromState = from; + trans->toState = to; + + /* Attach using the inRange pointer as the head pointer. */ + attachToInList( from, to, to->inRange.head, trans ); +} + +/* Detach for out/in lists or for default transition. The type of detaching is + * controlled by the keyType parameter. */ +void PdaGraph::detachTrans( PdaState *from, PdaState *to, PdaTrans *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + trans->fromState = 0; + trans->toState = 0; + + /* Detach using to's inRange pointer as the head. */ + detachFromInList( from, to, to->inRange.head, trans ); +} + + +/* Detach a state from the graph. Detaches and deletes transitions in and out + * of the state. Empties inList and outList. Removes the state from the final + * state set. A detached state becomes useless and should be deleted. */ +void PdaGraph::detachState( PdaState *state ) +{ + /* Detach the in transitions from the inRange list of transitions. */ + while ( state->inRange.head != 0 ) { + /* Get pointers to the trans and the state. */ + PdaTrans *trans = state->inRange.head; + PdaState *fromState = trans->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, trans ); + + /* Ok to delete the transition. */ + fromState->transMap.remove( trans->lowKey ); + delete trans; + } + + /* Detach out range transitions. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + detachTrans( state, trans->value->toState, trans->value ); + delete trans->value; + } + + /* Delete all of the out range pointers. */ + state->transMap.empty(); + + /* Unset final stateness before detaching from graph. */ + if ( state->stateBits & SB_ISFINAL ) + finStateSet.remove( state ); +} + +/* Move all the transitions that go into src so that they go into dest. */ +void PdaGraph::inTransMove( PdaState *dest, PdaState *src ) +{ + /* Do not try to move in trans to and from the same state. */ + assert( dest != src ); + + /* If src is the start state, dest becomes the start state. */ + assert( src != startState ); + + /* Move the transitions in inRange. */ + while ( src->inRange.head != 0 ) { + /* Get trans and from state. */ + PdaTrans *trans = src->inRange.head; + PdaState *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } +} + +void PdaGraph::addInReduction( PdaTrans *dest, long prodId, long prior ) +{ + /* Look for the reduction. If not there insert it, otherwise take + * the max of the priorities. */ + ReductionMapEl *redMapEl = dest->reductions.find( prodId ); + if ( redMapEl == 0 ) + dest->reductions.insert( prodId, prior ); + else if ( prior > redMapEl->value ) + redMapEl->value = prior; +} + +/* Callback invoked when another trans (or possibly this) is added into this + * transition during the merging process. Draw in any properties of srcTrans + * into this transition. AddInTrans is called when a new transitions is made + * that will be a duplicate of another transition or a combination of several + * other transitions. AddInTrans will be called for each transition that the + * new transition is to represent. */ +void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans ) +{ + /* Protect against adding in from ourselves. */ + if ( srcTrans != destTrans ) { + + /* Add in the shift priority. */ + if ( destTrans->isShift && srcTrans->isShift ) { + /* Both shifts are set. We want the max of the two. */ + if ( srcTrans->shiftPrior > destTrans->shiftPrior ) + destTrans->shiftPrior = srcTrans->shiftPrior; + } + else if ( srcTrans->isShift ) { + /* Just the source is set, copy the source prior over. */ + destTrans->shiftPrior = srcTrans->shiftPrior; + } + + /* If either is a shift, dest is a shift. */ + destTrans->isShift = destTrans->isShift || srcTrans->isShift; + + /* Add in the reductions. */ + for ( ReductionMap::Iter red = srcTrans->reductions; red.lte(); red++ ) + addInReduction( destTrans, red->key, red->value ); + + /* Add in the commit points. */ + destTrans->commits.insert( srcTrans->commits ); + + if ( srcTrans->toState->advanceReductions ) + destTrans->toState->advanceReductions = true; + + if ( srcTrans->noPreIgnore ) + destTrans->noPreIgnore = true; + if ( srcTrans->noPostIgnore ) + destTrans->noPostIgnore = true; + } +} + +/* NO LONGER USED. */ +void PdaGraph::addInState( PdaState *destState, PdaState *srcState ) +{ + /* Draw in any properties of srcState into destState. */ + if ( srcState != destState ) { + /* Get the epsilons, context, out priorities. */ + destState->pendingCommits.insert( srcState->pendingCommits ); + if ( srcState->pendingCommits.length() > 0 ) + cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl; + + /* Parser generation data. */ + destState->dotSet.insert( srcState->dotSet ); + + if ( srcState->onClosureQueue && !destState->onClosureQueue ) { + stateClosureQueue.append( destState ); + destState->onClosureQueue = true; + } + } +} + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +PdaState *PdaGraph::addState() +{ + /* Make the new state to return. */ + PdaState *state = new PdaState(); + + /* Create the new state. */ + stateList.append( state ); + + return state; +} + + +/* Follow from to the final state of srcFsm. */ +PdaState *PdaGraph::followFsm( PdaState *from, PdaGraph *srcFsm ) +{ + PdaState *followSrc = srcFsm->startState; + + while ( ! followSrc->isFinState() ) { + assert( followSrc->transMap.length() == 1 ); + PdaTrans *followTrans = followSrc->transMap[0].value; + + PdaTrans *inTrans = from->findTrans( followTrans->lowKey ); + assert( inTrans != 0 ); + + from = inTrans->toState; + followSrc = followTrans->toState; + } + + return from; +} + +int PdaGraph::fsmLength( ) +{ + int length = 0; + PdaState *state = startState; + while ( ! state->isFinState() ) { + length += 1; + state = state->transMap[0].value->toState; + } + return length; +} + +/* Remove states that have no path to them from the start state. Recursively + * traverses the graph marking states that have paths into them. Then removes + * all states that did not get marked. */ +void PdaGraph::removeUnreachableStates() +{ + /* Mark all the states that can be reached + * through the existing set of entry points. */ + if ( startState != 0 ) + markReachableFromHere( startState ); + + for ( PdaStateSet::Iter si = entryStateSet; si.lte(); si++ ) + markReachableFromHere( *si ); + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + PdaState *state = stateList.head; + while ( state ) { + PdaState *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} diff --git a/src/pdagraph.h b/src/pdagraph.h new file mode 100644 index 00000000..5cfc2a76 --- /dev/null +++ b/src/pdagraph.h @@ -0,0 +1,517 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_PDAGRAPH_H +#define _COLM_PDAGRAPH_H + +#include <assert.h> + +#include <avltree.h> +#include <bstmap.h> +#include <vector.h> +#include <sbstmap.h> +#include <sbstset.h> +#include <sbsttable.h> +#include <bstset.h> +#include <compare.h> +#include <avltree.h> +#include <dlist.h> +#include <avlset.h> +#include <dlistmel.h> + +/* Flags for states. */ +#define SB_ISFINAL 0x04 +#define SB_ISMARKED 0x08 +#define SB_ISSTART 0x10 + +/* Flags for transitions. */ +#define TB_ISMARKED 0x01 + +struct PdaTrans; +struct PdaState; +struct PdaGraph; +struct TokenInstance; +struct Production; +struct LangEl; +struct TokenRegion; + +typedef Vector<TokenRegion*> RegionVect; + +typedef Vector<long> ActDataList; + +struct ActionData +{ + ActionData( int targ, ActDataList &actions, int commitLen ) + : targ(targ), commitLen(commitLen), id(0), actions(actions) { } + + int targ; + int commitLen; + int id; + + ActDataList actions; +}; + + +struct CmpActionData +{ + static int compare( const ActionData &ap1, const ActionData &ap2 ) + { + if ( ap1.targ < ap2.targ ) + return -1; + else if ( ap1.targ > ap2.targ ) + return 1; + else if ( ap1.commitLen < ap2.commitLen ) + return -1; + else if ( ap1.commitLen > ap2.commitLen ) + return 1; + else if ( ap1.id < ap2.id ) + return -1; + else if ( ap1.id > ap2.id ) + return 1; + + return CmpTable< long, CmpOrd<long> >:: + compare( ap1.actions, ap2.actions ); + } +}; + +typedef AvlSet<ActionData, CmpActionData> PdaActionSet; +typedef AvlSetEl<ActionData> PdaActionSetEl; + +/* List pointers for the closure queue. Goes into state. */ +struct ClosureQueueListEl { PdaState *prev, *next; }; + +/* Queue of states, transitions to be closed. */ +typedef DListMel< PdaState, ClosureQueueListEl > StateClosureQueue; +typedef DList<PdaTrans> TransClosureQueue; + +typedef BstSet< Production*, CmpOrd<Production*> > DefSet; +typedef CmpTable< Production*, CmpOrd<Production*> > CmpDefSet; +typedef BstSet< DefSet, CmpDefSet > DefSetSet; + +typedef Vector< Production* > DefVect; +typedef BstSet< long, CmpOrd<long> > AlphSet; + +struct ExpandToEl +{ + ExpandToEl( PdaState *state, int prodId ) + : state(state), prodId(prodId) { } + + PdaState *state; + int prodId; +}; + +struct CmpExpandToEl +{ + static inline int compare( const ExpandToEl &etel1, const ExpandToEl &etel2 ) + { + if ( etel1.state < etel2.state ) + return -1; + else if ( etel1.state > etel2.state ) + return 1; + else if ( etel1.prodId < etel2.prodId ) + return -1; + else if ( etel1.prodId > etel2.prodId ) + return 1; + else + return 0; + } +}; + +typedef BstSet<ExpandToEl, CmpExpandToEl> ExpandToSet; +typedef BstSet< int, CmpOrd<int> > IntSet; +typedef CmpTable< int, CmpOrd<int> > CmpIntSet; + +typedef BstSet< long, CmpOrd<long> > LongSet; +typedef CmpTable< long, CmpOrd<long> > CmpLongSet; + +typedef BstMap< long, long, CmpOrd<long> > LongMap; +typedef BstMapEl< long, long > LongMapEl; + +typedef LongSet ProdIdSet; +typedef CmpLongSet CmpProdIdSet; + +/* Set of states, list of states. */ +typedef BstSet<PdaState*> PdaStateSet; +typedef Vector<PdaState*> StateVect; +typedef DList<PdaState> PdaStateList; + +typedef LongMap FollowToAdd; +typedef LongMap ReductionMap; +typedef LongMapEl ReductionMapEl; + +struct ProdIdPair +{ + ProdIdPair( int onReduce, int length ) + : onReduce(onReduce), length(length) {} + + int onReduce; + int length; +}; + +struct CmpProdIdPair +{ + static inline int compare( const ProdIdPair &pair1, const ProdIdPair &pair2 ) + { + if ( pair1.onReduce < pair2.onReduce ) + return -1; + else if ( pair1.onReduce > pair2.onReduce ) + return 1; + else if ( pair1.length < pair2.length ) + return -1; + else if ( pair1.length > pair2.length ) + return 1; + else + return 0; + } +}; + +typedef BstSet< ProdIdPair, CmpProdIdPair > ProdIdPairSet; + +/* Transition class that implements actions and priorities. */ +struct PdaTrans +{ + PdaTrans() : + fromState(0), + toState(0), + isShift(false), + isShiftReduce(false), + shiftPrior(0), + noPreIgnore(false), + noPostIgnore(false) + { } + + PdaTrans( const PdaTrans &other ) : + lowKey(other.lowKey), + fromState(0), toState(0), + isShift(other.isShift), + isShiftReduce(other.isShiftReduce), + shiftPrior(other.shiftPrior), + reductions(other.reductions), + commits(other.commits), + noPreIgnore(false), + noPostIgnore(false) + { } + + long lowKey; + PdaState *fromState; + PdaState *toState; + + /* Pointers for outlist. */ + PdaTrans *prev, *next; + + /* Pointers for in-list. */ + PdaTrans *ilprev, *ilnext; + + long maxPrior(); + + /* Parse Table construction data. */ + bool isShift, isShiftReduce; + int shiftPrior; + ReductionMap reductions; + ActDataList actions; + ActDataList actOrds; + ActDataList actPriors; + + ExpandToSet expandTo; + + PdaActionSetEl *actionSetEl; + + LongSet commits; + LongSet afterShiftCommits; + + bool noPreIgnore; + bool noPostIgnore; +}; + +/* In transition list. Like DList except only has head pointers, which is all + * that is required. Insertion and deletion is handled by the graph. This + * class provides the iterator of a single list. */ +struct PdaTransInList +{ + PdaTransInList() : head(0) { } + + PdaTrans *head; + + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct, assign from a list. */ + Iter( const PdaTransInList &il ) : ptr(il.head) { } + Iter &operator=( const PdaTransInList &dl ) { ptr = dl.head; return *this; } + + /* At the end */ + bool lte() const { return ptr != 0; } + bool end() const { return ptr == 0; } + + /* At the first, last element. */ + bool first() const { return ptr && ptr->ilprev == 0; } + bool last() const { return ptr && ptr->ilnext == 0; } + + /* Cast, dereference, arrow ops. */ + operator PdaTrans*() const { return ptr; } + PdaTrans &operator *() const { return *ptr; } + PdaTrans *operator->() const { return ptr; } + + /* Increment, decrement. */ + inline void operator++(int) { ptr = ptr->ilnext; } + inline void operator--(int) { ptr = ptr->ilprev; } + + /* The iterator is simply a pointer. */ + PdaTrans *ptr; + }; +}; + +typedef DList<PdaTrans> PdaTransList; + +/* A element in a state dict. */ +struct PdaStateDictEl +: + public AvlTreeEl<PdaStateDictEl> +{ + PdaStateDictEl(const PdaStateSet &stateSet) + : stateSet(stateSet) { } + + const PdaStateSet &getKey() { return stateSet; } + PdaStateSet stateSet; + PdaState *targState; +}; + +/* Dictionary mapping a set of states to a target state. */ +typedef AvlTree< PdaStateDictEl, PdaStateSet, CmpTable<PdaState*> > PdaStateDict; + +/* What items does a particular state encompass. */ +typedef BstSet< long, CmpOrd<long> > DotSet; +typedef CmpTable< long, CmpOrd<long> > CmpDotSet; + +/* Map of dot sets to states. */ +typedef AvlTree< PdaState, DotSet, CmpDotSet > DotSetMap; +typedef PdaState DotSetMapEl; + +typedef BstMap< long, PdaTrans* > TransMap; +typedef BstMapEl< long, PdaTrans* > TransMapEl; + +/* State class that implements actions and priorities. */ +struct PdaState +: + public ClosureQueueListEl, + public AvlTreeEl< PdaState > +{ + PdaState(); + PdaState(const PdaState &other); + ~PdaState(); + + /* Is the state final? */ + bool isFinState() { return stateBits & SB_ISFINAL; } + + PdaTrans *findTrans( long key ) + { + TransMapEl *transMapEl = transMap.find( key ); + if ( transMapEl == 0 ) + return 0; + return transMapEl->value; + } + + /* In transition list. */ + PdaTransInList inRange; + + ProdIdPairSet pendingCommits; + + /* When duplicating the fsm we need to map each + * state to the new state representing it. */ + PdaState *stateMap; + + /* When merging states (state machine operations) this next pointer is + * used for the list of states that need to be filled in. */ + PdaState *alg_next; + + PdaStateSet *stateSet; + + /* Identification for printing and stable minimization. */ + int stateNum; + + /* A pointer to a dict element that contains the set of states this state + * represents. This cannot go into alg, because alg.next is used during + * the merging process. */ + PdaStateDictEl *stateDictEl; + + /* Bits controlling the behaviour of the state during collapsing to dfa. */ + int stateBits; + + /* State list elements. */ + PdaState *next, *prev; + + /* For dotset map. */ + DotSet &getKey() { return dotSet; } + + /* Closure management. */ + DotSet dotSet; + DotSet dotSet2; + bool onClosureQueue; + bool inClosedMap; + bool followMarked; + bool onStateList; + + TransMap transMap; + + RegionVect regions; + RegionVect preRegions; + + bool advanceReductions; +}; + +/* Compare lists of epsilon transitions. Entries are name ids of targets. */ +typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; + +/* Compare sets of context values. */ +typedef CmpTable< int, CmpOrd<int> > CmpContextSets; + +/* Graph class that implements actions and priorities. */ +struct PdaGraph +{ + /* Constructors/Destructors. */ + PdaGraph(); + PdaGraph( const PdaGraph &graph ); + ~PdaGraph(); + + /* The list of states. */ + PdaStateList stateList; + PdaStateList misfitList; + + /* The start state. */ + PdaState *startState; + PdaStateSet entryStateSet; + + /* The set of final states. */ + PdaStateSet finStateSet; + + /* Closure queues and maps. */ + DotSetMap closedMap; + StateClosureQueue stateClosureQueue; + StateClosureQueue stateClosedList; + + TransClosureQueue transClosureQueue; + PdaState *stateClosureHead; + + LangEl **langElIndex; + + void setStartState( PdaState *state ); + void unsetStartState( ); + + /* + * Basic attaching and detaching. + */ + + /* Common to attaching/detaching list and default. */ + void attachToInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans ); + void detachFromInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans ); + + /* Attach with a new transition. */ + PdaTrans *appendNewTrans( PdaState *from, PdaState *to, long onChar1, long ); + PdaTrans *insertNewTrans( PdaState *from, PdaState *to, long lowKey, long ); + + /* Attach with an existing transition that already in an out list. */ + void attachTrans( PdaState *from, PdaState *to, PdaTrans *trans ); + + /* Detach a transition from a target state. */ + void detachTrans( PdaState *from, PdaState *to, PdaTrans *trans ); + + /* Detach a state from the graph. */ + void detachState( PdaState *state ); + + /* + * Callbacks. + */ + + /* Add in the properties of srcTrans into this. */ + void addInReduction( PdaTrans *dest, long prodId, long prior ); + void addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans ); + void addInState( PdaState *destState, PdaState *srcState ); + + /* + * Allocation. + */ + + /* New up a state and add it to the graph. */ + PdaState *addState(); + + /* + * Fsm operators. + */ + + /* Follow to the fin state of src fsm. */ + PdaState *followFsm( PdaState *from, PdaGraph *srcFsm ); + + /* + * Final states + */ + + /* Set and Unset a state as final. */ + void setFinState( PdaState *state ); + void unsetFinState( PdaState *state ); + void unsetAllFinStates( ); + + /* Set State numbers starting at 0. */ + void setStateNumbers(); + + /* + * Path pruning + */ + + /* Mark all states reachable from state. */ + void markReachableFromHere( PdaState *state ); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeUnreachableStates(); + + /* Remove error actions from states on which the error transition will + * never be taken. */ + bool outListCovers( PdaState *state ); + + /* Remove states that are on the misfit list. */ + void removeMisfits(); + + + /* + * Other + */ + + /* Move the in trans into src into dest. */ + void inTransMove(PdaState *dest, PdaState *src); + + int fsmLength( ); + + /* Collected machine information. */ + unsigned long long maxState; + unsigned long long maxAction; + unsigned long long maxLelId; + unsigned long long maxOffset; + unsigned long long maxIndex; + unsigned long long maxProdLen; + + PdaActionSet actionSet; +}; + +#endif /* _COLM_PDAGRAPH_H */ + diff --git a/src/pdarun.c b/src/pdarun.c new file mode 100644 index 00000000..f1885ec6 --- /dev/null +++ b/src/pdarun.c @@ -0,0 +1,2265 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pdarun.h" + +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <stdbool.h> +#include <assert.h> + +#include "config.h" +#include "debug.h" +#include "bytecode.h" +#include "tree.h" +#include "pool.h" +#include "internal.h" + +#define act_sb 0x1 +#define act_rb 0x2 + +#define read_word_p( i, p ) do { \ + i = ((word_t) p[0]); \ + i |= ((word_t) p[1]) << 8; \ + i |= ((word_t) p[2]) << 16; \ + i |= ((word_t) p[3]) << 24; \ +} while(0) + +#define read_tree_p( i, p ) do { \ + word_t w; \ + w = ((word_t) p[0]); \ + w |= ((word_t) p[1]) << 8; \ + w |= ((word_t) p[2]) << 16; \ + w |= ((word_t) p[3]) << 24; \ + i = (tree_t*)w; \ +} while(0) + +/* bit 0: data needed. bit 1: loc needed */ +#define RN_NONE 0x0 +#define RN_DATA 0x1 +#define RN_LOC 0x2 +#define RN_BOTH 0x3 + + +static void init_fsm_run( program_t *prg, struct pda_run *pda_run ) +{ + pda_run->fsm_tables = prg->rtd->fsm_tables; + + pda_run->consume_buf = 0; + + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + pda_run->scan_eof = 0; + + pda_run->pre_region = -1; +} + +static void clear_fsm_run( program_t *prg, struct pda_run *pda_run ) +{ + if ( pda_run->consume_buf != 0 ) { + /* Transfer the run buf list to the program */ + struct run_buf *head = pda_run->consume_buf; + struct run_buf *tail = head; + while ( tail->next != 0 ) + tail = tail->next; + + tail->next = prg->alloc_run_buf; + prg->alloc_run_buf = head; + } +} + +void colm_increment_steps( struct pda_run *pda_run ) +{ + pda_run->steps += 1; + //debug( prg, REALM_PARSE, "steps up to %ld\n", pdaRun->steps ); +} + +void colm_decrement_steps( struct pda_run *pda_run ) +{ + pda_run->steps -= 1; + //debug( prg, REALM_PARSE, "steps down to %ld\n", pdaRun->steps ); +} + +head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run, + struct input_impl *is, long length ) +{ + if ( pda_run != 0 ) { + struct run_buf *run_buf = pda_run->consume_buf; + if ( length > ( FSM_BUFSIZE - run_buf->length ) ) { + run_buf = new_run_buf( 0 ); + run_buf->next = pda_run->consume_buf; + pda_run->consume_buf = run_buf; + } + + alph_t *dest = run_buf->data + run_buf->length; + + is->funcs->get_data( prg, is, dest, length ); + location_t *loc = location_allocate( prg ); + is->funcs->consume_data( prg, is, length, loc ); + + run_buf->length += length; + + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + + head_t *tokdata = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length ); + tokdata->location = loc; + + return tokdata; + } + else { + head_t *head = init_str_space( length ); + alph_t *dest = (alph_t*)head->data; + + is->funcs->get_data( prg, is, dest, length ); + location_t *loc = location_allocate( prg ); + is->funcs->consume_data( prg, is, length, loc ); + head->location = loc; + + return head; + } +} + +/* Should only be sending back whole tokens/ignores, therefore the send back + * should never cross a buffer boundary. Either we slide back data, or we move to + * a previous buffer and slide back data. */ +static void send_back_text( struct colm_program *prg, struct input_impl *is, const alph_t *data, long length ) +{ + //debug( REALM_PARSE, "push back of %ld characters\n", length ); + + if ( length == 0 ) + return; + + //debug( REALM_PARSE, "sending back text: %.*s\n", + // (int)length, data ); + + is->funcs->undo_consume_data( prg, is, data, length ); +} + +static void send_back_tree( struct colm_program *prg, struct input_impl *is, tree_t *tree ) +{ + is->funcs->undo_consume_tree( prg, is, tree, false ); +} + +/* + * Stops on: + * PCR_REVERSE + */ +static void send_back_ignore( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is, parse_tree_t *parse_tree ) +{ + #ifdef DEBUG + struct lang_el_info *lel_info = prg->rtd->lel_info; + debug( prg, REALM_PARSE, "sending back: %s%s\n", + lel_info[parse_tree->shadow->tree->id].name, + parse_tree->flags & PF_ARTIFICIAL ? " (artificial)" : "" ); + #endif + + head_t *head = parse_tree->shadow->tree->tokdata; + int artificial = parse_tree->flags & PF_ARTIFICIAL; + + if ( head != 0 ) { + if ( artificial ) + send_back_tree( prg, is, parse_tree->shadow->tree ); + else + send_back_text( prg, is, colm_alph_from_cstr( string_data( head ) ), head->length ); + } + + colm_decrement_steps( pda_run ); + + /* Check for reverse code. */ + if ( parse_tree->flags & PF_HAS_RCODE ) { + pda_run->on_deck = true; + parse_tree->flags &= ~PF_HAS_RCODE; + } + + if ( pda_run->steps == pda_run->target_steps ) { + debug( prg, REALM_PARSE, "trigger parse stop, steps = " + "target = %d\n", pda_run->target_steps ); + pda_run->stop = true; + } +} + +static void reset_token( struct pda_run *pda_run ) +{ + /* If there is a token started, but never finished for a lack of data, we + * must first backup over it. */ + if ( pda_run->tokstart != 0 ) { + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + pda_run->scan_eof = 0; + } +} + +/* Stops on: + * PCR_REVERSE + */ + +static void send_back( program_t *prg, tree_t **sp, struct pda_run *pda_run, + struct input_impl *is, parse_tree_t *parse_tree ) +{ + debug( prg, REALM_PARSE, "sending back: %s\n", + prg->rtd->lel_info[parse_tree->id].name ); + + if ( parse_tree->flags & PF_NAMED ) { + /* Send the named lang el back first, then send back any leading + * whitespace. */ + is->funcs->undo_consume_lang_el( prg, is ); + } + + colm_decrement_steps( pda_run ); + + /* Artifical were not parsed, instead sent in as items. */ + if ( parse_tree->flags & PF_ARTIFICIAL ) { + /* Check for reverse code. */ + if ( parse_tree->flags & PF_HAS_RCODE ) { + debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" ); + pda_run->on_deck = true; + parse_tree->flags &= ~PF_HAS_RCODE; + } + + colm_tree_upref( prg, parse_tree->shadow->tree ); + + send_back_tree( prg, is, parse_tree->shadow->tree ); + } + else { + /* Check for reverse code. */ + if ( parse_tree->flags & PF_HAS_RCODE ) { + debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" ); + pda_run->on_deck = true; + parse_tree->flags &= ~PF_HAS_RCODE; + } + + /* Push back the token data. */ + send_back_text( prg, is, colm_alph_from_cstr( string_data( parse_tree->shadow->tree->tokdata ) ), + string_length( parse_tree->shadow->tree->tokdata ) ); + + /* If eof was just sent back remember that it needs to be sent again. */ + if ( parse_tree->id == prg->rtd->eof_lel_ids[pda_run->parser_id] ) + pda_run->eof_term_recvd = false; + + /* If the item is bound then store remove it from the bindings array. */ + prg->rtd->pop_binding( pda_run, parse_tree ); + } + + if ( pda_run->steps == pda_run->target_steps ) { + debug( prg, REALM_PARSE, "trigger parse stop, " + "steps = target = %d\n", pda_run->target_steps ); + pda_run->stop = true; + } + + /* Downref the tree that was sent back and free the kid. */ + colm_tree_downref( prg, sp, parse_tree->shadow->tree ); + kid_free( prg, parse_tree->shadow ); + parse_tree_free( pda_run, parse_tree ); +} + +static void set_region( struct pda_run *pda_run, int empty_ignore, parse_tree_t *tree ) +{ + if ( empty_ignore ) { + /* Recording the next region. */ + tree->retry_region = pda_run->next_region_ind; + if ( pda_run->pda_tables->token_regions[tree->retry_region+1] != 0 ) + pda_run->num_retry += 1; + } +} + +static void ignore_tree( program_t *prg, struct pda_run *pda_run, tree_t *tree ) +{ + int empty_ignore = pda_run->accum_ignore == 0; + + colm_increment_steps( pda_run ); + + parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); + parse_tree->shadow = kid_allocate( prg ); + parse_tree->shadow->tree = tree; + + parse_tree->next = pda_run->accum_ignore; + pda_run->accum_ignore = parse_tree; + + colm_transfer_reverse_code( pda_run, parse_tree ); + + if ( pda_run->pre_region >= 0 ) + parse_tree->flags |= PF_RIGHT_IGNORE; + + set_region( pda_run, empty_ignore, pda_run->accum_ignore ); +} + +static void ignore_tree_art( program_t *prg, struct pda_run *pda_run, tree_t *tree ) +{ + int empty_ignore = pda_run->accum_ignore == 0; + + colm_increment_steps( pda_run ); + + parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); + parse_tree->flags |= PF_ARTIFICIAL; + parse_tree->shadow = kid_allocate( prg ); + parse_tree->shadow->tree = tree; + + parse_tree->next = pda_run->accum_ignore; + pda_run->accum_ignore = parse_tree; + + colm_transfer_reverse_code( pda_run, parse_tree ); + + set_region( pda_run, empty_ignore, pda_run->accum_ignore ); +} + +kid_t *make_token_with_data( program_t *prg, struct pda_run *pda_run, + struct input_impl *is, int id, head_t *tokdata ) +{ + /* Make the token object. */ + long object_length = prg->rtd->lel_info[id].object_length; + kid_t *attrs = alloc_attrs( prg, object_length ); + + kid_t *input = 0; + input = kid_allocate( prg ); + input->tree = tree_allocate( prg ); + + debug( prg, REALM_PARSE, "made token %p\n", input->tree ); + + input->tree->refs = 1; + input->tree->id = id; + input->tree->tokdata = tokdata; + + /* No children and ignores get added later. */ + input->tree->child = attrs; + + struct lang_el_info *lel_info = prg->rtd->lel_info; + if ( lel_info[id].num_capture_attr > 0 ) { + int i; + for ( i = 0; i < lel_info[id].num_capture_attr; i++ ) { + CaptureAttr *ca = &prg->rtd->capture_attr[lel_info[id].capture_attr + i]; + head_t *data = string_alloc_full( prg, + colm_cstr_from_alph( pda_run->mark[ca->mark_enter] ), + pda_run->mark[ca->mark_leave] - + pda_run->mark[ca->mark_enter] ); + tree_t *string = construct_string( prg, data ); + colm_tree_upref( prg, string ); + colm_tree_set_field( prg, input->tree, ca->offset, string ); + } + } + + return input; +} + +static void report_parse_error( program_t *prg, tree_t **sp, struct pda_run *pda_run ) +{ + kid_t *kid = pda_run->bt_point; + head_t *deepest = 0; + while ( kid != 0 ) { + head_t *head = kid->tree->tokdata; + if ( head != 0 && head->location != 0 ) { + if ( deepest == 0 || head->location->byte > deepest->location->byte ) + deepest = head; + } + kid = kid->next; + } + + head_t *error_head = 0; + + /* If there are no error points on record assume the error occurred at the + * beginning of the stream. */ + if ( deepest == 0 ) { + error_head = string_alloc_full( prg, "<input>:1:1: parse error", 32 ); + error_head->location = location_allocate( prg ); + error_head->location->line = 1; + error_head->location->column = 1; + } + else { + debug( prg, REALM_PARSE, "deepest location byte: %d\n", + deepest->location->byte ); + + const char *name = deepest->location->name; + long line = deepest->location->line; + long i, column = deepest->location->column; + long byte = deepest->location->byte; + + for ( i = 0; i < deepest->length; i++ ) { + if ( deepest->data[i] != '\n' ) + column += 1; + else { + line += 1; + column = 1; + } + byte += 1; + } + + if ( name == 0 ) + name = "<input>"; + char *formatted = malloc( strlen( name ) + 128 ); + sprintf( formatted, "%s:%ld:%ld: parse error", name, line, column ); + error_head = string_alloc_full( prg, formatted, strlen(formatted) ); + free( formatted ); + + error_head->location = location_allocate( prg ); + + error_head->location->name = deepest->location->name; + error_head->location->line = line; + error_head->location->column = column; + error_head->location->byte = byte; + } + + tree_t *tree = construct_string( prg, error_head ); + colm_tree_downref( prg, sp, pda_run->parse_error_text ); + pda_run->parse_error_text = tree; + colm_tree_upref( prg, pda_run->parse_error_text ); +} + +static void attach_right_ignore( program_t *prg, tree_t **sp, + struct pda_run *pda_run, parse_tree_t *parse_tree ) +{ + if ( pda_run->accum_ignore == 0 ) + return; + + if ( pda_run->stack_top->id > 0 && + pda_run->stack_top->id < prg->rtd->first_non_term_id ) + { + /* OK, do it */ + debug( prg, REALM_PARSE, "attaching right ignore\n" ); + + /* Reset. */ + assert( ! ( parse_tree->flags & PF_RIGHT_IL_ATTACHED ) ); + + parse_tree_t *accum = pda_run->accum_ignore; + + parse_tree_t *stop_at = 0, *use = accum; + while ( use != 0 ) { + if ( ! (use->flags & PF_RIGHT_IGNORE) ) + stop_at = use; + use = use->next; + } + + if ( stop_at != 0 ) { + /* Stop at was set. Make it the last item in the igore list. Take + * the rest. */ + accum = stop_at->next; + stop_at->next = 0; + } + else { + /* Stop at was never set. All right ignore. Use it all. */ + pda_run->accum_ignore = 0; + } + + /* The data list needs to be extracted and reversed. The parse tree list + * can remain in stack order. */ + parse_tree_t *child = accum, *last = 0; + kid_t *data_child = 0, *data_last = 0; + + while ( child ) { + data_child = child->shadow; + parse_tree_t *next = child->next; + + /* Reverse the lists. */ + data_child->next = data_last; + child->next = last; + + /* Detach the parse tree from the data tree. */ + child->shadow = 0; + + /* Keep the last for reversal. */ + data_last = data_child; + last = child; + + child = next; + } + + /* Last is now the first. */ + parse_tree->right_ignore = last; + + if ( data_child != 0 ) { + debug( prg, REALM_PARSE, "attaching ignore right\n" ); + + kid_t *ignore_kid = data_last; + + /* Copy the ignore list first if we need to attach it as a right + * ignore. */ + tree_t *right_ignore = 0; + + right_ignore = tree_allocate( prg ); + right_ignore->id = LEL_ID_IGNORE; + right_ignore->child = ignore_kid; + + tree_t *push_to = parse_tree->shadow->tree; + + push_to = push_right_ignore( prg, push_to, right_ignore ); + + parse_tree->shadow->tree = push_to; + + parse_tree->flags |= PF_RIGHT_IL_ATTACHED; + } + } +} + +static void attach_left_ignore( program_t *prg, tree_t **sp, + struct pda_run *pda_run, parse_tree_t *parse_tree ) +{ + /* Reset. */ + assert( ! ( parse_tree->flags & PF_LEFT_IL_ATTACHED ) ); + + parse_tree_t *accum = pda_run->accum_ignore; + pda_run->accum_ignore = 0; + + /* The data list needs to be extracted and reversed. The parse tree list + * can remain in stack order. */ + parse_tree_t *child = accum, *last = 0; + kid_t *data_child = 0, *data_last = 0; + + while ( child ) { + data_child = child->shadow; + parse_tree_t *next = child->next; + + /* Reverse the lists. */ + data_child->next = data_last; + child->next = last; + + /* Detach the parse tree from the data tree. */ + child->shadow = 0; + + /* Keep the last for reversal. */ + data_last = data_child; + last = child; + + child = next; + } + + /* Last is now the first. */ + parse_tree->left_ignore = last; + + if ( data_child != 0 ) { + debug( prg, REALM_PARSE, "attaching left ignore\n" ); + + kid_t *ignore_kid = data_child; + + /* Make the ignore list for the left-ignore. */ + tree_t *left_ignore = tree_allocate( prg ); + left_ignore->id = LEL_ID_IGNORE; + left_ignore->child = ignore_kid; + + tree_t *push_to = parse_tree->shadow->tree; + + push_to = push_left_ignore( prg, push_to, left_ignore ); + + parse_tree->shadow->tree = push_to; + + parse_tree->flags |= PF_LEFT_IL_ATTACHED; + } +} + +/* Not currently used. Need to revive this. WARNING: untested changes here */ +static void detach_right_ignore( program_t *prg, tree_t **sp, + struct pda_run *pda_run, parse_tree_t *parse_tree ) +{ + /* Right ignore are immediately discarded since they are copies of + * left-ignores. */ + tree_t *right_ignore = 0; + if ( parse_tree->flags & PF_RIGHT_IL_ATTACHED ) { + tree_t *pop_from = parse_tree->shadow->tree; + + pop_from = pop_right_ignore( prg, sp, pop_from, &right_ignore ); + + parse_tree->shadow->tree = pop_from; + + parse_tree->flags &= ~PF_RIGHT_IL_ATTACHED; + } + + if ( parse_tree->right_ignore != 0 ) { + assert( right_ignore != 0 ); + + /* Transfer the trees to accumIgnore. */ + parse_tree_t *ignore = parse_tree->right_ignore; + parse_tree->right_ignore = 0; + + kid_t *data_ignore = right_ignore->child; + right_ignore->child = 0; + + parse_tree_t *last = 0; + kid_t *data_last = 0; + while ( ignore != 0 ) { + parse_tree_t *next = ignore->next; + kid_t *data_next = data_ignore->next; + + /* Put the data trees underneath the parse trees. */ + ignore->shadow = data_ignore; + + /* Reverse. */ + ignore->next = last; + data_ignore->next = data_last; + + /* Keep last for reversal. */ + last = ignore; + data_last = data_ignore; + + ignore = next; + data_ignore = data_next; + } + + pda_run->accum_ignore = last; + + colm_tree_downref( prg, sp, right_ignore ); + } +} + +static void detach_left_ignore( program_t *prg, tree_t **sp, + struct pda_run *pda_run, parse_tree_t *parse_tree ) +{ + /* Detach left. */ + tree_t *left_ignore = 0; + if ( parse_tree->flags & PF_LEFT_IL_ATTACHED ) { + tree_t *pop_from = parse_tree->shadow->tree; + + pop_from = pop_left_ignore( prg, sp, pop_from, &left_ignore ); + + parse_tree->shadow->tree = pop_from; + + parse_tree->flags &= ~PF_LEFT_IL_ATTACHED; + } + + if ( parse_tree->left_ignore != 0 ) { + assert( left_ignore != 0 ); + + /* Transfer the trees to accumIgnore. */ + parse_tree_t *ignore = parse_tree->left_ignore; + parse_tree->left_ignore = 0; + + kid_t *data_ignore = left_ignore->child; + left_ignore->child = 0; + + parse_tree_t *last = 0; + kid_t *data_last = 0; + while ( ignore != 0 ) { + parse_tree_t *next = ignore->next; + kid_t *data_next = data_ignore->next; + + /* Put the data trees underneath the parse trees. */ + ignore->shadow = data_ignore; + + /* Reverse. */ + ignore->next = last; + data_ignore->next = data_last; + + /* Keep last for reversal. */ + last = ignore; + data_last = data_ignore; + + ignore = next; + data_ignore = data_next; + } + + pda_run->accum_ignore = last; + } + + colm_tree_downref( prg, sp, left_ignore ); +} + +static int is_parser_stop_finished( struct pda_run *pda_run ) +{ + int done = + pda_run->stack_top->next != 0 && + pda_run->stack_top->next->next == 0 && + pda_run->stack_top->id == pda_run->stop_target; + return done; +} + +static void handle_error( program_t *prg, tree_t **sp, struct pda_run *pda_run ) +{ + /* Check the result. */ + if ( pda_run->parse_error ) { + /* Error occured in the top-level parser. */ + report_parse_error( prg, sp, pda_run ); + } + else { + if ( is_parser_stop_finished( pda_run ) ) { + debug( prg, REALM_PARSE, "stopping the parse\n" ); + pda_run->stop_parsing = true; + } + } +} + +static head_t *extract_match( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is ) +{ + long length = pda_run->tokend; + + //debug( prg, REALM_PARSE, "extracting token of length: %ld\n", length ); + + struct run_buf *run_buf = pda_run->consume_buf; + if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) { + run_buf = new_run_buf( length ); + run_buf->next = pda_run->consume_buf; + pda_run->consume_buf = run_buf; + } + + alph_t *dest = run_buf->data + run_buf->length; + + is->funcs->get_data( prg, is, (alph_t*)dest, length ); + location_t *location = location_allocate( prg ); + is->funcs->consume_data( prg, is, length, location ); + + run_buf->length += length; + + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + pda_run->tokstart = 0; + + head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length ); + + head->location = location; + + debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte ); + + return head; +} + +static head_t *extract_no_d( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is ) +{ + long length = pda_run->tokend; + + /* Just a consume, no data allocate. */ + location_t *location = location_allocate( prg ); + is->funcs->consume_data( prg, is, length, location ); + + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + pda_run->tokstart = 0; + + head_t *head = colm_string_alloc_pointer( prg, 0, 0 ); + + head->location = location; + + debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte ); + + return head; +} + +static head_t *extract_no_l( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is ) +{ + long length = pda_run->tokend; + + //debug( prg, REALM_PARSE, "extracting token of length: %ld\n", length ); + + struct run_buf *run_buf = pda_run->consume_buf; + if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) { + run_buf = new_run_buf( length ); + run_buf->next = pda_run->consume_buf; + pda_run->consume_buf = run_buf; + } + + alph_t *dest = run_buf->data + run_buf->length; + + is->funcs->get_data( prg, is, dest, length ); + + /* Using a dummpy location. */ + location_t location; + memset( &location, 0, sizeof( location ) ); + is->funcs->consume_data( prg, is, length, &location ); + + run_buf->length += length; + + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + pda_run->tokstart = 0; + + head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length ); + + /* Don't pass the location. */ + head->location = 0; + + debug( prg, REALM_PARSE, "location byte: %d\n", location.byte ); + + return head; +} + +static head_t *consume_match( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is ) +{ + long length = pda_run->tokend; + + /* No data or location returned. We just consume the data. */ + location_t dummy_loc; + memset( &dummy_loc, 0, sizeof(dummy_loc) ); + is->funcs->consume_data( prg, is, length, &dummy_loc ); + + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + pda_run->tokstart = 0; + + debug( prg, REALM_PARSE, "location byte: %d\n", dummy_loc.byte ); + + return 0; +} + + +static head_t *peek_match( program_t *prg, struct pda_run *pda_run, struct input_impl *is ) +{ + long length = pda_run->tokend; + + struct run_buf *run_buf = pda_run->consume_buf; + if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) { + run_buf = new_run_buf( 0 ); + run_buf->next = pda_run->consume_buf; + pda_run->consume_buf = run_buf; + } + + alph_t *dest = run_buf->data + run_buf->length; + + is->funcs->get_data( prg, is, dest, length ); + + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + + head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length ); + + head->location = location_allocate( prg ); + is->funcs->transfer_loc( prg, head->location, is ); + + debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte ); + + return head; +} + + +static void send_ignore( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is, long id ) +{ + if ( prg->rtd->reducer_need_ign( prg, pda_run ) == RN_NONE ) { + consume_match( prg, sp, pda_run, is ); + } + else { + debug( prg, REALM_PARSE, "ignoring: %s\n", prg->rtd->lel_info[id].name ); + + /* Make the ignore string. */ + head_t *ignore_str = extract_match( prg, sp, pda_run, is ); + + debug( prg, REALM_PARSE, "ignoring: %.*s\n", ignore_str->length, ignore_str->data ); + + tree_t *tree = tree_allocate( prg ); + tree->refs = 1; + tree->id = id; + tree->tokdata = ignore_str; + + /* Send it to the pdaRun. */ + ignore_tree( prg, pda_run, tree ); + } +} + +static void send_token( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is, long id ) +{ + int empty_ignore = pda_run->accum_ignore == 0; + + /* Make the token data. */ + head_t *tokdata = 0; + int rn = prg->rtd->reducer_need_tok( prg, pda_run, id ); + + switch ( rn ) { + case RN_NONE: + tokdata = consume_match( prg, sp, pda_run, is ); + break; + case RN_DATA: + tokdata = extract_no_l( prg, sp, pda_run, is ); + break; + case RN_LOC: + tokdata = extract_no_d( prg, sp, pda_run, is ); + break; + case RN_BOTH: + tokdata = extract_match( prg, sp, pda_run, is ); + break; + } + + debug( prg, REALM_PARSE, "token: %s text: %.*s\n", + prg->rtd->lel_info[id].name, + string_length(tokdata), string_data(tokdata) ); + + kid_t *input = make_token_with_data( prg, pda_run, is, id, tokdata ); + + colm_increment_steps( pda_run ); + + parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); + parse_tree->id = input->tree->id; + parse_tree->shadow = input; + + pda_run->parse_input = parse_tree; + + /* Store any alternate scanning region. */ + if ( input != 0 && pda_run->pda_cs >= 0 ) + set_region( pda_run, empty_ignore, parse_tree ); +} + +static void send_tree( program_t *prg, tree_t **sp, struct pda_run *pda_run, + struct input_impl *is ) +{ + kid_t *input = kid_allocate( prg ); + input->tree = is->funcs->consume_tree( prg, is ); + + colm_increment_steps( pda_run ); + + parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); + parse_tree->id = input->tree->id; + parse_tree->flags |= PF_ARTIFICIAL; + parse_tree->shadow = input; + + pda_run->parse_input = parse_tree; +} + +static void send_ignore_tree( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is ) +{ + tree_t *tree = is->funcs->consume_tree( prg, is ); + ignore_tree_art( prg, pda_run, tree ); +} + +static void send_collect_ignore( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is, int id ) +{ + debug( prg, REALM_PARSE, "token: CI\n" ); + + int empty_ignore = pda_run->accum_ignore == 0; + + /* Make the token data. */ + head_t *tokdata = head_allocate( prg ); + tokdata->location = location_allocate( prg ); + is->funcs->transfer_loc( prg, tokdata->location, is ); + + debug( prg, REALM_PARSE, "token: %s text: %.*s\n", + prg->rtd->lel_info[id].name, + string_length(tokdata), string_data(tokdata) ); + + kid_t *input = make_token_with_data( prg, pda_run, is, id, tokdata ); + + colm_increment_steps( pda_run ); + + parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); + parse_tree->id = input->tree->id; + parse_tree->shadow = input; + + pda_run->parse_input = parse_tree; + + /* Store any alternate scanning region. */ + if ( input != 0 && pda_run->pda_cs >= 0 ) + set_region( pda_run, empty_ignore, parse_tree ); +} + +/* Offset can be used to look at the next nextRegionInd. */ +static int get_next_region( struct pda_run *pda_run, int offset ) +{ + return pda_run->pda_tables->token_regions[pda_run->next_region_ind+offset]; +} + +static int get_next_pre_region( struct pda_run *pda_run ) +{ + return pda_run->pda_tables->token_pre_regions[pda_run->next_region_ind]; +} + +static void send_eof( program_t *prg, tree_t **sp, struct pda_run *pda_run, + struct input_impl *is ) +{ + debug( prg, REALM_PARSE, "token: _EOF\n" ); + + colm_increment_steps( pda_run ); + + head_t *head = head_allocate( prg ); + head->location = location_allocate( prg ); + is->funcs->transfer_loc( prg, head->location, is ); + + kid_t *input = kid_allocate( prg ); + input->tree = tree_allocate( prg ); + + input->tree->refs = 1; + input->tree->id = prg->rtd->eof_lel_ids[pda_run->parser_id]; + input->tree->tokdata = head; + + /* Set the state using the state of the parser. */ + pda_run->region = get_next_region( pda_run, 0 ); + pda_run->pre_region = get_next_pre_region( pda_run ); + pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->region]; + + parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); + parse_tree->id = input->tree->id; + parse_tree->shadow = input; + + pda_run->parse_input = parse_tree; +} + +static void new_token( program_t *prg, struct pda_run *pda_run ) +{ + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + pda_run->scan_eof = 0; + + /* Init the scanner vars. */ + pda_run->act = 0; + pda_run->tokstart = 0; + pda_run->tokend = 0; + pda_run->matched_token = 0; + + /* Set the state using the state of the parser. */ + pda_run->region = get_next_region( pda_run, 0 ); + pda_run->pre_region = get_next_pre_region( pda_run ); + if ( pda_run->pre_region > 0 ) { + pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->pre_region]; + pda_run->next_cs = pda_run->fsm_tables->entry_by_region[pda_run->region]; + } + else { + pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->region]; + } + + + /* Clear the mark array. */ + memset( pda_run->mark, 0, sizeof(pda_run->mark) ); +} + +static void push_bt_point( program_t *prg, struct pda_run *pda_run ) +{ + tree_t *tree = 0; + if ( pda_run->accum_ignore != 0 ) + tree = pda_run->accum_ignore->shadow->tree; + else if ( pda_run->token_list != 0 ) + tree = pda_run->token_list->kid->tree; + + if ( tree != 0 ) { + debug( prg, REALM_PARSE, "pushing bt point with location byte %d\n", + ( tree != 0 && tree->tokdata != 0 && tree->tokdata->location != 0 ) ? + tree->tokdata->location->byte : 0 ); + + kid_t *kid = kid_allocate( prg ); + kid->tree = tree; + colm_tree_upref( prg, tree ); + kid->next = pda_run->bt_point; + pda_run->bt_point = kid; + } +} + + +#define SCAN_UNDO -7 +#define SCAN_IGNORE -6 +#define SCAN_TREE -5 +#define SCAN_TRY_AGAIN_LATER -4 +#define SCAN_ERROR -3 +#define SCAN_LANG_EL -2 +#define SCAN_EOF -1 + +static long scan_token( program_t *prg, struct pda_run *pda_run, struct input_impl *is ) +{ + if ( pda_run->trigger_undo ) + return SCAN_UNDO; + + while ( true ) { + alph_t *pd = 0; + int len = 0; + int tokpref = pda_run->tokpref; + int type = is->funcs->get_parse_block( prg, is, &tokpref, &pd, &len ); + + switch ( type ) { + case INPUT_DATA: + pda_run->p = pd; + pda_run->pe = pd + len; + break; + + case INPUT_EOS: + pda_run->p = pda_run->pe = 0; + if ( pda_run->tokstart != 0 ) + pda_run->scan_eof = 1; + debug( prg, REALM_SCAN, "EOS *******************\n" ); + break; + + case INPUT_EOF: + pda_run->p = pda_run->pe = 0; + if ( pda_run->tokstart != 0 ) + pda_run->scan_eof = 1; + else + return SCAN_EOF; + break; + + case INPUT_EOD: + pda_run->p = pda_run->pe = 0; + return SCAN_TRY_AGAIN_LATER; + + case INPUT_LANG_EL: + if ( pda_run->tokstart != 0 ) + pda_run->scan_eof = 1; + else + return SCAN_LANG_EL; + break; + + case INPUT_TREE: + if ( pda_run->tokstart != 0 ) + pda_run->scan_eof = 1; + else + return SCAN_TREE; + break; + case INPUT_IGNORE: + if ( pda_run->tokstart != 0 ) + pda_run->scan_eof = 1; + else + return SCAN_IGNORE; + break; + } + + prg->rtd->fsm_execute( pda_run, is ); + + /* First check if scanning stopped because we have a token. */ + if ( pda_run->matched_token > 0 ) { + /* If the token has a marker indicating the end (due to trailing + * context) then adjust data now. */ + struct lang_el_info *lel_info = prg->rtd->lel_info; + if ( lel_info[pda_run->matched_token].mark_id >= 0 ) + pda_run->p = pda_run->mark[lel_info[pda_run->matched_token].mark_id]; + + return pda_run->matched_token; + } + + /* Check for error. */ + if ( pda_run->fsm_cs == pda_run->fsm_tables->error_state ) { + /* If a token was started, but not finished (tokstart != 0) then + * restore data to the beginning of that token. */ + if ( pda_run->tokstart != 0 ) + pda_run->p = pda_run->tokstart; + + /* Check for a default token in the region. If one is there + * then send it and continue with the processing loop. */ + if ( prg->rtd->region_info[pda_run->region].default_token >= 0 ) { + pda_run->tokpref = 0; + return prg->rtd->region_info[pda_run->region].default_token; + } + + return SCAN_ERROR; + } + + /* Check for no match on eof (trailing data that partially matches a token). */ + if ( pda_run->scan_eof ) + return SCAN_ERROR; + + /* Got here because the state machine didn't match a token or encounter + * an error. Must be because we got to the end of the buffer data. */ + assert( pda_run->p == pda_run->pe ); + } + + /* Should not be reached. */ + return SCAN_ERROR; +} + +tree_t *get_parsed_root( struct pda_run *pda_run, int stop ) +{ + if ( pda_run->parse_error ) + return 0; + else if ( stop ) { + if ( pda_run->stack_top->shadow != 0 ) + return pda_run->stack_top->shadow->tree; + } + else { + if ( pda_run->stack_top->next->shadow != 0 ) + return pda_run->stack_top->next->shadow->tree; + } + return 0; +} + +static void clear_parse_tree( program_t *prg, tree_t **sp, + struct pda_run *pda_run, parse_tree_t *pt ) +{ + tree_t **top = vm_ptop(); + + if ( pt == 0 ) + return; + +free_tree: + if ( pt->next != 0 ) { + vm_push_ptree( pt->next ); + } + + if ( pt->left_ignore != 0 ) { + vm_push_ptree( pt->left_ignore ); + } + + if ( pt->child != 0 ) { + vm_push_ptree( pt->child ); + } + + if ( pt->right_ignore != 0 ) { + vm_push_ptree( pt->right_ignore ); + } + + if ( pt->shadow != 0 ) { + colm_tree_downref( prg, sp, pt->shadow->tree ); + kid_free( prg, pt->shadow ); + } + + parse_tree_free( pda_run, pt ); + + /* Any trees to downref? */ + if ( sp != top ) { + pt = vm_pop_ptree(); + goto free_tree; + } +} + +void colm_pda_clear( program_t *prg, tree_t **sp, struct pda_run *pda_run ) +{ + clear_fsm_run( prg, pda_run ); + + /* Remaining stack and parse trees underneath. */ + clear_parse_tree( prg, sp, pda_run, pda_run->stack_top ); + pda_run->stack_top = 0; + + /* Traverse the token list downreffing. */ + ref_t *ref = pda_run->token_list; + while ( ref != 0 ) { + ref_t *next = ref->next; + kid_free( prg, (kid_t*)ref ); + ref = next; + } + pda_run->token_list = 0; + + /* Traverse the btPoint list downreffing */ + kid_t *btp = pda_run->bt_point; + while ( btp != 0 ) { + kid_t *next = btp->next; + colm_tree_downref( prg, sp, btp->tree ); + kid_free( prg, (kid_t*)btp ); + btp = next; + } + pda_run->bt_point = 0; + + /* Clear out any remaining ignores. */ + clear_parse_tree( prg, sp, pda_run, pda_run->accum_ignore ); + pda_run->accum_ignore = 0; + + /* Clear the input list (scanned tokes, sent trees). */ + clear_parse_tree( prg, sp, pda_run, pda_run->parse_input ); + pda_run->parse_input = 0; + + colm_rcode_downref_all( prg, sp, &pda_run->reverse_code ); + colm_rt_code_vect_empty( &pda_run->reverse_code ); + colm_rt_code_vect_empty( &pda_run->rcode_collect ); + + colm_tree_downref( prg, sp, pda_run->parse_error_text ); + + if ( pda_run->reducer ) { + long local_lost = pool_alloc_num_lost( &pda_run->local_pool ); + + if ( local_lost ) + message( "warning: reducer local lost parse trees: %ld\n", local_lost ); + pool_alloc_clear( &pda_run->local_pool ); + } +} + +void colm_pda_init( program_t *prg, struct pda_run *pda_run, struct pda_tables *tables, + int parser_id, long stop_target, int revert_on, struct_t *context, int reducer ) +{ + memset( pda_run, 0, sizeof(struct pda_run) ); + + pda_run->pda_tables = tables; + pda_run->parser_id = parser_id; + pda_run->stop_target = stop_target; + pda_run->revert_on = revert_on; + pda_run->target_steps = -1; + pda_run->reducer = reducer; + + /* An initial commit shift count of -1 means we won't ever back up to zero + * shifts and think parsing cannot continue. */ + pda_run->shift_count = 0; + pda_run->commit_shift_count = -1; + + if ( reducer ) { + init_pool_alloc( &pda_run->local_pool, sizeof(parse_tree_t) + + prg->rtd->commit_union_sz(reducer) ); + pda_run->parse_tree_pool = &pda_run->local_pool; + } + else { + pda_run->parse_tree_pool = &prg->parse_tree_pool; + } + + debug( prg, REALM_PARSE, "initializing struct pda_run %s\n", + prg->rtd->lel_info[prg->rtd->parser_lel_ids[parser_id]].name ); + + /* FIXME: need the right one here. */ + pda_run->pda_cs = prg->rtd->start_states[pda_run->parser_id]; + + kid_t *sentinal = kid_allocate( prg ); + sentinal->tree = tree_allocate( prg ); + sentinal->tree->refs = 1; + + /* Init the element allocation variables. */ + pda_run->stack_top = parse_tree_allocate( pda_run ); + pda_run->stack_top->state = -1; + pda_run->stack_top->shadow = sentinal; + + pda_run->num_retry = 0; + pda_run->next_region_ind = pda_run->pda_tables->token_region_inds[pda_run->pda_cs]; + pda_run->stop_parsing = false; + pda_run->accum_ignore = 0; + pda_run->bt_point = 0; + pda_run->check_next = false; + pda_run->check_stop = false; + + prg->rtd->init_bindings( pda_run ); + + init_rt_code_vect( &pda_run->reverse_code ); + init_rt_code_vect( &pda_run->rcode_collect ); + + pda_run->context = context; + pda_run->parse_error = 0; + pda_run->parse_input = 0; + pda_run->trigger_undo = 0; + + pda_run->token_id = 0; + + pda_run->on_deck = false; + pda_run->parsed = 0; + pda_run->reject = false; + + pda_run->rc_block_count = 0; + pda_run->eof_term_recvd = 0; + + init_fsm_run( prg, pda_run ); + new_token( prg, pda_run ); +} + +static long stack_top_target( program_t *prg, struct pda_run *pda_run ) +{ + long state; + if ( pda_run->stack_top->state < 0 ) + state = prg->rtd->start_states[pda_run->parser_id]; + else { + unsigned shift = pda_run->stack_top->id - + pda_run->pda_tables->keys[pda_run->stack_top->state<<1]; + unsigned offset = pda_run->pda_tables->offsets[pda_run->stack_top->state] + shift; + int index = pda_run->pda_tables->indices[offset]; + state = pda_run->pda_tables->targs[index]; + } + return state; +} + +/* + * shift: retry goes into lower of shifted node. + * reduce: retry goes into upper of reduced node. + * shift-reduce: cannot be a retry + */ + +/* Stops on: + * PCR_REDUCTION + * PCR_REVERSE + */ +static long parse_token( program_t *prg, tree_t **sp, + struct pda_run *pda_run, struct input_impl *is, long entry ) +{ + int pos; + unsigned int *action; + int rhs_len; + int owner; + int induce_reject; + int ind_pos; + + /* COROUTINE */ + switch ( entry ) { + case PCR_START: + + /* The scanner will send a null token if it can't find a token. */ + if ( pda_run->parse_input == 0 ) + goto parse_error; + + /* This will cause parseInput to be lost. This + * path should be traced. */ + if ( pda_run->pda_cs < 0 ) + return PCR_DONE; + + /* Record the state in the parse tree. */ + pda_run->parse_input->state = pda_run->pda_cs; + +again: + if ( pda_run->parse_input == 0 ) + goto _out; + + pda_run->lel = pda_run->parse_input; + pda_run->cur_state = pda_run->pda_cs; + + if ( pda_run->lel->id < pda_run->pda_tables->keys[pda_run->cur_state<<1] || + pda_run->lel->id > pda_run->pda_tables->keys[(pda_run->cur_state<<1)+1] ) + { + debug( prg, REALM_PARSE, "parse error, no transition 1\n" ); + push_bt_point( prg, pda_run ); + goto parse_error; + } + + ind_pos = pda_run->pda_tables->offsets[pda_run->cur_state] + + (pda_run->lel->id - pda_run->pda_tables->keys[pda_run->cur_state<<1]); + + owner = pda_run->pda_tables->owners[ind_pos]; + if ( owner != pda_run->cur_state ) { + debug( prg, REALM_PARSE, "parse error, no transition 2\n" ); + push_bt_point( prg, pda_run ); + goto parse_error; + } + + pos = pda_run->pda_tables->indices[ind_pos]; + if ( pos < 0 ) { + debug( prg, REALM_PARSE, "parse error, no transition 3\n" ); + push_bt_point( prg, pda_run ); + goto parse_error; + } + + /* Checking complete. */ + + induce_reject = false; + pda_run->pda_cs = pda_run->pda_tables->targs[pos]; + action = pda_run->pda_tables->actions + pda_run->pda_tables->act_inds[pos]; + if ( pda_run->lel->retry_lower ) + action += pda_run->lel->retry_lower; + + /* + * Shift + */ + + if ( *action & act_sb ) { + debug( prg, REALM_PARSE, "shifted: %s\n", + prg->rtd->lel_info[pda_run->lel->id].name ); + /* Consume. */ + pda_run->parse_input = pda_run->parse_input->next; + + pda_run->lel->state = pda_run->cur_state; + + /* If its a token then attach ignores and record it in the token list + * of the next ignore attachment to use. */ + if ( pda_run->lel->id < prg->rtd->first_non_term_id ) { + if ( pda_run->lel->cause_reduce == 0 ) + attach_right_ignore( prg, sp, pda_run, pda_run->stack_top ); + } + + pda_run->lel->next = pda_run->stack_top; + pda_run->stack_top = pda_run->lel; + + /* If its a token then attach ignores and record it in the token list + * of the next ignore attachment to use. */ + if ( pda_run->lel->id < prg->rtd->first_non_term_id ) { + attach_left_ignore( prg, sp, pda_run, pda_run->lel ); + + ref_t *ref = (ref_t*)kid_allocate( prg ); + ref->kid = pda_run->lel->shadow; + //colm_tree_upref( prg, pdaRun->tree ); + ref->next = pda_run->token_list; + pda_run->token_list = ref; + } + + if ( action[1] == 0 ) + pda_run->lel->retry_lower = 0; + else { + debug( prg, REALM_PARSE, "retry: %p\n", pda_run->stack_top ); + pda_run->lel->retry_lower += 1; + assert( pda_run->lel->retry_upper == 0 ); + /* FIXME: Has the retry already been counted? */ + pda_run->num_retry += 1; + } + + pda_run->shift_count += 1; + } + + /* + * Commit + */ + + if ( pda_run->pda_tables->commit_len[pos] != 0 ) { + debug( prg, REALM_PARSE, "commit point\n" ); + pda_run->commit_shift_count = pda_run->shift_count; + + /* Not in a reverting context and the parser result is not used. */ + if ( pda_run->reducer ) + commit_reduce( prg, sp, pda_run ); + + if ( pda_run->fail_parsing ) + goto fail; + + } + + /* + * Reduce + */ + + if ( *action & act_rb ) { + int r, object_length; + parse_tree_t *last, *child; + kid_t *attrs; + kid_t *data_last, *data_child; + + /* If there was shift don't attach again. */ + if ( !( *action & act_sb ) && pda_run->lel->id < prg->rtd->first_non_term_id ) + attach_right_ignore( prg, sp, pda_run, pda_run->stack_top ); + + pda_run->reduction = *action >> 2; + + if ( pda_run->parse_input != 0 ) + pda_run->parse_input->cause_reduce += 1; + + kid_t *value = kid_allocate( prg ); + value->tree = tree_allocate( prg ); + value->tree->refs = 1; + value->tree->id = prg->rtd->prod_info[pda_run->reduction].lhs_id; + value->tree->prod_num = prg->rtd->prod_info[pda_run->reduction].prod_num; + + pda_run->red_lel = parse_tree_allocate( pda_run ); + pda_run->red_lel->id = prg->rtd->prod_info[pda_run->reduction].lhs_id; + pda_run->red_lel->next = 0; + pda_run->red_lel->cause_reduce = 0; + pda_run->red_lel->retry_lower = 0; + pda_run->red_lel->shadow = value; + + /* Transfer. */ + pda_run->red_lel->retry_upper = pda_run->lel->retry_lower; + pda_run->lel->retry_lower = 0; + + /* Allocate the attributes. */ + object_length = prg->rtd->lel_info[pda_run->red_lel->id].object_length; + attrs = alloc_attrs( prg, object_length ); + + /* Build the list of children. We will be giving up a reference when we + * detach parse tree and data tree, but gaining the reference when we + * put the children under the new data tree. No need to alter refcounts + * here. */ + rhs_len = prg->rtd->prod_info[pda_run->reduction].length; + child = last = 0; + data_child = data_last = 0; + for ( r = 0; r < rhs_len; r++ ) { + + /* The child. */ + child = pda_run->stack_top; + data_child = child->shadow; + + /* Pop. */ + pda_run->stack_top = pda_run->stack_top->next; + + /* Detach the parse tree from the data. */ + child->shadow = 0; + + /* Reverse list. */ + child->next = last; + data_child->next = data_last; + + /* Track last for reversal. */ + last = child; + data_last = data_child; + } + + pda_run->red_lel->child = child; + pda_run->red_lel->shadow->tree->child = kid_list_concat( attrs, data_child ); + + debug( prg, REALM_PARSE, "reduced: %s rhsLen %d\n", + prg->rtd->prod_info[pda_run->reduction].name, rhs_len ); + if ( action[1] == 0 ) + pda_run->red_lel->retry_upper = 0; + else { + pda_run->red_lel->retry_upper += 1; + assert( pda_run->lel->retry_lower == 0 ); + pda_run->num_retry += 1; + debug( prg, REALM_PARSE, "retry: %p\n", pda_run->red_lel ); + } + + /* When the production is of zero length we stay in the same state. + * Otherwise we use the state stored in the first child. */ + pda_run->pda_cs = rhs_len == 0 ? pda_run->cur_state : child->state; + + if ( prg->ctx_dep_parsing && prg->rtd->prod_info[pda_run->reduction].frame_id >= 0 ) { + /* Frame info for reduction. */ + pda_run->fi = &prg->rtd->frame_info[prg->rtd->prod_info[pda_run->reduction].frame_id]; + pda_run->frame_id = prg->rtd->prod_info[pda_run->reduction].frame_id; + pda_run->reject = false; + pda_run->parsed = 0; + pda_run->code = pda_run->fi->codeWV; + + /* COROUTINE */ + return PCR_REDUCTION; + case PCR_REDUCTION: + + if ( prg->induce_exit ) + goto fail; + + /* If the lhs was stored and it changed then we need to restore the + * original upon backtracking, otherwise downref since we took a + * copy above. */ + if ( pda_run->parsed != 0 ) { + if ( pda_run->parsed != pda_run->red_lel->shadow->tree ) { + debug( prg, REALM_PARSE, "lhs tree was modified, " + "adding a restore instruction\n" ); +// +// /* Make it into a parse tree. */ +// tree_t *newPt = prepParseTree( prg, sp, pdaRun->redLel->tree ); +// colm_tree_downref( prg, sp, pdaRun->redLel->tree ); +// +// /* Copy it in. */ +// pdaRun->redLel->tree = newPt; +// colm_tree_upref( prg, pdaRun->redLel->tree ); + + /* Add the restore instruct. */ + append_code_val( &pda_run->rcode_collect, IN_RESTORE_LHS ); + append_word( &pda_run->rcode_collect, (word_t)pda_run->parsed ); + append_code_val( &pda_run->rcode_collect, SIZEOF_CODE + SIZEOF_WORD ); + } + else { + /* Not changed. Done with parsed. */ + colm_tree_downref( prg, sp, pda_run->parsed ); + } + pda_run->parsed = 0; + } + + /* Pull out the reverse code, if any. */ + colm_make_reverse_code( pda_run ); + colm_transfer_reverse_code( pda_run, pda_run->red_lel ); + + /* Perhaps the execution environment is telling us we need to + * reject the reduction. */ + induce_reject = pda_run->reject; + } + + /* If the left hand side was replaced then the only parse algorithm + * data that is contained in it will the PF_HAS_RCODE flag. Everthing + * else will be in the original. This requires that we restore first + * when going backwards and when doing a commit. */ + + if ( induce_reject ) { + debug( prg, REALM_PARSE, "error induced during reduction of %s\n", + prg->rtd->lel_info[pda_run->red_lel->id].name ); + pda_run->red_lel->state = pda_run->cur_state; + pda_run->red_lel->next = pda_run->stack_top; + pda_run->stack_top = pda_run->red_lel; + /* FIXME: What is the right argument here? */ + push_bt_point( prg, pda_run ); + goto parse_error; + } + + pda_run->red_lel->next = pda_run->parse_input; + pda_run->parse_input = pda_run->red_lel; + } + + goto again; + +parse_error: + debug( prg, REALM_PARSE, "hit error, backtracking\n" ); + +#if 0 + if ( pda_run->num_retry == 0 ) { + debug( prg, REALM_PARSE, "out of retries failing parse\n" ); + goto fail; + } +#endif + + while ( 1 ) { + if ( pda_run->on_deck ) { + debug( prg, REALM_BYTECODE, "dropping out for reverse code call\n" ); + + pda_run->frame_id = -1; + pda_run->code = colm_pop_reverse_code( &pda_run->reverse_code ); + + /* COROUTINE */ + return PCR_REVERSE; + case PCR_REVERSE: + + colm_decrement_steps( pda_run ); + } + else if ( pda_run->check_next ) { + pda_run->check_next = false; + + if ( pda_run->next > 0 && pda_run->pda_tables->token_regions[pda_run->next] != 0 ) { + debug( prg, REALM_PARSE, "found a new region\n" ); + pda_run->num_retry -= 1; + pda_run->pda_cs = stack_top_target( prg, pda_run ); + pda_run->next_region_ind = pda_run->next; + return PCR_DONE; + } + } + else if ( pda_run->check_stop ) { + pda_run->check_stop = false; + + if ( pda_run->stop ) { + debug( prg, REALM_PARSE, "stopping the backtracking, " + "steps is %d\n", pda_run->steps ); + + pda_run->pda_cs = stack_top_target( prg, pda_run ); + goto _out; + } + } + else if ( pda_run->parse_input != 0 ) { + /* Either we are dealing with a terminal that was shifted or a + * nonterminal that was reduced. */ + if ( pda_run->parse_input->id < prg->rtd->first_non_term_id ) { + /* This is a terminal. */ + assert( pda_run->parse_input->retry_upper == 0 ); + + if ( pda_run->parse_input->retry_lower != 0 ) { + debug( prg, REALM_PARSE, "found retry targ: %p\n", pda_run->parse_input ); + + pda_run->num_retry -= 1; + pda_run->pda_cs = pda_run->parse_input->state; + goto again; + } + + if ( pda_run->parse_input->cause_reduce != 0 ) { + /* The terminal caused a reduce. Unshift the reduced thing + * (will unreduce in the next step. */ + if ( pda_run->shift_count == pda_run->commit_shift_count ) { + debug( prg, REALM_PARSE, "backed up to commit point, " + "failing parse\n" ); + goto fail; + } + pda_run->shift_count -= 1; + + pda_run->undo_lel = pda_run->stack_top; + + /* Check if we've arrived at the stack sentinal. This guard + * is here to allow us to initially set numRetry to one to + * cause the parser to backup all the way to the beginning + * when an error occurs. */ + if ( pda_run->undo_lel->next == 0 ) + break; + + /* Either we are dealing with a terminal that was + * shifted or a nonterminal that was reduced. */ + assert( !(pda_run->stack_top->id < prg->rtd->first_non_term_id) ); + + debug( prg, REALM_PARSE, "backing up over non-terminal: %s\n", + prg->rtd->lel_info[pda_run->stack_top->id].name ); + + /* Pop the item from the stack. */ + pda_run->stack_top = pda_run->stack_top->next; + + /* Queue it as next parseInput item. */ + pda_run->undo_lel->next = pda_run->parse_input; + pda_run->parse_input = pda_run->undo_lel; + } + else { + long region = pda_run->parse_input->retry_region; + pda_run->next = region > 0 ? region + 1 : 0; + pda_run->check_next = true; + pda_run->check_stop = true; + + send_back( prg, sp, pda_run, is, pda_run->parse_input ); + + pda_run->parse_input = 0; + } + } + else if ( pda_run->parse_input->flags & PF_HAS_RCODE ) { + debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" ); + pda_run->on_deck = true; + pda_run->parsed = 0; + + /* Only the RCODE flag was in the replaced lhs. All the rest is in + * the the original. We read it after restoring. */ + + pda_run->parse_input->flags &= ~PF_HAS_RCODE; + } + else { + /* Remove it from the input queue. */ + pda_run->undo_lel = pda_run->parse_input; + pda_run->parse_input = pda_run->parse_input->next; + + /* Extract children from the child list. */ + parse_tree_t *first = pda_run->undo_lel->child; + pda_run->undo_lel->child = 0; + + /* This will skip the ignores/attributes, etc. */ + kid_t *data_first = tree_extract_child( prg, pda_run->undo_lel->shadow->tree ); + + /* Walk the child list and and push the items onto the parsing + * stack one at a time. */ + while ( first != 0 ) { + /* Get the next item ahead of time. */ + parse_tree_t *next = first->next; + kid_t *data_next = data_first->next; + + /* Push onto the stack. */ + first->next = pda_run->stack_top; + pda_run->stack_top = first; + + /* Reattach the data and the parse tree. */ + first->shadow = data_first; + + first = next; + data_first = data_next; + } + + /* If there is an parseInput queued, this is one less reduction it has + * caused. */ + if ( pda_run->parse_input != 0 ) + pda_run->parse_input->cause_reduce -= 1; + + if ( pda_run->undo_lel->retry_upper != 0 ) { + /* There is always an parseInput item here because reduce + * conflicts only happen on a lookahead character. */ + assert( pda_run->parse_input != pda_run->undo_lel ); + assert( pda_run->parse_input != 0 ); + assert( pda_run->undo_lel->retry_lower == 0 ); + assert( pda_run->parse_input->retry_upper == 0 ); + + /* Transfer the retry from undoLel to parseInput. */ + pda_run->parse_input->retry_lower = pda_run->undo_lel->retry_upper; + pda_run->parse_input->retry_upper = 0; + pda_run->parse_input->state = stack_top_target( prg, pda_run ); + } + + /* Free the reduced item. */ + colm_tree_downref( prg, sp, pda_run->undo_lel->shadow->tree ); + kid_free( prg, pda_run->undo_lel->shadow ); + parse_tree_free( pda_run, pda_run->undo_lel ); + + /* If the stacktop had right ignore attached, detach now. */ + if ( pda_run->stack_top->flags & PF_RIGHT_IL_ATTACHED ) + detach_right_ignore( prg, sp, pda_run, pda_run->stack_top ); + } + } + else if ( pda_run->accum_ignore != 0 ) { + debug( prg, REALM_PARSE, "have accumulated ignore to undo\n" ); + + /* Send back any accumulated ignore tokens, then trigger error + * in the the parser. */ + parse_tree_t *ignore = pda_run->accum_ignore; + pda_run->accum_ignore = pda_run->accum_ignore->next; + ignore->next = 0; + + long region = ignore->retry_region; + pda_run->next = region > 0 ? region + 1 : 0; + pda_run->check_next = true; + pda_run->check_stop = true; + + send_back_ignore( prg, sp, pda_run, is, ignore ); + + colm_tree_downref( prg, sp, ignore->shadow->tree ); + kid_free( prg, ignore->shadow ); + parse_tree_free( pda_run, ignore ); + } + else { + if ( pda_run->shift_count == pda_run->commit_shift_count ) { + debug( prg, REALM_PARSE, "backed up to commit point, failing parse\n" ); + goto fail; + } + + pda_run->shift_count -= 1; + + /* Now it is time to undo something. Pick an element from the top of + * the stack. */ + pda_run->undo_lel = pda_run->stack_top; + + /* Check if we've arrived at the stack sentinal. This guard is + * here to allow us to initially set numRetry to one to cause the + * parser to backup all the way to the beginning when an error + * occurs. */ + if ( pda_run->undo_lel->next == 0 ) + break; + + /* Either we are dealing with a terminal that was + * shifted or a nonterminal that was reduced. */ + if ( pda_run->stack_top->id < prg->rtd->first_non_term_id ) { + debug( prg, REALM_PARSE, "backing up over effective terminal: %s\n", + prg->rtd->lel_info[pda_run->stack_top->id].name ); + + /* Pop the item from the stack. */ + pda_run->stack_top = pda_run->stack_top->next; + + /* Queue it as next parseInput item. */ + pda_run->undo_lel->next = pda_run->parse_input; + pda_run->parse_input = pda_run->undo_lel; + + /* Pop from the token list. */ + ref_t *ref = pda_run->token_list; + pda_run->token_list = ref->next; + kid_free( prg, (kid_t*)ref ); + + assert( pda_run->accum_ignore == 0 ); + detach_left_ignore( prg, sp, pda_run, pda_run->parse_input ); + } + else { + debug( prg, REALM_PARSE, "backing up over non-terminal: %s\n", + prg->rtd->lel_info[pda_run->stack_top->id].name ); + + /* Pop the item from the stack. */ + pda_run->stack_top = pda_run->stack_top->next; + + /* Queue it as next parseInput item. */ + pda_run->undo_lel->next = pda_run->parse_input; + pda_run->parse_input = pda_run->undo_lel; + } + + /* Undo attach of right ignore. */ + if ( pda_run->stack_top->flags & PF_RIGHT_IL_ATTACHED ) + detach_right_ignore( prg, sp, pda_run, pda_run->stack_top ); + } + } + +fail: + pda_run->pda_cs = -1; + pda_run->parse_error = 1; + + /* FIXME: do we still need to fall through here? A fail is permanent now, + * no longer called into again. */ + + return PCR_DONE; + +_out: + pda_run->next_region_ind = pda_run->pda_tables->token_region_inds[pda_run->pda_cs]; + + /* COROUTINE */ + case PCR_DONE: + break; } + + return PCR_DONE; +} + +/* + * colm_parse_loop + * + * Stops on: + * PCR_PRE_EOF + * PCR_GENERATION + * PCR_REDUCTION + * PCR_REVERSE + */ + +long colm_parse_loop( program_t *prg, tree_t **sp, struct pda_run *pda_run, + struct input_impl *is, long entry ) +{ + struct lang_el_info *lel_info = prg->rtd->lel_info; + + /* COROUTINE */ + switch ( entry ) { + case PCR_START: + + pda_run->stop = false; + + while ( true ) { + debug( prg, REALM_PARSE, "parse loop start\n" ); + + /* Pull the current scanner from the parser. This can change during + * parsing due to inputStream pushes, usually for the purpose of includes. + * */ + pda_run->token_id = scan_token( prg, pda_run, is ); + + if ( pda_run->token_id == SCAN_ERROR ) { + if ( pda_run->pre_region >= 0 ) { + pda_run->pre_region = -1; + pda_run->fsm_cs = pda_run->next_cs; + pda_run->tokpref = 0; + continue; + } + } + + if ( pda_run->token_id == SCAN_ERROR && + ( prg->rtd->region_info[pda_run->region].ci_lel_id > 0 ) ) + { + debug( prg, REALM_PARSE, "sending a collect ignore\n" ); + send_collect_ignore( prg, sp, pda_run, is, + prg->rtd->region_info[pda_run->region].ci_lel_id ); + goto yes; + } + + if ( pda_run->token_id == SCAN_TRY_AGAIN_LATER ) { + debug( prg, REALM_PARSE, "scanner says try again later\n" ); + break; + } + + assert( pda_run->parse_input == 0 ); + pda_run->parse_input = 0; + + /* Check for EOF. */ + if ( pda_run->token_id == SCAN_EOF ) { + pda_run->eof_term_recvd = true; + send_eof( prg, sp, pda_run, is ); + + pda_run->frame_id = prg->rtd->region_info[pda_run->region].eof_frame_id; + + if ( prg->ctx_dep_parsing && pda_run->frame_id >= 0 ) { + debug( prg, REALM_PARSE, "HAVE PRE_EOF BLOCK\n" ); + + pda_run->fi = &prg->rtd->frame_info[pda_run->frame_id]; + pda_run->code = pda_run->fi->codeWV; + + /* COROUTINE */ + return PCR_PRE_EOF; + case PCR_PRE_EOF: + + colm_make_reverse_code( pda_run ); + } + } + else if ( pda_run->token_id == SCAN_UNDO ) { + /* Fall through with parseInput = 0. FIXME: Do we need to send back ignore? */ + debug( prg, REALM_PARSE, "invoking undo from the scanner\n" ); + } + else if ( pda_run->token_id == SCAN_ERROR ) { + /* Scanner error, maybe retry. */ + if ( pda_run->accum_ignore == 0 && get_next_region( pda_run, 1 ) != 0 ) { + debug( prg, REALM_PARSE, "scanner failed, trying next region\n" ); + + pda_run->next_region_ind += 1; + goto skip_send; + } + else { // if ( pdaRun->numRetry > 0 ) { + debug( prg, REALM_PARSE, "invoking parse error from the scanner\n" ); + + /* Fall through to send null (error). */ + push_bt_point( prg, pda_run ); + } +#if 0 + else { + debug( prg, REALM_PARSE, "no alternate scanning regions\n" ); + + /* There are no alternative scanning regions to try, nor are + * there any alternatives stored in the current parse tree. No + * choice but to end the parse. */ + push_bt_point( prg, pda_run ); + + report_parse_error( prg, sp, pda_run ); + pda_run->parse_error = 1; + goto skip_send; + } +#endif + } + else if ( pda_run->token_id == SCAN_LANG_EL ) { + debug( prg, REALM_PARSE, "sending an named lang el\n" ); + + /* A named language element (parsing colm program). */ + prg->rtd->send_named_lang_el( prg, sp, pda_run, is ); + } + else if ( pda_run->token_id == SCAN_TREE ) { + debug( prg, REALM_PARSE, "sending a tree\n" ); + + /* A tree already built. */ + send_tree( prg, sp, pda_run, is ); + } + else if ( pda_run->token_id == SCAN_IGNORE ) { + debug( prg, REALM_PARSE, "sending an ignore token\n" ); + + /* A tree to ignore. */ + send_ignore_tree( prg, sp, pda_run, is ); + goto skip_send; + } + else if ( prg->ctx_dep_parsing && lel_info[pda_run->token_id].frame_id >= 0 ) { + /* Has a generation action. */ + debug( prg, REALM_PARSE, "token gen action: %s\n", + prg->rtd->lel_info[pda_run->token_id].name ); + + /* Make the token data. */ + pda_run->tokdata = peek_match( prg, pda_run, is ); + + /* Note that we don't update the position now. It is done when the token + * data is pulled from the inputStream. */ + + pda_run->p = pda_run->pe = 0; + pda_run->tokpref = 0; + pda_run->scan_eof = 0; + + pda_run->fi = &prg->rtd->frame_info[prg->rtd->lel_info[pda_run->token_id].frame_id]; + pda_run->frame_id = prg->rtd->lel_info[pda_run->token_id].frame_id; + pda_run->code = pda_run->fi->codeWV; + + /* COROUTINE */ + return PCR_GENERATION; + case PCR_GENERATION: + + colm_make_reverse_code( pda_run ); + + /* Finished with the match text. */ + string_free( prg, pda_run->tokdata ); + + goto skip_send; + } + else if ( lel_info[pda_run->token_id].ignore ) { + debug( prg, REALM_PARSE, "sending an ignore token: %s\n", + prg->rtd->lel_info[pda_run->token_id].name ); + + /* Is an ignore token. */ + send_ignore( prg, sp, pda_run, is, pda_run->token_id ); + goto skip_send; + } + else { + debug( prg, REALM_PARSE, "sending a plain old token: %s\n", + prg->rtd->lel_info[pda_run->token_id].name ); + + /* Is a plain token. */ + send_token( prg, sp, pda_run, is, pda_run->token_id ); + } +yes: + + if ( pda_run->parse_input != 0 ) + colm_transfer_reverse_code( pda_run, pda_run->parse_input ); + + if ( pda_run->parse_input != 0 ) { + /* If it's a nonterminal with a termdup then flip the parse tree to + * the terminal. */ + if ( pda_run->parse_input->id >= prg->rtd->first_non_term_id ) { + pda_run->parse_input->id = + prg->rtd->lel_info[pda_run->parse_input->id].term_dup_id; + pda_run->parse_input->flags |= PF_TERM_DUP; + } + } + + long pcr = parse_token( prg, sp, pda_run, is, PCR_START ); + + while ( pcr != PCR_DONE ) { + + /* COROUTINE */ + return pcr; + case PCR_REDUCTION: + case PCR_REVERSE: + + pcr = parse_token( prg, sp, pda_run, is, entry ); + } + + assert( pcr == PCR_DONE ); + + handle_error( prg, sp, pda_run ); + +skip_send: + new_token( prg, pda_run ); + + /* Various stop conditions. This should all be coverned by one test + * eventually. */ + + if ( pda_run->trigger_undo ) { + debug( prg, REALM_PARSE, "parsing stopped by triggerUndo\n" ); + break; + } + + if ( pda_run->eof_term_recvd ) { + debug( prg, REALM_PARSE, "parsing stopped by EOF\n" ); + break; + } + + if ( pda_run->stop_parsing ) { + debug( prg, REALM_PARSE, "scanner has been stopped\n" ); + break; + } + + if ( pda_run->stop ) { + debug( prg, REALM_PARSE, "parsing has been stopped by consumedCount\n" ); + break; + } + + if ( prg->induce_exit ) { + debug( prg, REALM_PARSE, "parsing has been stopped by a call to exit\n" ); + break; + } + + if ( pda_run->parse_error ) { + debug( prg, REALM_PARSE, "parsing stopped by a parse error\n" ); + break; + } + + /* Disregard any alternate parse paths, just go right to failure. */ + if ( pda_run->fail_parsing ) { + debug( prg, REALM_PARSE, "parsing failed by explicit request\n" ); + break; + } + } + + /* COROUTINE */ + case PCR_DONE: + break; } + + return PCR_DONE; +} + + +long colm_parse_frag( program_t *prg, tree_t **sp, + struct pda_run *pda_run, input_t *input, long entry ) +{ + /* COROUTINE */ + switch ( entry ) { + case PCR_START: + + if ( ! pda_run->parse_error ) { + long pcr = colm_parse_loop( prg, sp, pda_run, + input_to_impl( input ), entry ); + + while ( pcr != PCR_DONE ) { + + /* COROUTINE */ + return pcr; + case PCR_REDUCTION: + case PCR_GENERATION: + case PCR_PRE_EOF: + case PCR_REVERSE: + + pcr = colm_parse_loop( prg, sp, pda_run, + input_to_impl( input ), entry ); + } + } + + /* COROUTINE */ + case PCR_DONE: + break; } + + return PCR_DONE; +} + +long colm_parse_undo_frag( program_t *prg, tree_t **sp, struct pda_run *pda_run, + input_t *input, long entry, long steps ) +{ + debug( prg, REALM_PARSE, + "undo parse frag, target steps: %ld, pdarun steps: %ld\n", + steps, pda_run->steps ); + + reset_token( pda_run ); + + /* COROUTINE */ + switch ( entry ) { + case PCR_START: + + if ( steps < pda_run->steps ) { + /* Setup environment for going backwards until we reduced steps to + * what we want. */ + pda_run->num_retry += 1; + pda_run->target_steps = steps; + pda_run->trigger_undo = 1; + + /* The parse loop will recognise the situation. */ + long pcr = colm_parse_loop( prg, sp, pda_run, input_to_impl(input), entry ); + while ( pcr != PCR_DONE ) { + + /* COROUTINE */ + return pcr; + case PCR_REDUCTION: + case PCR_GENERATION: + case PCR_PRE_EOF: + case PCR_REVERSE: + + pcr = colm_parse_loop( prg, sp, pda_run, input_to_impl(input), entry ); + } + + /* Reset environment. */ + pda_run->trigger_undo = 0; + pda_run->target_steps = -1; + pda_run->num_retry -= 1; + } + + /* COROUTINE */ + case PCR_DONE: + break; } + + return PCR_DONE; +} + +void colm_parse_reduce_commit( program_t *prg, tree_t **sp, + struct pda_run *pda_run ) +{ + /* Flush out anything not committed. */ + if ( pda_run->reducer ) + commit_reduce( prg, sp, pda_run ); +} + diff --git a/src/pdarun.h b/src/pdarun.h new file mode 100644 index 00000000..4003b9be --- /dev/null +++ b/src/pdarun.h @@ -0,0 +1,471 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_PDARUN_H +#define _COLM_PDARUN_H + +#include <colm/input.h> +#include <colm/defs.h> +#include <colm/tree.h> +#include <colm/struct.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct colm_program; + +#define MARK_SLOTS 32 + +struct fsm_tables +{ + long *actions; + long *key_offsets; + char *trans_keys; + long *single_lengths; + long *range_lengths; + long *index_offsets; + long *transTargsWI; + long *transActionsWI; + long *to_state_actions; + long *from_state_actions; + long *eof_actions; + long *eof_targs; + long *entry_by_region; + + long num_states; + long num_actions; + long num_trans_keys; + long num_single_lengths; + long num_range_lengths; + long num_index_offsets; + long numTransTargsWI; + long numTransActionsWI; + long num_regions; + + long start_state; + long first_final; + long error_state; + + struct GenAction **action_switch; + long num_action_switch; +}; + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +struct colm_execution; + +struct rt_code_vect +{ + code_t *data; + long tab_len; + long alloc_len; + + /* FIXME: leak when freed. */ +}; + +void list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el ); +void list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el ); + +void list_prepend( list_t *list, list_el_t *new_el ); +void list_append( list_t *list, list_el_t *new_el ); + +list_el_t *list_detach( list_t *list, list_el_t *el ); +list_el_t *list_detach_first(list_t *list ); +list_el_t *list_detach_last(list_t *list ); + +long list_length(list_t *list); + +struct function_info +{ + long frame_id; + long arg_size; + long frame_size; +}; + +/* + * Program Data. + */ + +struct pat_cons_info +{ + long offset; + long num_bindings; +}; + +struct pat_cons_node +{ + long id; + long prod_num; + long next; + long child; + long bind_id; + const char *data; + long length; + long left_ignore; + long right_ignore; + + /* Just match nonterminal, don't go inside. */ + unsigned char stop; +}; + +/* FIXME: should have a descriptor for object types to give the length. */ + +struct lang_el_info +{ + const char *name; + const char *xml_tag; + unsigned char repeat; + unsigned char list; + unsigned char literal; + unsigned char ignore; + + long frame_id; + + long object_type_id; + long ofi_offset; + long object_length; + + long term_dup_id; + long mark_id; + long capture_attr; + long num_capture_attr; +}; + +struct struct_el_info +{ + long size; + short *trees; + long trees_len; +}; + +struct prod_info +{ + unsigned long lhs_id; + short prod_num; + long length; + const char *name; + long frame_id; + unsigned char lhs_upref; + unsigned char *copy; + long copy_len; +}; + +/* Must match the LocalType enum. */ +#define LI_Tree 1 +#define LI_Iter 2 +#define LI_RevIter 3 +#define LI_UserIter 4 + +struct local_info +{ + char type; + short offset; +}; + +struct frame_info +{ + const char *name; + code_t *codeWV; + long codeLenWV; + code_t *codeWC; + long codeLenWC; + struct local_info *locals; + long locals_len; + long arg_size; + long frame_size; + char ret_tree; +}; + +struct region_info +{ + long default_token; + long eof_frame_id; + int ci_lel_id; +}; + +typedef struct _CaptureAttr +{ + long mark_enter; + long mark_leave; + long offset; +} CaptureAttr; + +struct pda_tables +{ + /* Parser table data. */ + int *indices; + int *owners; + int *keys; + unsigned int *offsets; + unsigned int *targs; + unsigned int *act_inds; + unsigned int *actions; + int *commit_len; + int *token_region_inds; + int *token_regions; + int *token_pre_regions; + + int num_indices; + int num_keys; + int num_states; + int num_targs; + int num_act_inds; + int num_actions; + int num_commit_len; + int num_region_items; + int num_pre_region_items; +}; + +struct pool_block +{ + void *data; + struct pool_block *next; +}; + +struct pool_item +{ + struct pool_item *next; +}; + +struct pool_alloc +{ + struct pool_block *head; + long nextel; + struct pool_item *pool; + int sizeofT; +}; + +struct pda_run +{ + /* + * Scanning. + */ + struct fsm_tables *fsm_tables; + + struct run_buf *consume_buf; + + long region, pre_region; + long fsm_cs, next_cs, act; + alph_t *start; + alph_t *tokstart; + long tokend; + long tokpref; + alph_t *p, *pe; + char scan_eof; + + char return_result; + char skip_tokpref; + char eof_term_recvd; + + alph_t *mark[MARK_SLOTS]; + long matched_token; + + /* + * Parsing + */ + int num_retry; + parse_tree_t *stack_top; + ref_t *token_list; + int pda_cs; + int next_region_ind; + + struct pda_tables *pda_tables; + int parser_id; + + /* Reused. */ + struct rt_code_vect rcode_collect; + struct rt_code_vect reverse_code; + + int stop_parsing; + long stop_target; + + parse_tree_t *accum_ignore; + + kid_t *bt_point; + + struct bindings *bindings; + + int revert_on; + + struct colm_struct *context; + + int stop; + int parse_error; + + long steps; + long target_steps; + + /* The shift count simply tracks the number of shifts that have happend. + * The commit shift count is the shift count when the last commit occurred. + * If we back up to this number of shifts then we decide we cannot proceed. + * The commit shift count is initialized to -1. */ + long shift_count; + long commit_shift_count; + + int on_deck; + + /* + * Data we added when refactoring the parsing engine into a coroutine. + */ + + parse_tree_t *parse_input; + struct frame_info *fi; + int reduction; + parse_tree_t *red_lel; + int cur_state; + parse_tree_t *lel; + int trigger_undo; + + int token_id; + head_t *tokdata; + int frame_id; + int next; + parse_tree_t *undo_lel; + + int check_next; + int check_stop; + + /* The lhs is sometimes saved before reduction actions in case it is + * replaced and we need to restore it on backtracking */ + tree_t *parsed; + + int reject; + + /* Instruction pointer to use when we stop parsing and execute code. */ + code_t *code; + + int rc_block_count; + + tree_t *parse_error_text; + + /* Zero indicates parsing proper. Nonzero is the reducer id. */ + int reducer; + + parse_tree_t *last_final; + + struct pool_alloc *parse_tree_pool; + struct pool_alloc local_pool; + + /* Disregard any alternate parse paths, just go right to failure. */ + int fail_parsing; +}; + +void colm_pda_init( struct colm_program *prg, struct pda_run *pda_run, + struct pda_tables *tables, int parser_id, long stop_target, + int revert_on, struct colm_struct *context, int reducer ); + +void colm_pda_clear( struct colm_program *prg, struct colm_tree **sp, + struct pda_run *pda_run ); + +void colm_rt_code_vect_replace( struct rt_code_vect *vect, long pos, + const code_t *val, long len ); +void colm_rt_code_vect_empty( struct rt_code_vect *vect ); +void colm_rt_code_vect_remove( struct rt_code_vect *vect, long pos, long len ); + +void init_rt_code_vect( struct rt_code_vect *code_vect ); + +inline static void append_code_val( struct rt_code_vect *vect, const code_t val ); +inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len ); +inline static void append_half( struct rt_code_vect *vect, half_t half ); +inline static void append_word( struct rt_code_vect *vect, word_t word ); + +inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len ) +{ + colm_rt_code_vect_replace( vect, vect->tab_len, val, len ); +} + +inline static void append_code_val( struct rt_code_vect *vect, const code_t val ) +{ + colm_rt_code_vect_replace( vect, vect->tab_len, &val, 1 ); +} + +inline static void append_half( struct rt_code_vect *vect, half_t half ) +{ + /* not optimal. */ + append_code_val( vect, half & 0xff ); + append_code_val( vect, (half>>8) & 0xff ); +} + +inline static void append_word( struct rt_code_vect *vect, word_t word ) +{ + /* not optimal. */ + append_code_val( vect, word & 0xff ); + append_code_val( vect, (word>>8) & 0xff ); + append_code_val( vect, (word>>16) & 0xff ); + append_code_val( vect, (word>>24) & 0xff ); + #if SIZEOF_LONG == 8 + append_code_val( vect, (word>>32) & 0xff ); + append_code_val( vect, (word>>40) & 0xff ); + append_code_val( vect, (word>>48) & 0xff ); + append_code_val( vect, (word>>56) & 0xff ); + #endif +} + +void colm_increment_steps( struct pda_run *pda_run ); +void colm_decrement_steps( struct pda_run *pda_run ); + +void colm_clear_stream_impl( struct colm_program *prg, tree_t **sp, struct stream_impl *input_stream ); + +#define PCR_START 1 +#define PCR_DONE 2 +#define PCR_REDUCTION 3 +#define PCR_GENERATION 4 +#define PCR_PRE_EOF 5 +#define PCR_REVERSE 6 + +head_t *colm_stream_pull( struct colm_program *prg, struct colm_tree **sp, + struct pda_run *pda_run, struct input_impl *is, long length ); +head_t *colm_string_alloc_pointer( struct colm_program *prg, const char *data, long length ); + +kid_t *make_token_with_data( struct colm_program *prg, struct pda_run *pda_run, + struct input_impl *input_stream, int id, head_t *tokdata ); + +long colm_parse_loop( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run, + struct input_impl *input_stream, long entry ); + +long colm_parse_frag( struct colm_program *prg, tree_t **sp, + struct pda_run *pda_run, input_t *input, long entry ); +long colm_parse_finish( struct colm_program *prg, tree_t **sp, + struct pda_run *pda_run, stream_t *input, long entry ); +long colm_parse_undo_frag( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run, + input_t *input, long entry, long steps ); + +void commit_clear_kid_list( program_t *prg, tree_t **sp, kid_t *kid ); +void commit_clear_parse_tree( program_t *prg, tree_t **sp, + struct pda_run *pda_run, parse_tree_t *pt ); +void commit_reduce( program_t *prg, tree_t **root, + struct pda_run *pda_run ); + +tree_t *get_parsed_root( struct pda_run *pda_run, int stop ); + +void colm_parse_reduce_commit( program_t *prg, tree_t **sp, + struct pda_run *pda_run ); + +#ifdef __cplusplus +} +#endif + +#endif /* _COLM_PDRUN_H */ + diff --git a/src/pool.c b/src/pool.c new file mode 100644 index 00000000..ffb32636 --- /dev/null +++ b/src/pool.c @@ -0,0 +1,248 @@ +/* + * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <colm/pool.h> + +#include <assert.h> +#include <string.h> +#include <stdlib.h> + +#include <colm/pdarun.h> +#include <colm/debug.h> + +void init_pool_alloc( struct pool_alloc *pool_alloc, int sizeofT ) +{ + pool_alloc->head = 0; + pool_alloc->nextel = FRESH_BLOCK; + pool_alloc->pool = 0; + pool_alloc->sizeofT = sizeofT; +} + +static void *pool_alloc_allocate( struct pool_alloc *pool_alloc ) +{ + //debug( REALM_POOL, "pool allocation\n" ); + +#ifdef POOL_MALLOC + void *res = malloc( pool_alloc->sizeofT ); + memset( res, 0, pool_alloc->sizeofT ); + return res; +#else + + void *new_el = 0; + if ( pool_alloc->pool == 0 ) { + if ( pool_alloc->nextel == FRESH_BLOCK ) { + struct pool_block *new_block = (struct pool_block*)malloc( sizeof(struct pool_block) ); + new_block->data = malloc( pool_alloc->sizeofT * FRESH_BLOCK ); + new_block->next = pool_alloc->head; + pool_alloc->head = new_block; + pool_alloc->nextel = 0; + } + + new_el = (char*)pool_alloc->head->data + pool_alloc->sizeofT * pool_alloc->nextel++; + } + else { + new_el = pool_alloc->pool; + pool_alloc->pool = pool_alloc->pool->next; + } + memset( new_el, 0, pool_alloc->sizeofT ); + return new_el; +#endif +} + +void pool_alloc_free( struct pool_alloc *pool_alloc, void *el ) +{ + #if 0 + /* Some sanity checking. Best not to normally run with this on. */ + char *p = (char*)el + sizeof(struct pool_item*); + char *pe = (char*)el + sizeof(T); + for ( ; p < pe; p++ ) + assert( *p != 0xcc ); + memset( el, 0xcc, sizeof(T) ); + #endif + +#ifdef POOL_MALLOC + free( el ); +#else + struct pool_item *pi = (struct pool_item*) el; + pi->next = pool_alloc->pool; + pool_alloc->pool = pi; +#endif +} + +void pool_alloc_clear( struct pool_alloc *pool_alloc ) +{ + struct pool_block *block = pool_alloc->head; + while ( block != 0 ) { + struct pool_block *next = block->next; + free( block->data ); + free( block ); + block = next; + } + + pool_alloc->head = 0; + pool_alloc->nextel = 0; + pool_alloc->pool = 0; +} + +long pool_alloc_num_lost( struct pool_alloc *pool_alloc ) +{ + /* Count the number of items allocated. */ + long lost = 0; + struct pool_block *block = pool_alloc->head; + if ( block != 0 ) { + lost = pool_alloc->nextel; + block = block->next; + while ( block != 0 ) { + lost += FRESH_BLOCK; + block = block->next; + } + } + + /* Subtract. Items that are on the free list. */ + struct pool_item *pi = pool_alloc->pool; + while ( pi != 0 ) { + lost -= 1; + pi = pi->next; + } + + return lost; +} + +/* + * kid_t + */ + +kid_t *kid_allocate( program_t *prg ) +{ + return (kid_t*) pool_alloc_allocate( &prg->kid_pool ); +} + +void kid_free( program_t *prg, kid_t *el ) +{ + pool_alloc_free( &prg->kid_pool, el ); +} + +void kid_clear( program_t *prg ) +{ + pool_alloc_clear( &prg->kid_pool ); +} + +long kid_num_lost( program_t *prg ) +{ + return pool_alloc_num_lost( &prg->kid_pool ); +} + +/* + * tree_t + */ + +tree_t *tree_allocate( program_t *prg ) +{ + return (tree_t*) pool_alloc_allocate( &prg->tree_pool ); +} + +void tree_free( program_t *prg, tree_t *el ) +{ + pool_alloc_free( &prg->tree_pool, el ); +} + +void tree_clear( program_t *prg ) +{ + pool_alloc_clear( &prg->tree_pool ); +} + +long tree_num_lost( program_t *prg ) +{ + return pool_alloc_num_lost( &prg->tree_pool ); +} + +/* + * parse_tree_t + */ + +parse_tree_t *parse_tree_allocate( struct pda_run *pda_run ) +{ + return (parse_tree_t*) pool_alloc_allocate( pda_run->parse_tree_pool ); +} + +void parse_tree_free( struct pda_run *pda_run, parse_tree_t *el ) +{ + pool_alloc_free( pda_run->parse_tree_pool, el ); +} + +void parse_tree_clear( struct pool_alloc *pool_alloc ) +{ + pool_alloc_clear( pool_alloc ); +} + +long parse_tree_num_lost( struct pool_alloc *pool_alloc ) +{ + return pool_alloc_num_lost( pool_alloc ); +} + +/* + * head_t + */ + +head_t *head_allocate( program_t *prg ) +{ + return (head_t*) pool_alloc_allocate( &prg->head_pool ); +} + +void head_free( program_t *prg, head_t *el ) +{ + pool_alloc_free( &prg->head_pool, el ); +} + +void head_clear( program_t *prg ) +{ + pool_alloc_clear( &prg->head_pool ); +} + +long head_num_lost( program_t *prg ) +{ + return pool_alloc_num_lost( &prg->head_pool ); +} + +/* + * location_t + */ + +location_t *location_allocate( program_t *prg ) +{ + return (location_t*) pool_alloc_allocate( &prg->location_pool ); +} + +void location_free( program_t *prg, location_t *el ) +{ + pool_alloc_free( &prg->location_pool, el ); +} + +void location_clear( program_t *prg ) +{ + pool_alloc_clear( &prg->location_pool ); +} + +long location_num_lost( program_t *prg ) +{ + return pool_alloc_num_lost( &prg->location_pool ); +} diff --git a/src/pool.h b/src/pool.h new file mode 100644 index 00000000..5e8f1de0 --- /dev/null +++ b/src/pool.h @@ -0,0 +1,73 @@ +/* + * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_POOL_H +#define _COLM_POOL_H + +/* Allocation, number of items. */ +#define FRESH_BLOCK 8128 + +#include <colm/pdarun.h> +#include <colm/map.h> +#include <colm/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +void init_pool_alloc( struct pool_alloc *pool_alloc, int sizeofT ); + +kid_t *kid_allocate( program_t *prg ); +void kid_free( program_t *prg, kid_t *el ); +void kid_clear( program_t *prg ); +long kid_num_lost( program_t *prg ); + +tree_t *tree_allocate( program_t *prg ); +void tree_free( program_t *prg, tree_t *el ); +void tree_clear( program_t *prg ); +long tree_num_lost( program_t *prg ); + +/* Parse tree allocators go into pda_run structs. */ +parse_tree_t *parse_tree_allocate( struct pda_run *pda_run ); +void parse_tree_free( struct pda_run *pda_run, parse_tree_t *el ); +void parse_tree_clear( struct pool_alloc *pool_alloc ); +long parse_tree_num_lost( struct pool_alloc *pool_alloc ); + +head_t *head_allocate( program_t *prg ); +void head_free( program_t *prg, head_t *el ); +void head_clear( program_t *prg ); +long head_num_lost( program_t *prg ); + +location_t *location_allocate( program_t *prg ); +void location_free( program_t *prg, location_t *el ); +void location_clear( program_t *prg ); +long location_num_lost( program_t *prg ); + +void pool_alloc_clear( struct pool_alloc *pool_alloc ); +long pool_alloc_num_lost( struct pool_alloc *pool_alloc ); + +#ifdef __cplusplus +} +#endif + +#endif /* _COLM_POOL_H */ + diff --git a/src/print.c b/src/print.c new file mode 100644 index 00000000..363a7eea --- /dev/null +++ b/src/print.c @@ -0,0 +1,775 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdio.h> +#include <unistd.h> +#include <assert.h> + +#include <colm/tree.h> +#include <colm/pool.h> +#include <colm/bytecode.h> +#include <colm/debug.h> + +#define BUFFER_INITIAL_SIZE 4096 + +static void xml_escape_data( struct colm_print_args *print_args, const char *data, long len ) +{ + int i; + for ( i = 0; i < len; i++ ) { + if ( data[i] == '<' ) + print_args->out( print_args, "<", 4 ); + else if ( data[i] == '>' ) + print_args->out( print_args, ">", 4 ); + else if ( data[i] == '&' ) + print_args->out( print_args, "&", 5 ); + else if ( (32 <= data[i] && data[i] <= 126) || + data[i] == '\t' || data[i] == '\n' || data[i] == '\r' ) + { + print_args->out( print_args, &data[i], 1 ); + } + else { + char out[64]; + sprintf( out, "&#%u;", ((unsigned)data[i]) ); + print_args->out( print_args, out, strlen(out) ); + } + } +} + +void init_str_collect( str_collect_t *collect ) +{ + collect->data = malloc( BUFFER_INITIAL_SIZE ); + collect->allocated = BUFFER_INITIAL_SIZE; + collect->length = 0; + collect->indent.indent = 0; + collect->indent.level = COLM_INDENT_OFF; +} + +void str_collect_destroy( str_collect_t *collect ) +{ + free( collect->data ); +} + +void str_collect_append( str_collect_t *collect, const char *data, long len ) +{ + long new_len = collect->length + len; + if ( new_len > collect->allocated ) { + collect->allocated = new_len * 2; + collect->data = realloc( collect->data, collect->allocated ); + } + memcpy( collect->data + collect->length, data, len ); + collect->length += len; +} + +void str_collect_clear( str_collect_t *collect ) +{ + collect->length = 0; +} + +#define INT_SZ 32 + +void print_str( struct colm_print_args *print_args, head_t *str ) +{ + print_args->out( print_args, str->data, str->length ); +} + +void append_collect( struct colm_print_args *args, const char *data, int length ) +{ + str_collect_append( (str_collect_t*) args->arg, data, length ); +} + +void append_file( struct colm_print_args *args, const char *data, int length ) +{ + struct stream_impl_data *impl = (struct stream_impl_data*) args->arg; + fwrite( data, 1, length, impl->file ); +} + +static void out_indent( struct colm_print_args *args, const char *data, int length ) +{ + int level; +restart: + if ( args->indent->indent ) { + /* Consume mode. */ + while ( length > 0 && ( *data == ' ' || *data == '\t' ) ) { + data += 1; + length -= 1; + } + + if ( length > 0 ) { + /* Found some data, print the indentation and turn off indentation + * mode. */ + for ( level = 0; level < args->indent->level; level++ ) + args->out( args, "\t", 1 ); + + args->indent->indent = 0; + + goto restart; + } + } + else { + const char *nl; + if ( args->indent->level != COLM_INDENT_OFF && + (nl = memchr( data, '\n', length )) ) + { + /* Print up to and including the newline. */ + int wl = nl - data + 1; + args->out( args, data, wl ); + + /* Go into consume state. If we see more non-indentation chars we + * will generate the appropriate indentation level. */ + data += wl; + length -= wl; + args->indent->indent = 1; + goto restart; + } + else { + /* Indentation off, or no indent trigger (newline). */ + args->out( args, data, length ); + } + } +} + + +tree_t *tree_trim( struct colm_program *prg, tree_t **sp, tree_t *tree ) +{ + if ( tree == 0 ) + return 0; + + debug( prg, REALM_PARSE, "attaching left ignore\n" ); + + /* Make the ignore list for the left-ignore. */ + tree_t *left_ignore = tree_allocate( prg ); + left_ignore->id = LEL_ID_IGNORE; + left_ignore->flags |= AF_SUPPRESS_RIGHT; + + tree = push_left_ignore( prg, tree, left_ignore ); + + debug( prg, REALM_PARSE, "attaching ignore right\n" ); + + /* Copy the ignore list first if we need to attach it as a right + * ignore. */ + tree_t *right_ignore = 0; + right_ignore = tree_allocate( prg ); + right_ignore->id = LEL_ID_IGNORE; + right_ignore->flags |= AF_SUPPRESS_LEFT; + + tree = push_right_ignore( prg, tree, right_ignore ); + + return tree; +} + +enum ReturnType +{ + Done = 1, + CollectIgnoreLeft, + CollectIgnoreRight, + RecIgnoreList, + ChildPrint +}; + +enum VisitType +{ + IgnoreWrapper, + IgnoreData, + Term, + NonTerm +}; + +#define TF_TERM_SEEN 0x1 + +void print_kid( program_t *prg, tree_t **sp, struct colm_print_args *print_args, kid_t *kid ) +{ + enum ReturnType rt; + kid_t *parent = 0; + kid_t *leading_ignore = 0; + enum VisitType visit_type; + int flags = 0; + + /* Iterate the kids passed in. We are expecting a next, which will allow us + * to print the trailing ignore list. */ + while ( kid != 0 ) { + vm_push_type( enum ReturnType, Done ); + goto rec_call; + rec_return_top: + kid = kid->next; + } + + return; + +rec_call: + if ( kid->tree == 0 ) + goto skip_null; + + /* If not currently skipping ignore data, then print it. Ignore data can + * be associated with terminals and nonterminals. */ + if ( kid->tree->flags & AF_LEFT_IGNORE ) { + vm_push_kid( parent ); + vm_push_kid( kid ); + parent = kid; + kid = tree_left_ignore_kid( prg, kid->tree ); + vm_push_type( enum ReturnType, CollectIgnoreLeft ); + goto rec_call; + rec_return_ign_left: + kid = vm_pop_kid(); + parent = vm_pop_kid(); + } + + if ( kid->tree->id == LEL_ID_IGNORE ) + visit_type = IgnoreWrapper; + else if ( parent != 0 && parent->tree->id == LEL_ID_IGNORE ) + visit_type = IgnoreData; + else if ( kid->tree->id < prg->rtd->first_non_term_id ) + visit_type = Term; + else + visit_type = NonTerm; + + debug( prg, REALM_PRINT, "visit type: %d\n", visit_type ); + + if ( visit_type == IgnoreData ) { + debug( prg, REALM_PRINT, "putting %p on ignore list\n", kid->tree ); + kid_t *new_ignore = kid_allocate( prg ); + new_ignore->next = leading_ignore; + leading_ignore = new_ignore; + leading_ignore->tree = kid->tree; + goto skip_node; + } + + if ( visit_type == IgnoreWrapper ) { + kid_t *new_ignore = kid_allocate( prg ); + new_ignore->next = leading_ignore; + leading_ignore = new_ignore; + leading_ignore->tree = kid->tree; + /* Don't skip. */ + } + + /* print leading ignore? Triggered by terminals. */ + if ( visit_type == Term ) { + /* Reverse the leading ignore list. */ + if ( leading_ignore != 0 ) { + kid_t *ignore = 0, *last = 0; + + /* Reverse the list and take the opportunity to implement the + * suppress left. */ + while ( true ) { + kid_t *next = leading_ignore->next; + leading_ignore->next = last; + + if ( leading_ignore->tree->flags & AF_SUPPRESS_LEFT ) { + /* We are moving left. Chop off the tail. */ + debug( prg, REALM_PRINT, "suppressing left\n" ); + free_kid_list( prg, next ); + break; + } + + if ( next == 0 ) + break; + + last = leading_ignore; + leading_ignore = next; + } + + /* Print the leading ignore list. Also implement the suppress right + * in the process. */ + if ( print_args->comm && (!print_args->trim || + (flags & TF_TERM_SEEN && kid->tree->id > 0)) ) + { + ignore = leading_ignore; + while ( ignore != 0 ) { + if ( ignore->tree->flags & AF_SUPPRESS_RIGHT ) + break; + + if ( ignore->tree->id != LEL_ID_IGNORE ) { + vm_push_type( enum VisitType, visit_type ); + vm_push_kid( leading_ignore ); + vm_push_kid( ignore ); + vm_push_kid( parent ); + vm_push_kid( kid ); + + leading_ignore = 0; + kid = ignore; + parent = 0; + + debug( prg, REALM_PRINT, "rec call on %p\n", kid->tree ); + vm_push_type( enum ReturnType, RecIgnoreList ); + goto rec_call; + rec_return_il: + + kid = vm_pop_kid(); + parent = vm_pop_kid(); + ignore = vm_pop_kid(); + leading_ignore = vm_pop_kid(); + visit_type = vm_pop_type(enum VisitType); + } + + ignore = ignore->next; + } + } + + /* Free the leading ignore list. */ + free_kid_list( prg, leading_ignore ); + leading_ignore = 0; + } + } + + if ( visit_type == Term || visit_type == NonTerm ) { + /* Open the tree. */ + print_args->open_tree( prg, sp, print_args, parent, kid ); + } + + if ( visit_type == Term ) + flags |= TF_TERM_SEEN; + + if ( visit_type == Term || visit_type == IgnoreData ) { + /* Print contents. */ + if ( kid->tree->id < prg->rtd->first_non_term_id ) { + debug( prg, REALM_PRINT, "printing terminal %p\n", kid->tree ); + if ( kid->tree->id != 0 ) + print_args->print_term( prg, sp, print_args, kid ); + } + } + + /* Print children. */ + kid_t *child = print_args->attr ? + tree_attr( prg, kid->tree ) : + tree_child( prg, kid->tree ); + + if ( child != 0 ) { + vm_push_type( enum VisitType, visit_type ); + vm_push_kid( parent ); + vm_push_kid( kid ); + parent = kid; + kid = child; + while ( kid != 0 ) { + vm_push_type( enum ReturnType, ChildPrint ); + goto rec_call; + rec_return: + kid = kid->next; + } + kid = vm_pop_kid(); + parent = vm_pop_kid(); + visit_type = vm_pop_type(enum VisitType); + } + + if ( visit_type == Term || visit_type == NonTerm ) { + /* close the tree. */ + print_args->close_tree( prg, sp, print_args, parent, kid ); + } + +skip_node: + + /* If not currently skipping ignore data, then print it. Ignore data can + * be associated with terminals and nonterminals. */ + if ( kid->tree->flags & AF_RIGHT_IGNORE ) { + debug( prg, REALM_PRINT, "right ignore\n" ); + vm_push_kid( parent ); + vm_push_kid( kid ); + parent = kid; + kid = tree_right_ignore_kid( prg, kid->tree ); + vm_push_type( enum ReturnType, CollectIgnoreRight ); + goto rec_call; + rec_return_ign_right: + kid = vm_pop_kid(); + parent = vm_pop_kid(); + } + +/* For skiping over content on null. */ +skip_null: + + rt = vm_pop_type(enum ReturnType); + switch ( rt ) { + case Done: + debug( prg, REALM_PRINT, "return: done\n" ); + goto rec_return_top; + break; + case CollectIgnoreLeft: + debug( prg, REALM_PRINT, "return: ignore left\n" ); + goto rec_return_ign_left; + case CollectIgnoreRight: + debug( prg, REALM_PRINT, "return: ignore right\n" ); + goto rec_return_ign_right; + case RecIgnoreList: + debug( prg, REALM_PRINT, "return: ignore list\n" ); + goto rec_return_il; + case ChildPrint: + debug( prg, REALM_PRINT, "return: child print\n" ); + goto rec_return; + } +} + +void colm_print_tree_args( program_t *prg, tree_t **sp, + struct colm_print_args *print_args, tree_t *tree ) +{ + if ( tree == 0 ) + out_indent( print_args, "NIL", 3 ); + else { + /* This term tree allows us to print trailing ignores. */ + tree_t term_tree; + memset( &term_tree, 0, sizeof(term_tree) ); + + kid_t kid, term; + term.tree = &term_tree; + term.next = 0; + + kid.tree = tree; + kid.next = &term; + + print_kid( prg, sp, print_args, &kid ); + } +} + +void colm_print_null( program_t *prg, tree_t **sp, + struct colm_print_args *args, kid_t *parent, kid_t *kid ) +{ +} + +void colm_print_term_tree( program_t *prg, tree_t **sp, + struct colm_print_args *args, kid_t *kid ) +{ + debug( prg, REALM_PRINT, "printing term %p\n", kid->tree ); + + if ( kid->tree->id == LEL_ID_PTR ) { + char buf[INT_SZ]; + out_indent( args, "#<", 2 ); + sprintf( buf, "%lx", ((pointer_t*)kid->tree)->value ); + out_indent( args, buf, strlen(buf) ); + out_indent( args, ">", 1 ); + } + else if ( kid->tree->id == LEL_ID_STR ) { + print_str( args, ((str_t*)kid->tree)->value ); + } +// else if ( kid->tree->id == LEL_ID_STREAM ) { +// char buf[INT_SZ]; +// printArgs->out( printArgs, "#", 1 ); +// sprintf( buf, "%p", (void*) ((stream_t*)kid->tree)->in->file ); +// printArgs->out( printArgs, buf, strlen(buf) ); +// } + else if ( kid->tree->tokdata != 0 && + string_length( kid->tree->tokdata ) > 0 ) + { + out_indent( args, string_data( kid->tree->tokdata ), + string_length( kid->tree->tokdata ) ); + } + + struct lang_el_info *lel_info = prg->rtd->lel_info; + if ( strcmp( lel_info[kid->tree->id].name, "_IN_" ) == 0 ) { + if ( args->indent->level == COLM_INDENT_OFF ) { + args->indent->level = 1; + args->indent->indent = 1; + } + else { + args->indent->level += 1; + } + } + + if ( strcmp( lel_info[kid->tree->id].name, "_EX_" ) == 0 ) + args->indent->level -= 1; +} + +void colm_print_tree_collect( program_t *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ) +{ + struct colm_print_args print_args = { + collect, true, false, trim, &collect->indent, + &append_collect, &colm_print_null, + &colm_print_term_tree, &colm_print_null + }; + + colm_print_tree_args( prg, sp, &print_args, tree ); +} + +void colm_print_tree_collect_a( program_t *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ) +{ + struct colm_print_args print_args = { + collect, true, true, trim, &collect->indent, + &append_collect, &colm_print_null, + &colm_print_term_tree, &colm_print_null + }; + + colm_print_tree_args( prg, sp, &print_args, tree ); +} + +void colm_print_tree_file( program_t *prg, tree_t **sp, + struct stream_impl_data *impl, tree_t *tree, int trim ) +{ + struct colm_print_args print_args = { + impl, true, false, trim, &impl->indent, + &append_file, &colm_print_null, + &colm_print_term_tree, &colm_print_null + }; + + colm_print_tree_args( prg, sp, &print_args, tree ); +} + +static void xml_open( program_t *prg, tree_t **sp, struct colm_print_args *args, + kid_t *parent, kid_t *kid ) +{ + /* Skip the terminal that is for forcing trailing ignores out. */ + if ( kid->tree->id == 0 ) + return; + + struct lang_el_info *lel_info = prg->rtd->lel_info; + + /* List flattening: skip the repeats and lists that are a continuation of + * the list. */ + if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 && + ( lel_info[parent->tree->id].repeat || lel_info[parent->tree->id].list ) ) + { + return; + } + + const char *name = lel_info[kid->tree->id].xml_tag; + args->out( args, "<", 1 ); + args->out( args, name, strlen( name ) ); + args->out( args, ">", 1 ); +} + +static void xml_term( program_t *prg, tree_t **sp, + struct colm_print_args *print_args, kid_t *kid ) +{ + //kid_t *child; + + /*child = */ tree_child( prg, kid->tree ); + if ( kid->tree->id == LEL_ID_PTR ) { + char ptr[INT_SZ]; + sprintf( ptr, "%lx", ((pointer_t*)kid->tree)->value ); + print_args->out( print_args, ptr, strlen(ptr) ); + } + else if ( kid->tree->id == LEL_ID_STR ) { + head_t *head = (head_t*) ((str_t*)kid->tree)->value; + + xml_escape_data( print_args, head->data, head->length ); + } + else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->first_non_term_id && + kid->tree->id != LEL_ID_IGNORE && + kid->tree->tokdata != 0 && + string_length( kid->tree->tokdata ) > 0 ) + { + xml_escape_data( print_args, string_data( kid->tree->tokdata ), + string_length( kid->tree->tokdata ) ); + } +} + +static void xml_close( program_t *prg, tree_t **sp, + struct colm_print_args *args, kid_t *parent, kid_t *kid ) +{ + /* Skip the terminal that is for forcing trailing ignores out. */ + if ( kid->tree->id == 0 ) + return; + + struct lang_el_info *lel_info = prg->rtd->lel_info; + + /* List flattening: skip the repeats and lists that are a continuation of + * the list. */ + if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 && + ( lel_info[parent->tree->id].repeat || lel_info[parent->tree->id].list ) ) + { + return; + } + + const char *name = lel_info[kid->tree->id].xml_tag; + args->out( args, "</", 2 ); + args->out( args, name, strlen( name ) ); + args->out( args, ">", 1 ); +} + +void colm_print_xml_stdout( program_t *prg, tree_t **sp, + struct stream_impl_data *impl, tree_t *tree, + int comm_attr, int trim ) +{ + struct colm_print_args print_args = { + impl, comm_attr, comm_attr, trim, &impl->indent, + &append_file, &xml_open, &xml_term, &xml_close }; + colm_print_tree_args( prg, sp, &print_args, tree ); +} + +static void postfix_open( program_t *prg, tree_t **sp, struct colm_print_args *args, + kid_t *parent, kid_t *kid ) +{ +} + +static void postfix_term_data( struct colm_print_args *args, const char *data, long len ) +{ + int i; + for ( i = 0; i < len; i++ ) { + if ( data[i] == '\\' ) + args->out( args, "\\5c", 3 ); + else if ( 33 <= data[i] && data[i] <= 126 ) + args->out( args, &data[i], 1 ); + else { + char out[64]; + sprintf( out, "\\%02x", ((unsigned char)data[i]) ); + args->out( args, out, strlen(out) ); + } + } +} + +static void postfix_term( program_t *prg, tree_t **sp, + struct colm_print_args *args, kid_t *kid ) +{ + //kid_t *child; + + /*child = */ tree_child( prg, kid->tree ); + if ( kid->tree->id == LEL_ID_PTR ) { + //char ptr[INT_SZ]; + //sprintf( ptr, "%lx", ((pointer_t*)kid->tree)->value ); + //args->out( args, ptr, strlen(ptr) ); + args->out( args, "p\n", 2 ); + } + else if ( kid->tree->id == LEL_ID_STR ) { + //head_t *head = (head_t*) ((str_t*)kid->tree)->value; + + //xml_escape_data( args, (char*)(head->data), head->length ); + args->out( args, "s\n", 2 ); + } + else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->first_non_term_id && + kid->tree->id != LEL_ID_IGNORE //&& + //kid->tree->tokdata != 0 && + //string_length( kid->tree->tokdata ) > 0 ) + ) + { + char buf[512]; + struct lang_el_info *lel_info = prg->rtd->lel_info; + const char *name = lel_info[kid->tree->id].xml_tag; + + args->out( args, "t ", 2 ); + args->out( args, name, strlen( name ) ); + + /* id. */ + sprintf( buf, " %d", kid->tree->id ); + args->out( args, buf, strlen( buf ) ); + + /* location. */ + if ( kid->tree->tokdata == 0 ) { + args->out( args, " 0 0 0 -", 8 ); + } + else { + struct colm_data *tokdata = kid->tree->tokdata; + struct colm_location *loc = tokdata->location; + if ( loc == 0 ) { + args->out( args, " 0 0 0 ", 7 ); + } + else { + sprintf( buf, " %ld %ld %ld ", loc->line, loc->column, loc->byte ); + args->out( args, buf, strlen( buf ) ); + } + + if ( string_length( tokdata ) == 0 ) { + args->out( args, "-", 1 ); + } + else { + postfix_term_data( args, string_data( tokdata ), string_length( tokdata ) ); + } + } + + args->out( args, "\n", 1 ); + } +} + +static void postfix_close( program_t *prg, tree_t **sp, + struct colm_print_args *args, kid_t *parent, kid_t *kid ) +{ + /* Skip the terminal that is for forcing trailing ignores out. */ + if ( kid->tree->id == 0 ) + return; + + if ( kid->tree->id >= prg->rtd->first_non_term_id ) { + char buf[512]; + struct lang_el_info *lel_info = prg->rtd->lel_info; + const char *name = lel_info[kid->tree->id].xml_tag; + + args->out( args, "r ", 2 ); + args->out( args, name, strlen( name ) ); + + /* id. */ + sprintf( buf, " %d", kid->tree->id ); + args->out( args, buf, strlen( buf ) ); + + /* Production number. */ + sprintf( buf, " %d", kid->tree->prod_num ); + args->out( args, buf, strlen( buf ) ); + + /* Child count. */ + int children = 0; + kid_t *child = tree_child( prg, kid->tree ); + while ( child != 0 ) { + child = child->next; + children += 1; + } + + sprintf( buf, " %d", children ); + args->out( args, buf, strlen( buf ) ); + args->out( args, "\n", 1 ); + } +} + +void colm_postfix_tree_collect( program_t *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ) +{ + struct colm_print_args print_args = { + collect, false, false, false, &collect->indent, + &append_collect, &postfix_open, &postfix_term, &postfix_close + }; + + colm_print_tree_args( prg, sp, &print_args, tree ); +} + +#if 0 +void colm_postfix_tree_file( program_t *prg, tree_t **sp, struct stream_impl *impl, + tree_t *tree, int trim ) +{ + struct colm_print_args print_args = { + impl, false, false, false, &append_file, + &postfix_open, &postfix_term, &postfix_close + }; + + colm_print_tree_args( prg, sp, &print_args, tree ); + + //struct stream_impl *impl = (struct stream_impl*) args->arg; + fflush( impl->file ); +} +#endif + +void colm_print_tree_collect_xml( program_t *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ) +{ + struct colm_print_args print_args = { + collect, false, false, trim, &collect->indent, + &append_collect, &xml_open, &xml_term, &xml_close + }; + + colm_print_tree_args( prg, sp, &print_args, tree ); +} + +void colm_print_tree_collect_xml_ac( program_t *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ) +{ + struct colm_print_args print_args = { + collect, true, true, trim, &collect->indent, + &append_collect, &xml_open, &xml_term, &xml_close + }; + + colm_print_tree_args( prg, sp, &print_args, tree ); +} + diff --git a/src/prog.lm b/src/prog.lm new file mode 100644 index 00000000..3a11e342 --- /dev/null +++ b/src/prog.lm @@ -0,0 +1,88 @@ +include 'colm.lm' + +export ColmTree: start +export ColmError: str + +A: str = argv->pop() +F: stream = open( A, 'r' ) +parse P: start [ F ] + +alias prod_map map<prod_list, id> +alias unique_prod map_el<prod_list, id> + +global PM: prod_map = new prod_map() +global NextId: int = 1 +global Modified: bool = false + +prod_list cons_prod( SLA: prod_sublist ) +{ + if match SLA [Left: prod_sublist BAR prod_el_list] + return cons prod_list[ cons_prod(Left) ' | [ ' SLA.prod_el_list ' ] ' ] + else + return cons prod_list[ '[ ' SLA.prod_el_list ' ]' ] +} + +cfl_def rewrite_cfl_def( CflDef: ref<cfl_def> ) +{ + NewDef: cfl_def + for PE: prod_el in CflDef { + if match PE [ + OptName: opt_prod_el_name POPEN PS: prod_sublist PCLOSE OptRep: opt_repeat] + { + PL: prod_list = cons_prod(PS) + + Name: id = PM->find( PL ) + if ( !Name ) { + Name = parse id + "_sublist_[sprintf("%d", NextId)]" + NextId = NextId + 1 + + PM->insert( PL, Name ) + + NewDef = cons cfl_def + "def [Name] [PL]" + } + + PE = cons prod_el + [OptName Name OptRep " "] + + Modified = true + + # Currently can return only one item. + if ( NewDef ) + break + } + } + return NewDef +} + +void rewrite( P: ref<start> ) +{ + Modified = false + + for RIL: root_item<* in P { + require RIL [Head: root_item<* CflDef: cfl_def] + + NewDef: cfl_def + + NewDef = rewrite_cfl_def( CflDef ) + + if NewDef { + RIL = cons root_item<* [Head NewDef "\n\n" CflDef] + Modified = true + } + else { + RIL = cons root_item<* [Head CflDef] + } + + } + + return Modified +} + +if P { + while ( rewrite( P ) ) {} +} + +ColmTree = P +ColmError = error diff --git a/src/program.c b/src/program.c new file mode 100644 index 00000000..a9459ccf --- /dev/null +++ b/src/program.c @@ -0,0 +1,333 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> +#include <assert.h> +#include <stdlib.h> + +#include <colm/pdarun.h> +#include <colm/tree.h> +#include <colm/bytecode.h> +#include <colm/pool.h> +#include <colm/debug.h> +#include <colm/struct.h> + +#define VM_STACK_SIZE (8192) + +static void colm_alloc_global( program_t *prg ) +{ + /* Alloc the global. */ + prg->global = colm_struct_new( prg, prg->rtd->global_id ) ; +} + +void vm_init( program_t *prg ) +{ + struct stack_block *b = malloc( sizeof(struct stack_block) ); + b->data = malloc( sizeof(tree_t*) * VM_STACK_SIZE ); + b->len = VM_STACK_SIZE; + b->offset = 0; + b->next = 0; + + prg->stack_block = b; + + prg->sb_beg = prg->stack_block->data; + prg->sb_end = prg->stack_block->data + prg->stack_block->len; + + prg->stack_root = prg->sb_end; +} + +tree_t **colm_vm_root( program_t *prg ) +{ + return prg->stack_root; +} + +tree_t **vm_bs_add( program_t *prg, tree_t **sp, int n ) +{ + /* Close off the current block. */ + if ( prg->stack_block != 0 ) { + prg->stack_block->offset = sp - prg->stack_block->data; + prg->sb_total += prg->stack_block->len - prg->stack_block->offset; + } + + if ( prg->reserve != 0 && prg->reserve->len >= n) { + struct stack_block *b = prg->reserve; + b->next = prg->stack_block; + b->offset = 0; + + prg->stack_block = b; + prg->reserve = 0; + } + else { + struct stack_block *b = malloc( sizeof(struct stack_block) ); + int size = VM_STACK_SIZE; + if ( n > size ) + size = n; + b->next = prg->stack_block; + b->data = malloc( sizeof(tree_t*) * size ); + b->len = size; + b->offset = 0; + + prg->stack_block = b; + } + + prg->sb_beg = prg->stack_block->data; + prg->sb_end = prg->stack_block->data + prg->stack_block->len; + + return prg->sb_end; +} + +tree_t **vm_bs_pop( program_t *prg, tree_t **sp, int n ) +{ + while ( 1 ) { + tree_t **end = prg->stack_block->data + prg->stack_block->len; + int remaining = end - sp; + + /* Don't have to free this block. Remaining values to pop leave us + * inside it. */ + if ( n < remaining ) { + sp += n; + return sp; + } + + if ( prg->stack_block->next == 0 ) { + /* Don't delete the sentinal stack block. Returns the end as in the + * creation of the first stack block. */ + return prg->sb_end; + } + + /* Clear any previous reserve. We are going to save this block as the + * reserve. */ + if ( prg->reserve != 0 ) { + free( prg->reserve->data ); + free( prg->reserve ); + } + + /* Pop the stack block. */ + struct stack_block *b = prg->stack_block; + prg->stack_block = prg->stack_block->next; + prg->reserve = b; + + /* Setup the bounds. Note that we restore the full block, which is + * necessary to honour any CONTIGUOUS statements that counted on it + * before a subsequent CONTIGUOUS triggered a new block. */ + prg->sb_beg = prg->stack_block->data; + prg->sb_end = prg->stack_block->data + prg->stack_block->len; + + /* Update the total stack usage. */ + prg->sb_total -= prg->stack_block->len - prg->stack_block->offset; + + n -= remaining; + sp = prg->stack_block->data + prg->stack_block->offset; + } +} + +void vm_clear( program_t *prg ) +{ + while ( prg->stack_block != 0 ) { + struct stack_block *b = prg->stack_block; + prg->stack_block = prg->stack_block->next; + + free( b->data ); + free( b ); + } + + if ( prg->reserve != 0 ) { + free( prg->reserve->data ); + free( prg->reserve ); + } +} + +tree_t *colm_return_val( struct colm_program *prg ) +{ + return prg->return_val; +} + +void colm_set_debug( program_t *prg, long active_realm ) +{ + prg->active_realm = active_realm; +} + +void colm_set_reduce_clean( struct colm_program *prg, unsigned char reduce_clean ) +{ + prg->reduce_clean = reduce_clean; +} + +program_t *colm_new_program( struct colm_sections *rtd ) +{ + program_t *prg = malloc(sizeof(program_t)); + memset( prg, 0, sizeof(program_t) ); + + assert( sizeof(str_t) <= sizeof(tree_t) ); + assert( sizeof(pointer_t) <= sizeof(tree_t) ); + + prg->rtd = rtd; + prg->ctx_dep_parsing = 1; + prg->reduce_clean = 1; + + init_pool_alloc( &prg->kid_pool, sizeof(kid_t) ); + init_pool_alloc( &prg->tree_pool, sizeof(tree_t) ); + init_pool_alloc( &prg->parse_tree_pool, sizeof(parse_tree_t) ); + init_pool_alloc( &prg->head_pool, sizeof(head_t) ); + init_pool_alloc( &prg->location_pool, sizeof(location_t) ); + + prg->true_val = (tree_t*) 1; + prg->false_val = (tree_t*) 0; + + /* Allocate the global variable. */ + colm_alloc_global( prg ); + + /* Allocate the VM stack. */ + vm_init( prg ); + + rtd->init_need(); + + prg->stream_fns = malloc( sizeof(char*) * 1 ); + prg->stream_fns[0] = 0; + return prg; +} + +void colm_run_program2( program_t *prg, int argc, const char **argv, const int *argl ) +{ + if ( prg->rtd->root_code_len == 0 ) + return; + + /* Make the arguments available to the program. */ + prg->argc = argc; + prg->argv = argv; + prg->argl = argl; + + execution_t execution; + memset( &execution, 0, sizeof(execution) ); + execution.frame_id = prg->rtd->root_frame_id; + + colm_execute( prg, &execution, prg->rtd->root_code ); + + /* Clear the arg and stack. */ + prg->argc = 0; + prg->argv = 0; +} + +void colm_run_program( program_t *prg, int argc, const char **argv ) +{ + colm_run_program2( prg, argc, argv, 0 ); +} + +static void colm_clear_heap( program_t *prg, tree_t **sp ) +{ + struct colm_struct *hi = prg->heap.head; + while ( hi != 0 ) { + struct colm_struct *next = hi->next; + colm_struct_delete( prg, sp, hi ); + hi = next; + } +} + +void *colm_get_reduce_ctx( struct colm_program *prg ) +{ + return prg->red_ctx; +} + +void colm_set_reduce_ctx( struct colm_program *prg, void *ctx ) +{ + prg->red_ctx = ctx; +} + +const char **colm_extract_fns( struct colm_program *prg ) +{ + const char **fns = prg->stream_fns; + prg->stream_fns = 0; + return fns; +} + +const char *colm_error( struct colm_program *prg, int *length ) +{ + const char *rtn = 0; + if ( prg->error != 0 ) { + rtn = prg->error->tokdata->data; + if ( length != 0 ) + *length = prg->error->tokdata->length; + } + return rtn; +} + +int colm_delete_program( program_t *prg ) +{ + tree_t **sp = prg->stack_root; + int exit_status = prg->exit_status; + + colm_tree_downref( prg, sp, prg->return_val ); + colm_clear_heap( prg, sp ); + + colm_tree_downref( prg, sp, prg->error ); + +#if DEBUG + long kid_lost = kid_num_lost( prg ); + long tree_lost = tree_num_lost( prg ); + long parse_tree_lost = parse_tree_num_lost( &prg->parse_tree_pool ); + long head_lost = head_num_lost( prg ); + long location_lost = location_num_lost( prg ); + + if ( kid_lost ) + message( "warning: lost kids: %ld\n", kid_lost ); + + if ( tree_lost ) + message( "warning: lost trees: %ld\n", tree_lost ); + + if ( parse_tree_lost ) + message( "warning: lost parse trees: %ld\n", parse_tree_lost ); + + if ( head_lost ) + message( "warning: lost heads: %ld\n", head_lost ); + + if ( location_lost ) + message( "warning: lost locations: %ld\n", location_lost ); +#endif + + kid_clear( prg ); + tree_clear( prg ); + head_clear( prg ); + parse_tree_clear( &prg->parse_tree_pool ); + location_clear( prg ); + + struct run_buf *rb = prg->alloc_run_buf; + while ( rb != 0 ) { + struct run_buf *next = rb->next; + free( rb ); + rb = next; + } + + vm_clear( prg ); + + if ( prg->stream_fns ) { + char **ptr = (char**)prg->stream_fns; + while ( *ptr != 0 ) { + free( *ptr ); + ptr += 1; + } + + free( prg->stream_fns ); + } + + free( prg ); + + return exit_status; +} diff --git a/src/program.h b/src/program.h new file mode 100644 index 00000000..8ba716d4 --- /dev/null +++ b/src/program.h @@ -0,0 +1,186 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_PROGRAM_H +#define _COLM_PROGRAM_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <colm/pdarun.h> + +struct stack_block +{ + tree_t **data; + int len; + int offset; + struct stack_block *next; +}; + +struct colm_sections +{ + struct lang_el_info *lel_info; + long num_lang_els; + + struct struct_el_info *sel_info; + long num_struct_els; + + struct prod_info *prod_info; + long num_prods; + + struct region_info *region_info; + long num_regions; + + code_t *root_code; + long root_code_len; + long root_frame_id; + + struct frame_info *frame_info; + long num_frames; + + struct function_info *function_info; + long num_functions; + + struct pat_cons_info *pat_repl_info; + long num_patterns; + + struct pat_cons_node *pat_repl_nodes; + long num_pattern_nodes; + + struct generic_info *generic_info; + long num_generics; + + long argv_generic_id; + long stds_generic_id; + + const char **litdata; + long *litlen; + head_t **literals; + long num_literals; + + CaptureAttr *capture_attr; + long num_captured_attr; + + struct fsm_tables *fsm_tables; + struct pda_tables *pda_tables; + int *start_states; + int *eof_lel_ids; + int *parser_lel_ids; + long num_parsers; + + long global_size; + + long first_non_term_id; + long first_struct_el_id; + + long integer_id; + long string_id; + long any_id; + long eof_id; + long no_token_id; + long global_id; + long argv_el_id; + long stds_el_id; + long struct_inbuilt_id; + long struct_input_id; + long struct_stream_id; + + void (*fsm_execute)( struct pda_run *pda_run, struct input_impl *input_stream ); + void (*send_named_lang_el)( struct colm_program *prg, tree_t **tree, + struct pda_run *pda_run, struct input_impl *input_stream ); + void (*init_bindings)( struct pda_run *pda_run ); + void (*pop_binding)( struct pda_run *pda_run, parse_tree_t *tree ); + + tree_t **(*host_call)( program_t *prg, long code, tree_t **sp ); + + void (*commit_reduce_forward)( program_t *prg, tree_t **root, struct pda_run *pda_run, parse_tree_t *pt ); + long (*commit_union_sz)( int reducer ); + void (*init_need)(); + int (*reducer_need_tok)( program_t *prg, struct pda_run *pda_run, int id ); + int (*reducer_need_ign)( program_t *prg, struct pda_run *pda_run ); + void (*read_reduce)( program_t *prg, int reducer, input_t *input ); +}; + +struct heap_list +{ + struct colm_struct *head; + struct colm_struct *tail; +}; + +struct colm_program +{ + long active_realm; + + int argc; + const char **argv; + const int *argl; + + unsigned char ctx_dep_parsing; + unsigned char reduce_clean; + struct colm_sections *rtd; + struct colm_struct *global; + int induce_exit; + int exit_status; + + struct pool_alloc kid_pool; + struct pool_alloc tree_pool; + struct pool_alloc parse_tree_pool; + struct pool_alloc head_pool; + struct pool_alloc location_pool; + + tree_t *true_val; + tree_t *false_val; + + struct heap_list heap; + + stream_t *stdin_val; + stream_t *stdout_val; + stream_t *stderr_val; + + tree_t *error; + + struct run_buf *alloc_run_buf; + + /* Current stack block limits. Changed when crossing block boundaries. */ + tree_t **sb_beg; + tree_t **sb_end; + long sb_total; + struct stack_block *reserve; + struct stack_block *stack_block; + tree_t **stack_root; + + /* Returned value for main program and any exported functions. */ + tree_t *return_val; + + void *red_ctx; + + /* This can be extracted for ownership transfer before a program is deleted. */ + const char **stream_fns; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _COLM_PROGRAM_H */ + diff --git a/src/redbuild.cc b/src/redbuild.cc new file mode 100644 index 00000000..7e0396d7 --- /dev/null +++ b/src/redbuild.cc @@ -0,0 +1,562 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "redbuild.h" + +#include <assert.h> +#include <string.h> +#include <stdbool.h> + +#include <iostream> + +#include "fsmcodegen.h" + +using namespace std; + +RedFsmBuild::RedFsmBuild( Compiler *pd, FsmGraph *fsm ) +: + pd(pd), + fsm(fsm), + nextActionTableId(0), + startState(-1), + errState(-1) +{ +} + +void RedFsmBuild::initActionList( unsigned long length ) +{ + redFsm->allActions = new GenAction[length]; + memset( redFsm->allActions, 0, sizeof(GenAction) * length ); + for ( unsigned long a = 0; a < length; a++ ) + redFsm->genActionList.append( redFsm->allActions+a ); +} + + +void RedFsmBuild::makeActionList() +{ + /* Determine which actions to write. */ + int nextActionId = 0; + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->numRefs() > 0 || act->numCondRefs > 0 ) + act->actionId = nextActionId++; + } + + initActionList( nextActionId ); + curAction = 0; + + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->actionId >= 0 ) + makeAction( act ); + } +} + +void RedFsmBuild::initActionTableList( unsigned long length ) +{ + redFsm->allActionTables = new RedAction[length]; +} + +void RedFsmBuild::initStateList( unsigned long length ) +{ + redFsm->allStates = new RedState[length]; + for ( unsigned long s = 0; s < length; s++ ) + redFsm->stateList.append( redFsm->allStates+s ); + + /* We get the start state as an offset, set the pointer now. */ + assert( startState >= 0 ); + redFsm->startState = redFsm->allStates + startState; + if ( errState >= 0 ) + redFsm->errState = redFsm->allStates + errState; + for ( EntryIdVect::Iter en = redFsm->entryPointIds; en.lte(); en++ ) + redFsm->entryPoints.insert( redFsm->allStates + *en ); + + /* The nextStateId is no longer used to assign state ids (they come in set + * from the frontend now), however generation code still depends on it. + * Should eventually remove this variable. */ + redFsm->nextStateId = redFsm->stateList.length(); +} + +void RedFsmBuild::addEntryPoint( int entryId, unsigned long entryState ) +{ + redFsm->entryPointIds.append( entryState ); + redFsm->redEntryMap.insert( entryId, entryState ); +} + +void RedFsmBuild::addRegionToEntry( int regionId, int entryId ) +{ + assert( regionId == redFsm->regionToEntry.length() ); + redFsm->regionToEntry.append( entryId ); +} + +void RedFsmBuild::initTransList( int snum, unsigned long length ) +{ + /* Could preallocate the out range to save time growing it. For now do + * nothing. */ +} + +void RedFsmBuild::newTrans( int snum, int tnum, Key lowKey, + Key highKey, long targ, long action ) +{ + /* Get the current state and range. */ + RedState *curState = redFsm->allStates + snum; + RedTransList &destRange = curState->outRange; + + if ( curState == redFsm->errState ) + return; + + /* Make the new transitions. */ + RedState *targState = targ >= 0 ? (redFsm->allStates + targ) : + redFsm->wantComplete ? redFsm->getErrorState() : 0; + RedAction *actionTable = action >= 0 ? (redFsm->allActionTables + action) : 0; + RedTrans *trans = redFsm->allocateTrans( targState, actionTable ); + RedTransEl transEl( lowKey, highKey, trans ); + + if ( redFsm->wantComplete ) { + /* If the machine is to be complete then we need to fill any gaps with + * the error transitions. */ + if ( destRange.length() == 0 ) { + /* Range is currently empty. */ + if ( keyOps->minKey < lowKey ) { + /* The first range doesn't start at the low end. */ + Key fillHighKey = lowKey; + fillHighKey.decrement(); + + /* Create the filler with the state's error transition. */ + RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + else { + /* The range list is not empty, get the the last range. */ + RedTransEl *last = &destRange[destRange.length()-1]; + Key nextKey = last->highKey; + nextKey.increment(); + if ( nextKey < lowKey ) { + /* There is a gap to fill. Make the high key. */ + Key fillHighKey = lowKey; + fillHighKey.decrement(); + + /* Create the filler with the state's error transtion. */ + RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + } + + /* Filler taken care of. Append the range. */ + destRange.append( RedTransEl( lowKey, highKey, trans ) ); +} + +void RedFsmBuild::finishTransList( int snum ) +{ + /* Get the current state and range. */ + RedState *curState = redFsm->allStates + snum; + RedTransList &destRange = curState->outRange; + + if ( curState == redFsm->errState ) + return; + + /* If building a complete machine we may need filler on the end. */ + if ( redFsm->wantComplete ) { + /* Check if there are any ranges already. */ + if ( destRange.length() == 0 ) { + /* Fill with the whole alphabet. */ + /* Add the range on the lower and upper bound. */ + RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + else { + /* Get the last and check for a gap on the end. */ + RedTransEl *last = &destRange[destRange.length()-1]; + if ( last->highKey < keyOps->maxKey ) { + /* Make the high key. */ + Key fillLowKey = last->highKey; + fillLowKey.increment(); + + /* Create the new range with the error trans and append it. */ + RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + } +} + +void RedFsmBuild::setId( int snum, int id ) +{ + RedState *curState = redFsm->allStates + snum; + curState->id = id; +} + +void RedFsmBuild::setEofTrans( int snum, int eofTarget, int actId ) +{ + RedState *curState = redFsm->allStates + snum; + RedState *targState = redFsm->allStates + eofTarget; + RedAction *eofAct = redFsm->allActionTables + actId; + curState->eofTrans = redFsm->allocateTrans( targState, eofAct ); +} + +void RedFsmBuild::setFinal( int snum ) +{ + RedState *curState = redFsm->allStates + snum; + curState->isFinal = true; +} + + +void RedFsmBuild::setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ) +{ + RedState *curState = redFsm->allStates + snum; + if ( toStateAction >= 0 ) + curState->toStateAction = redFsm->allActionTables + toStateAction; + if ( fromStateAction >= 0 ) + curState->fromStateAction = redFsm->allActionTables + fromStateAction; + if ( eofAction >= 0 ) + curState->eofAction = redFsm->allActionTables + eofAction; +} + +void RedFsmBuild::closeMachine() +{ +} + + +void RedFsmBuild::initStateCondList( int snum, ulong length ) +{ + /* Could preallocate these, as we could with transitions. */ +} + +void RedFsmBuild::setForcedErrorState() +{ + redFsm->forcedErrorState = true; +} + +Key RedFsmBuild::findMaxKey() +{ + Key maxKey = keyOps->maxKey; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + assert( st->outSingle.length() == 0 ); + assert( st->defTrans == 0 ); + + long rangeLen = st->outRange.length(); + if ( rangeLen > 0 ) { + Key highKey = st->outRange[rangeLen-1].highKey; + if ( highKey > maxKey ) + maxKey = highKey; + } + } + return maxKey; +} + + +void RedFsmBuild::makeActionTableList() +{ + /* Must first order the action tables based on their id. */ + int numTables = nextActionTableId; + RedActionTable **tables = new RedActionTable*[numTables]; + for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) + tables[at->id] = at; + + initActionTableList( numTables ); + curActionTable = 0; + + for ( int t = 0; t < numTables; t++ ) { + long length = tables[t]->key.length(); + + /* Collect the action table. */ + RedAction *redAct = redFsm->allActionTables + curActionTable; + redAct->actListId = curActionTable; + redAct->key.setAsNew( length ); + + int pos = 0; + for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { + int actionId = atel->value->actionId; + redAct->key[pos].key = 0; + redAct->key[pos].value = redFsm->allActions+actionId; + pos += 1; + } + + /* Insert into the action table map. */ + redFsm->actionMap.insert( redAct ); + + curActionTable += 1; + + } + + delete[] tables; +} + +void RedFsmBuild::reduceActionTables() +{ + /* Reduce the actions tables to a set. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + RedActionTable *actionTable = 0; + + /* Reduce To State Actions. */ + if ( st->toStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce From State Actions. */ + if ( st->fromStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce EOF actions. */ + if ( st->eofActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->eofActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Loop the transitions and reduce their actions. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->actionTable.length() > 0 ) { + if ( actionTableMap.insert( trans->actionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + } +} + +void RedFsmBuild::appendTrans( TransListVect &outList, Key lowKey, + Key highKey, FsmTrans *trans ) +{ + if ( trans->toState != 0 || trans->actionTable.length() > 0 ) + outList.append( TransEl( lowKey, highKey, trans ) ); +} + +void RedFsmBuild::makeTrans( Key lowKey, Key highKey, FsmTrans *trans ) +{ + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( trans->actionTable.length() > 0 ) + actionTable = actionTableMap.find( trans->actionTable ); + + long targ = trans->toState == 0 ? -1 : trans->toState->alg.stateNum; + long action = actionTable == 0 ? -1 : actionTable->id; + + newTrans( curState, curTrans++, lowKey, highKey, targ, action ); +} + +void RedFsmBuild::makeTransList( FsmState *state ) +{ + TransListVect outList; + + /* If there is only are no ranges the task is simple. */ + if ( state->outList.length() > 0 ) { + /* Loop each source range. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Reduce the transition. If it reduced to anything then add it. */ + appendTrans( outList, trans->lowKey, trans->highKey, trans ); + } + } + + long length = outList.length(); + initTransList( curState, length ); + curTrans = 0; + + for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) + makeTrans( tvi->lowKey, tvi->highKey, tvi->value ); + finishTransList( curState ); +} + +void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *action ) +{ + redFsm->allActions[anum].actionId = anum; + redFsm->allActions[anum].name = name; + redFsm->allActions[anum].loc.line = line; + redFsm->allActions[anum].loc.col = col; + redFsm->allActions[anum].inlineList = action->inlineList; + redFsm->allActions[anum].objField = action->objField; + redFsm->allActions[anum].markType = action->markType; + redFsm->allActions[anum].markId = action->markId + 1; +} + +void RedFsmBuild::makeAction( Action *action ) +{ + int line = action->loc.line; + int col = action->loc.col; + + char *name = 0; + if ( action->name != 0 ) + name = action->name; + + newAction( curAction++, name, line, col, action ); +} + +void xmlEscapeHost( std::ostream &out, char *data, int len ) +{ + char *end = data + len; + while ( data != end ) { + switch ( *data ) { + case '<': out << "<"; break; + case '>': out << ">"; break; + case '&': out << "&"; break; + default: out << *data; break; + } + data += 1; + } +} + +void RedFsmBuild::makeStateActions( FsmState *state ) +{ + RedActionTable *toStateActions = 0; + if ( state->toStateActionTable.length() > 0 ) + toStateActions = actionTableMap.find( state->toStateActionTable ); + + RedActionTable *fromStateActions = 0; + if ( state->fromStateActionTable.length() > 0 ) + fromStateActions = actionTableMap.find( state->fromStateActionTable ); + + RedActionTable *eofActions = 0; + if ( state->eofActionTable.length() > 0 ) + eofActions = actionTableMap.find( state->eofActionTable ); + + if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) { + long toStateAction = -1; + long fromStateAction = -1; + long eofAction = -1; + + if ( toStateActions != 0 ) + toStateAction = toStateActions->id; + if ( fromStateActions != 0 ) + fromStateAction = fromStateActions->id; + if ( eofActions != 0 ) + eofAction = eofActions->id; + + setStateActions( curState, toStateAction, + fromStateAction, eofAction ); + } +} + +void RedFsmBuild::makeStateList() +{ + /* Write the list of states. */ + long length = fsm->stateList.length(); + initStateList( length ); + curState = 0; + + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + /* Both or neither should be set. */ + assert( !( (st->eofTarget != 0) xor (st->eofActionTable.length() > 0) ) ); + + makeStateActions( st ); + makeTransList( st ); + + setId( curState, st->alg.stateNum ); + if ( st->isFinState() ) + setFinal( curState ); + + /* If there is an eof target, make an eof transition. */ + if ( st->eofTarget != 0 ) { + /* Find the eof actions. */ + RedActionTable *eofActions = 0; + eofActions = actionTableMap.find( st->eofActionTable ); + setEofTrans( curState, st->eofTarget->alg.stateNum, eofActions->id ); + } + + curState += 1; + } +} + +void RedFsmBuild::makeEntryPoints() +{ + if ( fsm->lmRequiresErrorState ) + setForcedErrorState(); + + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + /* Get the name instantiation from nameIndex. */ + FsmState *state = en->value; + long entry = state->alg.stateNum; + addEntryPoint( en->key, entry ); + } + + for ( RegionList::Iter reg = pd->regionList; reg.lte(); reg++ ) { + assert( reg->impl->regionNameInst != 0 ); + + TokenRegion *use = reg; + + if ( use->zeroLel != 0 ) + use = use->ignoreOnly; + + NameInst *regionName = use->impl->regionNameInst; + addRegionToEntry( reg->id, regionName->id ); + } +} + +void RedFsmBuild::makeMachine() +{ + /* Action tables. */ + reduceActionTables(); + + makeActionList(); + makeActionTableList(); + makeConditions(); + + /* Start state. */ + startState = fsm->startState->alg.stateNum; + + /* Error state. */ + if ( fsm->errState != 0 ) + errState = fsm->errState->alg.stateNum; + + makeEntryPoints(); + makeStateList(); +} + +void RedFsmBuild::makeConditions() +{ +} + +RedFsm *RedFsmBuild::reduceMachine() +{ + redFsm = new RedFsm(); + redFsm->wantComplete = true; + + /* Open the definition. */ + makeMachine(); + + /* Do this before distributing transitions out to singles and defaults + * makes life easier. */ + redFsm->maxKey = findMaxKey(); + + redFsm->assignActionLocs(); + + /* Find the first final state (The final state with the lowest id). */ + redFsm->findFirstFinState(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Maybe do flat expand, otherwise choose single. */ + redFsm->chooseSingle(); + + /* Set up incoming transitions. */ + redFsm->setInTrans(); + + /* Anlayze Machine will find the final action reference counts, among + * other things. We will use these in reporting the usage + * of fsm directives in action code. */ + redFsm->analyzeMachine(); + + return redFsm; +} + diff --git a/src/redbuild.h b/src/redbuild.h new file mode 100644 index 00000000..e9ad0465 --- /dev/null +++ b/src/redbuild.h @@ -0,0 +1,161 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_FSMREDUCE_H +#define _COLM_FSMREDUCE_H + +#include <iostream> + +#include <avltree.h> + +#include "fsmgraph.h" +#include "compiler.h" + +/* Forwards. */ +struct FsmTrans; +struct FsmGraph; +struct Compiler; +struct FsmCodeGen; +struct RedFsm; +struct GenCondSpace; +struct Condition; + +struct RedActionTable +: + public AvlTreeEl<RedActionTable> +{ + RedActionTable( const ActionTable &key ) + : + key(key), + id(0) + { } + + const ActionTable &getKey() + { return key; } + + ActionTable key; + int id; +}; + +typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap; + +struct NextRedTrans +{ + Key lowKey, highKey; + FsmTrans *trans; + FsmTrans *next; + + void load() { + if ( trans != 0 ) { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + NextRedTrans( FsmTrans *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + +class RedFsmBuild +{ +public: + RedFsmBuild( Compiler *pd, FsmGraph *fsm ); + RedFsm *reduceMachine( ); + +private: + void appendTrans( TransListVect &outList, Key lowKey, Key highKey, FsmTrans *trans ); + void makeStateActions( FsmState *state ); + void makeStateList(); + void makeStateConditions( FsmState *state ); + + void initActionList( unsigned long length ); + void newAction( int anum, char *name, int line, int col, Action *action ); + void initActionTableList( unsigned long length ); + void initCondSpaceList( ulong length ); + void condSpaceItem( int cnum, long condActionId ); + void newCondSpace( int cnum, int condSpaceId, Key baseKey ); + void initStateCondList( int snum, ulong length ); + void addStateCond( int snum, Key lowKey, Key highKey, long condNum ); + void initStateList( unsigned long length ); + void addRegionToEntry( int regionId, int entryId ); + void addEntryPoint( int entryId, unsigned long entryState ); + void setId( int snum, int id ); + void initTransList( int snum, unsigned long length ); + void newTrans( int snum, int tnum, Key lowKey, Key highKey, + long targ, long act ); + void finishTransList( int snum ); + void setFinal( int snum ); + void setEofTrans( int snum, int eofTarget, int actId ); + void setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ); + void setForcedErrorState(); + void closeMachine(); + Key findMaxKey(); + + void makeEntryPoints(); + void makeGetKeyExpr(); + void makeAccessExpr(); + void makeCurStateExpr(); + void makeConditions(); + void makeInlineList( InlineList *inlineList, InlineItem *context ); + void makeActionList(); + void makeActionTableList(); + void reduceTrans( FsmTrans *trans ); + void reduceActionTables(); + void makeTransList( FsmState *state ); + void makeTrans( Key lowKey, Key highKey, FsmTrans *defTrans ); + void makeAction( Action *action ); + void makeLmSwitch( InlineItem *item ); + void makeMachine(); + void makeActionExec( InlineItem *item ); + void makeActionExecTE( InlineItem *item ); + + Compiler *pd; + FsmGraph *fsm; + ActionTableMap actionTableMap; + int nextActionTableId; + + int startState; + int errState; + +public: + RedFsm *redFsm; + +private: + int curAction; + int curActionTable; + int curTrans; + int curState; + int curCondSpace; + int curStateCond; +}; + +#endif /* _COLM_FSMREDUCE_H */ + diff --git a/src/redfsm.cc b/src/redfsm.cc new file mode 100644 index 00000000..d79a1e32 --- /dev/null +++ b/src/redfsm.cc @@ -0,0 +1,1049 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "redfsm.h" + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <stdio.h> + +#include <sstream> +#include <iostream> + +#include "fsmgraph.h" +#include "parsetree.h" + +using std::ostringstream; + +string nameOrLoc( GenAction *genAction ) +{ + if ( genAction->name != 0 ) + return string(genAction->name); + else { + ostringstream ret; + ret << genAction->loc.line << ":" << genAction->loc.col; + return ret.str(); + } +} + +RedFsm::RedFsm() +: + wantComplete(false), + forcedErrorState(false), + nextActionId(0), + nextTransId(0), + errState(0), + errTrans(0), + firstFinState(0), + numFinStates(0), + allActions(0), + allActionTables(0), + allStates(0), + bAnyToStateActions(false), + bAnyFromStateActions(false), + bAnyRegActions(false), + bAnyEofActions(false), + bAnyActionGotos(false), + bAnyActionCalls(false), + bAnyActionRets(false), + bAnyRegActionRets(false), + bAnyRegActionByValControl(false), + bAnyRegNextStmt(false), + bAnyRegCurStateRef(false), + bAnyRegBreak(false), + bAnyLmSwitchError(false), + bAnyConditions(false) +{ +} + +/* Does the machine have any actions. */ +bool RedFsm::anyActions() +{ + return actionMap.length() > 0; +} + +void RedFsm::depthFirstOrdering( RedState *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onStateList ) + return; + + /* Doing depth first, put state on the list. */ + state->onStateList = true; + stateList.append( state ); + +// /* At this point transitions should only be in ranges. */ +// assert( state->outSingle.length() == 0 ); +// assert( state->defTrans == 0 ); + + /* Recurse on singles. */ + for ( RedTransList::Iter stel = state->outSingle; stel.lte(); stel++ ) { + if ( stel->value->targ != 0 ) + depthFirstOrdering( stel->value->targ ); + } + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->targ != 0 ) + depthFirstOrdering( rtel->value->targ ); + } + + if ( state->defTrans != 0 && state->defTrans->targ != 0 ) + depthFirstOrdering( state->defTrans->targ ); +} + +/* Ordering states by transition connections. */ +void RedFsm::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + depthFirstOrdering( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( *en ); + if ( forcedErrorState ) + depthFirstOrdering( errState ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Assign state ids by appearance in the state list. */ +void RedFsm::sequentialStateIds() +{ + /* Table based machines depend on the state numbers starting at zero. */ + nextStateId = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->id = nextStateId++; +} + +/* Stable sort the states by final state status. */ +void RedFsm::sortStatesByFinal() +{ + /* Move forward through the list and throw final states onto the end. */ + RedState *state = 0; + RedState *next = stateList.head; + RedState *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinal ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +/* Assign state ids by final state state status. */ +void RedFsm::sortStateIdsByFinal() +{ + /* Table based machines depend on this starting at zero. */ + nextStateId = 0; + + /* First pass to assign non final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( ! st->isFinal ) + st->id = nextStateId++; + } + + /* Second pass to assign final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal ) + st->id = nextStateId++; + } +} + +struct CmpStateById +{ + static int compare( RedState *st1, RedState *st2 ) + { + if ( st1->id < st2->id ) + return -1; + else if ( st1->id > st2->id ) + return 1; + else + return 0; + } +}; + +void RedFsm::sortByStateId() +{ + /* Make the array. */ + int pos = 0; + RedState **ptrList = new RedState*[stateList.length()]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + ptrList[pos++] = st; + + MergeSort<RedState*, CmpStateById> mergeSort; + mergeSort.sort( ptrList, stateList.length() ); + + stateList.abandon(); + for ( int st = 0; st < pos; st++ ) + stateList.append( ptrList[st] ); + + delete[] ptrList; +} + +/* Find the final state with the lowest id. */ +void RedFsm::findFirstFinState() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) ) + firstFinState = st; + } +} + +void RedFsm::assignActionLocs() +{ + int nextLocation = 0; + for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { + /* Store the loc, skip over the array and a null terminator. */ + act->location = nextLocation; + nextLocation += act->key.length() + 1; + } +} + +/* Check if we can extend the current range by displacing any ranges + * ahead to the singles. */ +bool RedFsm::canExtend( const RedTransList &list, int pos ) +{ + /* Get the transition that we want to extend. */ + RedTrans *extendTrans = list[pos].value; + + /* Look ahead in the transition list. */ + for ( int next = pos + 1; next < list.length(); pos++, next++ ) { + /* If they are not continuous then cannot extend. */ + Key nextKey = list[next].lowKey; + nextKey.decrement(); + if ( list[pos].highKey != nextKey ) + break; + + /* Check for the extenstion property. */ + if ( extendTrans == list[next].value ) + return true; + + /* If the span of the next element is more than one, then don't keep + * checking, it won't be moved to single. */ + unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey ); + if ( nextSpan > 1 ) + break; + } + return false; +} + +/* Move ranges to the singles list. */ +void RedFsm::moveTransToSingle( RedState *state ) +{ + RedTransList &range = state->outRange; + RedTransList &single = state->outSingle; + for ( int rpos = 0; rpos < range.length(); ) { + /* Check if this is a range we can extend. */ + if ( canExtend( range, rpos ) ) { + /* Transfer singles over. */ + while ( range[rpos].value != range[rpos+1].value ) { + /* Transfer the range to single. */ + single.append( range[rpos+1] ); + range.remove( rpos+1 ); + } + + /* Extend. */ + range[rpos].highKey = range[rpos+1].highKey; + range.remove( rpos+1 ); + } + /* Maybe move it to the singles. */ + else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) { + single.append( range[rpos] ); + range.remove( rpos ); + } + else { + /* Keeping it in the ranges. */ + rpos += 1; + } + } +} + +/* Look through ranges and choose suitable single character transitions. */ +void RedFsm::chooseSingle() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rewrite the transition list taking out the suitable single + * transtions. */ + moveTransToSingle( st ); + } +} + +void RedFsm::makeFlat() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->condLowKey = 0; + st->condHighKey = 0; + + if ( st->outRange.length() == 0 ) { + st->lowKey = st->highKey = 0; + st->transList = 0; + } + else { + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + st->transList = new RedTrans*[ span ]; + memset( st->transList, 0, sizeof(RedTrans*)*span ); + + for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) { + unsigned long long base, trSpan; + base = keyOps->span( st->lowKey, trans->lowKey )-1; + trSpan = keyOps->span( trans->lowKey, trans->highKey ); + for ( unsigned long long pos = 0; pos < trSpan; pos++ ) + st->transList[base+pos] = trans->value; + } + + /* Fill in the gaps with the default transition. */ + for ( unsigned long long pos = 0; pos < span; pos++ ) { + if ( st->transList[pos] == 0 ) + st->transList[pos] = st->defTrans; + } + } + } +} + + +/* A default transition has been picked, move it from the outRange to the + * default pointer. */ +void RedFsm::moveToDefault( RedTrans *defTrans, RedState *state ) +{ + /* Rewrite the outRange, omitting any ranges that use + * the picked default. */ + RedTransList outRange; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* If it does not take the default, copy it over. */ + if ( rtel->value != defTrans ) + outRange.append( *rtel ); + } + + /* Save off the range we just created into the state's range. */ + state->outRange.transfer( outRange ); + + /* Store the default. */ + state->defTrans = defTrans; +} + +bool RedFsm::alphabetCovered( RedTransList &outRange ) +{ + /* Cannot cover without any out ranges. */ + if ( outRange.length() == 0 ) + return false; + + /* If the first range doesn't start at the the lower bound then the + * alphabet is not covered. */ + RedTransList::Iter rtel = outRange; + if ( keyOps->minKey < rtel->lowKey ) + return false; + + /* Check that every range is next to the previous one. */ + rtel.increment(); + for ( ; rtel.lte(); rtel++ ) { + Key highKey = rtel[-1].highKey; + highKey.increment(); + if ( highKey != rtel->lowKey ) + return false; + } + + /* The last must extend to the upper bound. */ + RedTransEl *last = &outRange[outRange.length()-1]; + if ( last->highKey < keyOps->maxKey ) + return false; + + return true; +} + +RedTrans *RedFsm::chooseDefaultSpan( RedState *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransPtrSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many alphabet characters the + * transition spans. */ + unsigned long long *span = new unsigned long long[stateTransSet.length()]; + memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTrans **inSet = stateTransSet.find( rtel->value ); + int pos = inSet - stateTransSet.data; + span[pos] += keyOps->span( rtel->lowKey, rtel->highKey ); + } + + /* Find the max span, choose it for making the default. */ + RedTrans *maxTrans = 0; + unsigned long long maxSpan = 0; + for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( span[rtel.pos()] > maxSpan ) { + maxSpan = span[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] span; + return maxTrans; +} + +/* Pick default transitions from ranges for the states. */ +void RedFsm::chooseDefaultSpan() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Only pick a default transition if the alphabet is covered. This + * avoids any transitions in the out range that go to error and avoids + * the need for an ERR state. */ + if ( alphabetCovered( st->outRange ) ) { + /* Pick a default transition by largest span. */ + RedTrans *defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } + } +} + +RedTrans *RedFsm::chooseDefaultGoto( RedState *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransPtrSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->targ == state->next ) + return rtel->value; + } + return 0; +} + +void RedFsm::chooseDefaultGoto() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTrans *defTrans = chooseDefaultGoto( st ); + if ( defTrans == 0 ) + defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTrans *RedFsm::chooseDefaultNumRanges( RedState *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransPtrSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many ranges use the transition. */ + int *numRanges = new int[stateTransSet.length()]; + memset( numRanges, 0, sizeof(int) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTrans **inSet = stateTransSet.find( rtel->value ); + numRanges[inSet - stateTransSet.data] += 1; + } + + /* Find the max number of ranges. */ + RedTrans *maxTrans = 0; + int maxNumRanges = 0; + for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( numRanges[rtel.pos()] > maxNumRanges ) { + maxNumRanges = numRanges[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] numRanges; + return maxTrans; +} + +void RedFsm::chooseDefaultNumRanges() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTrans *defTrans = chooseDefaultNumRanges( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTrans *RedFsm::getErrorTrans( ) +{ + /* If the error trans has not been made aready, make it. */ + if ( errTrans == 0 ) { + /* This insert should always succeed since no transition created by + * the user can point to the error state. */ + errTrans = new RedTrans( getErrorState(), 0, nextTransId++ ); + RedTrans *inRes = transSet.insert( errTrans ); + assert( inRes != 0 ); + } + return errTrans; +} + +RedState *RedFsm::getErrorState() +{ + /* Something went wrong. An error state is needed but one was not supplied + * by the frontend. */ + assert( errState != 0 ); + return errState; +} + + +RedTrans *RedFsm::allocateTrans( RedState *targ, RedAction *action ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedTrans redTrans( targ, action, 0 ); + RedTrans *inDict = transSet.find( &redTrans ); + if ( inDict == 0 ) { + inDict = new RedTrans( targ, action, nextTransId++ ); + transSet.insert( inDict ); + } + return inDict; +} + +void RedFsm::partitionFsm( int nparts ) +{ + /* At this point the states are ordered by a depth-first traversal. We + * will allocate to partitions based on this ordering. */ + this->nParts = nparts; + int partSize = stateList.length() / nparts; + int remainder = stateList.length() % nparts; + int numInPart = partSize; + int partition = 0; + if ( remainder-- > 0 ) + numInPart += 1; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->partition = partition; + + numInPart -= 1; + if ( numInPart == 0 ) { + partition += 1; + numInPart = partSize; + if ( remainder-- > 0 ) + numInPart += 1; + } + } +} + +void RedFsm::setInTrans() +{ + /* First pass counts the number of transitions. */ + for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ ) + trans->targ->numInTrans += 1; + + /* Pass over states to allocate the needed memory. Reset the counts so we + * can use them as the current size. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->inTrans = new RedTrans*[st->numInTrans]; + st->numInTrans = 0; + } + + /* Second pass over transitions copies pointers into the in trans list. */ + for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ ) + trans->targ->inTrans[trans->targ->numInTrans++] = trans; +} + +void RedFsm::setValueLimits() +{ + maxSingleLen = 0; + maxRangeLen = 0; + maxKeyOffset = 0; + maxIndexOffset = 0; + maxActListId = 0; + maxActionLoc = 0; + maxActArrItem = 0; + maxSpan = 0; + maxCondSpan = 0; + maxFlatIndexOffset = 0; + maxCondOffset = 0; + maxCondLen = 0; + maxCondSpaceId = 0; + maxCondIndexOffset = 0; + + /* In both of these cases the 0 index is reserved for no value, so the max + * is one more than it would be if they started at 0. */ + maxIndex = transSet.length(); + maxCond = 0; + + /* The nextStateId - 1 is the last state id assigned. */ + maxState = nextStateId - 1; + + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Maximum single length. */ + if ( st->outSingle.length() > maxSingleLen ) + maxSingleLen = st->outSingle.length(); + + /* Maximum range length. */ + if ( st->outRange.length() > maxRangeLen ) + maxRangeLen = st->outRange.length(); + + /* The key offset index offset for the state after last is not used, skip it.. */ + if ( ! st.last() ) { + maxKeyOffset += st->outSingle.length() + st->outRange.length()*2; + maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1; + } + + /* Max key span. */ + if ( st->transList != 0 ) { + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + if ( span > maxSpan ) + maxSpan = span; + } + + /* Max flat index offset. */ + if ( ! st.last() ) { + if ( st->transList != 0 ) + maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey ); + maxFlatIndexOffset += 1; + } + } + + for ( GenActionTableMap::Iter at = actionMap; at.lte(); at++ ) { + /* Maximum id of action lists. */ + if ( at->actListId+1 > maxActListId ) + maxActListId = at->actListId+1; + + /* Maximum location of items in action array. */ + if ( at->location+1 > maxActionLoc ) + maxActionLoc = at->location+1; + + /* Maximum values going into the action array. */ + if ( at->key.length() > maxActArrItem ) + maxActArrItem = at->key.length(); + for ( GenActionTable::Iter item = at->key; item.lte(); item++ ) { + if ( item->value->actionId > maxActArrItem ) + maxActArrItem = item->value->actionId; + } + } +} + +void RedFsm::findFinalActionRefs() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rerence count out of single transitions. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 ) { + rtel->value->action->numTransRefs += 1; + for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + } + + /* Reference count out of range transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 ) { + rtel->value->action->numTransRefs += 1; + for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + } + + /* Reference count default transition. */ + if ( st->defTrans != 0 && st->defTrans->action != 0 ) { + st->defTrans->action->numTransRefs += 1; + for ( GenActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + + /* Reference count to state actions. */ + if ( st->toStateAction != 0 ) { + st->toStateAction->numToStateRefs += 1; + for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) + item->value->numToStateRefs += 1; + } + + /* Reference count from state actions. */ + if ( st->fromStateAction != 0 ) { + st->fromStateAction->numFromStateRefs += 1; + for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) + item->value->numFromStateRefs += 1; + } + + /* Reference count EOF actions. */ + if ( st->eofAction != 0 ) { + st->eofAction->numEofRefs += 1; + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + item->value->numEofRefs += 1; + } + } +} + +void RedFsm::analyzeAction( GenAction *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Check for various things in regular actions. */ + if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || + act->numFromStateRefs > 0 || act->numEofRefs > 0 ) + { + if ( item->type == InlineItem::LmSwitch && + item->tokenRegion->lmSwitchHandlesError ) + { + bAnyLmSwitchError = true; + } + } + + if ( item->children != 0 ) + analyzeAction( act, item->children ); + } +} + +void RedFsm::analyzeActionList( RedAction *redAct, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + if ( item->children != 0 ) + analyzeActionList( redAct, item->children ); + } +} + +/* Assign ids to referenced actions. */ +void RedFsm::assignActionIds() +{ + int nextActionId = 0; + for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) { + /* Only ever interested in referenced actions. */ + if ( numRefs( act ) > 0 ) + act->actionId = nextActionId++; + } +} + +/* Gather various info on the machine. */ +void RedFsm::analyzeMachine() +{ + /* Find the true count of action references. */ + findFinalActionRefs(); + + /* Check if there are any calls in action code. */ + for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) { + /* Record the occurrence of various kinds of actions. */ + if ( act->numToStateRefs > 0 ) + bAnyToStateActions = true; + if ( act->numFromStateRefs > 0 ) + bAnyFromStateActions = true; + if ( act->numEofRefs > 0 ) + bAnyEofActions = true; + if ( act->numTransRefs > 0 ) + bAnyRegActions = true; + + /* Recurse through the action's parse tree looking for various things. */ + analyzeAction( act, act->inlineList ); + } + + /* Analyze reduced action lists. */ + for ( GenActionTableMap::Iter redAct = actionMap; redAct.lte(); redAct++ ) { + for ( GenActionTable::Iter act = redAct->key; act.lte(); act++ ) + analyzeActionList( redAct, act->value->inlineList ); + } + + /* Find states that have transitions with actions that have next + * statements. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Check any actions out of outSinge. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + + /* Check any actions out of outRange. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + + /* Check any action out of default. */ + if ( st->defTrans != 0 && st->defTrans->action != 0 && + st->defTrans->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + + /* Assign ids to actions that are referenced. */ + assignActionIds(); + + /* Set the maximums of various values used for deciding types. */ + setValueLimits(); +} + +int transAction( RedTrans *trans ) +{ + int retAct = 0; + if ( trans->action != 0 ) + retAct = trans->action->location+1; + return retAct; +} + +int toStateAction( RedState *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +int fromStateAction( RedState *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +int eofAction( RedState *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + return act; +} + + +fsm_tables *RedFsm::makeFsmTables() +{ + /* The fsm runtime needs states sorted by id. */ + sortByStateId(); + + int pos, curKeyOffset, curIndOffset; + fsm_tables *fsmTables = new fsm_tables; + fsmTables->num_states = stateList.length(); + + /* + * actions + */ + + fsmTables->num_actions = 1; + for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) + fsmTables->num_actions += 1 + act->key.length(); + + pos = 0; + fsmTables->actions = new long[fsmTables->num_actions]; + fsmTables->actions[pos++] = 0; + for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { + fsmTables->actions[pos++] = act->key.length(); + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + fsmTables->actions[pos++] = item->value->actionId; + } + + /* + * keyOffset + */ + pos = 0, curKeyOffset = 0; + fsmTables->key_offsets = new long[fsmTables->num_states]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Store the current offset. */ + fsmTables->key_offsets[pos++] = curKeyOffset; + + /* Move the key offset ahead. */ + curKeyOffset += st->outSingle.length() + st->outRange.length()*2; + } + + /* + * transKeys + */ + fsmTables->num_trans_keys = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + fsmTables->num_trans_keys += st->outSingle.length(); + fsmTables->num_trans_keys += 2 * st->outRange.length(); + } + + pos = 0; + fsmTables->trans_keys = new char[fsmTables->num_trans_keys]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + fsmTables->trans_keys[pos++] = stel->lowKey.getVal(); + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + fsmTables->trans_keys[pos++] = rtel->lowKey.getVal(); + fsmTables->trans_keys[pos++] = rtel->highKey.getVal(); + } + } + + /* + * singleLengths + */ + pos = 0; + fsmTables->single_lengths = new long[fsmTables->num_states]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->single_lengths[pos++] = st->outSingle.length(); + + /* + * rangeLengths + */ + pos = 0; + fsmTables->range_lengths = new long[fsmTables->num_states]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->range_lengths[pos++] = st->outRange.length(); + + /* + * indexOffsets + */ + pos = 0, curIndOffset = 0; + fsmTables->index_offsets = new long[fsmTables->num_states]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + fsmTables->index_offsets[pos++] = curIndOffset; + + curIndOffset += st->outSingle.length() + st->outRange.length(); + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + + /* + * transTargsWI + */ + fsmTables->numTransTargsWI = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + fsmTables->numTransTargsWI += st->outSingle.length(); + fsmTables->numTransTargsWI += st->outRange.length(); + if ( st->defTrans != 0 ) + fsmTables->numTransTargsWI += 1; + } + + pos = 0; + fsmTables->transTargsWI = new long[fsmTables->numTransTargsWI]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + fsmTables->transTargsWI[pos++] = stel->value->targ->id; + + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) + fsmTables->transTargsWI[pos++] = rtel->value->targ->id; + + if ( st->defTrans != 0 ) + fsmTables->transTargsWI[pos++] = st->defTrans->targ->id; + } + + /* + * transActionsWI + */ + fsmTables->numTransActionsWI = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + fsmTables->numTransActionsWI += st->outSingle.length(); + fsmTables->numTransActionsWI += st->outRange.length(); + if ( st->defTrans != 0 ) + fsmTables->numTransActionsWI += 1; + } + + pos = 0; + fsmTables->transActionsWI = new long[fsmTables->numTransActionsWI]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + fsmTables->transActionsWI[pos++] = transAction( stel->value ); + + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) + fsmTables->transActionsWI[pos++] = transAction( rtel->value ); + + if ( st->defTrans != 0 ) + fsmTables->transActionsWI[pos++] = transAction( st->defTrans ); + } + + /* + * toStateActions + */ + pos = 0; + fsmTables->to_state_actions = new long[fsmTables->num_states]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->to_state_actions[pos++] = toStateAction( st ); + + /* + * fromStateActions + */ + pos = 0; + fsmTables->from_state_actions = new long[fsmTables->num_states]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->from_state_actions[pos++] = fromStateAction( st ); + + /* + * eofActions + */ + pos = 0; + fsmTables->eof_actions = new long[fsmTables->num_states]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->eof_actions[pos++] = eofAction( st ); + + /* + * eofTargs + */ + pos = 0; + fsmTables->eof_targs = new long[fsmTables->num_states]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + int targ = -1; + if ( st->eofTrans != 0 ) + targ = st->eofTrans->targ->id; + fsmTables->eof_targs[pos++] = targ; + } + + /* Start state. */ + fsmTables->start_state = startState->id; + + /* First final state. */ + fsmTables->first_final = ( firstFinState != 0 ) ? + firstFinState->id : nextStateId; + + /* The error state. */ + fsmTables->error_state = ( errState != 0 ) ? + errState->id : -1; + + /* The array pointing to actions. */ + pos = 0; + fsmTables->num_action_switch = genActionList.length(); + fsmTables->action_switch = new GenAction*[fsmTables->num_action_switch]; + for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) + fsmTables->action_switch[pos++] = act; + + /* + * entryByRegion + */ + + fsmTables->num_regions = regionToEntry.length()+1; + fsmTables->entry_by_region = new long[fsmTables->num_regions]; + fsmTables->entry_by_region[0] = fsmTables->error_state; + + pos = 1; + for ( RegionToEntry::Iter en = regionToEntry; en.lte(); en++ ) { + /* Find the entry state from the entry id. */ + RedEntryMapEl *entryMapEl = redEntryMap.find( *en ); + + /* Save it off. */ + fsmTables->entry_by_region[pos++] = entryMapEl != 0 ? entryMapEl->value + : fsmTables->error_state; + } + + return fsmTables; +} + + diff --git a/src/redfsm.h b/src/redfsm.h new file mode 100644 index 00000000..618fbd61 --- /dev/null +++ b/src/redfsm.h @@ -0,0 +1,479 @@ +/* + * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_REDFSM_H +#define _COLM_REDFSM_H + +#include <assert.h> +#include <string.h> + +#include <string> + +#include <avlbasic.h> +#include <avltree.h> +#include <avlmap.h> +#include <bstmap.h> +#include <vector.h> +#include <dlist.h> +#include <bstset.h> +#include <mergesort.h> +#include <sbstmap.h> +#include <sbstset.h> +#include <sbsttable.h> + +#include "keyops.h" +#include "compare.h" +#include "global.h" +#include "pdarun.h" + +#define TRANS_ERR_TRANS 0 +#define STATE_ERR_STATE 0 +#define FUNC_NO_FUNC 0 + +using std::string; + +struct RedState; +struct InlineList; +struct Compiler; +struct ObjectField; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct GenAction +{ + /* Data collected during parse. */ + InputLoc loc; + char *name; + InlineList *inlineList; + int actionId; + MarkType markType; + ObjectField *objField; + long markId; + + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + + GenAction *prev, *next; +}; + +typedef DList<GenAction> GenActionList; +string nameOrLoc( GenAction *genAction ); + +/* Number of references in the final machine. */ +inline int numRefs( GenAction *genAction ) +{ + return genAction->numTransRefs + + genAction->numToStateRefs + + genAction->numFromStateRefs + + genAction->numEofRefs; +} + + +/* Forwards. */ +struct RedState; +struct FsmState; + +/* Transistion GenAction Element. */ +typedef SBstMapEl< int, GenAction* > GenActionTableEl; + +/* Transition GenAction Table. */ +struct GenActionTable + : public SBstMap< int, GenAction*, CmpOrd<int> > +{ + void setAction( int ordering, GenAction *action ); + void setActions( int *orderings, GenAction **actions, int nActs ); + void setActions( const GenActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct GenCmpActionTableEl +{ + static int compare( const GenActionTableEl &action1, + const GenActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for GenActionTable. */ +typedef CmpSTable< GenActionTableEl, GenCmpActionTableEl > GenCmpActionTable; + +/* Set of states. */ +typedef BstSet<RedState*> RedStateSet; +typedef BstSet<int> IntSet; + +/* Reduced action. */ +struct RedAction +: + public AvlTreeEl<RedAction> +{ + RedAction( ) + : + key(), + eofRefs(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + bAnyNextStmt(false), + bAnyCurStateRef(false), + bAnyBreakStmt(false) + { } + + const GenActionTable &getKey() + { return key; } + + GenActionTable key; + int actListId; + int location; + IntSet *eofRefs; + + /* Number of references in the final machine. */ + bool numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + + bool anyNextStmt() { return bAnyNextStmt; } + bool anyCurStateRef() { return bAnyCurStateRef; } + bool anyBreakStmt() { return bAnyBreakStmt; } + + bool bAnyNextStmt; + bool bAnyCurStateRef; + bool bAnyBreakStmt; +}; +typedef AvlTree<RedAction, GenActionTable, GenCmpActionTable> GenActionTableMap; + +/* Reduced transition. */ +struct RedTrans +: + public AvlTreeEl<RedTrans> +{ + RedTrans( RedState *targ, RedAction *action, int id ) + : targ(targ), action(action), id(id), labelNeeded(true) { } + + RedState *targ; + RedAction *action; + int id; + bool partitionBoundary; + bool labelNeeded; +}; + +/* Compare of transitions for the final reduction of transitions. Comparison + * is on target and the pointer to the shared action table. It is assumed that + * when this is used the action tables have been reduced. */ +struct CmpRedTrans +{ + static int compare( const RedTrans &t1, const RedTrans &t2 ) + { + if ( t1.targ < t2.targ ) + return -1; + else if ( t1.targ > t2.targ ) + return 1; + else if ( t1.action < t2.action ) + return -1; + else if ( t1.action > t2.action ) + return 1; + else + return 0; + } +}; + +typedef AvlBasic<RedTrans, CmpRedTrans> RedTransSet; + +/* Element in out range. */ +struct RedTransEl +{ + /* Constructors. */ + RedTransEl( Key lowKey, Key highKey, RedTrans *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + RedTrans *value; +}; + +typedef Vector<RedTransEl> RedTransList; +typedef Vector<RedState*> RedStateVect; + +typedef BstMapEl<RedState*, unsigned long long> RedSpanMapEl; +typedef BstMap<RedState*, unsigned long long> RedSpanMap; + +/* Compare used by span map sort. Reverse sorts by the span. */ +struct CmpRedSpanMapEl +{ + static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 ) + { + if ( smel1.value > smel2.value ) + return -1; + else if ( smel1.value < smel2.value ) + return 1; + else + return 0; + } +}; + +/* Sorting state-span map entries by span. */ +typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort; + +/* Set of entry ids that go into this state. */ +typedef Vector<int> EntryIdVect; +typedef Vector<char*> EntryNameVect; + +/* Maps entry ids (defined by the frontend, to reduced state ids. */ +typedef BstMap<int, int> RedEntryMap; +typedef BstMapEl<int, int> RedEntryMapEl; + +typedef Vector<int> RegionToEntry; + +/* Reduced state. */ +struct RedState +{ + RedState() + : + defTrans(0), + transList(0), + isFinal(false), + labelNeeded(false), + outNeeded(false), + onStateList(false), + toStateAction(0), + fromStateAction(0), + eofAction(0), + eofTrans(0), + id(0), + bAnyRegCurStateRef(false), + partitionBoundary(false), + inTrans(0), + numInTrans(0) + { } + + /* Transitions out. */ + RedTransList outSingle; + RedTransList outRange; + RedTrans *defTrans; + + /* For flat conditions. */ + Key condLowKey, condHighKey; + + /* For flat keys. */ + Key lowKey, highKey; + RedTrans **transList; + + /* The list of states that transitions from this state go to. */ + RedStateVect targStates; + + bool isFinal; + bool labelNeeded; + bool outNeeded; + bool onStateList; + RedAction *toStateAction; + RedAction *fromStateAction; + RedAction *eofAction; + RedTrans *eofTrans; + int id; + + /* Pointers for the list of states. */ + RedState *prev, *next; + + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool bAnyRegCurStateRef; + + int partition; + bool partitionBoundary; + + RedTrans **inTrans; + int numInTrans; +}; + +/* List of states. */ +typedef DList<RedState> RedStateList; + +/* Set of reduced transitons. Comparison is by pointer. */ +typedef BstSet< RedTrans*, CmpOrd<RedTrans*> > RedTransPtrSet; + +/* Next version of the fsm machine. */ +struct RedFsm +{ + RedFsm(); + + bool wantComplete; + bool forcedErrorState; + + int nextActionId; + int nextTransId; + + /* Next State Id doubles as the total number of state ids. */ + int nextStateId; + + RedTransSet transSet; + GenActionTableMap actionMap; + RedStateList stateList; + RedStateSet entryPoints; + RedState *startState; + RedState *errState; + RedTrans *errTrans; + RedTrans *errActionTrans; + RedState *firstFinState; + int numFinStates; + int nParts; + + GenAction *allActions; + RedAction *allActionTables; + RedState *allStates; + GenActionList genActionList; + EntryIdVect entryPointIds; + RedEntryMap redEntryMap; + RegionToEntry regionToEntry; + + bool bAnyToStateActions; + bool bAnyFromStateActions; + bool bAnyRegActions; + bool bAnyEofActions; + bool bAnyActionGotos; + bool bAnyActionCalls; + bool bAnyActionRets; + bool bAnyRegActionRets; + bool bAnyRegActionByValControl; + bool bAnyRegNextStmt; + bool bAnyRegCurStateRef; + bool bAnyRegBreak; + bool bAnyLmSwitchError; + bool bAnyConditions; + + int maxState; + int maxSingleLen; + int maxRangeLen; + int maxKeyOffset; + int maxIndexOffset; + int maxIndex; + int maxActListId; + int maxActionLoc; + int maxActArrItem; + unsigned long long maxSpan; + unsigned long long maxCondSpan; + int maxFlatIndexOffset; + Key maxKey; + int maxCondOffset; + int maxCondLen; + int maxCondSpaceId; + int maxCondIndexOffset; + int maxCond; + + bool anyActions(); + bool anyToStateActions() { return bAnyToStateActions; } + bool anyFromStateActions() { return bAnyFromStateActions; } + bool anyRegActions() { return bAnyRegActions; } + bool anyEofActions() { return bAnyEofActions; } + bool anyActionGotos() { return bAnyActionGotos; } + bool anyActionCalls() { return bAnyActionCalls; } + bool anyActionRets() { return bAnyActionRets; } + bool anyRegActionRets() { return bAnyRegActionRets; } + bool anyRegActionByValControl() { return bAnyRegActionByValControl; } + bool anyRegNextStmt() { return bAnyRegNextStmt; } + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool anyRegBreak() { return bAnyRegBreak; } + bool anyLmSwitchError() { return bAnyLmSwitchError; } + bool anyConditions() { return bAnyConditions; } + + /* Is is it possible to extend a range by bumping ranges that span only + * one character to the singles array. */ + bool canExtend( const RedTransList &list, int pos ); + + /* Pick single transitions from the ranges. */ + void moveTransToSingle( RedState *state ); + void chooseSingle(); + + void makeFlat(); + + /* Move a selected transition from ranges to default. */ + void moveToDefault( RedTrans *defTrans, RedState *state ); + + /* Pick a default transition by largest span. */ + RedTrans *chooseDefaultSpan( RedState *state ); + void chooseDefaultSpan(); + + /* Pick a default transition by most number of ranges. */ + RedTrans *chooseDefaultNumRanges( RedState *state ); + void chooseDefaultNumRanges(); + + /* Pick a default transition tailored towards goto driven machine. */ + RedTrans *chooseDefaultGoto( RedState *state ); + void chooseDefaultGoto(); + + /* Ordering states by transition connections. */ + void optimizeStateOrdering( RedState *state ); + void optimizeStateOrdering(); + + /* Ordering states by transition connections. */ + void depthFirstOrdering( RedState *state ); + void depthFirstOrdering(); + + /* Set state ids. */ + void sequentialStateIds(); + void sortStateIdsByFinal(); + + /* Arrange states in by final id. This is a stable sort. */ + void sortStatesByFinal(); + + /* Sorting states by id. */ + void sortByStateId(); + + /* Locating the first final state. This is the final state with the lowest + * id. */ + void findFirstFinState(); + + void assignActionLocs(); + + RedTrans *getErrorTrans(); + RedState *getErrorState(); + + /* Is every char in the alphabet covered? */ + bool alphabetCovered( RedTransList &outRange ); + + RedTrans *allocateTrans( RedState *targState, RedAction *actionTable ); + + void partitionFsm( int nParts ); + + void setInTrans(); + void setValueLimits(); + void assignActionIds(); + void analyzeActionList( RedAction *redAct, InlineList *inlineList ); + void analyzeAction( GenAction *act, InlineList *inlineList ); + void findFinalActionRefs(); + void analyzeMachine(); + + fsm_tables *makeFsmTables(); +}; + +#endif /* _COLM_REDFSM_H */ + diff --git a/src/reduce.cc b/src/reduce.cc new file mode 100644 index 00000000..89a95015 --- /dev/null +++ b/src/reduce.cc @@ -0,0 +1,954 @@ +/* + * Copyright 2015-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> +#include <stdbool.h> + +#include <iostream> + +#include "fsmcodegen.h" + +void Compiler::writeCommitStub() +{ + *outStream << + "void " << objectName << "_commit_reduce_forward( program_t *prg, tree_t **root,\n" + " struct pda_run *pda_run, parse_tree_t *pt )\n" + "{\n" + " commit_clear_parse_tree( prg, root, pda_run, pt->child );\n" + "}\n" + "\n" + "long " << objectName << "_commit_union_sz( int reducer ) { return 0; }\n" + "void " << objectName << "_init_need() {}\n" + "int " << objectName << "_reducer_need_tok( program_t *prg, " + "struct pda_run *pda_run, int id ) { return COLM_RN_BOTH; }\n" + "int " << objectName << "_reducer_need_ign( program_t *prg, " + "struct pda_run *pda_run ) { return COLM_RN_BOTH; }\n" + "\n" + "void " << objectName << "_read_reduce( program_t *prg, int reducer, input_t *stream ) {}\n" + ; +} + +void Compiler::findRhsRefs( bool &lhsUsed, Vector<ProdEl*> &rhsUsed, Vector<ProdEl*> &treeUsed, + Vector<ProdEl*> &locUsed, Reduction *reduction, Production *production, + const ReduceTextItemList &list ) +{ + ObjectDef *objectDef = production->prodName->objectDef; + + rhsUsed.setAsNew( production->prodElList->length() ); + treeUsed.setAsNew( production->prodElList->length() ); + locUsed.setAsNew( production->prodElList->length() ); + + for ( ReduceTextItemList::Iter i = list; i.lte(); i++ ) { + if ( i->type == ReduceTextItem::LhsRef ) { + lhsUsed = true; + } + + if ( i->type == ReduceTextItem::RhsRef || + i->type == ReduceTextItem::RhsLoc || + i->type == ReduceTextItem::TreeRef ) + { + if ( i->n > 0 ) { + /* Numbered. */ + ProdEl *prodEl = production->prodElList->head; + int adv = i->n - 1; + while ( adv > 0 ) { + prodEl = prodEl->next; + adv -= 1; + } + + if ( i->type == ReduceTextItem::RhsLoc ) + locUsed[i->n-1] = prodEl; + else if ( i->type == ReduceTextItem::TreeRef ) + treeUsed[i->n-1] = prodEl; + else + rhsUsed[i->n-1] = prodEl; + } + else { + /* Named. */ + String name( i->txt.data + 1, i->txt.length() - 1 ); + ObjectField *field = objectDef->rootScope->findField( name ); + if ( field != 0 ) { + for ( Vector<RhsVal>::Iter r = field->rhsVal; r.lte(); r++ ) { + if ( r->prodEl->production == production ) { + if ( i->type == ReduceTextItem::RhsLoc ) + locUsed[r->prodEl->pos] = r->prodEl; + else + rhsUsed[r->prodEl->pos] = r->prodEl; + } + } + } + } + } + } +} + +void Compiler::computeNeeded( Reduction *reduction, Production *production, + const ReduceTextItemList &list ) +{ + bool lhsUsed = false; + Vector<ProdEl*> rhsUsed; + Vector<ProdEl*> treeUsed; + Vector<ProdEl*> locUsed; + + findRhsRefs( lhsUsed, rhsUsed, treeUsed, locUsed, reduction, production, list ); + + /* Same length, can concurrently walk with one test. */ + Vector<ProdEl*>::Iter rhs = rhsUsed; + Vector<ProdEl*>::Iter loc = locUsed; + + for ( ; rhs.lte(); rhs++, loc++ ) { + ProdEl *prodEl = *rhs; + if ( prodEl != 0 ) { + if ( prodEl->production == production && prodEl->langEl->type == LangEl::Term ) + reduction->needData[prodEl->langEl->id] = true; + } + + ProdEl *locEl = *loc; + if ( locEl != 0 && locEl->production == production ) + reduction->needLoc[locEl->langEl->id] = true; + } +} + +void Compiler::loadRefs( Reduction *reduction, Production *production, + const ReduceTextItemList &list, bool read ) +{ + bool lhsUsed = false; + Vector<ProdEl*> rhsUsed; + Vector<ProdEl*> treeUsed; + Vector<ProdEl*> locUsed; + + findRhsRefs( lhsUsed, rhsUsed, treeUsed, locUsed, reduction, production, list ); + + if ( lhsUsed ) { + *outStream << " lel_" << production->prodName->fullName << " *_lhs = "; + + if ( read ) { + *outStream << + "&node->u." << production->prodName->fullName << ";\n"; + } + else { + *outStream << + "&((commit_reduce_union*)(lel+1))->" << production->prodName->fullName << ";\n"; + } + } + + /* + * In the first pass we load using a parse tree cursor. This is for + * nonterms. + */ + bool useCursor = false; + for ( Vector<ProdEl*>::Iter rhs = rhsUsed; rhs.lte(); rhs++ ) { + if ( *rhs != 0 && (*rhs)->production == production && + (*rhs)->langEl->type != LangEl::Term ) + { + useCursor = true; + break; + } + } + + if ( useCursor ) { + int cursorPos = 0; + + if ( read ) { + *outStream << + " struct read_reduce_node *_pt_cursor = node->child;\n"; + } + else { + *outStream << + " struct colm_parse_tree *_pt_cursor = lel->child;\n"; + } + + /* Same length, can concurrently walk with one test. */ + Vector<ProdEl*>::Iter rhs = rhsUsed; + Vector<ProdEl*>::Iter loc = locUsed; + + for ( ; rhs.lte(); rhs++, loc++ ) { + ProdEl *prodEl = *rhs; + if ( prodEl != 0 ) { + while ( cursorPos < rhs.pos() ) { + *outStream << + " _pt_cursor = _pt_cursor->next;\n"; + cursorPos += 1; + } + + if ( prodEl->production == production ) { + if ( prodEl->langEl->type != LangEl::Term ) { + *outStream << + "lel_" << prodEl->langEl->fullName << " *" "_rhs" << rhs.pos() << " = "; + + if ( read ) { + *outStream << "&_pt_cursor->u." << prodEl->langEl->fullName << ";\n"; + } + else { + *outStream << "&((commit_reduce_union*)(_pt_cursor+1))->" << prodEl->langEl->fullName << ";\n"; + } + } + } + + } + } + } + + /* In the second pass we load using a tree cursor. This is for token/tree + * data and locations. */ + + useCursor = false; + for ( Vector<ProdEl*>::Iter rhs = rhsUsed; rhs.lte(); rhs++ ) { + if ( *rhs != 0 && (*rhs)->production == production && + (*rhs)->langEl->type == LangEl::Term ) + { + useCursor = true; + break; + } + } + for ( Vector<ProdEl*>::Iter rhs = treeUsed; rhs.lte(); rhs++ ) { + if ( *rhs != 0 ) { + useCursor = true; + break; + } + } + for ( Vector<ProdEl*>::Iter loc = locUsed; loc.lte(); loc++ ) { + if ( *loc != 0 ) { + useCursor = true; + break; + } + } + + if ( useCursor ) { + int cursorPos = 0; + + if ( read ) { + *outStream << + " read_reduce_node *_tree_cursor = node->child;\n"; + } + else { + *outStream << + " kid_t *_tree_cursor = kid->tree->child;\n"; + } + + /* Same length, can concurrently walk with one test. */ + Vector<ProdEl*>::Iter rhs = rhsUsed; + Vector<ProdEl*>::Iter tree = treeUsed; + Vector<ProdEl*>::Iter loc = locUsed; + + for ( ; rhs.lte(); rhs++, loc++ ) { + + ProdEl *prodEl = *rhs; + if ( prodEl != 0 ) { + if ( prodEl->production == production ) { + if ( prodEl->langEl->type == LangEl::Term ) { + + while ( cursorPos < rhs.pos() ) { + *outStream << + " _tree_cursor = _tree_cursor->next;\n"; + cursorPos += 1; + } + + *outStream << " colm_data *_rhs" << rhs.pos() << " = "; + + if ( read ) { + *outStream << + "&_tree_cursor->data;\n"; + } + else { + *outStream << + "_tree_cursor->tree->tokdata;\n"; + } + } + } + } + + ProdEl *treeEl = *tree; + if ( treeEl != 0 ) { + if ( treeEl->production == production ) { + while ( cursorPos < rhs.pos() ) { + *outStream << + " _tree_cursor = _tree_cursor->next;\n"; + cursorPos += 1; + } + + *outStream << " colm_tree *_tree" << rhs.pos() << " = "; + *outStream << "_tree_cursor->tree;\n"; + } + } + + ProdEl *locEl = *loc; + if ( locEl != 0 ) { + if ( locEl->production == production ) { + + while ( cursorPos < rhs.pos() ) { + *outStream << + " _tree_cursor = _tree_cursor->next;\n"; + cursorPos += 1; + } + + *outStream << + " colm_location *_loc" << loc.pos() << " = "; + + if ( read ) { + *outStream << "&_tree_cursor->loc;\n"; + } + else { + *outStream << + "colm_find_location( prg, _tree_cursor->tree );\n"; + } + } + } + } + } +} + +void Compiler::writeRhsRef( Production *production, ReduceTextItem *i ) +{ + if ( i->n > 0 ) { + *outStream << "_rhs" << ( i->n - 1 ); + } + else { + ObjectDef *objectDef = production->prodName->objectDef; + String name( i->txt.data + 1, i->txt.length() - 1 ); + + /* Find the field in the rhsVal using capture field. */ + ObjectField *field = objectDef->rootScope->findField( name ); + if ( field != 0 ) { + for ( Vector<RhsVal>::Iter r = field->rhsVal; + r.lte(); r++ ) + { + if ( r->prodEl->production == production ) + *outStream << "_rhs" << r->prodEl->pos; + } + } + } +} + +void Compiler::writeTreeRef( Production *production, ReduceTextItem *i ) +{ + if ( i->n > 0 ) { + *outStream << "_tree" << ( i->n - 1 ); + } + else { + ObjectDef *objectDef = production->prodName->objectDef; + String name( i->txt.data + 1, i->txt.length() - 1 ); + + /* Find the field in the rhsVal using capture field. */ + ObjectField *field = objectDef->rootScope->findField( name ); + if ( field != 0 ) { + for ( Vector<RhsVal>::Iter r = field->rhsVal; + r.lte(); r++ ) + { + if ( r->prodEl->production == production ) + *outStream << "_tree" << r->prodEl->pos; + } + } + } +} + +void Compiler::writeRhsLoc( Production *production, ReduceTextItem *i ) +{ + if ( i->n > 0 ) { + *outStream << "_loc" << ( i->n - 1 ); + } + else { + ObjectDef *objectDef = production->prodName->objectDef; + String name( i->txt.data + 1, i->txt.length() - 1 ); + + /* Find the field in the rhsVal using capture field. */ + ObjectField *field = objectDef->rootScope->findField( name ); + if ( field != 0 ) { + for ( Vector<RhsVal>::Iter r = field->rhsVal; + r.lte(); r++ ) + { + if ( r->prodEl->production == production ) + *outStream << "_loc" << r->prodEl->pos; + } + } + } +} + +void Compiler::writeLhsRef( Production *production, ReduceTextItem *i ) +{ + *outStream << "_lhs"; +} + +void Compiler::writeHostItemList( Production *production, + const ReduceTextItemList &list ) +{ + for ( ReduceTextItemList::Iter i = list; i.lte(); i++ ) { + switch ( i->type ) { + case ReduceTextItem::LhsRef: + writeLhsRef( production, i ); + break; + case ReduceTextItem::RhsRef: + writeRhsRef( production, i ); + break; + case ReduceTextItem::TreeRef: + writeTreeRef( production, i ); + break; + case ReduceTextItem::RhsLoc: + writeRhsLoc( production, i ); + break; + case ReduceTextItem::Txt: + *outStream << i->txt; + break; + } + } +} + +/* For sorting according to prod name id, then by prod num. */ +struct CmpReduceAction +{ + static int compare( const ReduceAction *ra1 , const ReduceAction *ra2 ) + { + if ( ra1->production->prodName->id < ra2->production->prodName->id ) + return -1; + else if ( ra1->production->prodName->id > ra2->production->prodName->id ) + return 1; + else { + if ( ra1->production->prodNum < ra2->production->prodNum ) + return -1; + else if ( ra1->production->prodNum > ra2->production->prodNum ) + return 1; + } + return 0; + } +}; + +void Compiler::initReductionNeeds( Reduction *reduction ) +{ + reduction->needData = new bool[nextLelId]; + reduction->needLoc = new bool[nextLelId]; + memset( reduction->needData, 0, sizeof(bool)*nextLelId ); + memset( reduction->needLoc, 0, sizeof(bool)*nextLelId ); +} + +void Compiler::writeNeeds() +{ + + *outStream << + "struct reduction_info\n" + "{\n" + " unsigned char need_data[" << nextLelId << "];\n" + " unsigned char need_loc[" << nextLelId << "];\n" + "};\n" + "\n"; + + *outStream << + "static struct reduction_info ri[" << rootNamespace->reductions.length() + 1 << "];\n" + "\n"; + + *outStream << + "extern \"C\" void " << objectName << "_init_need()\n" + "{\n"; + + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + Reduction *reduction = *r; + *outStream << + " memset( ri[" << reduction->id << "]" + ".need_data, 0, sizeof(unsigned char) * " << nextLelId << " );\n" + " memset( ri[" << reduction->id << "]" + ".need_loc, 0, sizeof(unsigned char) * " << nextLelId << " );\n"; + + for ( int i = 0; i < nextLelId; i++ ) { + if ( reduction->needData[i] ) { + *outStream << + " ri[" << reduction->id << "].need_data[" << i << "] = COLM_RN_DATA;\n"; + } + + if ( reduction->needLoc[i] ) { + *outStream << + " ri[" << reduction->id << "].need_loc[" << i << "] = COLM_RN_LOC;\n"; + } + } + } + + *outStream << + "}\n"; + + *outStream << + "extern \"C\" int " << objectName << "_reducer_need_tok( program_t *prg, " + "struct pda_run *pda_run, int id )\n" + "{\n" + " if ( prg->reduce_clean && pda_run->reducer > 0 ) {\n" + /* Note we are forcing the reducer need for data. Enabling requires finding + * a solution for backtracking push. */ + " return COLM_RN_DATA | ri[pda_run->reducer].need_data[id] | \n" + " ri[pda_run->reducer].need_loc[id];\n" + " }\n" + " return COLM_RN_BOTH;\n" + "}\n" + "\n" + "extern \"C\" int " << objectName << "_reducer_need_ign( program_t *prg, struct pda_run *pda_run )\n" + "{\n" + // Using this requires finding a solution for backtracking push back. + //" if ( pda_run->reducer > 0 )\n" + //" return COLM_RN_NEITHER;\n" + " return COLM_RN_BOTH;\n" + "}\n"; +} + +void Compiler::writeReduceStructs() +{ + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + for ( ReduceNonTermList::Iter rdi = (*r)->reduceNonTerms; rdi.lte(); rdi++ ) { + *outStream << + "struct lel_" << rdi->nonTerm->uniqueType->langEl->fullName << "\n" + "{\n"; + + *outStream << + "#line " << rdi->loc.line << "\"" << rdi->loc.fileName << "\"\n"; + + writeHostItemList( 0, rdi->itemList ); + + *outStream << + "};\n"; + } + } + + *outStream << + "union commit_reduce_union\n" + "{\n"; + + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + for ( ReduceNonTermList::Iter rdi = (*r)->reduceNonTerms; rdi.lte(); rdi++ ) { + LangEl *langEl = rdi->nonTerm->uniqueType->langEl; + *outStream << + " lel_" << langEl->fullName << " " << langEl->fullName << ";\n"; + } + } + + *outStream << + "};\n" + "\n"; + + *outStream << + "extern \"C\" long " << objectName << "_commit_union_sz( int reducer )\n" + "{\n" + " return sizeof( commit_reduce_union );\n" + "}\n"; + + *outStream << + "struct read_reduce_node\n" + "{\n" + " std::string name;\n" + " int id;\n" + " int prod_num;\n" + " colm_location loc;\n" + " colm_data data;\n" + " commit_reduce_union u;\n" + " read_reduce_node *next;\n" + " read_reduce_node *child;\n" + "};\n" + "\n"; +} + + +void Compiler::writeUnescape() +{ + *outStream << + "static void unescape( colm_data *tokdata )\n" + "{\n" + " unsigned char *src = (unsigned char*)tokdata->data, *dest = (unsigned char*)tokdata->data;\n" + " while ( *src != 0 ) {\n" + " if ( *src == '\\\\' ) {\n" + " unsigned int i;\n" + " char buf[3];\n" + "\n" + " src += 1;\n" + " buf[0] = *src++;\n" + " buf[1] = *src++;\n" + " buf[2] = 0;\n" + "\n" + " sscanf( buf, \"%x\", &i );\n" + " *dest++ = (unsigned char)i;\n" + "\n" + " tokdata->length -= 2;\n" + " }\n" + " else {\n" + " *dest++ = *src++;\n" + " }\n" + " }\n" + " *dest = 0;\n" + "}\n" + "\n"; +} + +void Compiler::writeReduceDispatchers() +{ + *outStream << + "\n" + "extern \"C\" void " << objectName << "_commit_reduce_forward( program_t *prg, tree_t **root,\n" + " struct pda_run *pda_run, parse_tree_t *pt )\n" + "{\n" + " switch ( pda_run->reducer ) {\n"; + + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + Reduction *reduction = *r; + if ( reduction->parserBased ) { + *outStream << + " case " << reduction->id << ":\n" + " ((" << reduction->name << "*)prg->red_ctx)->commit_reduce_forward( " + "prg, root, pda_run, pt );\n" + " break;\n"; + } + } + + *outStream << + " }\n" + "}\n" + "\n"; + + *outStream << + "extern \"C\" void " << objectName << "_read_reduce( program_t *prg, int reducer, stream_t *stream )\n" + "{\n" + " switch ( reducer ) {\n"; + + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + Reduction *reduction = *r; + if ( reduction->postfixBased ) { + *outStream << + " case " << reduction->id << ":\n" + " ((" << reduction->name << "*)prg->red_ctx)->read_reduce_forward( prg, stream->impl->file );\n" + " break;\n"; + } + } + + *outStream << + " }\n" + "}\n" + "\n"; +} + +void Compiler::computeNeeded() +{ + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + Reduction *reduction = *r; + initReductionNeeds( reduction ); + + for ( ReduceActionList::Iter rdi = reduction->reduceActions; rdi.lte(); rdi++ ) + computeNeeded( reduction, rdi->production, rdi->itemList ); + } +} + +void Compiler::writeParseReduce( Reduction *reduction ) +{ + *outStream << + "void " << reduction->name << "::commit_reduce_forward( program_t *prg, \n" + " tree_t **root, struct pda_run *pda_run, parse_tree_t *pt )\n" + "{\n" + " tree_t **sp = root;\n" + "\n" + " parse_tree_t *lel = pt;\n" + " kid_t *kid = pt->shadow;\n" + "\n" + "recurse:\n" + "\n" + " if ( lel->child != 0 ) {\n" + " /* There are children. Must process all children first. */\n" + " vm_push_ptree( lel );\n" + " vm_push_kid( kid );\n" + "\n" + " lel = lel->child;\n" + " kid = tree_child( prg, kid->tree );\n" + " while ( lel != 0 ) {\n" + " goto recurse;\n" + " resume:\n" + " lel = lel->next;\n" + " kid = kid->next;\n" + " }\n" + "\n" + " kid = vm_pop_kid();\n" + " lel = vm_pop_ptree();\n" + " }\n" + "\n" + " if ( !( lel->flags & PF_COMMITTED ) ) {\n" + " /* Now can execute the reduction action. */\n" + " {\n"; + + + *outStream << + " { switch ( kid->tree->id ) {\n"; + + /* Populate a vector with the reduce actions. */ + Vector<ReduceAction*> actions; + actions.setAsNew( reduction->reduceActions.length() ); + long pos = 0; + for ( ReduceActionList::Iter rdi = reduction->reduceActions; rdi.lte(); rdi++ ) + actions[pos++] = rdi; + + /* Sort it by lhs id, then prod num. */ + MergeSort<ReduceAction*, CmpReduceAction> sortActions; + sortActions.sort( actions.data, actions.length() ); + + ReduceAction *last = 0; + + for ( Vector<ReduceAction*>::Iter rdi = actions; rdi.lte(); rdi++ ) { + ReduceAction *action = *rdi; + int lelId = action->production->prodName->id; + int prodNum = action->production->prodNum; + + /* Maybe close off the last prod. */ + if ( last != 0 && + last->production->prodName != action->production->prodName ) + { + *outStream << + " break;\n" + " }\n"; + + } + + /* Maybe open a new prod. */ + if ( last == 0 || + last->production->prodName != action->production->prodName ) + { + *outStream << + " case " << lelId << ": {\n"; + } + + *outStream << + " if ( kid->tree->prod_num == " << prodNum << " ) {\n"; + + + loadRefs( reduction, action->production, action->itemList, false ); + + *outStream << + "#line " << action->loc.line << " \"" << action->loc.fileName << "\"\n"; + + writeHostItemList( action->production, action->itemList ); + + *outStream << + " }\n"; + + last = action; + } + + if ( last != 0 ) { + *outStream << + " break;\n" + " }\n"; + } + + *outStream << + " } }\n" + " }\n" + " }\n" + "\n" + " commit_clear_parse_tree( prg, sp, pda_run, lel->child );\n" + " if ( prg->reduce_clean ) {\n" + " commit_clear_kid_list( prg, sp, kid->tree->child );\n" + " kid->tree->child = 0;\n" + " kid->tree->flags &= ~( AF_LEFT_IGNORE | AF_RIGHT_IGNORE );\n" + " }\n" + " lel->child = 0;\n" + "\n" + " if ( sp != root )\n" + " goto resume;\n" + " pt->flags |= PF_COMMITTED;\n" + "}\n" + "\n"; + +} + +void Compiler::writeParseReduce() +{ + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + Reduction *reduction = *r; + if ( reduction->parserBased ) + writeParseReduce( reduction ); + } +} + +void Compiler::writePostfixReduce( Reduction *reduction ) +{ + *outStream << + "void " << reduction->name << "::read_reduce_forward( program_t *prg, FILE *file )\n" + "{\n" + " __gnu_cxx::stdio_filebuf<char> fbuf( file, std::ios::in|std::ios::out|std::ios::app );\n" + " std::iostream in( &fbuf );\n" + " std::string type, tok, text;\n" + " long _id, line, column, byte, prod_num, children;\n" + " read_reduce_node sentinal;\n" + " sentinal.next = 0;\n" + " read_reduce_node *stack = &sentinal, *last = 0;\n" + " while ( in >> type ) {\n" + " /* read. */\n" + " if ( type == \"t\" ) {\n" + " in >> tok >> _id >> line >> column >> byte >> text;\n" + " read_reduce_node *node = new read_reduce_node;\n" + " node->name = tok;\n" + " node->id = _id;\n" + " node->loc.name = \"<>\";\n" + " node->loc.line = line;\n" + " node->loc.column = column;\n" + " node->loc.byte = byte;\n" + " node->data.data = strdup( text.c_str() );\n" + " node->data.length = text.size();\n" + " unescape( &node->data );\n" + "\n" + " node->next = stack;\n" + " node->child = 0;\n" + " stack = node;\n" + " }\n" + " else if ( type == \"r\" ) {\n" + " in >> tok >> _id >> prod_num >> children;\n" + " read_reduce_node *node = new read_reduce_node;\n" + " memset( &node->loc, 0, sizeof(colm_location) );\n" + " memset( &node->data, 0, sizeof(colm_data) );\n" + " node->name = tok;\n" + " node->id = _id;\n" + " node->prod_num = prod_num;\n" + " node->child = 0;\n" + " while ( children-- > 0 ) {\n" + " last = stack;\n" + " stack = stack->next;\n" + " last->next = node->child;\n" + " node->child = last;\n" + " }\n" + "\n" + " node->next = stack;\n" + " stack = node;\n" + "\n" + " { switch ( node->id ) {\n"; + + /* Populate a vector with the reduce actions. */ + Vector<ReduceAction*> actions; + actions.setAsNew( reduction->reduceActions.length() ); + long pos = 0; + for ( ReduceActionList::Iter rdi = reduction->reduceActions; rdi.lte(); rdi++ ) + actions[pos++] = rdi; + + /* Sort it by lhs id, then prod num. */ + MergeSort<ReduceAction*, CmpReduceAction> sortActions; + sortActions.sort( actions.data, actions.length() ); + + ReduceAction *last = 0; + + for ( Vector<ReduceAction*>::Iter rdi = actions; rdi.lte(); rdi++ ) { + ReduceAction *action = *rdi; + int lelId = action->production->prodName->id; + int prodNum = action->production->prodNum; + + /* Maybe close off the last prod. */ + if ( last != 0 && + last->production->prodName != action->production->prodName ) + { + *outStream << + " break;\n" + " }\n"; + } + + /* Maybe open a new prod. */ + if ( last == 0 || + last->production->prodName != action->production->prodName ) + { + *outStream << + " case " << lelId << ": {\n"; + } + + *outStream << + " if ( node->prod_num == " << prodNum << " ) {\n"; + + loadRefs( reduction, action->production, action->itemList, true ); + + *outStream << + "#line " << action->loc.line << "\"" << action->loc.fileName << "\"\n"; + + writeHostItemList( action->production, action->itemList ); + + *outStream << + " }\n"; + + last = action; + } + + if ( last != 0 ) { + *outStream << + " break;\n" + " }\n"; + } + + *outStream << + " } }\n" + " /* delete the children */\n" + " last = node->child;\n" + " while ( last != 0 ) {\n" + " read_reduce_node *next = last->next;\n" + " delete last;\n" + " last = next;\n" + " }\n" + " }\n" + " }\n" + "}\n" + "\n"; +} + +void Compiler::writePostfixReduce() +{ + bool unescape = false; + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + Reduction *reduction = *r; + if ( reduction->postfixBased ) { + if ( !unescape ) + writeUnescape(); + + writePostfixReduce( reduction ); + } + } +} + +void Compiler::writeCommit() +{ + *outStream << + "#include <colm/pdarun.h>\n" + "#include <colm/bytecode.h>\n" + "#include <colm/defs.h>\n" + "#include <colm/input.h>\n" + "#include <colm/tree.h>\n" + "#include <colm/program.h>\n" + "#include <colm/colm.h>\n" + "\n" + "#include <stdio.h>\n" + "#include <stdlib.h>\n" + "#include <string.h>\n" + "#include <assert.h>\n" + "#include <errno.h>\n" + "\n" + "#include <iostream>\n" + /* Not available on MAC OS. */ + // "#include <ext/stdio_filebuf.h>\n" + "#include <fstream>\n" + "\n" + "using std::endl;\n" + "\n" + "#include \"reducer.h\"\n" + "\n"; + + computeNeeded(); + + writeReduceStructs(); + + writeReduceDispatchers(); + + //writePostfixReduce(); + + writeParseReduce(); + + writeNeeds(); +} diff --git a/src/resolve.cc b/src/resolve.cc new file mode 100644 index 00000000..c1f2cb3d --- /dev/null +++ b/src/resolve.cc @@ -0,0 +1,988 @@ +/* + * Copyright 2009-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdbool.h> +#include <iostream> +#include "compiler.h" + +/* + * Type Resolve. + */ + +using std::cout; +using std::cerr; +using std::endl; + +Namespace *TypeRef::resolveNspace( Compiler *pd ) +{ + if ( parsedVarRef != 0 && !nspaceQual->thisOnly() ) { + UniqueType *ut = parsedVarRef->lookup( pd ); + return ut->langEl->nspace; + } + else if ( parsedTypeRef != 0 && !nspaceQual->thisOnly() ) { + UniqueType *ut = parsedTypeRef->resolveType( pd ); + return ut->langEl->nspace; + } + else { + /* Lookup up the qualifiction and then the name. */ + return nspaceQual->getQual( pd ); + } +} + +UniqueType *TypeRef::resolveTypeName( Compiler *pd ) +{ + nspace = resolveNspace( pd ); + + if ( nspace == 0 ) + error(loc) << "do not have region for resolving reference" << endp; + + while ( nspace != 0 ) { + /* Search for the token in the region by typeName. */ + TypeMapEl *inDict = nspace->typeMap.find( typeName ); + + if ( inDict != 0 ) { + switch ( inDict->type ) { + /* Defer to the typeRef we are an alias of. We need to guard + * against loops here. */ + case TypeMapEl::AliasType: { + return inDict->typeRef->resolveType( pd ); + } + + case TypeMapEl::LangElType: { + UniqueType *ut = pd->findUniqueType( TYPE_TREE, inDict->value ); + return ut; + } + case TypeMapEl::StructType: { + UniqueType *ut = pd->findUniqueType( TYPE_STRUCT, inDict->structEl ); + return ut; + } + } + } + + if ( nspaceQual->thisOnly() ) + break; + + nspace = nspace->parentNamespace; + } + + error(loc) << "unknown type " << typeName << " in typeof expression" << endp; + return 0; +} + +UniqueType *TypeRef::resolveTypeLiteral( Compiler *pd ) +{ + /* Lookup up the qualifiction and then the name. */ + nspace = resolveNspace( pd ); + + if ( nspace == 0 ) + error(loc) << "do not have region for resolving reference" << endp; + + /* Interpret escape sequences and remove quotes. */ + bool unusedCI; + String interp; + prepareLitString( interp, unusedCI, pdaLiteral->data, + pdaLiteral->loc ); + + while ( nspace != 0 ) { + LiteralDictEl *ldel = nspace->literalDict.find( interp ); + + if ( ldel != 0 ) + return pd->findUniqueType( TYPE_TREE, ldel->value->tokenDef->tdLangEl ); + + if ( nspaceQual->thisOnly() ) + break; + + nspace = nspace->parentNamespace; + } + + error(loc) << "unknown type " << pdaLiteral->data << " in typeof expression" << endp; + return 0; +} + +bool TypeRef::uniqueGeneric( UniqueGeneric *&inMap, Compiler *pd, + const UniqueGeneric &searchKey ) +{ + bool inserted = false; + inMap = pd->uniqueGenericMap.find( &searchKey ); + if ( inMap == 0 ) { + inserted = true; + inMap = new UniqueGeneric( searchKey ); + pd->uniqueGenericMap.insert( inMap ); + } + return inserted; +} + +StructEl *TypeRef::declareListEl( Compiler *pd, TypeRef *valType ) +{ + static long vlistElId = 1; + String name( 32, "list_el_%d", vlistElId++ ); + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::StructType, + name, pd->nextObjectId++ ); + + StructDef *structDef = new StructDef( loc, name, objectDef ); + + pd->rootNamespace->structDefList.append( structDef ); + + /* Value Element. */ + String id = "value"; + ObjectField *elValObjField = ObjectField::cons( internal, + ObjectField::StructFieldType, valType, id ); + + objectDef->rootScope->insertField( elValObjField->name, elValObjField ); + + /* Typeref for the struct. Used for pointers. */ + NamespaceQual *nspaceQual = NamespaceQual::cons( pd->rootNamespace ); + TypeRef *selfTypeRef = TypeRef::cons( InputLoc(), nspaceQual, name, RepeatNone ); + + /* Type ref for the list pointers psuedo type. */ + TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::ListPtrs, 0, selfTypeRef, 0 ); + + ObjectField *of = ObjectField::cons( InputLoc(), + ObjectField::GenericElementType, elTr, name ); + + objectDef->rootScope->insertField( of->name, of ); + + return declareStruct( pd, pd->rootNamespace, name, structDef ); +} + +void ConsItemList::resolve( Compiler *pd ) +{ + /* Types in constructor. */ + for ( ConsItemList::Iter item = first(); item.lte(); item++ ) { + switch ( item->type ) { + case ConsItem::LiteralType: + /* Use pdaFactor reference resolving. */ + pd->resolveProdEl( item->prodEl ); + break; + case ConsItem::InputText: + break; + case ConsItem::ExprType: + item->expr->resolve( pd ); + break; + } + } +} + +UniqueType *TypeRef::resolveTypeListEl( Compiler *pd ) +{ + TypeRef *valTr = typeRef1; + UniqueType *utValue = valTr->resolveType( pd ); + + UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::ListEl, utValue ); + if ( uniqueGeneric( inMap, pd, searchKey ) ) + inMap->structEl = declareListEl( pd, valTr ); + + return pd->findUniqueType( TYPE_STRUCT, inMap->structEl ); +} + +UniqueType *TypeRef::resolveTypeList( Compiler *pd ) +{ + nspace = pd->rootNamespace; + + UniqueType *utValue = typeRef1->resolveType( pd ); + + if ( utValue->typeId != TYPE_STRUCT ) + error( loc ) << "only structs can be list elements" << endp; + + /* Find the list element. */ + ObjectDef *elObjDef = utValue->structEl->structDef->objectDef; + UniqueType *ptrsUt = pd->findUniqueType( TYPE_LIST_PTRS ); + ObjectField *listEl = elObjDef->findFieldType( pd, ptrsUt ); + + if ( !listEl ) + error( loc ) << "could not find list element in type ref" << endp; + + UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::List, utValue ); + if ( uniqueGeneric( inMap, pd, searchKey ) ) { + + GenericType *generic = new GenericType( GEN_LIST, + pd->nextGenericId++, typeRef1, 0, typeRef2, listEl ); + + nspace->genericList.append( generic ); + + generic->declare( pd, nspace ); + + inMap->generic = generic; + } + + generic = inMap->generic; + return pd->findUniqueType( TYPE_GENERIC, inMap->generic ); +} + +StructEl *TypeRef::declareMapElStruct( Compiler *pd, TypeRef *keyType, TypeRef *valType ) +{ + static long vlistElId = 1; + String name( 32, "map_el_%d", vlistElId++ ); + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::StructType, + name, pd->nextObjectId++ ); + + StructDef *structDef = new StructDef( loc, name, objectDef ); + + pd->rootNamespace->structDefList.append( structDef ); + + /* Value Element. */ + String id = "value"; + ObjectField *elValObjField = ObjectField::cons( internal, + ObjectField::StructFieldType, valType, id ); + + objectDef->rootScope->insertField( elValObjField->name, elValObjField ); + + /* Typeref for the pointers. */ + NamespaceQual *nspaceQual = NamespaceQual::cons( pd->rootNamespace ); + TypeRef *selfTypeRef = TypeRef::cons( InputLoc(), nspaceQual, name, RepeatNone ); + + TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::MapPtrs, 0, selfTypeRef, keyType ); + + ObjectField *of = ObjectField::cons( InputLoc(), + ObjectField::GenericElementType, elTr, name ); + + objectDef->rootScope->insertField( of->name, of ); + + StructEl *sel = declareStruct( pd, pd->rootNamespace, name, structDef ); + return sel; +} + +UniqueType *TypeRef::resolveTypeMapEl( Compiler *pd ) +{ + TypeRef *keyType = typeRef1; + TypeRef *valType = typeRef2; + + UniqueType *utKey = keyType->resolveType( pd ); + UniqueType *utValue = valType->resolveType( pd ); + + UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::MapEl, utKey, utValue ); + if ( uniqueGeneric( inMap, pd, searchKey ) ) + inMap->structEl = declareMapElStruct( pd, keyType, valType ); + + return pd->findUniqueType( TYPE_STRUCT, inMap->structEl ); +} + + +UniqueType *TypeRef::resolveTypeMap( Compiler *pd ) +{ + nspace = pd->rootNamespace; + + UniqueType *utKey = typeRef1->resolveType( pd ); + UniqueType *utEl = typeRef2->resolveType( pd ); + + if ( utEl->typeId != TYPE_STRUCT ) + error( loc ) << "only structs can be map elements" << endp; + + /* Find the list element. */ + ObjectDef *elObjDef = utEl->structEl->structDef->objectDef; + UniqueType *ptrsUt = pd->findUniqueType( TYPE_MAP_PTRS ); + ObjectField *mapEl = elObjDef->findFieldType( pd, ptrsUt ); + + if ( !mapEl ) + error( loc ) << "could not find map element in type ref" << endp; + + UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::Map, utKey, utEl ); + + if ( uniqueGeneric( inMap, pd, searchKey ) ) { + + GenericType *generic = new GenericType( GEN_MAP, + pd->nextGenericId++, typeRef2, typeRef1, typeRef3, mapEl ); + + nspace->genericList.append( generic ); + + generic->declare( pd, nspace ); + + inMap->generic = generic; + } + + generic = inMap->generic; + return pd->findUniqueType( TYPE_GENERIC, inMap->generic ); +} + +UniqueType *TypeRef::resolveTypeParser( Compiler *pd ) +{ + nspace = pd->rootNamespace; + + UniqueType *utParse = typeRef1->resolveType( pd ); + + UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::Parser, utParse ); + if ( uniqueGeneric( inMap, pd, searchKey ) ) { + GenericType *generic = new GenericType( GEN_PARSER, + pd->nextGenericId++, typeRef1, 0, 0, 0 ); + + nspace->genericList.append( generic ); + + generic->declare( pd, nspace ); + + inMap->generic = generic; + } + + generic = inMap->generic; + return pd->findUniqueType( TYPE_GENERIC, inMap->generic ); +} + + +/* + * End object based list/map + */ + +UniqueType *TypeRef::resolveTypeRef( Compiler *pd ) +{ + typeRef1->resolveType( pd ); + return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl ); +} + +void TypeRef::resolveRepeat( Compiler *pd ) +{ + if ( uniqueType->typeId != TYPE_TREE ) + error(loc) << "cannot repeat non-tree type" << endp; + + UniqueRepeat searchKey( repeatType, uniqueType->langEl ); + UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey ); + if ( uniqueRepeat == 0 ) { + uniqueRepeat = new UniqueRepeat( repeatType, uniqueType->langEl ); + pd->uniqeRepeatMap.insert( uniqueRepeat ); + + LangEl *declLangEl = 0; + + switch ( repeatType ) { + case RepeatRepeat: { + /* If the factor is a repeat, create the repeat element and link the + * factor to it. */ + String repeatName( 128, "_repeat_%s", typeName.data ); + declLangEl = pd->makeRepeatProd( loc, nspace, repeatName, uniqueType, false ); + break; + } + case RepeatLeftRepeat: { + /* If the factor is a repeat, create the repeat element and link the + * factor to it. */ + String repeatName( 128, "_lrepeat_%s", typeName.data ); + declLangEl = pd->makeRepeatProd( loc, nspace, repeatName, uniqueType, true ); + break; + } + case RepeatList: { + /* If the factor is a repeat, create the repeat element and link the + * factor to it. */ + String listName( 128, "_list_%s", typeName.data ); + declLangEl = pd->makeListProd( loc, nspace, listName, uniqueType, false ); + break; + } + case RepeatLeftList: { + /* If the factor is a repeat, create the repeat element and link the + * factor to it. */ + String repeatName( 128, "_llist_%s", typeName.data ); + declLangEl = pd->makeListProd( loc, nspace, repeatName, uniqueType, true ); + break; + } + case RepeatOpt: { + /* If the factor is an opt, create the opt element and link the factor + * to it. */ + String optName( 128, "_opt_%s", typeName.data ); + declLangEl = pd->makeOptProd( loc, nspace, optName, uniqueType ); + break; + } + case RepeatNone: + break; + } + + uniqueRepeat->declLangEl = declLangEl; + declLangEl->repeatOf = uniqueRepeat->langEl; + } + + uniqueType = pd->findUniqueType( TYPE_TREE, uniqueRepeat->declLangEl ); +} + +UniqueType *TypeRef::resolveIterator( Compiler *pd ) +{ + UniqueType *searchUT = searchTypeRef->resolveType( pd ); + + /* Lookup the iterator call. Make sure it is an iterator. */ + VarRefLookup lookup = iterCall->langTerm->varRef->lookupIterCall( pd ); + if ( lookup.objMethod->iterDef == 0 ) { + error(loc) << "attempt to iterate using something " + "that is not an iterator" << endp; + } + + /* Now that we have done the iterator call lookup we can make the type + * reference for the object field. */ + UniqueType *iterUniqueType = pd->findUniqueType( TYPE_ITER, lookup.objMethod->iterDef ); + + iterDef = lookup.objMethod->iterDef; + searchUniqueType = searchUT; + + return iterUniqueType; +} + + +UniqueType *TypeRef::resolveType( Compiler *pd ) +{ + if ( uniqueType != 0 ) + return uniqueType; + + /* Not an iterator. May be a reference. */ + switch ( type ) { + case Name: + uniqueType = resolveTypeName( pd ); + break; + case Literal: + uniqueType = resolveTypeLiteral( pd ); + break; + case Parser: + uniqueType = resolveTypeParser( pd ); + break; + case Ref: + uniqueType = resolveTypeRef( pd ); + break; + case Iterator: + uniqueType = resolveIterator( pd ); + break; + + case List: + uniqueType = resolveTypeList( pd ); + break; + case ListPtrs: + uniqueType = pd->findUniqueType( TYPE_LIST_PTRS ); + break; + case ListEl: + uniqueType = resolveTypeListEl( pd ); + break; + + case Map: + uniqueType = resolveTypeMap( pd ); + break; + case MapPtrs: + uniqueType = pd->findUniqueType( TYPE_MAP_PTRS ); + break; + case MapEl: + uniqueType = resolveTypeMapEl( pd ); + break; + + case Unspecified: + /* No lookup needed, unique type(s) set when constructed. */ + break; + } + + if ( repeatType != RepeatNone ) + resolveRepeat( pd ); + + return uniqueType; +} + +void Compiler::resolveProdEl( ProdEl *prodEl ) +{ + prodEl->typeRef->resolveType( this ); + prodEl->langEl = prodEl->typeRef->uniqueType->langEl; +} + +void LangTerm::resolveFieldArgs( Compiler *pd ) +{ + /* Initialization expressions. */ + if ( fieldInitArgs != 0 ) { + for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) + (*pi)->expr->resolve( pd ); + } +} + +void LangTerm::resolve( Compiler *pd ) +{ + switch ( type ) { + case ConstructType: + typeRef->resolveType( pd ); + + resolveFieldArgs( pd ); + + /* Types in constructor. */ + constructor->list->resolve( pd ); + break; + + case VarRefType: + break; + + case MakeTreeType: + case MakeTokenType: + case MethodCallType: + if ( args != 0 ) { + for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) + (*pe)->expr->resolve( pd ); + } + break; + + case NumberType: + case StringType: + break; + + case ProdCompareType: + /* If it has a match expression go into that. */ + if ( expr != 0 ) + expr->resolve( pd ); + break; + + case MatchType: + for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) { + switch ( item->form ) { + case PatternItem::TypeRefForm: + /* Use pdaFactor reference resolving. */ + pd->resolveProdEl( item->prodEl ); + break; + case PatternItem::InputTextForm: + /* Nothing to do here. */ + break; + } + } + + break; + case NewType: + /* Init args, then the new type. */ + resolveFieldArgs( pd ); + typeRef->resolveType( pd ); + break; + case TypeIdType: + typeRef->resolveType( pd ); + break; + case SearchType: + typeRef->resolveType( pd ); + break; + case NilType: + case TrueType: + case FalseType: + break; + + case ParseType: + case ParseTreeType: + case ParseStopType: + typeRef->resolveType( pd ); + + resolveFieldArgs( pd ); + + parserText->list->resolve( pd ); + break; + + case SendType: +// for ( CallArgVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- ) +// (*pex)->expr->resolve( pd ); + parserText->list->resolve( pd ); + break; + case SendTreeType: + case EmbedStringType: + break; + + case CastType: + typeRef->resolveType( pd ); + expr->resolve( pd ); + break; + } +} + +void LangVarRef::resolve( Compiler *pd ) const +{ +} + +void LangExpr::resolve( Compiler *pd ) const +{ + switch ( type ) { + case BinaryType: { + left->resolve( pd ); + right->resolve( pd ); + break; + } + case UnaryType: { + right->resolve( pd ); + break; + } + case TermType: { + term->resolve( pd ); + break; + } + } +} + +void IterCall::resolve( Compiler *pd ) const +{ + switch ( form ) { + case Call: + langTerm->resolve( pd ); + break; + case Expr: + langExpr->resolve( pd ); + break; + } +} + +void LangStmt::resolveForIter( Compiler *pd ) const +{ + iterCall->resolve( pd ); + + /* Search type ref. */ + typeRef->resolveType( pd ); + + /* Iterator type ref. */ + objField->typeRef->resolveType( pd ); + + /* Resolve the statements. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); +} + +void LangStmt::resolve( Compiler *pd ) const +{ + switch ( type ) { + case ExprType: { + /* Evaluate the exrepssion, then pop it immediately. */ + expr->resolve( pd ); + break; + } + case IfType: { + /* Evaluate the test. */ + expr->resolve( pd ); + + /* Analyze the if true branch. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); + + if ( elsePart != 0 ) + elsePart->resolve( pd ); + + break; + } + case ElseType: { + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); + break; + } + case RejectType: + break; + case WhileType: { + expr->resolve( pd ); + + /* Compute the while block. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); + break; + } + case AssignType: { + /* Evaluate the exrepssion. */ + expr->resolve( pd ); + break; + } + case ForIterType: { + resolveForIter( pd ); + break; + } + case ReturnType: { + /* Evaluate the exrepssion. */ + expr->resolve( pd ); + break; + } + case BreakType: { + break; + } + case YieldType: { + /* take a reference and yield it. Immediately reset the referece. */ + varRef->resolve( pd ); + break; + } + } +} + +void ObjectDef::resolve( Compiler *pd ) +{ + for ( FieldList::Iter fli = fieldList; fli.lte(); fli++ ) { + ObjectField *field = fli->value; + + if ( field->typeRef != 0 ) + field->typeRef->resolveType( pd ); + } +} + +void CodeBlock::resolve( Compiler *pd ) const +{ + if ( localFrame != 0 ) { + localFrame->resolve( pd ); + } + + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); +} + +void Compiler::resolveFunction( Function *func ) +{ + if ( func->typeRef != 0 ) + func->typeRef->resolveType( this ); + + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) + param->typeRef->resolveType( this ); + + CodeBlock *block = func->codeBlock; + block->resolve( this ); +} + +void Compiler::resolveInHost( Function *func ) +{ + if ( func->typeRef != 0 ) + func->typeRef->resolveType( this ); + + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) + param->typeRef->resolveType( this ); +} + + +void Compiler::resolvePreEof( TokenRegion *region ) +{ + CodeBlock *block = region->preEofBlock; + block->resolve( this ); +} + +void Compiler::resolveRootBlock() +{ + CodeBlock *block = rootCodeBlock; + block->resolve( this ); +} + +void Compiler::resolveTranslateBlock( LangEl *langEl ) +{ + CodeBlock *block = langEl->transBlock; + block->resolve( this ); +} + +void Compiler::resolveReductionCode( Production *prod ) +{ + CodeBlock *block = prod->redBlock; + block->resolve( this ); +} + +void Compiler::resolveParseTree() +{ + /* Compile functions. */ + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) + resolveFunction( f ); + + for ( FunctionList::Iter f = inHostList; f.lte(); f++ ) + resolveInHost( f ); + + /* Compile the reduction code. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->redBlock != 0 ) + resolveReductionCode( prod ); + } + + /* Compile the token translation code. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->transBlock != 0 ) + resolveTranslateBlock( lel ); + } + + /* Compile preeof blocks. */ + for ( RegionList::Iter r = regionList; r.lte(); r++ ) { + if ( r->preEofBlock != 0 ) + resolvePreEof( r ); + } + + /* Compile the init code */ + resolveRootBlock( ); + + rootLocalFrame->resolve( this ); + + /* Init all user object fields (need consistent size). */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + ObjectDef *objDef = lel->objectDef; + if ( objDef != 0 ) { + /* Init all fields of the object. */ + for ( FieldList::Iter f = objDef->fieldList; f.lte(); f++ ) + f->value->typeRef->resolveType( this ); + } + } + + for ( StructElList::Iter sel = structEls; sel.lte(); sel++ ) { + ObjectDef *objDef = sel->structDef->objectDef; + for ( FieldList::Iter f = objDef->fieldList; f.lte(); f++ ) + f->value->typeRef->resolveType( this ); + } + + /* Init all fields of the global object. */ + for ( FieldList::Iter f = globalObjectDef->fieldList; f.lte(); f++ ) { + f->value->typeRef->resolveType( this ); + } +} + +/* Resolves production els and computes the precedence of each prod. */ +void Compiler::resolveProductionEls() +{ + /* NOTE: as we process this list it may be growing! */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + /* First resolve. */ + for ( ProdElList::Iter prodEl = *prod->prodElList; prodEl.lte(); prodEl++ ) + resolveProdEl( prodEl ); + + /* If there is no explicit precdence ... */ + if ( prod->predOf == 0 ) { + /* Compute the precedence of the productions. */ + for ( ProdElList::Iter prodEl = prod->prodElList->last(); prodEl.gtb(); prodEl-- ) { + /* Production inherits the precedence of the last terminal with + * precedence. */ + if ( prodEl->langEl->predType != PredNone ) { + prod->predOf = prodEl->langEl; + break; + } + } + } + } +} + +void Compiler::makeTerminalWrappers() +{ + /* Make terminal language elements corresponding to each nonterminal in + * the grammar. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->type == LangEl::NonTerm ) { + String name( lel->name.length() + 5, "_T_%s", lel->name.data ); + LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term ); + + /* Give the dup the attributes of the nonterminal. This ensures + * that the attributes are allocated when patterns and + * constructors are parsed. */ + termDup->objectDef = lel->objectDef; + + langEls.append( termDup ); + lel->termDup = termDup; + termDup->termDup = lel; + } + } +} + +void Compiler::makeEofElements() +{ + /* Make eof language elements for each user terminal. This is a bit excessive and + * need to be reduced to the ones that we need parsers for, but we don't know that yet. + * Another pass before this one is needed. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->eofLel == 0 && + lel != eofLangEl && + lel != errorLangEl && + lel != noTokenLangEl /* && + !( lel->tokenInstance == 0 || lel->tokenInstance->dupOf == 0 ) */ ) + { + String name( lel->name.length() + 5, "_eof_%s", lel->name.data ); + LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term ); + + langEls.append( eofLel ); + lel->eofLel = eofLel; + eofLel->eofLel = lel; + eofLel->isEOF = true; + } + } +} + +void Compiler::resolvePrecedence() +{ + for ( PredDeclList::Iter predDecl = predDeclList; predDecl != 0; predDecl++ ) { + predDecl->typeRef->resolveType( this ); + + LangEl *langEl = predDecl->typeRef->uniqueType->langEl; + langEl->predType = predDecl->predType; + langEl->predValue = predDecl->predValue; + } +} + +void Compiler::resolveReductionActions() +{ + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + for ( ReduceNonTermList::Iter rni = (*r)->reduceNonTerms; rni.lte(); rni++ ) + rni->nonTerm->resolveType( this ); + + for ( ReduceActionList::Iter rai = (*r)->reduceActions; rai.lte(); rai++ ) + rai->nonTerm->resolveType( this ); + } +} + +Production *Compiler::findProductionByLabel( LangEl *langEl, String label ) +{ + for ( LelDefList::Iter ldi = langEl->defList; ldi.lte(); ldi++ ) { + if ( ldi->_name != 0 && ( strcmp( ldi->_name, label ) == 0 ) ) + return ldi; + } + return 0; +} + +void Compiler::findReductionActionProds() +{ + for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { + for ( ReduceActionList::Iter rai = (*r)->reduceActions; rai.lte(); rai++ ) { + rai->nonTerm->resolveType( this ); + LangEl *langEl = rai->nonTerm->uniqueType->langEl; + + Production *prod = findProductionByLabel( langEl, rai->prod ); + + if ( prod == 0 ) { + error(rai->loc) << "could not find production \"" << + rai->prod << "\"" << endp; + } + + rai->production = prod; + } + } +} + +void Compiler::resolveReducers() +{ + for ( ParserTextList::Iter pt = parserTextList; pt.lte(); pt++ ) { + if ( pt->reduce ) { + Reduction *reduction = rootNamespace->findReduction( pt->reducer ); + if ( reduction == 0 ) { + error ( pt->loc ) << "could not locate reduction \"" << + pt->reducer << "\"" << endp; + } + + pt->reducerId = reduction->id; + + /* Indicate which type of reducing we need. Parser based, or + * postfix. */ + if ( pt->read ) + reduction->postfixBased = true; + else + reduction->parserBased = true; + } + } +} + +void Compiler::resolvePass() +{ + /* + * Type Resolving. + */ + + resolvePrecedence(); + + resolveParseTree(); + + UniqueType *argvUT = argvTypeRef->resolveType( this ); + argvElSel = argvUT->generic->elUt->structEl; + + UniqueType *stdsUT = stdsTypeRef->resolveType( this ); + stdsElSel = stdsUT->generic->elUt->structEl; + + resolveReductionActions(); + + /* We must do this as the last step in the type resolution process because + * all type resolves can cause new language elments with associated + * productions. They get tacked onto the end of the list of productions. + * Doing it at the end results processing a growing list. */ + resolveProductionEls(); + + findReductionActionProds(); + + resolveReducers(); +} diff --git a/src/rtvector.h b/src/rtvector.h new file mode 100644 index 00000000..e15d3f2a --- /dev/null +++ b/src/rtvector.h @@ -0,0 +1,35 @@ +/* + * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_RTVECTOR_H +#define _COLM_RTVECTOR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _COLM_RT_VECTOR_H */ + diff --git a/src/stream.c b/src/stream.c new file mode 100644 index 00000000..77779aae --- /dev/null +++ b/src/stream.c @@ -0,0 +1,828 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <colm/input.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> +#include <stdbool.h> + +#include <colm/pdarun.h> +#include <colm/debug.h> +#include <colm/program.h> +#include <colm/tree.h> +#include <colm/bytecode.h> +#include <colm/pool.h> +#include <colm/struct.h> + +DEF_STREAM_FUNCS( stream_funcs_data, stream_impl_data ); + +extern struct stream_funcs_data file_funcs; +extern struct stream_funcs_data accum_funcs; + +void stream_impl_push_line( struct stream_impl_data *ss, int ll ) +{ + if ( ss->line_len == 0 ) { + ss->lines_cur = 0; + ss->lines_alloc = 16; + ss->line_len = malloc( sizeof(int) * ss->lines_alloc ); + } + else if ( ss->lines_cur == ss->lines_alloc ) { + int lines_alloc_new = ss->lines_alloc * 2; + int *line_len_new = malloc( sizeof(int) * lines_alloc_new ); + memcpy( line_len_new, ss->line_len, sizeof(int) * ss->lines_alloc ); + ss->lines_alloc = lines_alloc_new; + ss->line_len = line_len_new; + } + + ss->line_len[ ss->lines_cur ] = ll; + ss->lines_cur += 1; +} + +int stream_impl_pop_line( struct stream_impl_data *ss ) +{ + int len = 0; + if ( ss->lines_cur > 0 ) { + ss->lines_cur -= 1; + len = ss->line_len[ss->lines_cur]; + } + return len; +} + +static void dump_contents( struct colm_program *prg, struct stream_impl_data *sid ) +{ + struct run_buf *rb = sid->queue.head; + while ( rb != 0 ) { + debug( prg, REALM_INPUT, " %p contents |%d|%d|%d|%.*s|\n", sid, + rb->offset, rb->length, + rb->length - rb->offset, + (int)rb->length - rb->offset, + rb->data + rb->offset ); + rb = rb->next; + } +} + +static bool loc_set( location_t *loc ) +{ + return loc->line != 0; +} + +static void close_stream_file( FILE *file ) +{ + if ( file != stdin && file != stdout && file != stderr && + fileno(file) != 0 && fileno( file) != 1 && fileno(file) != 2 ) + { + fclose( file ); + } +} + +static void si_data_push_tail( struct stream_impl_data *ss, struct run_buf *run_buf ) +{ + if ( ss->queue.head == 0 ) { + run_buf->prev = run_buf->next = 0; + ss->queue.head = ss->queue.tail = run_buf; + } + else { + ss->queue.tail->next = run_buf; + run_buf->prev = ss->queue.tail; + run_buf->next = 0; + ss->queue.tail = run_buf; + } +} + +static struct run_buf *si_data_pop_tail( struct stream_impl_data *ss ) +{ + struct run_buf *ret = ss->queue.tail; + ss->queue.tail = ss->queue.tail->prev; + if ( ss->queue.tail == 0 ) + ss->queue.head = 0; + else + ss->queue.tail->next = 0; + return ret; +} + + +static void si_data_push_head( struct stream_impl_data *ss, struct run_buf *run_buf ) +{ + if ( ss->queue.head == 0 ) { + run_buf->prev = run_buf->next = 0; + ss->queue.head = ss->queue.tail = run_buf; + } + else { + ss->queue.head->prev = run_buf; + run_buf->prev = 0; + run_buf->next = ss->queue.head; + ss->queue.head = run_buf; + } +} + +static struct run_buf *si_data_pop_head( struct stream_impl_data *ss ) +{ + struct run_buf *ret = ss->queue.head; + ss->queue.head = ss->queue.head->next; + if ( ss->queue.head == 0 ) + ss->queue.tail = 0; + else + ss->queue.head->prev = 0; + return ret; +} + + +struct run_buf *new_run_buf( int sz ) +{ + struct run_buf *rb; + if ( sz > FSM_BUFSIZE ) { + int ssz = sizeof(struct run_buf) + sz - FSM_BUFSIZE; + rb = (struct run_buf*) malloc( ssz ); + memset( rb, 0, ssz ); + } + else { + rb = (struct run_buf*) malloc( sizeof(struct run_buf) ); + memset( rb, 0, sizeof(struct run_buf) ); + } + return rb; +} + +/* Keep the position up to date after consuming text. */ +void update_position_data( struct stream_impl_data *is, const alph_t *data, long length ) +{ + int i; + for ( i = 0; i < length; i++ ) { + if ( data[i] == '\n' ) { + stream_impl_push_line( is, is->column ); + is->line += 1; + is->column = 1; + } + else { + is->column += 1; + } + } + + is->byte += length; +} + +/* Keep the position up to date after sending back text. */ +void undo_position_data( struct stream_impl_data *is, const alph_t *data, long length ) +{ + /* FIXME: this needs to fetch the position information from the parsed + * token and restore based on that.. */ + int i; + for ( i = 0; i < length; i++ ) { + if ( data[i] == '\n' ) { + is->line -= 1; + is->column = stream_impl_pop_line( is ); + } + else { + is->column -= 1; + } + } + + is->byte -= length; +} + + +/* + * Interface + */ + +static void data_transfer_loc( struct colm_program *prg, location_t *loc, + struct stream_impl_data *ss ) +{ + loc->name = ss->name; + loc->line = ss->line; + loc->column = ss->column; + loc->byte = ss->byte; +} + +/* + * Data inputs: files, strings, etc. + */ + +static int data_get_data( struct colm_program *prg, struct stream_impl_data *ss, + alph_t *dest, int length ) +{ + int copied = 0; + + /* Move over skip bytes. */ + struct run_buf *buf = ss->queue.head; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + struct run_buf *run_buf = new_run_buf( 0 ); + int received = ss->funcs->get_data_source( prg, + (struct stream_impl*)ss, run_buf->data, FSM_BUFSIZE ); + if ( received == 0 ) { + free( run_buf ); + break; + } + + run_buf->length = received; + si_data_push_tail( ss, run_buf ); + + buf = run_buf; + } + + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + alph_t *src = &buf->data[buf->offset]; + + int slen = avail < length ? avail : length; + memcpy( dest+copied, src, slen ) ; + copied += slen; + length -= slen; + } + + if ( length == 0 ) { + //debug( REALM_INPUT, "exiting get data\n", length ); + break; + } + + buf = buf->next; + } + + return copied; +} + +static struct stream_impl *data_split_consumed( program_t *prg, struct stream_impl_data *sid ) +{ + struct stream_impl *split_off = 0; + if ( sid->consumed > 0 ) { + debug( prg, REALM_INPUT, "maybe split: consumed is > 0, splitting\n" ); + split_off = colm_impl_consumed( "<text3>", sid->consumed ); + sid->consumed = 0; + } + return split_off; +} + +int data_append_data( struct colm_program *prg, struct stream_impl_data *sid, + const alph_t *data, int length ) +{ + struct run_buf *tail = sid->queue.tail; + if ( tail == 0 || length > (FSM_BUFSIZE - tail->length) ) { + debug( prg, REALM_INPUT, "data_append_data: allocating run buf\n" ); + tail = new_run_buf( length ); + si_data_push_tail( sid, tail ); + } + + debug( prg, REALM_INPUT, "data_append_data: appending to " + "accum tail, offset: %d, length: %d, dlen: %d\n", + tail->offset, tail->length, length ); + + memcpy( tail->data + tail->length, data, length ); + tail->length += length; + +#ifdef DEBUG + dump_contents( prg, sid ); +#endif + + return length; +} + +int data_undo_append_data( struct colm_program *prg, struct stream_impl_data *sid, int length ) +{ + int consumed = 0; + int remaining = length; + + /* Move over skip bytes. */ + while ( true ) { + struct run_buf *buf = sid->queue.tail; + + if ( buf == 0 ) + break; + + /* Anything available in the current buffer. */ + int avail = buf->length - buf->offset; + if ( avail > 0 ) { + /* The source data from the current buffer. */ + int slen = avail <= remaining ? avail : remaining; + consumed += slen; + remaining -= slen; + buf->length -= slen; + //sid->consumed += slen; + } + + if ( remaining == 0 ) + break; + + struct run_buf *run_buf = si_data_pop_tail( sid ); + free( run_buf ); + } + + debug( prg, REALM_INPUT, "data_undo_append_data: stream %p " + "ask: %d, consumed: %d, now: %d\n", sid, length, consumed ); + +#ifdef DEBUG + dump_contents( prg, sid ); +#endif + + return consumed; + +} + +static void data_destructor( program_t *prg, tree_t **sp, struct stream_impl_data *si ) +{ + if ( si->file != 0 ) + close_stream_file( si->file ); + + if ( si->collect != 0 ) { + str_collect_destroy( si->collect ); + free( si->collect ); + } + + struct run_buf *buf = si->queue.head; + while ( buf != 0 ) { + struct run_buf *next = buf->next; + free( buf ); + buf = next; + } + + si->queue.head = 0; + + if ( si->data != 0 ) + free( (char*)si->data ); + + /* FIXME: Need to leak this for now. Until we can return strings to a + * program loader and free them at a later date (after the colm program is + * deleted). */ + // if ( si->name != 0 ) + // free( si->name ); + + free( si ); +} + +static str_collect_t *data_get_collect( struct colm_program *prg, struct stream_impl_data *si ) +{ + return si->collect; +} + +static void data_flush_stream( struct colm_program *prg, struct stream_impl_data *si ) +{ + if ( si->file != 0 ) + fflush( si->file ); +} + +static void data_close_stream( struct colm_program *prg, struct stream_impl_data *si ) +{ + if ( si->file != 0 ) { + close_stream_file( si->file ); + si->file = 0; + } +} + +static int data_get_option( struct colm_program *prg, struct stream_impl_data *si, int option ) +{ + return si->auto_trim; +} + +static void data_set_option( struct colm_program *prg, struct stream_impl_data *si, int option, int value ) +{ + si->auto_trim = value ? 1 : 0; +} + +static void data_print_tree( struct colm_program *prg, tree_t **sp, + struct stream_impl_data *si, tree_t *tree, int trim ) +{ + if ( si->file != 0 ) + colm_print_tree_file( prg, sp, si, tree, trim ); + else if ( si->collect != 0 ) + colm_print_tree_collect( prg, sp, si->collect, tree, trim ); +} + +static int data_get_parse_block( struct colm_program *prg, struct stream_impl_data *ss, + int *pskip, alph_t **pdp, int *copied ) +{ + int ret = 0; + *copied = 0; + + /* Move over skip bytes. */ + struct run_buf *buf = ss->queue.head; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + struct run_buf *run_buf = new_run_buf( 0 ); + int received = ss->funcs->get_data_source( prg, + (struct stream_impl*)ss, run_buf->data, FSM_BUFSIZE ); + if ( received == 0 ) { + free( run_buf ); + ret = INPUT_EOD; + break; + } + + run_buf->length = received; + si_data_push_tail( ss, run_buf ); + + int slen = received; + *pdp = run_buf->data; + *copied = slen; + ret = INPUT_DATA; + break; + } + + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + alph_t *src = &buf->data[buf->offset]; + + /* Need to skip? */ + if ( *pskip > 0 && *pskip >= avail ) { + /* Skipping the the whole source. */ + *pskip -= avail; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += *pskip; + avail -= *pskip; + *pskip = 0; + + int slen = avail; + *pdp = src; + *copied += slen; + ret = INPUT_DATA; + break; + } + } + + buf = buf->next; + } + + return ret; +} + +static int data_consume_data( struct colm_program *prg, struct stream_impl_data *sid, + int length, location_t *loc ) +{ + int consumed = 0; + int remaining = length; + + /* Move over skip bytes. */ + while ( true ) { + struct run_buf *buf = sid->queue.head; + + if ( buf == 0 ) + break; + + /* Anything available in the current buffer. */ + int avail = buf->length - buf->offset; + if ( avail > 0 ) { + + if ( !loc_set( loc ) ) + data_transfer_loc( prg, loc, sid ); + + /* The source data from the current buffer. */ + int slen = avail <= remaining ? avail : remaining; + consumed += slen; + remaining -= slen; + update_position_data( sid, buf->data + buf->offset, slen ); + buf->offset += slen; + sid->consumed += slen; + } + + if ( remaining == 0 ) + break; + + struct run_buf *run_buf = si_data_pop_head( sid ); + free( run_buf ); + } + + debug( prg, REALM_INPUT, "data_consume_data: stream %p " + "ask: %d, consumed: %d, now: %d\n", sid, length, consumed, sid->consumed ); + +#ifdef DEBUG + dump_contents( prg, sid ); +#endif + + return consumed; +} + +static int data_undo_consume_data( struct colm_program *prg, struct stream_impl_data *sid, + const alph_t *data, int length ) +{ + const alph_t *end = data + length; + int amount = length; + if ( amount > sid->consumed ) + amount = sid->consumed; + + int remaining = amount; + struct run_buf *head = sid->queue.head; + if ( head != 0 && head->offset > 0 ) { + /* Fill into the offset space. */ + int fill = remaining > head->offset ? head->offset : remaining; + end -= fill; + remaining -= fill; + + undo_position_data( sid, end, fill ); + memcpy( head->data + (head->offset - fill), end, fill ); + + head->offset -= fill; + sid->consumed -= fill; + } + + if ( remaining > 0 ) { + end -= remaining; + struct run_buf *new_buf = new_run_buf( 0 ); + new_buf->length = remaining; + undo_position_data( sid, end, remaining ); + memcpy( new_buf->data, end, remaining ); + si_data_push_head( sid, new_buf ); + sid->consumed -= amount; + } + + debug( prg, REALM_INPUT, "data_undo_consume_data: stream %p " + "undid consume %d of %d bytes, consumed now %d, \n", + sid, amount, length, sid->consumed ); + +#ifdef DEBUG + dump_contents( prg, sid ); +#endif + + return amount; +} + +/* + * File Inputs + */ + +static int file_get_data_source( struct colm_program *prg, struct stream_impl_data *si, + alph_t *dest, int length ) +{ + return fread( dest, 1, length, si->file ); +} + +/* + * Text inputs + */ + +static int accum_get_data_source( struct colm_program *prg, struct stream_impl_data *si, + alph_t *dest, int want ) +{ + long avail = si->dlen - si->offset; + long take = avail < want ? avail : want; + if ( take > 0 ) + memcpy( dest, si->data + si->offset, take ); + si->offset += take; + return take; +} + +char stream_get_eof_sent( struct colm_program *prg, struct input_impl_seq *si ) +{ + return si->eof_sent; +} + +void stream_set_eof_sent( struct colm_program *prg, struct input_impl_seq *si, char eof_sent ) +{ + si->eof_sent = eof_sent; +} + +struct stream_funcs_data file_funcs = +{ + &data_get_parse_block, + &data_get_data, + &file_get_data_source, + + &data_consume_data, + &data_undo_consume_data, + + &data_transfer_loc, + &data_get_collect, + &data_flush_stream, + &data_close_stream, + &data_print_tree, + + &data_split_consumed, + &data_append_data, + &data_undo_append_data, + &data_destructor, + + &data_get_option, + &data_set_option, +}; + +struct stream_funcs_data accum_funcs = +{ + &data_get_parse_block, + &data_get_data, + &accum_get_data_source, + + &data_consume_data, + &data_undo_consume_data, + + &data_transfer_loc, + &data_get_collect, + &data_flush_stream, + &data_close_stream, + &data_print_tree, + + &data_split_consumed, + &data_append_data, + &data_undo_append_data, + &data_destructor, + + &data_get_option, + &data_set_option, +}; + +static void si_data_init( struct stream_impl_data *is, char *name ) +{ + memset( is, 0, sizeof(struct stream_impl_data) ); + + is->type = 'D'; + is->name = name; + is->line = 1; + is->column = 1; + is->byte = 0; + + /* Indentation turned off. */ + is->indent.level = COLM_INDENT_OFF; + is->indent.indent = 0; +} + +struct stream_impl *colm_impl_new_accum( char *name ) +{ + struct stream_impl_data *si = (struct stream_impl_data*) + malloc(sizeof(struct stream_impl_data)); + si_data_init( si, name ); + si->funcs = (struct stream_funcs*)&accum_funcs; + + return (struct stream_impl*)si; +} + +static struct stream_impl *colm_impl_new_file( char *name, FILE *file ) +{ + struct stream_impl_data *ss = (struct stream_impl_data*) + malloc(sizeof(struct stream_impl_data)); + si_data_init( ss, name ); + ss->funcs = (struct stream_funcs*)&file_funcs; + ss->file = file; + return (struct stream_impl*)ss; +} + +static struct stream_impl *colm_impl_new_fd( char *name, long fd ) +{ + struct stream_impl_data *si = (struct stream_impl_data*) + malloc(sizeof(struct stream_impl_data)); + si_data_init( si, name ); + si->funcs = (struct stream_funcs*)&file_funcs; + si->file = fdopen( fd, ( fd == 0 ) ? "r" : "w" ); + return (struct stream_impl*)si; +} + +struct stream_impl *colm_impl_consumed( char *name, int len ) +{ + struct stream_impl_data *si = (struct stream_impl_data*) + malloc(sizeof(struct stream_impl_data)); + si_data_init( si, name ); + si->funcs = (struct stream_funcs*)&accum_funcs; + + si->data = 0; + si->consumed = len; + si->offset = len; + + si->dlen = len; + + return (struct stream_impl*)si; +} + +struct stream_impl *colm_impl_new_text( char *name, struct colm_location *loc, const alph_t *data, int len ) +{ + struct stream_impl_data *si = (struct stream_impl_data*) + malloc(sizeof(struct stream_impl_data)); + si_data_init( si, name ); + si->funcs = (struct stream_funcs*)&accum_funcs; + + alph_t *buf = (alph_t*)malloc( len ); + memcpy( buf, data, len ); + + si->data = buf; + si->dlen = len; + + if ( loc != 0 ) { + si->line = loc->line; + si->column = loc->column; + si->byte = loc->byte; + } + + return (struct stream_impl*)si; +} + +struct stream_impl *colm_impl_new_collect( char *name ) +{ + struct stream_impl_data *ss = (struct stream_impl_data*) + malloc(sizeof(struct stream_impl_data)); + si_data_init( ss, name ); + ss->funcs = (struct stream_funcs*)&accum_funcs; + ss->collect = (struct colm_str_collect*) malloc( sizeof( struct colm_str_collect ) ); + init_str_collect( ss->collect ); + return (struct stream_impl*)ss; +} + +struct stream_impl *stream_to_impl( stream_t *ptr ) +{ + return ptr->impl; +} + +str_t *collect_string( program_t *prg, stream_t *s ) +{ + str_collect_t *collect = s->impl->funcs->get_collect( prg, s->impl ); + head_t *head = string_alloc_full( prg, collect->data, collect->length ); + str_t *str = (str_t*)construct_string( prg, head ); + return str; +} + +stream_t *colm_stream_open_fd( program_t *prg, char *name, long fd ) +{ + struct stream_impl *impl = colm_impl_new_fd( colm_filename_add( prg, name ), fd ); + + struct colm_stream *s = colm_stream_new_struct( prg ); + s->impl = impl; + return s; +} + +stream_t *colm_stream_open_file( program_t *prg, tree_t *name, tree_t *mode ) +{ + head_t *head_name = ((str_t*)name)->value; + head_t *head_mode = ((str_t*)mode)->value; + stream_t *stream = 0; + + const char *given_mode = string_data(head_mode); + const char *fopen_mode = 0; + if ( memcmp( given_mode, "r", string_length(head_mode) ) == 0 ) + fopen_mode = "rb"; + else if ( memcmp( given_mode, "w", string_length(head_mode) ) == 0 ) + fopen_mode = "wb"; + else if ( memcmp( given_mode, "a", string_length(head_mode) ) == 0 ) + fopen_mode = "ab"; + else { + fatal( "unknown file open mode: %s\n", given_mode ); + } + + /* Need to make a C-string (null terminated). */ + char *file_name = malloc(string_length(head_name)+1); + memcpy( file_name, string_data(head_name), string_length(head_name) ); + file_name[string_length(head_name)] = 0; + + FILE *file = fopen( file_name, fopen_mode ); + if ( file != 0 ) { + stream = colm_stream_new_struct( prg ); + stream->impl = colm_impl_new_file( colm_filename_add( prg, file_name ), file ); + } + + free( file_name ); + + return stream; +} + + +void colm_stream_destroy( program_t *prg, tree_t **sp, struct_t *s ) +{ + stream_t *stream = (stream_t*) s; + struct stream_impl *si = stream->impl; + si->funcs->destructor( prg, sp, si ); +} + +stream_t *colm_stream_new_struct( program_t *prg ) +{ + size_t memsize = sizeof(struct colm_stream); + struct colm_stream *stream = (struct colm_stream*) malloc( memsize ); + memset( stream, 0, memsize ); + colm_struct_add( prg, (struct colm_struct *)stream ); + stream->id = prg->rtd->struct_stream_id; + stream->destructor = &colm_stream_destroy; + return stream; +} + +stream_t *colm_stream_open_collect( program_t *prg ) +{ + struct stream_impl *impl = colm_impl_new_collect( colm_filename_add( prg, "<internal>" ) ); + struct colm_stream *stream = colm_stream_new_struct( prg ); + stream->impl = impl; + return stream; +} + diff --git a/src/string.c b/src/string.c new file mode 100644 index 00000000..d1d16aa9 --- /dev/null +++ b/src/string.c @@ -0,0 +1,281 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> + +#include <colm/pool.h> +#include <colm/pdarun.h> +#include <colm/bytecode.h> + +str_t *string_prefix( program_t *prg, str_t *str, long len ) +{ + head_t *head = string_alloc_full( prg, str->value->data, len ); + return (str_t*)construct_string( prg, head ); +} + +str_t *string_suffix( program_t *prg, str_t *str, long pos ) +{ + long len = str->value->length - pos; + head_t *head = string_alloc_full( prg, str->value->data + pos, len ); + return (str_t*)construct_string( prg, head ); +} + +tree_t *construct_string( program_t *prg, head_t *s ) +{ + str_t *str = (str_t*) tree_allocate( prg ); + str->id = LEL_ID_STR; + str->value = s; + + return (tree_t*)str; +} + + +/* + * In this system strings are not null terminated. Often strings come from a + * parse, in which case the string is just a pointer into the the data stream. + * A block in a parsed stream can hold many tokens and there is no room + * allocated for nulls. + */ + +head_t *string_copy( program_t *prg, head_t *head ) +{ + head_t *result = 0; + if ( head != 0 ) { + if ( (char*)(head+1) == head->data ) + result = string_alloc_full( prg, head->data, head->length ); + else + result = colm_string_alloc_pointer( prg, head->data, head->length ); + + if ( head->location != 0 ) { + result->location = location_allocate( prg ); + result->location->name = head->location->name; + result->location->line = head->location->line; + result->location->column = head->location->column; + result->location->byte = head->location->byte; + } + } + return result; +} + +void string_free( program_t *prg, head_t *head ) +{ + if ( head != 0 ) { + if ( head->location != 0 ) + location_free( prg, head->location ); + + if ( (char*)(head+1) == head->data ) { + /* Full string allocation. */ + free( head ); + } + else { + /* Just a string head. */ + head_free( prg, head ); + } + } +} + +const char *string_data( head_t *head ) +{ + if ( head == 0 ) + return 0; + return head->data; +} + +long string_length( head_t *head ) +{ + if ( head == 0 ) + return 0; + return head->length; +} + +void string_shorten( head_t *head, long newlen ) +{ + assert( newlen <= head->length ); + head->length = newlen; +} + +head_t *init_str_space( long length ) +{ + /* Find the length and allocate the space for the shared string. */ + head_t *head = (head_t*) malloc( sizeof(head_t) + length ); + + /* Init the header. */ + head->data = (char*)(head+1); + head->length = length; + head->location = 0; + + /* Save the pointer to the data. */ + return head; +} + +/* Create from a c-style string. */ +head_t *string_alloc_full( program_t *prg, const char *data, long length ) +{ + /* Init space for the data. */ + head_t *head = init_str_space( length ); + + /* Copy in the data. */ + memcpy( (head+1), data, length ); + + return head; +} + +/* Create from a c-style string. */ +head_t *colm_string_alloc_pointer( program_t *prg, const char *data, long length ) +{ + /* Find the length and allocate the space for the shared string. */ + head_t *head = head_allocate( prg ); + + /* Init the header. */ + head->data = data; + head->length = length; + + return head; +} + +head_t *concat_str( head_t *s1, head_t *s2 ) +{ + long s1Len = s1->length; + long s2Len = s2->length; + + /* Init space for the data. */ + head_t *head = init_str_space( s1Len + s2Len ); + + /* Copy in the data. */ + memcpy( (head+1), s1->data, s1Len ); + memcpy( (char*)(head+1) + s1Len, s2->data, s2Len ); + + return head; +} + +head_t *string_to_upper( head_t *s ) +{ + /* Init space for the data. */ + long len = s->length; + head_t *head = init_str_space( len ); + + /* Copy in the data. */ + const char *src = s->data; + char *dst = (char*)(head+1); + int i; + for ( i = 0; i < len; i++ ) + *dst++ = toupper( *src++ ); + + return head; +} + +head_t *string_to_lower( head_t *s ) +{ + /* Init space for the data. */ + long len = s->length; + head_t *head = init_str_space( len ); + + /* Copy in the data. */ + const char *src = s->data; + char *dst = (char*)(head+1); + int i; + for ( i = 0; i < len; i++ ) + *dst++ = tolower( *src++ ); + + return head; +} + + +/* Compare two strings. If identical returns 1, otherwise 0. */ +word_t cmp_string( head_t *s1, head_t *s2 ) +{ + if ( s1->length < s2->length ) + return -1; + else if ( s1->length > s2->length ) + return 1; + else { + char *d1 = (char*)(s1->data); + char *d2 = (char*)(s2->data); + return memcmp( d1, d2, s1->length ); + } +} + +word_t str_atoi( head_t *str ) +{ + /* FIXME: need to implement this by hand. There is no null terminator. */ + char *nulled = (char*)malloc( str->length + 1 ); + memcpy( nulled, str->data, str->length ); + nulled[str->length] = 0; + int res = atoi( nulled ); + free( nulled ); + return res; +} + +word_t str_atoo( head_t *str ) +{ + /* FIXME: need to implement this by hand. There is no null terminator. */ + char *nulled = (char*)malloc( str->length + 1 ); + memcpy( nulled, str->data, str->length ); + nulled[str->length] = 0; + int res = strtol( nulled, 0, 8 ); + free( nulled ); + return res; +} + +head_t *int_to_str( program_t *prg, word_t i ) +{ + char data[20]; + sprintf( data, "%ld", i ); + return string_alloc_full( prg, data, strlen(data) ); +} + +word_t str_uord16( head_t *head ) +{ + uchar *data = (uchar*)(head->data); + ulong res; + res = (ulong)data[1]; + res |= ((ulong)data[0]) << 8; + return res; +} + +word_t str_uord8( head_t *head ) +{ + uchar *data = (uchar*)(head->data); + ulong res = (ulong)data[0]; + return res; +} + +head_t *make_literal( program_t *prg, long offset ) +{ + return colm_string_alloc_pointer( prg, + prg->rtd->litdata[offset], + prg->rtd->litlen[offset] ); +} + +head_t *string_sprintf( program_t *prg, str_t *format, long integer ) +{ + head_t *format_head = format->value; + long written = snprintf( 0, 0, (char*)string_data(format_head), integer ); + head_t *head = init_str_space( written+1 ); + written = snprintf( (char*)head->data, written+1, (char*)string_data(format_head), integer ); + head->length -= 1; + return head; +} diff --git a/src/struct.c b/src/struct.c new file mode 100644 index 00000000..5ee58ed3 --- /dev/null +++ b/src/struct.c @@ -0,0 +1,185 @@ +/* + * Copyright 2016-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> + +#include <colm/program.h> +#include <colm/struct.h> + +#include "internal.h" +#include "bytecode.h" + +struct colm_tree *colm_get_global( program_t *prg, long pos ) +{ + return colm_struct_get_field( prg->global, tree_t*, pos ); +} + +void colm_struct_add( program_t *prg, struct colm_struct *item ) +{ + if ( prg->heap.head == 0 ) { + prg->heap.head = prg->heap.tail = item; + item->prev = item->next = 0; + } + else { + item->prev = prg->heap.tail; + item->next = 0; + prg->heap.tail->next = item; + prg->heap.tail = item; + } +} + +struct colm_struct *colm_struct_new_size( program_t *prg, int size ) +{ + size_t memsize = sizeof(struct colm_struct) + ( sizeof(tree_t*) * size ); + struct colm_struct *item = (struct colm_struct*) malloc( memsize ); + memset( item, 0, memsize ); + + colm_struct_add( prg, item ); + return item; +} + +struct colm_struct *colm_struct_new( program_t *prg, int id ) +{ + struct colm_struct *s = colm_struct_new_size( prg, prg->rtd->sel_info[id - prg->rtd->num_lang_els].size ); + s->id = id; + return s; +} + +struct struct_el_info *colm_sel_info( program_t *prg, int id ) +{ + return &prg->rtd->sel_info[id - prg->rtd->num_lang_els]; +} + +void colm_struct_delete( program_t *prg, tree_t **sp, struct colm_struct *el ) +{ + if ( el->id == prg->rtd->struct_inbuilt_id || el->id == prg->rtd->struct_stream_id ) { + colm_destructor_t destructor = ((struct colm_inbuilt*)el)->destructor; + if ( destructor != 0 ) + (*destructor)( prg, sp, el ); + } + else { + int tree_i; + struct struct_el_info *sel = colm_sel_info( prg, el->id ); + for ( tree_i = 0; tree_i < sel->trees_len; tree_i++ ) { + tree_t *tree = colm_struct_get_field( el, tree_t*, sel->trees[tree_i] ); + colm_tree_downref( prg, sp, tree ); + } + } + free( el ); +} + +void colm_parser_destroy( program_t *prg, tree_t **sp, struct colm_struct *s ) +{ + struct colm_parser *parser = (struct colm_parser*) s; + + /* Free the PDA run. */ + colm_pda_clear( prg, sp, parser->pda_run ); + free( parser->pda_run ); + + /* Free the result. */ + colm_tree_downref( prg, sp, parser->result ); +} + +parser_t *colm_parser_new( program_t *prg, struct generic_info *gi, int stop_id, int reducer ) +{ + struct pda_run *pda_run = malloc( sizeof(struct pda_run) ); + + /* Start off the parsing process. */ + colm_pda_init( prg, pda_run, prg->rtd->pda_tables, + gi->parser_id, stop_id, 0, 0, reducer ); + + size_t memsize = sizeof(struct colm_parser); + struct colm_parser *parser = (struct colm_parser*) malloc( memsize ); + memset( parser, 0, memsize ); + colm_struct_add( prg, (struct colm_struct*) parser ); + + parser->id = prg->rtd->struct_inbuilt_id; + parser->destructor = &colm_parser_destroy; + parser->pda_run = pda_run; + + return parser; +} + +void colm_map_destroy( program_t *prg, tree_t **sp, struct colm_struct *s ) +{ + struct colm_map *map = (struct colm_map*) s; + + map_el_t *el = map->head; + while ( el != 0 ) { + map_el_t *next = el->next; + colm_tree_downref( prg, sp, el->key ); + //mapElFree( prg, el ); + el = next; + } +} + +map_t *colm_map_new( struct colm_program *prg ) +{ + size_t memsize = sizeof(struct colm_map); + struct colm_map *map = (struct colm_map*) malloc( memsize ); + memset( map, 0, memsize ); + colm_struct_add( prg, (struct colm_struct *)map ); + map->id = prg->rtd->struct_inbuilt_id; + return map; +} + +struct_t *colm_construct_generic( program_t *prg, long generic_id, int stop_id ) +{ + struct generic_info *generic_info = &prg->rtd->generic_info[generic_id]; + struct_t *new_generic = 0; + switch ( generic_info->type ) { + case GEN_MAP: { + map_t *map = colm_map_new( prg ); + map->generic_info = generic_info; + new_generic = (struct_t*) map; + break; + } + case GEN_LIST: { + list_t *list = colm_list_new( prg ); + list->generic_info = generic_info; + new_generic = (struct_t*) list; + break; + } + case GEN_PARSER: { + parser_t *parser = colm_parser_new( prg, generic_info, stop_id, 0 ); + parser->input = colm_input_new( prg ); + new_generic = (struct_t*) parser; + break; + } + } + + return new_generic; +} + +struct_t *colm_construct_reducer( program_t *prg, long generic_id, int reducer_id ) +{ + struct generic_info *generic_info = &prg->rtd->generic_info[generic_id]; + struct_t *new_generic = 0; + + parser_t *parser = colm_parser_new( prg, generic_info, 0, reducer_id ); + parser->input = colm_input_new( prg ); + new_generic = (struct_t*) parser; + + return new_generic; +} diff --git a/src/struct.h b/src/struct.h new file mode 100644 index 00000000..13f78c40 --- /dev/null +++ b/src/struct.h @@ -0,0 +1,180 @@ +/* + * Copyright 2016-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_STRUCT_H +#define _COLM_STRUCT_H + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef void (*colm_destructor_t)( struct colm_program *prg, + tree_t **sp, struct colm_struct *s ); + +struct colm_struct +{ + short id; + struct colm_struct *prev, *next; +}; + +/* Must overlay colm_struct. */ +struct colm_inbuilt +{ + short id; + struct colm_struct *prev, *next; + colm_destructor_t destructor; +}; + +/* Must overlay colm_inbuilt. */ +typedef struct colm_parser +{ + short id; + struct colm_struct *prev, *next; + colm_destructor_t destructor; + + struct pda_run *pda_run; + struct colm_input *input; + tree_t *result; +} parser_t; + +/* Must overlay colm_inbuilt. */ +typedef struct colm_input +{ + short id; + struct colm_struct *prev, *next; + colm_destructor_t destructor; + + struct input_impl *impl; +} input_t; + +/* Must overlay colm_inbuilt. */ +typedef struct colm_stream +{ + short id; + struct colm_struct *prev, *next; + colm_destructor_t destructor; + + struct stream_impl *impl; +} stream_t; + +#define COLM_LIST_EL_SIZE 2 +typedef struct colm_list_el +{ + struct colm_list_el *list_next; + struct colm_list_el *list_prev; +} list_el_t; + +/* Must overlay colm_inbuilt. */ +typedef struct colm_list +{ + short id; + struct colm_struct *prev, *next; + colm_destructor_t destructor; + + list_el_t *head, *tail; + long list_len; + struct generic_info *generic_info; +} list_t; + +typedef struct colm_map_el +{ + tree_t *key; + + struct colm_map_el *left, *right, *parent; + long height; + + struct colm_map_el *next, *prev; +} map_el_t; + +#define COLM_MAP_EL_SIZE ( sizeof(colm_map_el) / sizeof(void*) ) + +typedef struct colm_map +{ + short id; + struct colm_struct *prev, *next; + colm_destructor_t destructor; + + struct colm_map_el *head, *tail, *root; + long tree_size; + struct generic_info *generic_info; +} map_t; + +struct colm_struct *colm_struct_new_size( struct colm_program *prg, int size ); +struct colm_struct *colm_struct_new( struct colm_program *prg, int id ); +void colm_struct_add( struct colm_program *prg, struct colm_struct *item ); +void colm_struct_delete( struct colm_program *prg, struct colm_tree **sp, + struct colm_struct *el ); + +struct colm_struct *colm_struct_inbuilt( struct colm_program *prg, int size, + colm_destructor_t destructor ); + +#define colm_struct_get_field( obj, type, field ) \ + (type)(((void**)(((struct colm_struct*)obj)+1))[field]) + +#define colm_struct_set_field( obj, type, field, val ) \ + ((type*)(((struct colm_struct*)obj)+1))[field] = val + +#define colm_struct_get_addr( obj, type, field ) \ + (type)(&(((void **)(((struct colm_struct*)obj)+1))[field])) + +#define colm_struct_container( el, field ) \ + ((void*)el) - (field * sizeof(void*)) - sizeof(struct colm_struct) + +#define colm_generic_el_container( prg, el, genId ) \ + colm_struct_container( el, prg->rtd->generic_info[genId].el_offset ) + +#define colm_struct_to_list_el( prg, obj, genId ) \ + colm_struct_get_addr( obj, list_el_t*, prg->rtd->generic_info[genId].el_offset ) + +#define colm_struct_to_map_el( prg, obj, genId ) \ + colm_struct_get_addr( obj, map_el_t*, prg->rtd->generic_info[genId].el_offset ) + +parser_t *colm_parser_new( program_t *prg, struct generic_info *gi, int stop_id, int reducer ); +input_t *colm_input_new( struct colm_program *prg ); +stream_t *colm_stream_new_struct( struct colm_program *prg ); + +list_t *colm_list_new( struct colm_program *prg ); +struct colm_struct *colm_list_get( struct colm_program *prg, list_t *list, + word_t gen_id, word_t field ); +struct colm_struct *colm_list_el_get( struct colm_program *prg, + list_el_t *list_el, word_t gen_id, word_t field ); +list_el_t *colm_list_detach_head( list_t *list ); +list_el_t *colm_list_detach_tail( list_t *list ); +long colm_list_length( list_t *list ); + +map_t *colm_map_new( struct colm_program *prg ); +struct colm_struct *colm_map_el_get( struct colm_program *prg, + map_el_t *map_el, word_t gen_id, word_t field ); +struct colm_struct *colm_map_get( struct colm_program *prg, map_t *map, + word_t gen_id, word_t field ); + +struct colm_struct *colm_construct_generic( struct colm_program *prg, long generic_id, int stop_id ); +struct colm_struct *colm_construct_reducer( struct colm_program *prg, long generic_id, int reducer_id ); +struct input_impl *input_to_impl( input_t *ptr ); +struct stream_impl *stream_to_impl( stream_t *ptr ); + +#if defined(__cplusplus) +} +#endif + +#endif /* _COLM_STRUCT_H */ + diff --git a/src/synthesis.cc b/src/synthesis.cc new file mode 100644 index 00000000..17c2440a --- /dev/null +++ b/src/synthesis.cc @@ -0,0 +1,3370 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <iostream> +#include "compiler.h" + +using std::cout; +using std::cerr; +using std::endl; + +bool isStr( UniqueType *ut ) +{ + return ut->typeId == TYPE_TREE && ut->langEl != 0 && ut->langEl->id == LEL_ID_STR; +} + +bool isTree( UniqueType *ut ) +{ + return ut->typeId == TYPE_TREE; +} + +IterDef::IterDef( Type type ) +: + type(type), + func(0) +{ +} + +IterDef::IterDef( Type type, Function *func ) +: + type(type), + func(func) +{} + +IterImpl::IterImpl( Type type ) : + type(type), + func(0), + useFuncId(false), + useSearchUT(false), + useGenericId(false) +{ + switch ( type ) { + case Tree: + inCreateWV = IN_TRITER_FROM_REF; + inCreateWC = IN_TRITER_FROM_REF; + inUnwind = IN_TRITER_UNWIND; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_ADVANCE; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + + case Child: + inCreateWV = IN_TRITER_FROM_REF; + inCreateWC = IN_TRITER_FROM_REF; + inUnwind = IN_TRITER_UNWIND; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_NEXT_CHILD; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + + case RevChild: + inCreateWV = IN_REV_TRITER_FROM_REF; + inCreateWC = IN_REV_TRITER_FROM_REF; + inUnwind = IN_REV_TRITER_UNWIND; + inDestroy = IN_REV_TRITER_DESTROY; + inAdvance = IN_REV_TRITER_PREV_CHILD; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + + case Repeat: + inCreateWV = IN_TRITER_FROM_REF; + inCreateWC = IN_TRITER_FROM_REF; + inUnwind = IN_TRITER_UNWIND; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_NEXT_REPEAT; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + + case RevRepeat: + inCreateWV = IN_TRITER_FROM_REF; + inCreateWC = IN_TRITER_FROM_REF; + inUnwind = IN_TRITER_UNWIND; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_PREV_REPEAT; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + + case ListEl: + inCreateWV = IN_GEN_ITER_FROM_REF; + inCreateWC = IN_GEN_ITER_FROM_REF; + inUnwind = IN_GEN_ITER_UNWIND; + inDestroy = IN_GEN_ITER_DESTROY; + inAdvance = IN_LIST_ITER_ADVANCE; + + inGetCurR = IN_GEN_ITER_GET_CUR_R; +// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC; +// inSetCurWC = //IN_HALT; +// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR; + useGenericId = true; + break; + + case ListVal: + inCreateWV = IN_GEN_ITER_FROM_REF; + inCreateWC = IN_GEN_ITER_FROM_REF; + inUnwind = IN_GEN_ITER_UNWIND; + inDestroy = IN_GEN_ITER_DESTROY; + inAdvance = IN_LIST_ITER_ADVANCE; + + inGetCurR = IN_GEN_VITER_GET_CUR_R; +// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC; +// inSetCurWC = //IN_HALT; +// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR; + useGenericId = true; + break; + + case RevListVal: + inCreateWV = IN_GEN_ITER_FROM_REF; + inCreateWC = IN_GEN_ITER_FROM_REF; + inUnwind = IN_GEN_ITER_UNWIND; + inDestroy = IN_GEN_ITER_DESTROY; + inAdvance = IN_REV_LIST_ITER_ADVANCE; + + inGetCurR = IN_GEN_VITER_GET_CUR_R; +// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC; +// inSetCurWC = //IN_HALT; +// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR; + useGenericId = true; + break; + + + case MapVal: + inCreateWV = IN_GEN_ITER_FROM_REF; + inCreateWC = IN_GEN_ITER_FROM_REF; + inUnwind = IN_GEN_ITER_UNWIND; + inDestroy = IN_GEN_ITER_DESTROY; + inAdvance = IN_MAP_ITER_ADVANCE; + + inGetCurR = IN_GEN_VITER_GET_CUR_R; + inGetCurWC = IN_GEN_VITER_GET_CUR_R; //IN_HALT; //IN_LIST_ITER_GET_CUR_WC; +// inSetCurWC = IN_HALT;//IN_HALT; +// inRefFromCur = IN_HALT;//IN_LIST_ITER_REF_FROM_CUR; + useGenericId = true; + break; + + case MapEl: + inCreateWV = IN_GEN_ITER_FROM_REF; + inCreateWC = IN_GEN_ITER_FROM_REF; + inUnwind = IN_GEN_ITER_UNWIND; + inDestroy = IN_GEN_ITER_DESTROY; + inAdvance = IN_MAP_ITER_ADVANCE; + + inGetCurR = IN_GEN_ITER_GET_CUR_R; +// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC; +// inSetCurWC = //IN_HALT; +// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR; + useGenericId = true; + break; + + case User: + assert(false); + } +} + +IterImpl::IterImpl( Type type, Function *func ) : + type(type), + func(func), + useFuncId(true), + useSearchUT(true), + useGenericId(false), + inCreateWV(IN_UITER_CREATE_WV), + inCreateWC(IN_UITER_CREATE_WC), + inUnwind(IN_UITER_UNWIND), + inDestroy(IN_UITER_DESTROY), + inAdvance(IN_UITER_ADVANCE), + inGetCurR(IN_UITER_GET_CUR_R), + inGetCurWC(IN_UITER_GET_CUR_WC), + inSetCurWC(IN_UITER_SET_CUR_WC), + inRefFromCur(IN_UITER_REF_FROM_CUR) +{} + +IterDef *Compiler::findIterDef( IterDef::Type type, Function *func ) +{ + IterDefSetEl *el = iterDefSet.find( IterDef( type, func ) ); + if ( el == 0 ) + el = iterDefSet.insert( IterDef( type, func ) ); + return &el->key; +} + +IterDef *Compiler::findIterDef( IterDef::Type type ) +{ + IterDefSetEl *el = iterDefSet.find( IterDef( type ) ); + if ( el == 0 ) + el = iterDefSet.insert( IterDef( type ) ); + return &el->key; +} + +UniqueType *Compiler::findUniqueType( enum TYPE typeId ) +{ + UniqueType searchKey( typeId ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +UniqueType *Compiler::findUniqueType( enum TYPE typeId, LangEl *langEl ) +{ + UniqueType searchKey( typeId, langEl ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId, langEl ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +UniqueType *Compiler::findUniqueType( enum TYPE typeId, IterDef *iterDef ) +{ + UniqueType searchKey( typeId, iterDef ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId, iterDef ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +UniqueType *Compiler::findUniqueType( enum TYPE typeId, StructEl *structEl ) +{ + UniqueType searchKey( typeId, structEl ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId, structEl ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +UniqueType *Compiler::findUniqueType( enum TYPE typeId, GenericType *generic ) +{ + UniqueType searchKey( typeId, generic ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId, generic ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +/* 0-based. */ +ObjectField *ObjectDef::findFieldNum( long offset ) +{ + /* Bounds check. */ + if ( offset >= fieldList.length() ) + return 0; + + int fn = 0; + FieldList::Iter field = fieldList; + while ( fn < offset ) { + fn++; + field++; + } + + return field->value; +} + +/* Finds the first field by type. */ +ObjectField *ObjectDef::findFieldType( Compiler *pd, UniqueType *ut ) +{ + for ( FieldList::Iter f = fieldList; f.lte(); f++ ) { + UniqueType *fUT = f->value->typeRef->resolveType( pd ); + if ( fUT == ut ) + return f->value; + } + return 0; +} + + +long sizeOfField( UniqueType *fieldUT ) +{ + long size = 0; + switch ( fieldUT->typeId ) { + case TYPE_ITER: + /* Select on the iterator type. */ + switch ( fieldUT->iterDef->type ) { + case IterDef::Tree: + case IterDef::Child: + case IterDef::Repeat: + case IterDef::RevRepeat: + size = sizeof(tree_iter_t) / sizeof(word_t); + break; + + case IterDef::RevChild: + size = sizeof(rev_tree_iter_t) / sizeof(word_t); + break; + + case IterDef::MapEl: + case IterDef::ListEl: + case IterDef::RevListVal: + size = sizeof(generic_iter_t) / sizeof(word_t); + break; + + case IterDef::User: + /* User iterators are just a pointer to the user_iter_t struct. The + * struct needs to go right beneath the call to the user iterator + * so it can be found by a yield. It is therefore allocated on the + * stack right before the call. */ + size = 1; + break; + } + break; + case TYPE_REF: + size = 2; + break; + case TYPE_GENERIC: + size = 1; + break; + case TYPE_LIST_PTRS: + size = COLM_LIST_EL_SIZE; + break; + case TYPE_MAP_PTRS: + size = COLM_MAP_EL_SIZE; + break; + default: + size = 1; + break; + } + + return size; +} + +void ObjectDef::referenceField( Compiler *pd, ObjectField *field ) +{ + field->beenReferenced = true; +} + +UniqueType *LangVarRef::loadField( Compiler *pd, CodeVect &code, + ObjectDef *inObject, ObjectField *el, bool forWriting, bool revert ) const +{ + /* Ensure that the field is referenced. */ + inObject->referenceField( pd, el ); + + UniqueType *elUT = el->typeRef->uniqueType; + + if ( elUT->val() ) { + if ( forWriting ) { + /* The instruction, depends on whether or not we are reverting. */ + if ( pd->revertOn && revert ) + code.append( el->inGetValWV ); + else + code.append( el->inGetValWC ); + } + else { + /* Loading for writing */ + code.append( el->inGetValR ); + } + } + else { + /* If it's a reference then we load it read always. */ + if ( forWriting ) { + /* The instruction, depends on whether or not we are reverting. */ + if ( elUT->typeId == TYPE_ITER ) + code.append( el->iterImpl->inGetCurWC ); + else if ( pd->revertOn && revert ) + code.append( el->inGetWV ); + else + code.append( el->inGetWC ); + } + else { + /* Loading something for reading */ + if ( elUT->typeId == TYPE_ITER ) + code.append( el->iterImpl->inGetCurR ); + else + code.append( el->inGetR ); + } + } + + if ( el->useGenericId ) + code.appendHalf( el->generic->id ); + + if ( el->useOffset() ) { + /* Gets of locals and fields require offsets. Fake vars like token + * data and lhs don't require it. */ + code.appendHalf( el->offset ); + } + else if ( el->isRhsGet() ) { + /* Need to place the array computing the val. */ + code.append( el->rhsVal.length() ); + for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) { + code.append( rg->prodEl->production->prodNum ); + code.append( rg->prodEl->pos ); + } + } + + if ( el->isConstVal ) { + code.appendHalf( el->constValId ); + + if ( el->constValId == CONST_ARG ) { + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( el->constValArg, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.appendWord( mapEl->value ); + } + } + + /* If we are dealing with an iterator then dereference it. */ + if ( elUT->typeId == TYPE_ITER ) + elUT = el->typeRef->searchUniqueType; + + return elUT; +} + +/* The qualification must start at a local frame. There cannot be any pointer. */ +long LangVarRef::loadQualificationRefs( Compiler *pd, CodeVect &code, + NameScope *rootScope ) const +{ + long count = 0; + + /* Start the search from the root object. */ + NameScope *searchScope = rootScope; + + for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { + /* Lookup the field in the current qualification. */ + ObjectField *el = searchScope->findField( qi->data ); + if ( el == 0 ) + error(qi->loc) << "cannot resolve qualification " << qi->data << endp; + + if ( qi.pos() > 0 ) { + if ( el->isRhsGet() ) { + code.append( IN_RHS_REF_FROM_QUAL_REF ); + code.appendHalf( 0 ); + + code.append( el->rhsVal.length() ); + for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) { + code.append( rg->prodEl->production->prodNum ); + code.append( rg->prodEl->pos ); + } + } + else { + code.append( IN_REF_FROM_QUAL_REF ); + code.appendHalf( 0 ); + code.appendHalf( el->offset ); + } + } + else if ( el->iterImpl != 0 ) { + code.append( el->iterImpl->inRefFromCur ); + code.appendHalf( el->offset ); + } + else if ( el->typeRef->type == TypeRef::Ref ) { + code.append( IN_REF_FROM_REF ); + code.appendHalf( el->offset ); + } + else { + code.append( IN_REF_FROM_LOCAL ); + code.appendHalf( el->offset ); + } + + UniqueType *elUT = el->typeRef->uniqueType; + if ( elUT->typeId == TYPE_ITER ) + elUT = el->typeRef->searchUniqueType; + + assert( qi->form == QualItem::Dot ); + + ObjectDef *searchObjDef = elUT->objectDef(); + searchScope = searchObjDef->rootScope; + + count += 1; + } + return count; +} + +void LangVarRef::loadQualification( Compiler *pd, CodeVect &code, + NameScope *rootScope, int lastPtrInQual, bool forWriting, bool revert ) const +{ + /* Start the search from the root object. */ + NameScope *searchScope = rootScope; + + for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { + /* Lookup the field int the current qualification. */ + ObjectField *el = searchScope->findField( qi->data ); + if ( el == 0 ) + error(qi->loc) << "cannot resolve qualification " << qi->data << endp; + + if ( forWriting && el->refActive ) + error(qi->loc) << "reference active, cannot write to object" << endp; + + bool lfForWriting = forWriting; + bool lfRevert = revert; + + /* If there is a pointer in the qualification, we need to compute + * forWriting and revert. */ + if ( lastPtrInQual >= 0 ) { + if ( qi.pos() <= lastPtrInQual ) { + /* If we are before or at the pointer we are strictly read + * only, regardless of the origin. */ + lfForWriting = false; + lfRevert = false; + } + else { + /* If we are past the pointer then we are always reverting + * because the object is global. Forwriting is as passed in. + * */ + lfRevert = true; + } + } + + UniqueType *qualUT = loadField( pd, code, searchScope->owningObj, + el, lfForWriting, lfRevert ); + + if ( qi->form == QualItem::Dot ) { + /* Cannot a reference. Iterator yes (access of the iterator not + * hte current) */ + if ( qualUT->ptr() ) + error(loc) << "dot cannot be used to access a pointer" << endp; + } + else if ( qi->form == QualItem::Arrow ) { + if ( qualUT->ptr() ) { + /* This deref instruction exists to capture the pointer reverse + * execution purposes. */ + if ( pd->revertOn && qi.pos() == lastPtrInQual && forWriting ) { + /* This is like a global load. */ + code.append( IN_PTR_ACCESS_WV ); + } + } + else { + error(loc) << "arrow operator cannot be used to " + "access this type" << endp; + } + } + + ObjectDef *searchObjDef = qualUT->objectDef(); + searchScope = searchObjDef->rootScope; + } +} + +void LangVarRef::loadContextObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + /* Start the search in the global object. */ + ObjectDef *rootObj = structDef->objectDef; + + if ( forWriting && lastPtrInQual < 0 ) { + /* If we are writing an no reference was found in the qualification + * then load the gloabl with a revert. */ + if ( pd->revertOn ) + code.append( IN_LOAD_CONTEXT_WV ); + else + code.append( IN_LOAD_CONTEXT_WC ); + } + else { + /* Either we are reading or we are loading a pointer that will be + * dereferenced. */ + code.append( IN_LOAD_CONTEXT_R ); + } + + loadQualification( pd, code, rootObj->rootScope, lastPtrInQual, forWriting, true ); +} + +void LangVarRef::loadGlobalObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + NameScope *scope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope; + + if ( forWriting && lastPtrInQual < 0 ) { + /* If we are writing an no reference was found in the qualification + * then load the gloabl with a revert. */ + if ( pd->revertOn ) + code.append( IN_LOAD_GLOBAL_WV ); + else + code.append( IN_LOAD_GLOBAL_WC ); + } + else { + /* Either we are reading or we are loading a pointer that will be + * dereferenced. */ + code.append( IN_LOAD_GLOBAL_R ); + } + + loadQualification( pd, code, scope, lastPtrInQual, forWriting, true ); +} + +void LangVarRef::loadScopedObj( Compiler *pd, CodeVect &code, + NameScope *scope, int lastPtrInQual, bool forWriting ) const +{ +// NameScope *scope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope; + + if ( forWriting && lastPtrInQual < 0 ) { + /* If we are writing an no reference was found in the qualification + * then load the gloabl with a revert. */ + if ( pd->revertOn ) + code.append( IN_LOAD_GLOBAL_WV ); + else + code.append( IN_LOAD_GLOBAL_WC ); + } + else { + /* Either we are reading or we are loading a pointer that will be + * dereferenced. */ + code.append( IN_LOAD_GLOBAL_R ); + } + + loadQualification( pd, code, scope, lastPtrInQual, forWriting, true ); +} + +void LangVarRef::loadInbuiltObject( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + /* Start the search in the local frame. */ + loadQualification( pd, code, scope, lastPtrInQual, forWriting, pd->revertOn ); +} + +void LangVarRef::loadLocalObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + /* Start the search in the local frame. */ + loadQualification( pd, code, scope, lastPtrInQual, forWriting, false ); +} + +void LangVarRef::loadObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + if ( nspaceQual != 0 && nspaceQual->qualNames.length() > 0 ) { + Namespace *nspace = pd->rootNamespace->findNamespace( nspaceQual->qualNames[0] ); + loadScopedObj( pd, code, nspace->rootScope, lastPtrInQual, forWriting ); + } + else if ( isInbuiltObject() ) + loadInbuiltObject( pd, code, lastPtrInQual, forWriting ); + else if ( isLocalRef() ) + loadLocalObj( pd, code, lastPtrInQual, forWriting ); + else if ( isProdRef( pd ) ) { + LangVarRef *dup = new LangVarRef( *this ); + dup->qual->prepend( QualItem( QualItem::Dot, InputLoc(), scope->caseClauseVarRef->name ) ); + dup->loadObj( pd, code, lastPtrInQual, forWriting ); + } + else if ( isStructRef() ) + loadContextObj( pd, code, lastPtrInQual, forWriting ); + else + loadGlobalObj( pd, code, lastPtrInQual, forWriting ); +} + + +bool castAssignment( Compiler *pd, CodeVect &code, UniqueType *destUT, + UniqueType *destSearchUT, UniqueType *srcUT ) +{ + if ( destUT == srcUT ) + return true; + + /* Casting trees to any. */ + if ( destUT->typeId == TYPE_TREE && destUT->langEl == pd->anyLangEl && + srcUT->typeId == TYPE_TREE ) + return true; + + /* Setting a reference from a tree. */ + if ( destUT->typeId == TYPE_REF && srcUT->typeId == TYPE_TREE && + destUT->langEl == srcUT->langEl ) + return true; + + /* Setting a tree from a reference. */ + if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_REF && + destUT->langEl == srcUT->langEl ) + return true; + + /* Setting an iterator from a tree. */ + if ( destUT->typeId == TYPE_ITER && srcUT->typeId == TYPE_TREE && + destSearchUT->langEl == srcUT->langEl ) + return true; + + /* Assigning nil to a tree. */ + if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_NIL ) + return true; + + if ( destUT->typeId == TYPE_STRUCT && srcUT->typeId == TYPE_NIL ) + return true; + + if ( destUT->typeId == TYPE_GENERIC && srcUT->typeId == TYPE_NIL ) + return true; + + if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_TREE && + srcUT->langEl == pd->anyLangEl ) + return true; + + return false; +} + +void LangVarRef::setFieldIter( Compiler *pd, CodeVect &code, + ObjectDef *inObject, ObjectField *el, UniqueType *objUT, + UniqueType *exprType, bool revert ) const +{ + code.append( el->iterImpl->inSetCurWC ); + code.appendHalf( el->offset ); +} + +void LangVarRef::setField( Compiler *pd, CodeVect &code, + ObjectDef *inObject, ObjectField *el, + UniqueType *exprUT, bool revert ) const +{ + /* Ensure that the field is referenced. */ + inObject->referenceField( pd, el ); + + if ( exprUT->val() ) { + if ( pd->revertOn && revert ) + code.append( el->inSetValWV ); + else + code.append( el->inSetValWC ); + } + else { + if ( pd->revertOn && revert ) + code.append( el->inSetWV ); + else + code.append( el->inSetWC ); + } + + /* Maybe write out an offset. */ + if ( el->useOffset() ) + code.appendHalf( el->offset ); + else if ( el->isRhsGet() ) { + /* Need to place the array computing the val. */ + code.append( el->rhsVal.length() ); + for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) { + code.append( rg->prodEl->production->prodNum ); + code.append( rg->prodEl->pos ); + } + } +} + + +UniqueType *LangVarRef::evaluate( Compiler *pd, CodeVect &code, bool forWriting ) const +{ + /* Lookup the loadObj. */ + VarRefLookup lookup = lookupField( pd ); + + /* Load the object, if any. */ + loadObj( pd, code, lookup.lastPtrInQual, forWriting ); + + /* Load the field. */ + UniqueType *ut = loadField( pd, code, lookup.inObject, + lookup.objField, forWriting, false ); + + return ut; +} + +bool LangVarRef::canTakeRef( Compiler *pd, VarRefLookup &lookup ) const +{ + bool canTake = false; + + /* If the var is not a local, it must be an attribute accessed + * via a local and attributes. */ + if ( lookup.inObject->type == ObjectDef::FrameType ) + canTake = true; + else if ( isLocalRef() ) { + if ( lookup.lastPtrInQual < 0 && ! lookup.uniqueType->ptr() ) + canTake = true; + } + + return canTake; +} + +void LangVarRef::verifyRefPossible( Compiler *pd, VarRefLookup &lookup ) const +{ + bool canTake = canTakeRef( pd, lookup ); + + if ( !canTake ) { + error(loc) << "can only take references of locals or " + "attributes accessed via a local" << endp; + } + + if ( lookup.objField->refActive ) + error(loc) << "reference currently active, cannot take another" << endp; +} + +bool LangExpr::canTakeRef( Compiler *pd ) const +{ + bool canTake = false; + + if ( type == LangExpr::TermType && term->type == LangTerm::VarRefType ) { + VarRefLookup lookup = term->varRef->lookupField( pd ); + if ( term->varRef->canTakeRef( pd, lookup ) ) + canTake = true; + } + + return canTake; +} + + +/* Return the field referenced. */ +ObjectField *LangVarRef::preEvaluateRef( Compiler *pd, CodeVect &code ) const +{ + VarRefLookup lookup = lookupField( pd ); + + verifyRefPossible( pd, lookup ); + + loadQualificationRefs( pd, code, scope ); + + return lookup.objField; +} + +/* Return the field referenced. */ +ObjectField *LangVarRef::evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const +{ + VarRefLookup lookup = lookupField( pd ); + + verifyRefPossible( pd, lookup ); + + /* Ensure that the field is referenced. */ + lookup.inObject->referenceField( pd, lookup.objField ); + + /* Note that we could have modified children. */ + if ( qual->length() == 0 ) + lookup.objField->refActive = true; + + /* Whenever we take a reference we have to assume writing and that the + * tree is dirty. */ + lookup.objField->dirtyTree = true; + + if ( qual->length() > 0 ) { + if ( lookup.objField->isRhsGet() ) { + code.append( IN_RHS_REF_FROM_QUAL_REF ); + code.appendHalf( pushCount ); + + ObjectField *el = lookup.objField; + code.append( el->rhsVal.length() ); + for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) { + code.append( rg->prodEl->production->prodNum ); + code.append( rg->prodEl->pos ); + } + } + else { + code.append( IN_REF_FROM_QUAL_REF ); + code.appendHalf( pushCount ); + code.appendHalf( lookup.objField->offset ); + } + } + else if ( lookup.objField->iterImpl != 0 ) { + code.append( lookup.objField->iterImpl->inRefFromCur ); + code.appendHalf( lookup.objField->offset ); + } + else if ( lookup.objField->typeRef->type == TypeRef::Ref ) { + code.append( IN_REF_FROM_REF ); + code.appendHalf( lookup.objField->offset ); + } + else { + code.append( IN_REF_FROM_LOCAL ); + code.appendHalf( lookup.objField->offset ); + } + + return lookup.objField; +} + +IterImpl *LangVarRef::chooseTriterCall( Compiler *pd, + UniqueType *searchUT, CallArgVect *args ) +{ + IterImpl *iterImpl = 0; + + /* Evaluate the triter args and choose the triter call based on it. */ + if ( args->length() == 1 ) { + /* Evaluate the expression. */ + CodeVect unused; + CallArgVect::Iter pe = *args; + UniqueType *exprUT = (*pe)->expr->evaluate( pd, unused ); + + if ( exprUT->typeId == TYPE_GENERIC && exprUT->generic->typeId == GEN_LIST ) { + if ( searchUT == exprUT->generic->elUt ) + iterImpl = new IterImpl( IterImpl::ListEl ); + else + iterImpl = new IterImpl( IterImpl::ListVal ); + } + + if ( exprUT->typeId == TYPE_GENERIC && exprUT->generic->typeId == GEN_MAP ) { + if ( searchUT == exprUT->generic->elUt ) + iterImpl = new IterImpl( IterImpl::MapEl ); + else + iterImpl = new IterImpl( IterImpl::MapVal ); + } + } + + if ( iterImpl == 0 ) + iterImpl = new IterImpl( IterImpl::Tree ); + + return iterImpl; +} + +ObjectField **LangVarRef::evaluateArgs( Compiler *pd, CodeVect &code, + VarRefLookup &lookup, CallArgVect *args ) +{ + /* Parameter list is given only for user defined methods. Otherwise it + * will be null. */ + ParameterList *paramList = lookup.objMethod->paramList; + + /* Match the number of arguments. */ + int numArgs = args != 0 ? args->length() : 0; + if ( numArgs != lookup.objMethod->numParams ) + error(loc) << "wrong number of arguments" << endp; + + /* This is for storing the object fields used by references. */ + ObjectField **paramRefs = new ObjectField*[numArgs]; + memset( paramRefs, 0, sizeof(ObjectField*) * numArgs ); + + /* Done now if there are no args. */ + if ( args == 0 ) + return paramRefs; + + /* We use this only if there is a paramter list. */ + ParameterList::Iter p; + long size = 0; + long tempPops = 0; + long pos = 0; + + paramList != 0 && ( p = *paramList ); + for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Get the expression and the UT for the arg. */ + LangExpr *expression = (*pe)->expr; + UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; + + if ( paramUT->typeId == TYPE_REF ) { + if ( expression->canTakeRef( pd ) ) { + /* Push object loads for reference parameters. */ + LangVarRef *varRef = expression->term->varRef; + ObjectField *refOf = varRef->preEvaluateRef( pd, code ); + paramRefs[pe.pos()] = refOf; + + size += varRef->qual->length() * 2; + (*pe)->offQualRef = size; + /**/ + + refOf = varRef->evaluateRef( pd, code, 0 ); //(size - (*pe)->offQualRef) ); + paramRefs[pe.pos()] = refOf; + + //size += 2; + } + else { + /* First pass we need to allocate and evaluate temporaries. */ + UniqueType *exprUT = expression->evaluate( pd, code ); + + (*pe)->exprUT = exprUT; + + size += 1; + (*pe)->offTmp = size; + tempPops += 1; + /**/ + code.append( IN_REF_FROM_BACK ); + code.appendHalf( 0 ); //size - (*pe)->offTmp ); + + //size += 2; + } + + if ( lookup.objMethod->func ) { + code.append( IN_STASH_ARG ); + code.appendHalf( pos ); + code.appendHalf( 2 ); + } + + pos += 2; + } + else { + UniqueType *exprUT = expression->evaluate( pd, code ); + // pd->unwindCode.remove( 0, 1 ); + + if ( !castAssignment( pd, code, paramUT, 0, exprUT ) ) + error(loc) << "arg " << pe.pos()+1 << " is of the wrong type" << endp; + + size += 1; + + if ( lookup.objMethod->func && !lookup.objMethod->func->inHost ) { + code.append( IN_STASH_ARG ); + code.appendHalf( pos ); + code.appendHalf( 1 ); + } + + pos += 1; + } + + /* Advance the parameter list iterator if we have it. */ + paramList != 0 && p.increment(); + } + + argSize = tempPops; + + return paramRefs; +} + +void LangVarRef::resetActiveRefs( Compiler *pd, VarRefLookup &lookup, + ObjectField **paramRefs ) const +{ + /* Parameter list is given only for user defined methods. Otherwise it + * will be null. */ + for ( long p = 0; p < lookup.objMethod->numParams; p++ ) { + if ( paramRefs[p] != 0 ) + paramRefs[p]->refActive = false; + } +} + +bool LangVarRef::isFinishCall( VarRefLookup &lookup ) const +{ + return lookup.objMethod->type == ObjectMethod::ParseFinish; +} + +void LangVarRef::callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const +{ + /* This is for writing if it is a non-const builtin. */ + bool forWriting = lookup.objMethod->func == 0 && + !lookup.objMethod->isConst; + + if ( lookup.objMethod->useCallObj ) { + /* Load the object, if any. */ + loadObj( pd, code, lookup.lastPtrInQual, forWriting ); + } + + /* Check if we need to revert the function. If it operates on a reference + * or if it is not local then we need to revert it. */ + bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef() || isInbuiltObject(); + bool unwind = false; + + if ( isFinishCall( lookup ) ) { + code.append( IN_SEND_EOF_W ); + + LangTerm::parseFrag( pd, code, 0 ); + + code.append( IN_GET_PARSER_MEM_R ); + code.appendHalf( 0 ); + } + else { + if ( pd->revertOn && revert ) { + if ( lookup.objMethod->opcodeWV == IN_CALL_WV || + lookup.objMethod->opcodeWC == FN_EXIT ) + unwind = true; + + if ( lookup.objMethod->useFnInstr ) + code.append( IN_FN ); + code.append( lookup.objMethod->opcodeWV ); + } + else { + if ( lookup.objMethod->opcodeWC == IN_CALL_WC || + lookup.objMethod->opcodeWC == FN_EXIT ) + unwind = true; + + if ( lookup.objMethod->useFnInstr ) + code.append( IN_FN ); + code.append( lookup.objMethod->opcodeWC ); + } + } + + if ( lookup.objMethod->useFuncId ) + code.appendHalf( lookup.objMethod->funcId ); + + if ( lookup.objMethod->useGenericId ) + code.appendHalf( lookup.objMethod->generic->id ); + + if ( unwind ) { + if ( pd->unwindCode.length() == 0 ) + code.appendHalf( 0 ); + else { + code.appendHalf( pd->unwindCode.length() + 1 ); + code.append( pd->unwindCode ); + code.append( IN_DONE ); + } + } +} + +void LangVarRef::popRefQuals( Compiler *pd, CodeVect &code, + VarRefLookup &lookup, CallArgVect *args, bool temps ) const +{ + long popCount = 0; + + /* Evaluate and push the args. */ + if ( args != 0 ) { + for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Get the expression and the UT for the arg. */ + LangExpr *expression = (*pe)->expr; + UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; + + if ( paramUT->typeId == TYPE_REF ) { + if ( expression->canTakeRef( pd ) ) { + LangVarRef *varRef = expression->term->varRef; + popCount += varRef->qual->length() * 2; + } + } + } + + if ( popCount > 0 ) { + code.append( IN_POP_N_WORDS ); + code.appendHalf( (short)popCount ); + } + + if ( temps ) { + for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Get the expression and the UT for the arg. */ + LangExpr *expression = (*pe)->expr; + UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; + + if ( paramUT->typeId == TYPE_REF ) { + if ( ! expression->canTakeRef( pd ) ) + code.append( IN_POP_TREE ); + } + } + } + } +} + + +UniqueType *LangVarRef::evaluateCall( Compiler *pd, CodeVect &code, CallArgVect *args ) +{ + /* Evaluate the object. */ + VarRefLookup lookup = lookupMethod( pd ); + + Function *func = lookup.objMethod->func; + + /* Prepare the contiguous call args space. */ + int asLoc; + if ( func != 0 && !func->inHost ) { + code.append( IN_PREP_ARGS ); + asLoc = code.length(); + code.appendHalf( 0 ); + } + + /* Evaluate and push the arguments. */ + ObjectField **paramRefs = evaluateArgs( pd, code, lookup, args ); + + /* Write the call opcode. */ + callOperation( pd, code, lookup ); + + popRefQuals( pd, code, lookup, args, true ); + + resetActiveRefs( pd, lookup, paramRefs); + delete[] paramRefs; + + if ( func != 0 && !func->inHost ) { + code.append( IN_CLEAR_ARGS ); + code.appendHalf( func->paramListSize ); + code.setHalf( asLoc, func->paramListSize ); + } + + if ( func != 0 && !func->inHost ) + code.append( IN_LOAD_RETVAL ); + + /* Return the type to the expression. */ + return lookup.uniqueType; +} + +/* Can match on a tree or a ref. A tree always comes back. */ +UniqueType *LangTerm::evaluateMatch( Compiler *pd, CodeVect &code ) const +{ + /* Add the vars bound by the pattern into the local scope. */ + for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) { + if ( item->varRef != 0 ) + item->bindId = pattern->nextBindId++; + } + + UniqueType *ut = varRef->evaluate( pd, code ); + if ( ut->typeId != TYPE_TREE && ut->typeId != TYPE_REF ) { + error(varRef->loc) << "expected match against a tree/ref type" << endp; + } + + /* Store the language element type in the pattern. This is needed by + * the pattern parser. */ + pattern->langEl = ut->langEl; + + code.append( IN_MATCH ); + code.appendHalf( pattern->patRepId ); + + for ( PatternItemList::Iter item = pattern->list->last(); item.gtb(); item-- ) { + if ( item->varRef != 0 ) { + /* Compute the unique type. */ + UniqueType *exprType = pd->findUniqueType( TYPE_TREE, item->prodEl->langEl ); + + /* Get the type of the variable being assigned to. */ + VarRefLookup lookup = item->varRef->lookupField( pd ); + + item->varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); + item->varRef->setField( pd, code, lookup.inObject, + lookup.objField, exprType, false ); + } + } + + /* The process of matching turns refs into trees. */ + if ( ut->typeId == TYPE_REF ) + ut = pd->findUniqueType( TYPE_TREE, ut->langEl ); + + return ut; +} + +UniqueType *LangTerm::evaluateProdCompare( Compiler *pd, CodeVect &code ) const +{ + UniqueType *ut = varRef->evaluate( pd, code ); + if ( ut->typeId != TYPE_TREE && ut->typeId != TYPE_REF ) { + error(varRef->loc) << "expected match against a tree/ref type" << endp; + } + code.append( IN_PROD_NUM ); + + /* look up the production name. */ + Production *prod = pd->findProductionByLabel( ut->langEl, this->prod ); + + if ( prod == 0 ) { + error( this->loc) << "could not find " + "production label: " << this->prod << endp; + } + + unsigned int n = prod->prodNum; + code.append( IN_LOAD_INT ); + code.appendWord( n ); + + code.append( IN_TST_EQL_VAL ); + + if ( expr != 0 ) { + code.append( IN_DUP_VAL ); + + /* Test: jump past the match if the production test failed. We don't have + * the distance yet. */ + long jumpFalse = code.length(); + code.append( IN_JMP_FALSE_VAL ); + code.appendHalf( 0 ); + + code.append( IN_POP_VAL ); + + expr->evaluate( pd, code ); + + /* Set the jump false distance. */ + long falseDist = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, falseDist ); + + return ut; + } + + return pd->uniqueTypeInt; +} + +void LangTerm::evaluateCapture( Compiler *pd, CodeVect &code, UniqueType *valUt ) const +{ + if ( varRef != 0 ) { + /* Get the type of the variable being assigned to. */ + VarRefLookup lookup = varRef->lookupField( pd ); + + /* Need a copy of the tree. */ + code.append( lookup.uniqueType->tree() ? IN_DUP_TREE : IN_DUP_VAL ); + + varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); + varRef->setField( pd, code, lookup.inObject, lookup.objField, valUt, false ); + } +} + +UniqueType *LangTerm::evaluateNew( Compiler *pd, CodeVect &code ) const +{ + /* What is being newstructed. */ + UniqueType *newUT = typeRef->uniqueType; + + if ( newUT->typeId != TYPE_STRUCT && newUT->typeId != TYPE_GENERIC ) + error(loc) << "can only new a struct or generic" << endp; + + bool context = false; + if ( newUT->typeId == TYPE_GENERIC && + newUT->generic->typeId == GEN_PARSER && + newUT->generic->elUt->langEl->contextIn != 0 ) + { + if ( fieldInitArgs == 0 || fieldInitArgs->length() != 1 ) + error(loc) << "parse command requires just context " << endp; + context = true; + } + + if ( newUT->typeId == TYPE_GENERIC ) { + code.append( IN_CONS_GENERIC ); + code.appendHalf( newUT->generic->id ); + code.appendHalf( 0 ); // stopId + + if ( newUT->generic->typeId == GEN_PARSER ) { + + } + } + else if ( newUT->typeId == TYPE_STRUCT && newUT->structEl == pd->streamSel ) { + code.append( IN_NEW_STREAM ); + } + else { + code.append( IN_NEW_STRUCT ); + code.appendHalf( newUT->structEl->id ); + } + + /* + * First load the context into the parser. + */ + if ( context ) { + for ( int i = 0; i < fieldInitArgs->length(); i++ ) { + /* Eval what we are initializing with. */ + UniqueType *argUT = fieldInitArgs->data[i]->expr->evaluate( pd, code ); + + if ( argUT == pd->uniqueTypeInput ) { + code.append( IN_SET_PARSER_INPUT ); + } + else if ( argUT->typeId == TYPE_STRUCT ) { + code.append( IN_SET_PARSER_CONTEXT ); + } + else { + error(loc) << "cannot initialize parser with this type, context or input only" << endp; + } + } + } + + evaluateCapture( pd, code, newUT ); + + return newUT; +} + +UniqueType *LangTerm::evaluateCast( Compiler *pd, CodeVect &code ) const +{ + expr->evaluate( pd, code ); + code.append( IN_TREE_CAST ); + code.appendHalf( typeRef->uniqueType->langEl->id ); + return typeRef->uniqueType; +} + +void LangTerm::assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const +{ + /* Now assign the field initializations. Note that we need to do this in + * reverse because the last expression evaluated is at the top of the + * stack. */ + if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { + ObjectDef *objDef = replUT->objectDef(); + /* Note the reverse traversal. */ + for ( FieldInitVect::Iter pi = fieldInitArgs->last(); pi.gtb(); pi-- ) { + FieldInit *fieldInit = *pi; + ObjectField *field = objDef->findFieldNum( pi.pos() ); + if ( field == 0 ) { + error(fieldInit->loc) << "failed to find init pos " << + pi.pos() << " in object" << endp; + } + + /* Lookup the type of the field and compare it to the type of the + * expression. */ + UniqueType *fieldUT = field->typeRef->uniqueType; + if ( !castAssignment( pd, code, fieldUT, 0, fieldInit->exprUT ) ) + error(fieldInit->loc) << "type mismatch in initialization" << endp; + + /* The set field instruction must leave the object on the top of + * the stack. */ + code.append( IN_SET_FIELD_TREE_LEAVE_WC ); + code.appendHalf( field->offset ); + } + } +} + +UniqueType *LangTerm::evaluateConstruct( Compiler *pd, CodeVect &code ) const +{ + /* Evaluate the initialization expressions. */ + if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { + for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) { + FieldInit *fieldInit = *pi; + fieldInit->exprUT = fieldInit->expr->evaluate( pd, code ); + } + } + + /* Assign bind ids to the variables in the replacement. */ + for ( ConsItemList::Iter item = *constructor->list; item.lte(); item++ ) { + if ( item->expr != 0 ) + item->bindId = constructor->nextBindId++; + } + + /* Evaluate variable references. */ + for ( ConsItemList::Iter item = constructor->list->last(); item.gtb(); item-- ) { + if ( item->type == ConsItem::ExprType ) { + UniqueType *ut = item->expr->evaluate( pd, code ); + + if ( ut->typeId != TYPE_TREE ) { + error(constructor->loc) << "variables used in " + "replacements must be trees" << endp; + } + + if ( !isStr( ut ) ) { + if ( item->trim == ConsItem::TrimYes ) + code.append( IN_TREE_TRIM ); + } + + item->langEl = ut->langEl; + } + } + + /* Construct the tree using the tree information stored in the compiled + * code. */ + code.append( IN_CONSTRUCT ); + code.appendHalf( constructor->patRepId ); + + /* Lookup the type of the replacement and store it in the replacement + * object so that replacement parsing has a target. */ + UniqueType *replUT = typeRef->uniqueType; + if ( replUT->typeId != TYPE_TREE ) + error(loc) << "don't know how to construct this type" << endp; + + constructor->langEl = replUT->langEl; + assignFieldArgs( pd, code, replUT ); + + evaluateCapture( pd, code, replUT ); + + return replUT; +} + +void LangTerm::parseFrag( Compiler *pd, CodeVect &code, int stopId ) +{ + code.append( IN_PARSE_FRAG_W ); +} + +UniqueType *LangTerm::evaluateReadReduce( Compiler *pd, CodeVect &code ) const +{ + UniqueType *parserUT = typeRef->uniqueType; + UniqueType *targetUT = parserUT->generic->elUt; + + /* Should be one arg and it should be a stream. */ + + /* Assign bind ids to the variables in the replacement. */ + for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { + switch ( item->type ) { + case ConsItem::LiteralType: { + break; + } + case ConsItem::InputText: { + break; + } + case ConsItem::ExprType: { + item->expr->evaluate( pd, code ); + break; + }} + } + + code.append( IN_READ_REDUCE ); + code.appendHalf( parserUT->generic->id ); + code.appendHalf( parserText->reducerId ); + + return targetUT; +} + +UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code, + bool tree, bool stop ) const +{ + if ( parserText->reduce && parserText->read ) { + return evaluateReadReduce( pd, code ); + } + + UniqueType *parserUT = typeRef->uniqueType; + UniqueType *targetUT = parserUT->generic->elUt; + + /* If this is a parse stop then we need to verify that the type is + * compatible with parse stop. */ + if ( stop ) + targetUT->langEl->parseStop = true; + int stopId = stop ? targetUT->langEl->id : 0; + + bool context = false; + if ( fieldInitArgs != 0 ) { + if ( fieldInitArgs == 0 || ( fieldInitArgs->length() != 1 && fieldInitArgs->length() != 2 ) ) + error(loc) << "parse command requires just context and input" << endp; + context = true; + } + + /* Evaluate variable references. */ + for ( ConsItemList::Iter item = consItemList->last(); item.gtb(); item-- ) { + if ( item->type == ConsItem::ExprType ) { + UniqueType *ut = item->expr->evaluate( pd, code ); + + if ( ut->typeId != TYPE_TREE ) + error() << "variables used in replacements must be trees" << endp; + + if ( item->trim == ConsItem::TrimYes ) + code.append( IN_TREE_TRIM ); + + item->langEl = ut->langEl; + } + } + + /* Construct the parser. */ + + if ( parserText->reduce ) { + code.append( IN_CONS_REDUCER ); + code.appendHalf( parserUT->generic->id ); + code.appendHalf( parserText->reducerId ); + } + else { + code.append( IN_CONS_GENERIC ); + code.appendHalf( parserUT->generic->id ); + code.appendHalf( stopId ); + } + + /* + * First load the context into the parser. + */ + if ( context ) { + for ( int i = 0; i < fieldInitArgs->length(); i++ ) { + /* Eval what we are initializing with. */ + UniqueType *argUT = fieldInitArgs->data[i]->expr->evaluate( pd, code ); + + if ( argUT == pd->uniqueTypeInput ) { + code.append( IN_SET_PARSER_INPUT ); + } + else if ( argUT->typeId == TYPE_STRUCT && targetUT->langEl->contextIn != 0 ) { + code.append( IN_SET_PARSER_CONTEXT ); + } + else { + error(loc) << "cannot initialize parser with this type, context or input only" << endp; + } + } + } + + /*****************************/ + + if ( parserText->list->length() == 0 ) { + code.append( IN_SEND_NOTHING ); + + /* Parse instruction, dependent on whether or not we are producing + * revert or commit code. */ + parseFrag( pd, code, stopId ); + } + else { + for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { + bool isStream = false; + uchar trim = TRIM_DEFAULT; + + switch ( item->type ) { + case ConsItem::LiteralType: { + String result; + bool unusedCI; + prepareLitString( result, unusedCI, + item->prodEl->typeRef->pdaLiteral->data, + item->prodEl->typeRef->pdaLiteral->loc ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( result, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ConsItem::InputText: { + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( item->data, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ConsItem::ExprType: { + UniqueType *ut = item->expr->evaluate( pd, code ); + + if ( ut->typeId == TYPE_VOID ) { + /* Clear it away if return type is void. */ + code.append( IN_POP_VAL ); + continue; + } + + if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) + code.append( IN_INT_TO_STR ); + + if ( ut == pd->uniqueTypeStream ) + isStream = true; + + if ( item->trim == ConsItem::TrimYes ) + trim = TRIM_YES; + else if ( item->trim == ConsItem::TrimNo ) + trim = TRIM_NO; + + break; + }} + + if ( isStream ) + code.append( IN_SEND_STREAM_W ); + else if ( tree ) { + code.append( IN_SEND_TREE_W ); + code.append( trim ); + } + else { + code.append( IN_SEND_TEXT_W ); + code.append( trim ); + } + + /* Parse instruction, dependent on whether or not we are producing + * revert or commit code. */ + parseFrag( pd, code, stopId ); + } + } + + /* + * Finish operation + */ + + if ( !stop ) { + code.append( IN_SEND_EOF_W ); + parseFrag( pd, code, stopId ); + } + + if ( parserText->reduce ) { + code.append( IN_REDUCE_COMMIT ); + } + + /* Pull out the error and save it off. */ + code.append( IN_DUP_VAL ); + code.append( IN_GET_PARSER_MEM_R ); + code.appendHalf( 1 ); + code.append( IN_SET_ERROR ); + + /* Replace the parser with the parsed tree. */ + code.append( IN_GET_PARSER_MEM_R ); + code.appendHalf( 0 ); + + /* Capture to the local var. */ + evaluateCapture( pd, code, targetUT ); + + return targetUT; +} + +void LangTerm::evaluateSendStream( Compiler *pd, CodeVect &code ) const +{ + UniqueType *varUt = varRef->evaluate( pd, code ); + + if ( varUt->listOf( pd->uniqueTypeStream ) ) { + code.append( IN_GET_VLIST_MEM_R ); + code.appendHalf( varUt->generic->id ); + code.appendHalf( 0 ); + } + + /* Assign bind ids to the variables in the replacement. */ + for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { + uchar trim = TRIM_DEFAULT; + + switch ( item->type ) { + case ConsItem::LiteralType: { + String result; + bool unusedCI; + prepareLitString( result, unusedCI, + item->prodEl->typeRef->pdaLiteral->data, + item->prodEl->typeRef->pdaLiteral->loc ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( result, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ConsItem::InputText: { + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( item->data, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ConsItem::ExprType: { + UniqueType *ut = item->expr->evaluate( pd, code ); + if ( ut->typeId == TYPE_VOID ) { + /* Clear it away if return type is void. */ + code.append( IN_POP_VAL ); + continue; + } + + if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) + code.append( IN_INT_TO_STR ); + + if ( item->trim == ConsItem::TrimYes ) + trim = TRIM_YES; + else if ( item->trim == ConsItem::TrimNo ) + trim = TRIM_NO; + + break; + }} + + code.append( IN_PRINT_TREE ); + code.append( trim ); + } +} + +void LangTerm::evaluateSendParser( Compiler *pd, CodeVect &code, bool strings ) const +{ + UniqueType *varUt = varRef->evaluate( pd, code ); + + if ( varUt->parser() ) { + } + else if ( varUt->listOf( pd->uniqueTypeStream ) ) { + code.append( IN_GET_VLIST_MEM_R ); + code.appendHalf( varUt->generic->id ); + code.appendHalf( 0 ); + } + + if ( parserText->list->length() == 0 ) { + code.append( IN_SEND_NOTHING ); + + /* Parse instruction, dependent on whether or not we are producing + * revert or commit code. */ + parseFrag( pd, code, 0 ); + } + else { + + /* Assign bind ids to the variables in the replacement. */ + for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { + bool isStream = false; + uchar trim = TRIM_DEFAULT; + + switch ( item->type ) { + case ConsItem::LiteralType: { + String result; + bool unusedCI; + prepareLitString( result, unusedCI, + item->prodEl->typeRef->pdaLiteral->data, + item->prodEl->typeRef->pdaLiteral->loc ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( result, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ConsItem::InputText: { + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( item->data, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ConsItem::ExprType: + UniqueType *ut = item->expr->evaluate( pd, code ); + if ( ut->typeId == TYPE_VOID ) { + /* Clear it away if return type is void. */ + code.append( IN_POP_VAL ); + continue; + } + + if ( ut == pd->uniqueTypeStream ) + isStream = true; + + if ( item->trim == ConsItem::TrimYes ) + trim = TRIM_YES; + else if ( item->trim == ConsItem::TrimNo ) + trim = TRIM_NO; + + if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) + code.append( IN_INT_TO_STR ); + + break; + } + + if ( isStream ) + code.append( IN_SEND_STREAM_W ); + else if ( !strings ) { + code.append( IN_SEND_TREE_W ); + code.append( trim ); + } + else { + code.append( IN_SEND_TEXT_W ); + code.append( trim ); + } + + parseFrag( pd, code, 0 ); + } + } + + if ( eof ) { + code.append( IN_SEND_EOF_W ); + parseFrag( pd, code, 0 ); + } +} + +UniqueType *LangTerm::evaluateSend( Compiler *pd, CodeVect &code ) const +{ + UniqueType *varUt = varRef->lookup( pd ); + + if ( varUt == pd->uniqueTypeStream ) + evaluateSendStream( pd, code ); + else if ( varUt->listOf( pd->uniqueTypeStream ) ) + evaluateSendStream( pd, code ); + else if ( varUt->parser() ) + evaluateSendParser( pd, code, true ); + else + error(loc) << "can only send to parsers and streams" << endl; + + return varUt; +} + + +UniqueType *LangTerm::evaluateSendTree( Compiler *pd, CodeVect &code ) const +{ + UniqueType *varUt = varRef->lookup( pd ); + + if ( varUt->parser() ) + evaluateSendParser( pd, code, false ); + else + error(loc) << "can only send_tree to parsers" << endl; + + return varUt; +} + +UniqueType *LangTerm::evaluateEmbedString( Compiler *pd, CodeVect &code ) const +{ + /* Assign bind ids to the variables in the replacement. */ + for ( ConsItemList::Iter item = *consItemList; item.lte(); item++ ) { + switch ( item->type ) { + case ConsItem::LiteralType: { + String result; + bool unusedCI; + prepareLitString( result, unusedCI, + item->prodEl->typeRef->pdaLiteral->data, + item->prodEl->typeRef->pdaLiteral->loc ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( result, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ConsItem::InputText: { + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( item->data, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ConsItem::ExprType: { + UniqueType *ut = item->expr->evaluate( pd, code ); + + if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) + code.append( IN_INT_TO_STR ); + + if ( ut->typeId == TYPE_TREE && + ut->langEl != pd->strLangEl && ut != pd->uniqueTypeStream ) + { + /* Convert it to a string. */ + code.append( IN_TREE_TO_STR ); + } + break; + }} + } + + /* If there was nothing loaded, load the empty string. We must produce + * something. */ + if ( consItemList->length() == 0 ) { + String result = ""; + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( result, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + } + + long items = consItemList->length(); + for ( long i = 0; i < items-1; i++ ) + code.append( IN_CONCAT_STR ); + + return pd->uniqueTypeStr; +} + +UniqueType *LangTerm::evaluateSearch( Compiler *pd, CodeVect &code ) const +{ + UniqueType *ut = typeRef->uniqueType; + if ( ut->typeId != TYPE_TREE ) + error(loc) << "can only search for tree types" << endp; + + /* Evaluate the expression. */ + UniqueType *treeUT = varRef->evaluate( pd, code ); + if ( treeUT->typeId != TYPE_TREE && treeUT->typeId != TYPE_REF ) + error(loc) << "search can be applied only to tree/ref types" << endp; + + /* Run the search. */ + code.append( IN_TREE_SEARCH ); + code.appendWord( ut->langEl->id ); + return ut; +} + +UniqueType *LangTerm::evaluate( Compiler *pd, CodeVect &code ) const +{ + UniqueType *retUt = 0; + switch ( type ) { + case VarRefType: + retUt = varRef->evaluate( pd, code ); + break; + case MethodCallType: + retUt = varRef->evaluateCall( pd, code, args ); + break; + case NilType: + code.append( IN_LOAD_NIL ); + retUt = pd->uniqueTypeNil; + break; + case TrueType: + code.append( IN_LOAD_TRUE ); + retUt = pd->uniqueTypeBool; + break; + case FalseType: + code.append( IN_LOAD_FALSE ); + retUt = pd->uniqueTypeBool; + break; + case MakeTokenType: + retUt = evaluateMakeToken( pd, code ); + break; + case MakeTreeType: + retUt = evaluateMakeTree( pd, code ); + break; + case NumberType: { + unsigned int n = atoi( data ); + code.append( IN_LOAD_INT ); + code.appendWord( n ); + retUt = pd->uniqueTypeInt; + break; + } + case StringType: { + String interp; + bool unused; + prepareLitString( interp, unused, data, InputLoc() ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( interp, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + retUt = pd->uniqueTypeStr; + break; + } + case MatchType: + retUt = evaluateMatch( pd, code ); + break; + case ProdCompareType: + retUt = evaluateProdCompare( pd, code ); + break; + case ParseType: + retUt = evaluateParse( pd, code, false, false ); + break; + case ParseTreeType: + retUt = evaluateParse( pd, code, true, false ); + break; + case ParseStopType: + retUt = evaluateParse( pd, code, false, true ); + break; + case ConstructType: + retUt = evaluateConstruct( pd, code ); + break; + case SendType: + retUt = evaluateSend( pd, code ); + break; + case SendTreeType: + retUt = evaluateSendTree( pd, code ); + break; + case NewType: + retUt = evaluateNew( pd, code ); + break; + case TypeIdType: { + /* Evaluate the expression. */ + UniqueType *ut = typeRef->uniqueType; + if ( ut->typeId != TYPE_TREE ) + error() << "typeid can only be applied to tree types" << endp; + + code.append( IN_LOAD_INT ); + code.appendWord( ut->langEl->id ); + retUt = pd->uniqueTypeInt; + break; + } + case SearchType: + retUt = evaluateSearch( pd, code ); + break; + case EmbedStringType: + retUt = evaluateEmbedString( pd, code ); + break; + case CastType: + retUt = evaluateCast( pd, code ); + break; + } + + // if ( retUt->val() ) + // pd->unwindCode.insert( 0, IN_POP_VAL ); + // else + // pd->unwindCode.insert( 0, IN_POP_TREE ); + + return retUt; +} + +UniqueType *LangExpr::evaluate( Compiler *pd, CodeVect &code ) const +{ + switch ( type ) { + case BinaryType: { + switch ( op ) { + case '+': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + // pd->unwindCode.remove( 0, 2 ); + // pd->unwindCode.insert( 0, IN_POP_TREE ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_ADD_INT ); + return pd->uniqueTypeInt; + } + + if ( lt == pd->uniqueTypeStr && rt == pd->uniqueTypeStr ) { + code.append( IN_CONCAT_STR ); + return pd->uniqueTypeStr; + } + + + error(loc) << "do not have an addition operator for these types" << endp; + break; + } + case '-': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_SUB_INT ); + return pd->uniqueTypeInt; + } + + error(loc) << "do not have an addition operator for these types" << endp; + break; + } + case '*': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_MULT_INT ); + return pd->uniqueTypeInt; + } + + error(loc) << "do not have an multiplication " + "operator for these types" << endp; + break; + } + case '/': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_DIV_INT ); + return pd->uniqueTypeInt; + } + + error(loc) << "do not have an division" + "operator for these types" << endp; + break; + } + case OP_DoubleEql: { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + if ( lt->val() ) + code.append( IN_TST_EQL_VAL ); + else + code.append( IN_TST_EQL_TREE ); + return pd->uniqueTypeBool; + } + case OP_NotEql: { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + if ( lt->val() ) + code.append( IN_TST_NOT_EQL_VAL ); + else + code.append( IN_TST_NOT_EQL_TREE ); + + return pd->uniqueTypeBool; + } + case '<': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + if ( lt->val() ) + code.append( IN_TST_LESS_VAL ); + else + code.append( IN_TST_LESS_TREE ); + return pd->uniqueTypeBool; + } + case '>': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + if ( lt->val() ) + code.append( IN_TST_GRTR_VAL ); + else + code.append( IN_TST_GRTR_TREE ); + + return pd->uniqueTypeBool; + } + case OP_LessEql: { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + if ( lt->val() ) + code.append( IN_TST_LESS_EQL_VAL ); + else + code.append( IN_TST_LESS_EQL_TREE ); + + return pd->uniqueTypeBool; + } + case OP_GrtrEql: { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + if ( lt->val() ) + code.append( IN_TST_GRTR_EQL_VAL ); + else + code.append( IN_TST_GRTR_EQL_TREE ); + + return pd->uniqueTypeBool; + } + case OP_LogicalAnd: { + /* Evaluate the left and duplicate it. */ + UniqueType *lut = left->evaluate( pd, code ); + if ( !lut->val() ) + code.append( IN_TST_NZ_TREE ); + code.append( IN_DUP_VAL ); + + /* Jump over the right if false, leaving the original left + * result on the top of the stack. We don't know the + * distance yet so record the position of the jump. */ + long jump = code.length(); + code.append( IN_JMP_FALSE_VAL ); + code.appendHalf( 0 ); + + /* Evauluate the right, add the test. Store it separately. */ + UniqueType *rut = right->evaluate( pd, code ); + if ( !rut->val() ) + code.append( IN_TST_NZ_TREE ); + + code.append( IN_TST_LOGICAL_AND ); + + /* Set the distance of the jump. */ + long distance = code.length() - jump - 3; + code.setHalf( jump+1, distance ); + + return pd->uniqueTypeInt; + } + case OP_LogicalOr: { + /* Evaluate the left and duplicate it. */ + UniqueType *lut = left->evaluate( pd, code ); + if ( !lut->val() ) + code.append( IN_TST_NZ_TREE ); + code.append( IN_DUP_VAL ); + + /* Jump over the right if true, leaving the original left + * result on the top of the stack. We don't know the + * distance yet so record the position of the jump. */ + long jump = code.length(); + code.append( IN_JMP_TRUE_VAL ); + code.appendHalf( 0 ); + + /* Evauluate the right, add the test. */ + UniqueType *rut = right->evaluate( pd, code ); + if ( !rut->val() ) + code.append( IN_TST_NZ_TREE ); + + code.append( IN_TST_LOGICAL_OR ); + + /* Set the distance of the jump. */ + long distance = code.length() - jump - 3; + code.setHalf( jump+1, distance ); + + return pd->uniqueTypeInt; + } + } + + assert(false); + return 0; + } + case UnaryType: { + switch ( op ) { + case '!': { + /* Evaluate the left and duplicate it. */ + UniqueType *ut = right->evaluate( pd, code ); + if ( ut->val() ) + code.append( IN_NOT_VAL ); + else + code.append( IN_NOT_TREE ); + return pd->uniqueTypeBool; + } + case '$': { + UniqueType *ut = right->evaluate( pd, code ); + + if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) + code.append( IN_INT_TO_STR ); + + code.append( IN_TREE_TO_STR_TRIM ); + return pd->uniqueTypeStr; + + } + case 'S': { + UniqueType *ut = right->evaluate( pd, code ); + + if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) + code.append( IN_INT_TO_STR ); + + code.append( IN_TREE_TO_STR_TRIM_A ); + return pd->uniqueTypeStr; + } + case '%': { + UniqueType *ut = right->evaluate( pd, code ); + if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) + code.append( IN_INT_TO_STR ); + else + code.append( IN_TREE_TO_STR ); + return pd->uniqueTypeStr; + } + case '^': { + UniqueType *rt = right->evaluate( pd, code ); + code.append( IN_TREE_TRIM ); + return rt; + } + case '@': { + UniqueType *rt = right->evaluate( pd, code ); + //code.append( IN_TREE_TRIM ); + return rt; + } + default: + assert(false); + } + return 0; + } + case TermType: { + return term->evaluate( pd, code ); + } + } + return 0; +} + +void LangVarRef::assignValue( Compiler *pd, CodeVect &code, + UniqueType *exprUT ) const +{ + /* Lookup the left hand side of the assignment. */ + VarRefLookup lookup = lookupField( pd ); + + if ( lookup.objField->refActive ) + error(loc) << "reference active, cannot write to object" << endp; + + if ( lookup.firstConstPart >= 0 ) { + error(loc) << "left hand side qualification \"" << + qual->data[lookup.firstConstPart].data << "\" is const" << endp; + } + + if ( lookup.objField->isConst ) + error(loc) << "field \"" << name << "\" is const" << endp; + + /* Writing guarantees the field is dirty. tree is dirty. */ + lookup.objField->dirtyTree = true; + + /* Check the types of the assignment and possibly cast. */ + UniqueType *objUT = lookup.objField->typeRef->uniqueType; + assert( lookup.uniqueType == lookup.objField->typeRef->uniqueType ); + if ( !castAssignment( pd, code, objUT, lookup.iterSearchUT, exprUT ) ) + error(loc) << "type mismatch in assignment" << endp; + + /* Decide if we need to revert the assignment. */ + bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(); + + /* Load the object and generate the field setting code. */ + loadObj( pd, code, lookup.lastPtrInQual, true ); + + if ( lookup.uniqueType->typeId == TYPE_ITER ) + setFieldIter( pd, code, lookup.inObject, lookup.objField, lookup.uniqueType, exprUT, false ); + else + setField( pd, code, lookup.inObject, lookup.objField, exprUT, revert ); +} + +UniqueType *LangTerm::evaluateMakeToken( Compiler *pd, CodeVect &code ) const +{ +// if ( pd->compileContext != Compiler::CompileTranslation ) +// error(loc) << "make_token can be used only in a translation block" << endp; + + /* Match the number of arguments. */ + int numArgs = args != 0 ? args->length() : 0; + if ( numArgs < 2 ) + error(loc) << "need at least two arguments" << endp; + + for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Evaluate. */ + UniqueType *exprUT = (*pe)->expr->evaluate( pd, code ); + + if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) + error(loc) << "first arg, id, must be an int" << endp; + + if ( pe.pos() == 1 && exprUT != pd->uniqueTypeStr ) + error(loc) << "second arg, length, must be a string" << endp; + } + + /* The token is now created, send it. */ + code.append( IN_MAKE_TOKEN ); + code.append( args->length() ); + + return pd->uniqueTypeAny; +} + +UniqueType *LangTerm::evaluateMakeTree( Compiler *pd, CodeVect &code ) const +{ +// if ( pd->compileContext != Compiler::CompileTranslation ) +// error(loc) << "make_tree can be used only in a translation block" << endp; + + /* Match the number of arguments. */ + int numArgs = args != 0 ? args->length() : 0; + if ( numArgs < 1 ) + error(loc) << "need at least one argument" << endp; + + for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Evaluate. */ + UniqueType *exprUT = (*pe)->expr->evaluate( pd, code ); + + if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) + error(loc) << "first arg, nonterm id, must be an int" << endp; + } + + /* The token is now created, send it. */ + code.append( IN_MAKE_TREE ); + code.append( args->length() ); + + return pd->uniqueTypeAny; +} + +void LangStmt::compileForIterBody( Compiler *pd, + CodeVect &code, UniqueType *iterUT ) const +{ + /* Remember the top of the loop. */ + long top = code.length(); + + /* Advance */ + code.append( objField->iterImpl->inAdvance ); + code.appendHalf( objField->offset ); + + /* Test: jump past the while block if false. Note that we don't have the + * distance yet. */ + long jumpFalse = code.length(); + code.append( IN_JMP_FALSE_VAL ); + code.appendHalf( 0 ); + + /* + * Set up the loop cleanup code. + */ + + /* Add the cleanup for the current loop. */ + int lcLen = pd->unwindCode.length(); + pd->unwindCode.insertHalf( 0, objField->offset ); + pd->unwindCode.insert( 0, objField->iterImpl->inUnwind ); + + /* Compile the contents. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + pd->unwindCode.remove( 0, pd->unwindCode.length() - lcLen ); + + /* Jump back to the top to retest. */ + long retestDist = code.length() - top + 3; + code.append( IN_JMP ); + code.appendHalf( -retestDist ); + + /* Set the jump false distance. */ + long falseDist = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, falseDist ); + + /* Compute the jump distance for the break jumps. */ + for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { + long distance = code.length() - *brk - 3; + code.setHalf( *brk+1, distance ); + } + pd->breakJumps.empty(); + + /* Destroy the iterator. */ + code.append( objField->iterImpl->inDestroy ); + code.appendHalf( objField->offset ); + + /* Clean up any prepush args. */ +} + +void LangStmt::compileForIter( Compiler *pd, CodeVect &code ) const +{ + /* The type we are searching for. */ + UniqueType *searchUT = typeRef->uniqueType; + + /* Lookup the iterator call. Make sure it is an iterator. */ + VarRefLookup lookup = iterCall->langTerm->varRef->lookupIterCall( pd ); + if ( lookup.objMethod->iterDef == 0 ) { + error(loc) << "attempt to iterate using something " + "that is not an iterator" << endp; + } + + /* Prepare the contiguous call args space. */ + Function *func = lookup.objMethod->func; + int asLoc; + if ( func != 0 ) { + code.append( IN_PREP_ARGS ); + asLoc = code.length(); + code.appendHalf( 0 ); + } + + /* + * Create the iterator from the local var. + */ + + UniqueType *iterUT = objField->typeRef->uniqueType; + IterImpl *iterImpl = 0; + + switch ( iterUT->iterDef->type ) { + case IterDef::Tree: + iterImpl = iterCall->langTerm->varRef->chooseTriterCall( pd, + searchUT, iterCall->langTerm->args ); + break; + case IterDef::Child: + iterImpl = new IterImpl( IterImpl::Child ); + break; + case IterDef::RevChild: + iterImpl = new IterImpl( IterImpl::RevChild ); + break; + case IterDef::Repeat: + iterImpl = new IterImpl( IterImpl::Repeat ); + break; + case IterDef::RevRepeat: + iterImpl = new IterImpl( IterImpl::RevRepeat ); + break; + case IterDef::User: + iterImpl = new IterImpl( IterImpl::User, iterUT->iterDef->func ); + break; + case IterDef::ListEl: + iterImpl = new IterImpl( IterImpl::ListEl ); + break; + case IterDef::RevListVal: + iterImpl = new IterImpl( IterImpl::RevListVal ); + break; + case IterDef::MapEl: + iterImpl = new IterImpl( IterImpl::MapEl ); + break; + } + + objField->iterImpl = iterImpl; + + /* Evaluate and push the arguments. */ + ObjectField **paramRefs = iterCall->langTerm->varRef->evaluateArgs( + pd, code, lookup, iterCall->langTerm->args ); + + if ( pd->revertOn ) + code.append( iterImpl->inCreateWV ); + else + code.append( iterImpl->inCreateWC ); + + code.appendHalf( objField->offset ); + + /* Arg size (or func id for user iters). */ + if ( lookup.objMethod->func != 0 ) + code.appendHalf( lookup.objMethod->func->funcId ); + else + code.appendHalf( iterCall->langTerm->varRef->argSize ); + + /* Search type. */ + if ( iterImpl->useSearchUT ) + code.appendHalf( searchUT->langEl->id ); + + if ( iterImpl->useGenericId ) { + CodeVect unused; + UniqueType *ut = + iterCall->langTerm->args->data[0]->expr->evaluate( pd, unused ); + + code.appendHalf( ut->generic->id ); + } + + compileForIterBody( pd, code, iterUT ); + + iterCall->langTerm->varRef->popRefQuals( pd, code, lookup, + iterCall->langTerm->args, false ); + + iterCall->langTerm->varRef->resetActiveRefs( pd, lookup, paramRefs ); + delete[] paramRefs; + + if ( func != 0 ) { + code.append( IN_CLEAR_ARGS ); + code.appendHalf( func->paramListSize ); + code.setHalf( asLoc, func->paramListSize ); + } +} + +void LangStmt::compileWhile( Compiler *pd, CodeVect &code ) const +{ + /* Generate code for the while test. Remember the top. */ + long top = code.length(); + UniqueType *eut = expr->evaluate( pd, code ); + + /* Jump past the while block if false. Note that we don't have the + * distance yet. */ + long jumpFalse = code.length(); + half_t jinstr = eut->tree() ? IN_JMP_FALSE_TREE : IN_JMP_FALSE_VAL; + code.append( jinstr ); + code.appendHalf( 0 ); + + /* Compute the while block. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + /* Jump back to the top to retest. */ + long retestDist = code.length() - top + 3; + code.append( IN_JMP ); + code.appendHalf( -retestDist ); + + /* Set the jump false distance. */ + long falseDist = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, falseDist ); + + /* Compute the jump distance for the break jumps. */ + for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { + long distance = code.length() - *brk - 3; + code.setHalf( *brk+1, distance ); + } + pd->breakJumps.empty(); +} + +void LangStmt::compile( Compiler *pd, CodeVect &code ) const +{ + CodeVect block; + + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( "unwind code\n", &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + block.append( IN_LOAD_STR ); + block.appendWord( mapEl->value ); + + block.append( IN_POP_TREE ); + + pd->unwindCode.insert( 0, block ); + + switch ( type ) { + case ExprType: { + /* Evaluate the exrepssion, then pop it immediately. */ + UniqueType *exprUt = expr->evaluate( pd, code ); + if ( exprUt->tree() ) + code.append( IN_POP_TREE ); + else + code.append( IN_POP_VAL ); + + // pd->unwindCode.remove( 0, 1 ); + break; + } + case IfType: { + long jumpFalse = 0, jumpPastElse = 0, distance = 0; + + /* Evaluate the test. */ + UniqueType *eut = expr->evaluate( pd, code ); + + /* Jump past the if block if false. We don't know the distance + * yet so store the location of the jump. */ + jumpFalse = code.length(); + half_t jinstr = eut->tree() ? IN_JMP_FALSE_TREE : IN_JMP_FALSE_VAL; + + code.append( jinstr ); + code.appendHalf( 0 ); + + /* Compile the if true branch. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + if ( elsePart != 0 ) { + /* Jump past the else code for the if true branch. */ + jumpPastElse = code.length(); + code.append( IN_JMP ); + code.appendHalf( 0 ); + } + + /* Set the distance for the jump false case. */ + distance = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, distance ); + + if ( elsePart != 0 ) { + /* Compile the else branch. */ + elsePart->compile( pd, code ); + + /* Set the distance for jump over the else part. */ + distance = code.length() - jumpPastElse - 3; + code.setHalf( jumpPastElse+1, distance ); + } + + break; + } + case ElseType: { + /* Compile the else branch. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + break; + } + case RejectType: { + code.append( IN_REJECT ); + break; + } + case WhileType: { + compileWhile( pd, code ); + break; + } + case AssignType: { + /* Evaluate the exrepssion. */ + UniqueType *exprUT = expr->evaluate( pd, code ); + + /* Do the assignment. */ + varRef->assignValue( pd, code, exprUT ); + break; + } + case ForIterType: { + compileForIter( pd, code ); + break; + } + case ReturnType: { + /* Evaluate the exrepssion. */ + UniqueType *exprUT = expr->evaluate( pd, code ); + + if ( pd->curFunction == 0 ) { + /* In the main function */ + pd->mainReturnUT = exprUT; + } + else { + UniqueType *resUT = pd->curFunction->typeRef->uniqueType; + if ( resUT != pd->uniqueTypeVoid && + !castAssignment( pd, code, resUT, 0, exprUT ) ) + error(loc) << "return value wrong type" << endp; + } + + code.append( IN_SAVE_RET ); + + /* The loop cleanup code. */ + if ( pd->unwindCode.length() > 0 ) + code.append( pd->unwindCode ); + + /* Jump to the return label. The distance will be filled in + * later. */ + pd->returnJumps.append( code.length() ); + code.append( IN_JMP ); + code.appendHalf( 0 ); + break; + } + case BreakType: { + pd->breakJumps.append( code.length() ); + code.append( IN_JMP ); + code.appendHalf( 0 ); + break; + } + case YieldType: { + /* take a reference and yield it. Immediately reset the referece. */ + varRef->preEvaluateRef( pd, code ); + ObjectField *objField = varRef->evaluateRef( pd, code, 0 ); + code.append( IN_YIELD ); + + if ( varRef->qual->length() > 0 ) { + code.append( IN_POP_N_WORDS ); + code.appendHalf( (short)(varRef->qual->length()*2) ); + } + + objField->refActive = false; + break; + } + } + + pd->unwindCode.remove( 0, block.length() ); +} + +void CodeBlock::compile( Compiler *pd, CodeVect &code ) const +{ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); +} + +void Compiler::findLocals( ObjectDef *localFrame, CodeBlock *block ) +{ + Locals &locals = block->locals; + + for ( FieldList::Iter ol = localFrame->fieldList; ol.lte(); ol++ ) { + ObjectField *el = ol->value; + + /* FIXME: This test needs to be improved. Match_text was getting + * through before useOffset was tested. What will? */ + if ( el->useOffset() && !el->isLhsEl() && + ( el->beenReferenced || el->isParam() ) ) + { + UniqueType *ut = el->typeRef->uniqueType; + if ( ut->tree() ) { + int depth = el->scope->depth(); + locals.append( LocalLoc( LT_Tree, depth, el->offset ) ); + } + } + + if ( el->useOffset() ) { + UniqueType *ut = el->typeRef->uniqueType; + if ( ut->typeId == TYPE_ITER ) { + int depth = el->scope->depth(); + LocalType type = LT_Tree; + switch ( ut->iterDef->type ) { + case IterDef::Tree: + case IterDef::Child: + case IterDef::Repeat: + case IterDef::RevRepeat: + type = LT_Iter; + break; + + case IterDef::MapEl: + case IterDef::ListEl: + case IterDef::RevListVal: + /* ? */ + type = LT_Iter; + break; + + case IterDef::RevChild: + type = LT_RevIter; + break; + case IterDef::User: + type = LT_UserIter; + break; + } + + locals.append( LocalLoc( type, depth, (int)el->offset ) ); + } + } + } +} + +void Compiler::addProdLHSLoad( Production *prod, CodeVect &code, long &insertPos ) +{ + NameScope *scope = prod->redBlock->localFrame->rootScope; + ObjectField *lhsField = scope->findField("lhs"); + assert( lhsField != 0 ); + + CodeVect loads; + if ( lhsField->beenReferenced ) { + loads.append( IN_INIT_LHS_EL ); + loads.appendHalf( lhsField->offset ); + } + + code.insert( insertPos, loads ); + insertPos += loads.length(); +} + +void Compiler::addPushBackLHS( Production *prod, CodeVect &code, long &insertPos ) +{ + CodeBlock *block = prod->redBlock; + + /* If the lhs tree is dirty then we will need to save off the old lhs + * before it gets modified. We want to avoid this for attribute + * modifications. The computation of dirtyTree should deal with this for + * us. */ + NameScope *scope = block->localFrame->rootScope; + ObjectField *lhsField = scope->findField("lhs"); + assert( lhsField != 0 ); + + if ( lhsField->beenReferenced ) { + code.append( IN_STORE_LHS_EL ); + code.appendHalf( lhsField->offset ); + } +} + +void Compiler::addProdRHSLoads( Production *prod, CodeVect &code, long &insertPos ) +{ + CodeVect loads; + long elPos = 0; + for ( ProdElList::Iter rhsEl = *prod->prodElList; rhsEl.lte(); rhsEl++, elPos++ ) { + if ( rhsEl->type == ProdEl::ReferenceType ) { + if ( rhsEl->rhsElField->beenReferenced ) { + loads.append ( IN_INIT_RHS_EL ); + loads.appendHalf( elPos ); + loads.appendHalf( rhsEl->rhsElField->offset ); + } + } + } + + /* Insert and update the insert position. */ + code.insert( insertPos, loads ); + insertPos += loads.length(); +} + + + +void Compiler::makeProdCopies( Production *prod ) +{ + int pos = 0; + for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++, pos++) { + if ( pel->captureField != 0 ) { + prod->copy.append( pel->captureField->offset ); + prod->copy.append( pos ); + } + } +} + +void Compiler::compileReductionCode( Production *prod ) +{ + CodeBlock *block = prod->redBlock; + + /* Init the compilation context. */ + compileContext = CompileReduction; + revertOn = true; + block->frameId = nextFrameId++; + + CodeVect &code = block->codeWV; + + long afterInit = code.length(); + + /* Compile the reduce block. */ + block->compile( this, code ); + + /* Might need to load right hand side values. */ + addProdRHSLoads( prod, code, afterInit ); + + addProdLHSLoad( prod, code, afterInit ); + addPushBackLHS( prod, code, afterInit ); + + code.append( IN_PCR_RET ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocals( block->localFrame, block ); +} + +void Compiler::compileTranslateBlock( LangEl *langEl ) +{ + CodeBlock *block = langEl->transBlock; + + /* Set up compilation context. */ + compileContext = CompileTranslation; + revertOn = true; + block->frameId = nextFrameId++; + + CodeVect &code = block->codeWV; + + if ( langEl->tokenDef->reCaptureVect.length() > 0 ) { + code.append( IN_INIT_CAPTURES ); + code.append( langEl->tokenDef->reCaptureVect.length() ); + } + + /* Set the local frame and compile the reduce block. */ + block->compile( this, code ); + + code.append( IN_PCR_RET ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocals( block->localFrame, block ); +} + +void Compiler::compilePreEof( TokenRegion *region ) +{ + CodeBlock *block = region->preEofBlock; + + /* Set up compilation context. */ + compileContext = CompileTranslation; + revertOn = true; + block->frameId = nextFrameId++; + + addInput( block->localFrame ); + addThis( block->localFrame ); + + CodeVect &code = block->codeWV; + + /* Set the local frame and compile the reduce block. */ + block->compile( this, code ); + + code.append( IN_PCR_RET ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocals( block->localFrame, block ); +} + +int Compiler::arg0Offset() +{ + globalObjectDef->referenceField( this, arg0 ); + return arg0->offset; +} + +int Compiler::argvOffset() +{ + globalObjectDef->referenceField( this, argv ); + return argv->offset; +} + +int Compiler::stdsOffset() +{ + globalObjectDef->referenceField( this, stds ); + return stds->offset; +} + +void Compiler::compileRootBlock( ) +{ + CodeBlock *block = rootCodeBlock; + + /* The root block never needs to be reverted. */ + + /* Set up the compile context. No locals are needed for the root code + * block, but we need an empty local frame for the compile. */ + compileContext = CompileRoot; + revertOn = false; + + /* The block needs a frame id. */ + block->frameId = nextFrameId++; + + /* The root block is not reverted. */ + CodeVect &code = block->codeWC; + + code.append( IN_FN ); + code.append( FN_LOAD_ARG0 ); + code.appendHalf( arg0Offset() ); + + code.append( IN_FN ); + code.append( FN_LOAD_ARGV ); + code.appendHalf( argvOffset() ); + + code.append( IN_FN ); + code.append( FN_INIT_STDS ); + code.appendHalf( stdsOffset() ); + + block->compile( this, code ); + + code.append( IN_FN ); + code.append( FN_STOP ); + + /* Make the local trees descriptor. */ + findLocals( rootLocalFrame, block ); +} + +void ObjectField::initField() +{ + switch ( type ) { + case UserLocalType: + case LhsElType: + case ParamValType: + case RedRhsType: + inGetR = IN_GET_LOCAL_R; + inGetWC = IN_GET_LOCAL_WC; + inSetWC = IN_SET_LOCAL_WC; + inGetValR = IN_GET_LOCAL_VAL_R; + inGetValWC = IN_GET_LOCAL_VAL_R; + inGetValWV = IN_GET_LOCAL_VAL_R; + inSetValWC = IN_SET_LOCAL_VAL_WC; + break; + + case ParamRefType: + inGetR = IN_GET_LOCAL_REF_R; + inGetWC = IN_GET_LOCAL_REF_WC; + inSetWC = IN_SET_LOCAL_REF_WC; + break; + + case UserFieldType: + inGetR = IN_GET_FIELD_TREE_R; + inGetWC = IN_GET_FIELD_TREE_WC; + inGetWV = IN_GET_FIELD_TREE_WV; + inSetWC = IN_SET_FIELD_TREE_WC; + inSetWV = IN_SET_FIELD_TREE_WV; + + //inGetValR; + inGetValR = IN_GET_FIELD_VAL_R; + //inGetValWC = IN_GET_FIELD_VAL_WC; + //inGetValWV; + inSetValWC = IN_SET_FIELD_VAL_WC; + //inSetValWV; + break; + + case GenericElementType: + case GenericDependentType: + case StructFieldType: + inGetR = IN_GET_STRUCT_R; + inGetWC = IN_GET_STRUCT_WC; + inGetWV = IN_GET_STRUCT_WV; + inSetWC = IN_SET_STRUCT_WC; + inSetWV = IN_SET_STRUCT_WV; + inGetValR = IN_GET_STRUCT_VAL_R; + inGetValWC = IN_GET_STRUCT_VAL_R; + inGetValWV = IN_GET_STRUCT_VAL_R; + inSetValWC = IN_SET_STRUCT_VAL_WC; + inSetValWV = IN_SET_STRUCT_VAL_WV; + break; + + case RhsNameType: + inGetR = IN_GET_RHS_VAL_R; + inGetWC = IN_GET_RHS_VAL_WC; + inGetWV = IN_GET_RHS_VAL_WV; + inSetWC = IN_SET_RHS_VAL_WC; + inSetWV = IN_SET_RHS_VAL_WC; + break; + + /* Inbuilts have instructions intialized outside the cons, at place of + * call. */ + case InbuiltFieldType: + case InbuiltObjectType: + case InbuiltOffType: + break; + + /* Out of date impl. */ + case LexSubstrType: + break; + } +} + +void ObjectDef::placeField( Compiler *pd, ObjectField *field ) +{ + UniqueType *fieldUT = field->typeRef->uniqueType; + + switch ( field->type ) { + case ObjectField::LhsElType: + case ObjectField::UserLocalType: + case ObjectField::RedRhsType: + + /* Local frame fields. Move the running offset first since this is + * a negative off from the end. */ + nextOffset += sizeOfField( fieldUT ); + field->offset = -nextOffset; + break; + + + case ObjectField::GenericElementType: { + + /* Tree object frame fields. Record the position, then move the + * running offset. */ + field->offset = nextOffset; + nextOffset += sizeOfField( fieldUT ); + + if ( fieldUT->typeId == TYPE_MAP_PTRS ) { + if ( field->mapKeyField != 0 ) + field->mapKeyField->offset = field->offset; + } + + break; + } + + case ObjectField::UserFieldType: + + /* Tree object frame fields. Record the position, then move the + * running offset. */ + field->offset = nextOffset; + nextOffset += sizeOfField( fieldUT ); + break; + + case ObjectField::StructFieldType: + field->offset = nextOffset; + nextOffset += sizeOfField( fieldUT ); + break; + + case ObjectField::GenericDependentType: + /* There is an object field that this type depends on. When it is + * placed, this one will be placed as well. Nothing to do now. */ + + case ObjectField::InbuiltFieldType: + case ObjectField::InbuiltOffType: + case ObjectField::InbuiltObjectType: + case ObjectField::RhsNameType: + case ObjectField::LexSubstrType: + + case ObjectField::ParamValType: + case ObjectField::ParamRefType: + break; + } +} + +void Compiler::placeAllLanguageObjects() +{ + /* Init all user object fields (need consistent size). */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + ObjectDef *objDef = lel->objectDef; + if ( objDef != 0 ) { + /* Init all fields of the object. */ + for ( FieldList::Iter f = objDef->fieldList; f.lte(); f++ ) + objDef->placeField( this, f->value ); + } + } +} + +void Compiler::placeAllStructObjects() +{ + for ( StructElList::Iter s = structEls; s.lte(); s++ ) { + ObjectDef *objectDef = s->structDef->objectDef; + for ( FieldList::Iter f = objectDef->fieldList; f.lte(); f++ ) + objectDef->placeField( this, f->value ); + } +} + +void Compiler::placeFrameFields( ObjectDef *localFrame ) +{ + for ( FieldList::Iter f = localFrame->fieldList; f.lte(); f++ ) + localFrame->placeField( this, f->value ); +} + +void Compiler::placeAllFrameObjects() +{ + /* Functions. */ + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) + placeFrameFields( f->localFrame ); + + for ( FunctionList::Iter f = inHostList; f.lte(); f++ ) + placeFrameFields( f->localFrame ); + + /* Reduction code. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->redBlock != 0 ) + placeFrameFields( prod->redBlock->localFrame ); + } + + /* Token translation code. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->transBlock != 0 ) { + ObjectDef *localFrame = lel->transBlock->localFrame; + if ( lel->tokenDef->reCaptureVect.length() > 0 ) { + FieldList::Iter f = localFrame->fieldList; + for ( int i = 0; i < lel->tokenDef->reCaptureVect.length(); i++, f++ ) + localFrame->placeField( this, f->value ); + } + + placeFrameFields( localFrame ); + } + } + + /* Preeof blocks. */ + for ( RegionList::Iter r = regionList; r.lte(); r++ ) { + if ( r->preEofBlock != 0 ) + placeFrameFields( r->preEofBlock->localFrame ); + } + + /* Root code. */ + placeFrameFields( rootLocalFrame ); +} + +void Compiler::placeUserFunction( Function *func, bool isUserIter ) +{ + /* Set up the parameters. */ + long paramPos = 0, paramListSize = 0, paramOffset = 0; + UniqueType **paramUTs = new UniqueType*[func->paramList->length()]; + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++, paramPos++ ) { + paramUTs[paramPos] = param->typeRef->uniqueType; + paramListSize += sizeOfField( paramUTs[paramPos] ); + } + + /* Param offset is relative to one past the last item in the array of + * words containing the args. */ + paramOffset = 0; + paramPos = 0; + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++, paramPos++ ) { + /* How much space do we need to make for call overhead. */ + long frameAfterArgs = isUserIter ? IFR_AA : FR_AA; + + param->offset = frameAfterArgs + paramOffset; + + paramOffset += sizeOfField( paramUTs[paramPos] ); + } + + func->paramListSize = paramListSize; + func->paramUTs = paramUTs; + + func->objMethod->paramUTs = paramUTs; + + /* Insert the function into the global function map. */ + UniqueType *returnUT = func->typeRef != 0 ? + func->typeRef->uniqueType : uniqueTypeInt; + func->objMethod->returnUT = returnUT; + + func->objMethod->paramUTs = new UniqueType*[func->paramList->length()]; + memcpy( func->objMethod->paramUTs, paramUTs, + sizeof(UniqueType*) * func->paramList->length() ); +} + +void Compiler::placeAllFunctions() +{ + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) + placeUserFunction( f, f->isUserIter ); + + for ( FunctionList::Iter f = inHostList; f.lte(); f++ ) + placeUserFunction( f, false ); +} + + +void Compiler::compileUserIter( Function *func, CodeVect &code ) +{ + CodeBlock *block = func->codeBlock; + + /* Compile the block. */ + block->compile( this, code ); + + /* Always yeild a nil at the end. This causes iteration to stop. */ + code.append( IN_LOAD_NIL ); + code.append( IN_YIELD ); +} + +void Compiler::compileUserIter( Function *func ) +{ + CodeBlock *block = func->codeBlock; + + /* Set up the context. */ + compileContext = CompileFunction; + curFunction = func; + block->frameId = nextFrameId++; + + /* Compile for revert and commit. */ + revertOn = true; + compileUserIter( func, block->codeWV ); + + revertOn = false; + compileUserIter( func, block->codeWC ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocals( block->localFrame, block ); + + /* FIXME: Need to deal with the freeing of local trees. */ +} + +/* Called for each type of function compile: revert and commit. */ +void Compiler::compileFunction( Function *func, CodeVect &code ) +{ + CodeBlock *block = func->codeBlock; + + /* Compile the block. */ + block->compile( this, code ); + + /* Check for a return statement. */ + if ( block->stmtList->length() == 0 || + block->stmtList->tail->type != LangStmt::ReturnType ) + { + /* Push the return value. */ + code.append( IN_LOAD_NIL ); + code.append( IN_SAVE_RET ); + } + + /* Compute the jump distance for the return jumps. */ + for ( LongVect::Iter rj = returnJumps; rj.lte(); rj++ ) { + long distance = code.length() - *rj - 3; + code.setHalf( *rj+1, distance ); + } + + /* Reset the vector of return jumps. */ + returnJumps.empty(); + + /* Return cleans up the stack (including the args) and leaves the return + * value on the top. */ + code.append( IN_RET ); +} + +void Compiler::compileFunction( Function *func ) +{ + CodeBlock *block = func->codeBlock; + + /* Set up the compilation context. */ + compileContext = CompileFunction; + curFunction = func; + + /* Assign a frame Id. */ + block->frameId = nextFrameId++; + + /* Compile once for revert. */ + revertOn = true; + compileFunction( func, block->codeWV ); + + /* Compile once for commit. */ + revertOn = false; + compileFunction( func, block->codeWC ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocals( block->localFrame, block ); +} + +void Compiler::removeNonUnparsableRepls() +{ + for ( ConsList::Iter repl = replList; repl.lte(); ) { + Constructor *maybeDel = repl++; + if ( !maybeDel->parse ) + replList.detach( maybeDel ); + } +} + +void Compiler::compileByteCode() +{ + /* Compile functions. */ + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) { + if ( f->isUserIter ) + compileUserIter( f ); + else + compileFunction( f ); + } + + /* Compile the reduction code. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + makeProdCopies( prod ); + if ( prod->redBlock != 0 ) + compileReductionCode( prod ); + } + + /* Compile the token translation code. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->transBlock != 0 ) + compileTranslateBlock( lel ); + } + + /* Compile preeof blocks. */ + for ( RegionList::Iter r = regionList; r.lte(); r++ ) { + if ( r->preEofBlock != 0 ) + compilePreEof( r ); + } + + /* Compile the init code */ + compileRootBlock( ); + removeNonUnparsableRepls(); +} diff --git a/src/tree.c b/src/tree.c new file mode 100644 index 00000000..e05681b9 --- /dev/null +++ b/src/tree.c @@ -0,0 +1,1655 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <string.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdio.h> +#include <unistd.h> +#include <assert.h> + +#include <colm/tree.h> +#include <colm/pool.h> +#include <colm/bytecode.h> +#include <colm/debug.h> + +kid_t *alloc_attrs( program_t *prg, long length ) +{ + kid_t *cur = 0; + long i; + for ( i = 0; i < length; i++ ) { + kid_t *next = cur; + cur = kid_allocate( prg ); + cur->next = next; + } + return cur; +} + +void free_attrs( program_t *prg, kid_t *attrs ) +{ + kid_t *cur = attrs; + while ( cur != 0 ) { + kid_t *next = cur->next; + kid_free( prg, cur ); + cur = next; + } +} + +void free_kid_list( program_t *prg, kid_t *kid ) +{ + while ( kid != 0 ) { + kid_t *next = kid->next; + kid_free( prg, kid ); + kid = next; + } +} + +static void colm_tree_set_attr( tree_t *tree, long pos, tree_t *val ) +{ + long i; + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + for ( i = 0; i < pos; i++ ) + kid = kid->next; + kid->tree = val; +} + +tree_t *colm_get_attr( tree_t *tree, long pos ) +{ + long i; + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + for ( i = 0; i < pos; i++ ) + kid = kid->next; + return kid->tree; +} + + +tree_t *colm_get_repeat_next( tree_t *tree ) +{ + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid->next->tree; +} + +tree_t *colm_get_repeat_val( tree_t *tree ) +{ + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid->tree; +} + +tree_t *colm_get_left_repeat_next( tree_t *tree ) +{ + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid->tree; +} + +tree_t *colm_get_left_repeat_val( tree_t *tree ) +{ + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid->next->tree; +} + +int colm_repeat_end( tree_t *tree ) +{ + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid == 0; +} + +int colm_list_last( tree_t *tree ) +{ + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid->next == 0; +} + +kid_t *get_attr_kid( tree_t *tree, long pos ) +{ + long i; + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + for ( i = 0; i < pos; i++ ) + kid = kid->next; + return kid; +} + +kid_t *kid_list_concat( kid_t *list1, kid_t *list2 ) +{ + if ( list1 == 0 ) + return list2; + else if ( list2 == 0 ) + return list1; + + kid_t *dest = list1; + while ( dest->next != 0 ) + dest = dest->next; + dest->next = list2; + return list1; +} + +tree_t *colm_construct_pointer( program_t *prg, value_t value ) +{ + pointer_t *pointer = (pointer_t*) tree_allocate( prg ); + pointer->id = LEL_ID_PTR; + pointer->value = value; + + return (tree_t*)pointer; +} + +value_t colm_get_pointer_val( tree_t *ptr ) +{ + return ((pointer_t*)ptr)->value; +} + + +tree_t *colm_construct_term( program_t *prg, word_t id, head_t *tokdata ) +{ + struct lang_el_info *lel_info = prg->rtd->lel_info; + + tree_t *tree = tree_allocate( prg ); + tree->id = id; + tree->refs = 0; + tree->tokdata = tokdata; + + int object_length = lel_info[tree->id].object_length; + tree->child = alloc_attrs( prg, object_length ); + + return tree; +} + + +kid_t *construct_kid( program_t *prg, tree_t **bindings, kid_t *prev, long pat ); + +static kid_t *construct_ignore_list( program_t *prg, long ignore_ind ) +{ + struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; + + kid_t *first = 0, *last = 0; + while ( ignore_ind >= 0 ) { + head_t *ignore_data = colm_string_alloc_pointer( prg, nodes[ignore_ind].data, + nodes[ignore_ind].length ); + + tree_t *ign_tree = tree_allocate( prg ); + ign_tree->refs = 1; + ign_tree->id = nodes[ignore_ind].id; + ign_tree->tokdata = ignore_data; + + kid_t *ign_kid = kid_allocate( prg ); + ign_kid->tree = ign_tree; + ign_kid->next = 0; + + if ( last == 0 ) + first = ign_kid; + else + last->next = ign_kid; + + ignore_ind = nodes[ignore_ind].next; + last = ign_kid; + } + + return first; +} + +static kid_t *construct_left_ignore_list( program_t *prg, long pat ) +{ + struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; + return construct_ignore_list( prg, nodes[pat].left_ignore ); +} + +static kid_t *construct_right_ignore_list( program_t *prg, long pat ) +{ + struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; + return construct_ignore_list( prg, nodes[pat].right_ignore ); +} + +static void ins_left_ignore( program_t *prg, tree_t *tree, tree_t *ignore_list ) +{ + assert( ! (tree->flags & AF_LEFT_IGNORE) ); + + /* Allocate. */ + kid_t *kid = kid_allocate( prg ); + kid->tree = ignore_list; + colm_tree_upref( prg, ignore_list ); + + /* Attach it. */ + kid->next = tree->child; + tree->child = kid; + + tree->flags |= AF_LEFT_IGNORE; +} + +static void ins_right_ignore( program_t *prg, tree_t *tree, tree_t *ignore_list ) +{ + assert( ! (tree->flags & AF_RIGHT_IGNORE) ); + + /* Insert an ignore head in the child list. */ + kid_t *kid = kid_allocate( prg ); + kid->tree = ignore_list; + colm_tree_upref( prg, ignore_list ); + + /* Attach it. */ + if ( tree->flags & AF_LEFT_IGNORE ) { + kid->next = tree->child->next; + tree->child->next = kid; + } + else { + kid->next = tree->child; + tree->child = kid; + } + + tree->flags |= AF_RIGHT_IGNORE; +} + +tree_t *push_right_ignore( program_t *prg, tree_t *push_to, tree_t *right_ignore ) +{ + /* About to alter the data tree. Split first. */ + push_to = split_tree( prg, push_to ); + + if ( push_to->flags & AF_RIGHT_IGNORE ) { + /* The previous token already has a right ignore. Merge by + * attaching it as a left ignore of the new list. */ + kid_t *cur_ignore = tree_right_ignore_kid( prg, push_to ); + ins_left_ignore( prg, right_ignore, cur_ignore->tree ); + + /* Replace the current ignore. Safe to access refs here because we just + * upreffed it in insLeftIgnore. */ + cur_ignore->tree->refs -= 1; + cur_ignore->tree = right_ignore; + colm_tree_upref( prg, right_ignore ); + } + else { + /* Attach The ignore list. */ + ins_right_ignore( prg, push_to, right_ignore ); + } + + return push_to; +} + +tree_t *push_left_ignore( program_t *prg, tree_t *push_to, tree_t *left_ignore ) +{ + push_to = split_tree( prg, push_to ); + + /* Attach as left ignore to the token we are sending. */ + if ( push_to->flags & AF_LEFT_IGNORE ) { + /* The token already has a left-ignore. Merge by attaching it as a + * right ignore of the new list. */ + kid_t *cur_ignore = tree_left_ignore_kid( prg, push_to ); + ins_right_ignore( prg, left_ignore, cur_ignore->tree ); + + /* Replace the current ignore. Safe to upref here because we just + * upreffed it in insRightIgnore. */ + cur_ignore->tree->refs -= 1; + cur_ignore->tree = left_ignore; + colm_tree_upref( prg, left_ignore ); + } + else { + /* Attach the ignore list. */ + ins_left_ignore( prg, push_to, left_ignore ); + } + + return push_to; +} + +static void rem_left_ignore( program_t *prg, tree_t **sp, tree_t *tree ) +{ + assert( tree->flags & AF_LEFT_IGNORE ); + + kid_t *next = tree->child->next; + colm_tree_downref( prg, sp, tree->child->tree ); + kid_free( prg, tree->child ); + tree->child = next; + + tree->flags &= ~AF_LEFT_IGNORE; +} + +static void rem_right_ignore( program_t *prg, tree_t **sp, tree_t *tree ) +{ + assert( tree->flags & AF_RIGHT_IGNORE ); + + if ( tree->flags & AF_LEFT_IGNORE ) { + kid_t *next = tree->child->next->next; + colm_tree_downref( prg, sp, tree->child->next->tree ); + kid_free( prg, tree->child->next ); + tree->child->next = next; + } + else { + kid_t *next = tree->child->next; + colm_tree_downref( prg, sp, tree->child->tree ); + kid_free( prg, tree->child ); + tree->child = next; + } + + tree->flags &= ~AF_RIGHT_IGNORE; +} + +tree_t *pop_right_ignore( program_t *prg, tree_t **sp, tree_t *pop_from, tree_t **right_ignore ) +{ + /* Modifying the tree we are detaching from. */ + pop_from = split_tree( prg, pop_from ); + + kid_t *ri_kid = tree_right_ignore_kid( prg, pop_from ); + + /* If the right ignore has a left ignore, then that was the original + * right ignore. */ + kid_t *li = tree_left_ignore_kid( prg, ri_kid->tree ); + if ( li != 0 ) { + colm_tree_upref( prg, li->tree ); + rem_left_ignore( prg, sp, ri_kid->tree ); + *right_ignore = ri_kid->tree; + colm_tree_upref( prg, *right_ignore ); + ri_kid->tree = li->tree; + } + else { + *right_ignore = ri_kid->tree; + colm_tree_upref( prg, *right_ignore ); + rem_right_ignore( prg, sp, pop_from ); + } + + return pop_from; +} + +tree_t *pop_left_ignore( program_t *prg, tree_t **sp, tree_t *pop_from, tree_t **left_ignore ) +{ + /* Modifying, make the write safe. */ + pop_from = split_tree( prg, pop_from ); + + kid_t *li_kid = tree_left_ignore_kid( prg, pop_from ); + + /* If the left ignore has a right ignore, then that was the original + * left ignore. */ + kid_t *ri = tree_right_ignore_kid( prg, li_kid->tree ); + if ( ri != 0 ) { + colm_tree_upref( prg, ri->tree ); + rem_right_ignore( prg, sp, li_kid->tree ); + *left_ignore = li_kid->tree; + colm_tree_upref( prg, *left_ignore ); + li_kid->tree = ri->tree; + } + else { + *left_ignore = li_kid->tree; + colm_tree_upref( prg, *left_ignore ); + rem_left_ignore( prg, sp, pop_from ); + } + + return pop_from; +} + +tree_t *colm_construct_object( program_t *prg, kid_t *kid, tree_t **bindings, long lang_el_id ) +{ + struct lang_el_info *lel_info = prg->rtd->lel_info; + tree_t *tree = 0; + + tree = tree_allocate( prg ); + tree->id = lang_el_id; + tree->refs = 1; + tree->tokdata = 0; + tree->prod_num = 0; + + int object_length = lel_info[tree->id].object_length; + + kid_t *attrs = alloc_attrs( prg, object_length ); + kid_t *child = 0; + + tree->child = kid_list_concat( attrs, child ); + + return tree; +} + +/* Returns an uprefed tree. Saves us having to downref and bindings to zero to + * return a zero-ref tree. */ +tree_t *colm_construct_tree( program_t *prg, kid_t *kid, tree_t **bindings, long pat ) +{ + struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; + struct lang_el_info *lel_info = prg->rtd->lel_info; + tree_t *tree = 0; + + if ( nodes[pat].bind_id > 0 ) { + /* All bindings have been uprefed. */ + tree = bindings[nodes[pat].bind_id]; + + long ignore = nodes[pat].left_ignore; + tree_t *left_ignore = 0; + if ( ignore >= 0 ) { + kid_t *ignore = construct_left_ignore_list( prg, pat ); + + left_ignore = tree_allocate( prg ); + left_ignore->id = LEL_ID_IGNORE; + left_ignore->child = ignore; + + tree = push_left_ignore( prg, tree, left_ignore ); + } + + ignore = nodes[pat].right_ignore; + tree_t *right_ignore = 0; + if ( ignore >= 0 ) { + kid_t *ignore = construct_right_ignore_list( prg, pat ); + + right_ignore = tree_allocate( prg ); + right_ignore->id = LEL_ID_IGNORE; + right_ignore->child = ignore; + + tree = push_right_ignore( prg, tree, right_ignore ); + } + } + else { + tree = tree_allocate( prg ); + tree->id = nodes[pat].id; + tree->refs = 1; + tree->tokdata = nodes[pat].length == 0 ? 0 : + colm_string_alloc_pointer( prg, + nodes[pat].data, nodes[pat].length ); + tree->prod_num = nodes[pat].prod_num; + + int object_length = lel_info[tree->id].object_length; + + kid_t *attrs = alloc_attrs( prg, object_length ); + kid_t *child = construct_kid( prg, bindings, + 0, nodes[pat].child ); + + tree->child = kid_list_concat( attrs, child ); + + /* Right first, then left. */ + kid_t *ignore = construct_right_ignore_list( prg, pat ); + if ( ignore != 0 ) { + tree_t *ignore_list = tree_allocate( prg ); + ignore_list->id = LEL_ID_IGNORE; + ignore_list->refs = 1; + ignore_list->child = ignore; + + kid_t *ignore_head = kid_allocate( prg ); + ignore_head->tree = ignore_list; + ignore_head->next = tree->child; + tree->child = ignore_head; + + tree->flags |= AF_RIGHT_IGNORE; + } + + ignore = construct_left_ignore_list( prg, pat ); + if ( ignore != 0 ) { + tree_t *ignore_list = tree_allocate( prg ); + ignore_list->id = LEL_ID_IGNORE; + ignore_list->refs = 1; + ignore_list->child = ignore; + + kid_t *ignore_head = kid_allocate( prg ); + ignore_head->tree = ignore_list; + ignore_head->next = tree->child; + tree->child = ignore_head; + + tree->flags |= AF_LEFT_IGNORE; + } + + int i; + for ( i = 0; i < lel_info[tree->id].num_capture_attr; i++ ) { + long ci = pat+1+i; + CaptureAttr *ca = prg->rtd->capture_attr + lel_info[tree->id].capture_attr + i; + tree_t *attr = tree_allocate( prg ); + attr->id = nodes[ci].id; + attr->refs = 1; + attr->tokdata = nodes[ci].length == 0 ? 0 : + colm_string_alloc_pointer( prg, + nodes[ci].data, nodes[ci].length ); + + colm_tree_set_attr( tree, ca->offset, attr ); + } + } + + return tree; +} + +kid_t *construct_kid( program_t *prg, tree_t **bindings, kid_t *prev, long pat ) +{ + struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; + kid_t *kid = 0; + + if ( pat != -1 ) { + kid = kid_allocate( prg ); + kid->tree = colm_construct_tree( prg, kid, bindings, pat ); + + /* Recurse down next. */ + kid_t *next = construct_kid( prg, bindings, + kid, nodes[pat].next ); + + kid->next = next; + } + + return kid; +} + +tree_t *colm_construct_token( program_t *prg, tree_t **args, long nargs ) +{ + value_t id_int = (value_t)args[0]; + str_t *text_str = (str_t*)args[1]; + + long id = (long)id_int; + head_t *tokdata = string_copy( prg, text_str->value ); + + struct lang_el_info *lel_info = prg->rtd->lel_info; + tree_t *tree; + + if ( lel_info[id].ignore ) { + tree = tree_allocate( prg ); + tree->refs = 1; + tree->id = id; + tree->tokdata = tokdata; + } + else { + long object_length = lel_info[id].object_length; + assert( nargs-2 <= object_length ); + + kid_t *attrs = alloc_attrs( prg, object_length ); + + tree = tree_allocate( prg ); + tree->id = id; + tree->refs = 1; + tree->tokdata = tokdata; + + tree->child = attrs; + + long i; + for ( i = 2; i < nargs; i++ ) { + colm_tree_set_attr( tree, i-2, args[i] ); + colm_tree_upref( prg, colm_get_attr( tree, i-2 ) ); + } + } + return tree; +} + +tree_t *cast_tree( program_t *prg, int lang_el_id, tree_t *tree ) +{ + struct lang_el_info *lel_info = prg->rtd->lel_info; + + /* Need to keep a lookout for next down. If + * copying it, return the copy. */ + tree_t *new_tree = tree_allocate( prg ); + + new_tree->id = lang_el_id; + new_tree->tokdata = string_copy( prg, tree->tokdata ); + + /* Invalidate the production number. */ + new_tree->prod_num = -1; + + /* Copy the child list. Start with ignores, then the list. */ + kid_t *child = tree->child, *last = 0; + + /* Flags we are interested in. */ + new_tree->flags |= tree->flags & ( AF_LEFT_IGNORE | AF_RIGHT_IGNORE ); + + int ignores = 0; + if ( tree->flags & AF_LEFT_IGNORE ) + ignores += 1; + if ( tree->flags & AF_RIGHT_IGNORE ) + ignores += 1; + + /* Igores. */ + while ( ignores-- > 0 ) { + kid_t *new_kid = kid_allocate( prg ); + + new_kid->tree = child->tree; + new_kid->next = 0; + new_kid->tree->refs += 1; + + /* Store the first child. */ + if ( last == 0 ) + new_tree->child = new_kid; + else + last->next = new_kid; + + child = child->next; + last = new_kid; + } + + /* Skip over the source's attributes. */ + int object_length = lel_info[tree->id].object_length; + while ( object_length-- > 0 ) + child = child->next; + + /* Allocate the target type's kids. */ + object_length = lel_info[lang_el_id].object_length; + while ( object_length-- > 0 ) { + kid_t *new_kid = kid_allocate( prg ); + + new_kid->tree = 0; + new_kid->next = 0; + + /* Store the first child. */ + if ( last == 0 ) + new_tree->child = new_kid; + else + last->next = new_kid; + + last = new_kid; + } + + /* Copy the source's children. */ + while ( child != 0 ) { + kid_t *new_kid = kid_allocate( prg ); + + new_kid->tree = child->tree; + new_kid->next = 0; + new_kid->tree->refs += 1; + + /* Store the first child. */ + if ( last == 0 ) + new_tree->child = new_kid; + else + last->next = new_kid; + + child = child->next; + last = new_kid; + } + + return new_tree; +} + +tree_t *make_tree( program_t *prg, tree_t **args, long nargs ) +{ + value_t id_int = (value_t)args[0]; + + long id = (long)id_int; + struct lang_el_info *lel_info = prg->rtd->lel_info; + + tree_t *tree = tree_allocate( prg ); + tree->id = id; + tree->refs = 1; + + long object_length = lel_info[id].object_length; + kid_t *attrs = alloc_attrs( prg, object_length ); + + kid_t *last = 0, *child = 0; + for ( id = 1; id < nargs; id++ ) { + kid_t *kid = kid_allocate( prg ); + kid->tree = args[id]; + colm_tree_upref( prg, kid->tree ); + + if ( last == 0 ) + child = kid; + else + last->next = kid; + + last = kid; + } + + tree->child = kid_list_concat( attrs, child ); + + return tree; +} + +int test_false( program_t *prg, tree_t *tree ) +{ + int flse = ( + tree == 0 || + tree == prg->false_val + ); + return flse; +} + +kid_t *copy_ignore_list( program_t *prg, kid_t *ignore_header ) +{ + kid_t *new_header = kid_allocate( prg ); + kid_t *last = 0, *ic = (kid_t*)ignore_header->tree; + while ( ic != 0 ) { + kid_t *new_ic = kid_allocate( prg ); + + new_ic->tree = ic->tree; + new_ic->tree->refs += 1; + + /* List pointers. */ + if ( last == 0 ) + new_header->tree = (tree_t*)new_ic; + else + last->next = new_ic; + + ic = ic->next; + last = new_ic; + } + return new_header; +} + +kid_t *copy_kid_list( program_t *prg, kid_t *kid_list ) +{ + kid_t *new_list = 0, *last = 0, *ic = kid_list; + + while ( ic != 0 ) { + kid_t *new_ic = kid_allocate( prg ); + + new_ic->tree = ic->tree; + colm_tree_upref( prg, new_ic->tree ); + + /* List pointers. */ + if ( last == 0 ) + new_list = new_ic; + else + last->next = new_ic; + + ic = ic->next; + last = new_ic; + } + return new_list; +} + +/* New tree has zero ref. */ +tree_t *copy_real_tree( program_t *prg, tree_t *tree, kid_t *old_next_down, kid_t **new_next_down ) +{ + /* Need to keep a lookout for next down. If + * copying it, return the copy. */ + tree_t *new_tree = tree_allocate( prg ); + + new_tree->id = tree->id; + new_tree->tokdata = string_copy( prg, tree->tokdata ); + new_tree->prod_num = tree->prod_num; + + /* Copy the child list. Start with ignores, then the list. */ + kid_t *child = tree->child, *last = 0; + + /* Left ignores. */ + if ( tree->flags & AF_LEFT_IGNORE ) { + new_tree->flags |= AF_LEFT_IGNORE; +// kid_t *newHeader = copyIgnoreList( prg, child ); +// +// /* Always the head. */ +// newTree->child = newHeader; +// +// child = child->next; +// last = newHeader; + } + + /* Right ignores. */ + if ( tree->flags & AF_RIGHT_IGNORE ) { + new_tree->flags |= AF_RIGHT_IGNORE; +// kid_t *newHeader = copyIgnoreList( prg, child ); +// if ( last == 0 ) +// newTree->child = newHeader; +// else +// last->next = newHeader; +// child = child->next; +// last = newHeader; + } + + /* Attributes and children. */ + while ( child != 0 ) { + kid_t *new_kid = kid_allocate( prg ); + + /* Watch out for next down. */ + if ( child == old_next_down ) + *new_next_down = new_kid; + + new_kid->tree = child->tree; + new_kid->next = 0; + + /* May be an attribute. */ + if ( new_kid->tree != 0 ) + new_kid->tree->refs += 1; + + /* Store the first child. */ + if ( last == 0 ) + new_tree->child = new_kid; + else + last->next = new_kid; + + child = child->next; + last = new_kid; + } + + return new_tree; +} + + +tree_t *colm_copy_tree( program_t *prg, tree_t *tree, kid_t *old_next_down, kid_t **new_next_down ) +{ + assert( tree->id != LEL_ID_PTR && tree->id != LEL_ID_STR ); + + tree = copy_real_tree( prg, tree, old_next_down, new_next_down ); + + assert( tree->refs == 0 ); + + return tree; +} + +tree_t *split_tree( program_t *prg, tree_t *tree ) +{ + if ( tree != 0 ) { + assert( tree->refs >= 1 ); + + if ( tree->refs > 1 ) { + kid_t *old_next_down = 0, *new_next_down = 0; + tree_t *new_tree = colm_copy_tree( prg, tree, old_next_down, &new_next_down ); + colm_tree_upref( prg, new_tree ); + + /* Downref the original. Don't need to consider freeing because + * refs were > 1. */ + tree->refs -= 1; + + tree = new_tree; + } + + assert( tree->refs == 1 ); + } + return tree; +} + +/* We can't make recursive calls here since the tree we are freeing may be + * very large. Need the VM stack. */ +void tree_free_rec( program_t *prg, tree_t **sp, tree_t *tree ) +{ + tree_t **top = vm_ptop(); + +free_tree: + switch ( tree->id ) { + case LEL_ID_PTR: + tree_free( prg, tree ); + break; + case LEL_ID_STR: { + str_t *str = (str_t*) tree; + string_free( prg, str->value ); + tree_free( prg, tree ); + break; + } + default: { + if ( tree->id != LEL_ID_IGNORE ) + string_free( prg, tree->tokdata ); + + /* Attributes and grammar-based children. */ + kid_t *child = tree->child; + while ( child != 0 ) { + kid_t *next = child->next; + vm_push_tree( child->tree ); + kid_free( prg, child ); + child = next; + } + + tree_free( prg, tree ); + break; + }} + + /* Any trees to downref? */ + while ( sp != top ) { + tree = vm_pop_tree(); + if ( tree != 0 ) { + assert( tree->refs > 0 ); + tree->refs -= 1; + if ( tree->refs == 0 ) + goto free_tree; + } + } +} + +void colm_tree_upref( program_t *prg, tree_t *tree ) +{ + if ( tree != 0 ) { + assert( tree->id < prg->rtd->first_struct_el_id ); + tree->refs += 1; + } +} + +void colm_tree_downref( program_t *prg, tree_t **sp, tree_t *tree ) +{ + if ( tree != 0 ) { + assert( tree->id < prg->rtd->first_struct_el_id ); + assert( tree->refs > 0 ); + tree->refs -= 1; + if ( tree->refs == 0 ) + tree_free_rec( prg, sp, tree ); + } +} + +/* We can't make recursive calls here since the tree we are freeing may be + * very large. Need the VM stack. */ +void object_free_rec( program_t *prg, tree_t **sp, tree_t *tree ) +{ + tree_t **top = vm_ptop(); + +free_tree: + + switch ( tree->id ) { + case LEL_ID_STR: { + str_t *str = (str_t*) tree; + string_free( prg, str->value ); + tree_free( prg, tree ); + break; + } + case LEL_ID_PTR: { + tree_free( prg, tree ); + break; + } + default: { + if ( tree->id != LEL_ID_IGNORE ) + string_free( prg, tree->tokdata ); + + /* Attributes and grammar-based children. */ + kid_t *child = tree->child; + while ( child != 0 ) { + kid_t *next = child->next; + vm_push_tree( child->tree ); + kid_free( prg, child ); + child = next; + } + + tree_free( prg, tree ); + break; + }} + + /* Any trees to downref? */ + while ( sp != top ) { + tree = vm_pop_tree(); + if ( tree != 0 ) { + assert( tree->refs > 0 ); + tree->refs -= 1; + if ( tree->refs == 0 ) + goto free_tree; + } + } +} + +void object_downref( program_t *prg, tree_t **sp, tree_t *tree ) +{ + if ( tree != 0 ) { + assert( tree->refs > 0 ); + tree->refs -= 1; + if ( tree->refs == 0 ) + object_free_rec( prg, sp, tree ); + } +} + +/* Find the first child of a tree. */ +kid_t *tree_child( program_t *prg, const tree_t *tree ) +{ + struct lang_el_info *lel_info = prg->rtd->lel_info; + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + /* Skip over attributes. */ + long object_length = lel_info[tree->id].object_length; + long a; + for ( a = 0; a < object_length; a++ ) + kid = kid->next; + + return kid; +} + +/* Detach at the first real child of a tree. */ +kid_t *tree_extract_child( program_t *prg, tree_t *tree ) +{ + struct lang_el_info *lel_info = prg->rtd->lel_info; + kid_t *kid = tree->child, *last = 0; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + /* Skip over attributes. */ + long a, object_length = lel_info[tree->id].object_length; + for ( a = 0; a < object_length; a++ ) { + last = kid; + kid = kid->next; + } + + if ( last == 0 ) + tree->child = 0; + else + last->next = 0; + + return kid; +} + + +/* Find the first child of a tree. */ +kid_t *tree_attr( program_t *prg, const tree_t *tree ) +{ + kid_t *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid; +} + +tree_t *tree_left_ignore( program_t *prg, tree_t *tree ) +{ + if ( tree->flags & AF_LEFT_IGNORE ) + return tree->child->tree; + return 0; +} + +tree_t *tree_right_ignore( program_t *prg, tree_t *tree ) +{ + if ( tree->flags & AF_RIGHT_IGNORE ) { + if ( tree->flags & AF_LEFT_IGNORE ) + return tree->child->next->tree; + else + return tree->child->tree; + } + return 0; +} + +kid_t *tree_left_ignore_kid( program_t *prg, tree_t *tree ) +{ + if ( tree->flags & AF_LEFT_IGNORE ) + return tree->child; + return 0; +} + +kid_t *tree_right_ignore_kid( program_t *prg, tree_t *tree ) +{ + if ( tree->flags & AF_RIGHT_IGNORE ) { + if ( tree->flags & AF_LEFT_IGNORE ) + return tree->child->next; + else + return tree->child; + } + return 0; +} + +void ref_set_value( program_t *prg, tree_t **sp, ref_t *ref, tree_t *v ) +{ + colm_tree_downref( prg, sp, ref->kid->tree ); + ref->kid->tree = v; +} + +tree_t *get_rhs_el( program_t *prg, tree_t *lhs, long position ) +{ + kid_t *pos = tree_child( prg, lhs ); + while ( position > 0 ) { + pos = pos->next; + position -= 1; + } + return pos->tree; +} + +void set_rhs_el( program_t *prg, tree_t *lhs, long position, tree_t *value ) +{ + kid_t *pos = tree_child( prg, lhs ); + while ( position > 0 ) { + pos = pos->next; + position -= 1; + } + pos->tree = value; +} + + +kid_t *get_rhs_el_kid( program_t *prg, tree_t *lhs, long position ) +{ + kid_t *pos = tree_child( prg, lhs ); + while ( position > 0 ) { + pos = pos->next; + position -= 1; + } + return pos; +} + +parse_tree_t *get_rhs_parse_tree( program_t *prg, parse_tree_t *lhs, long position ) +{ + parse_tree_t *pos = lhs->child; + while ( position > 0 ) { + pos = pos->next; + position -= 1; + } + return pos; +} + +tree_t *colm_get_rhs_val( program_t *prg, tree_t *tree, int *a ) +{ + int i, len = a[0]; + for ( i = 0; i < len; i++ ) { + int prod_num = a[1 + i * 2]; + int child_num = a[1 + i * 2 + 1]; + if ( tree->prod_num == prod_num ) + return get_rhs_el( prg, tree, child_num ); + } + return 0; +} + +void colm_tree_set_field( program_t *prg, tree_t *tree, long field, tree_t *value ) +{ + assert( tree->refs == 1 ); + if ( value != 0 ) + assert( value->refs >= 1 ); + colm_tree_set_attr( tree, field, value ); +} + +tree_t *colm_tree_get_field( tree_t *tree, word_t field ) +{ + return colm_get_attr( tree, field ); +} + +kid_t *get_field_kid( tree_t *tree, word_t field ) +{ + return get_attr_kid( tree, field ); +} + +tree_t *get_field_split( program_t *prg, tree_t *tree, word_t field ) +{ + tree_t *val = colm_get_attr( tree, field ); + tree_t *split = split_tree( prg, val ); + colm_tree_set_attr( tree, field, split ); + return split; +} + +/* This must traverse in the same order that the bindId assignments are done + * in. */ +int match_pattern( tree_t **bindings, program_t *prg, long pat, kid_t *kid, int check_next ) +{ + struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; + + /* match node, recurse on children. */ + if ( pat != -1 && kid != 0 ) { + if ( nodes[pat].id == kid->tree->id ) { + /* If the pattern node has data, then this means we need to match + * the data against the token data. */ + if ( nodes[pat].data != 0 ) { + /* Check the length of token text. */ + if ( nodes[pat].length != string_length( kid->tree->tokdata ) ) + return false; + + /* Check the token text data. */ + if ( nodes[pat].length > 0 && memcmp( nodes[pat].data, + string_data( kid->tree->tokdata ), nodes[pat].length ) != 0 ) + return false; + } + + /* No failure, all okay. */ + if ( nodes[pat].bind_id > 0 ) { + bindings[nodes[pat].bind_id] = kid->tree; + } + + /* If we didn't match a terminal duplicate of a nonterm then check + * down the children. */ + if ( !nodes[pat].stop ) { + /* Check for failure down child branch. */ + int child_check = match_pattern( bindings, prg, + nodes[pat].child, tree_child( prg, kid->tree ), true ); + if ( ! child_check ) + return false; + } + + /* If checking next, then look for failure there. */ + if ( check_next ) { + int next_check = match_pattern( bindings, prg, + nodes[pat].next, kid->next, true ); + if ( ! next_check ) + return false; + } + + return true; + } + } + else if ( pat == -1 && kid == 0 ) { + /* Both null is a match. */ + return 1; + } + + return false; +} + + +long colm_cmp_tree( program_t *prg, const tree_t *tree1, const tree_t *tree2 ) +{ + long cmpres = 0; + if ( tree1 == 0 ) { + if ( tree2 == 0 ) + return 0; + else + return -1; + } + else if ( tree2 == 0 ) + return 1; + else if ( tree1->id < tree2->id ) + return -1; + else if ( tree1->id > tree2->id ) + return 1; + else if ( tree1->id == LEL_ID_PTR ) { + if ( ((pointer_t*)tree1)->value < ((pointer_t*)tree2)->value ) + return -1; + else if ( ((pointer_t*)tree1)->value > ((pointer_t*)tree2)->value ) + return 1; + } + else if ( tree1->id == LEL_ID_STR ) { + cmpres = cmp_string( ((str_t*)tree1)->value, ((str_t*)tree2)->value ); + if ( cmpres != 0 ) + return cmpres; + } + else { + if ( tree1->tokdata == 0 && tree2->tokdata != 0 ) + return -1; + else if ( tree1->tokdata != 0 && tree2->tokdata == 0 ) + return 1; + else if ( tree1->tokdata != 0 && tree2->tokdata != 0 ) { + cmpres = cmp_string( tree1->tokdata, tree2->tokdata ); + if ( cmpres != 0 ) + return cmpres; + } + } + + kid_t *kid1 = tree_child( prg, tree1 ); + kid_t *kid2 = tree_child( prg, tree2 ); + + while ( true ) { + if ( kid1 == 0 && kid2 == 0 ) + return 0; + else if ( kid1 == 0 && kid2 != 0 ) + return -1; + else if ( kid1 != 0 && kid2 == 0 ) + return 1; + else { + cmpres = colm_cmp_tree( prg, kid1->tree, kid2->tree ); + if ( cmpres != 0 ) + return cmpres; + } + kid1 = kid1->next; + kid2 = kid2->next; + } +} + + +void split_ref( program_t *prg, tree_t ***psp, ref_t *from_ref ) +{ + /* Go up the chain of kids, turing the pointers down. */ + ref_t *last = 0, *ref = from_ref, *next = 0; + while ( ref->next != 0 ) { + next = ref->next; + ref->next = last; + last = ref; + ref = next; + } + ref->next = last; + + /* Now traverse the list, which goes down. */ + while ( ref != 0 ) { + if ( ref->kid->tree->refs > 1 ) { + ref_t *next_down = ref->next; + while ( next_down != 0 && next_down->kid == ref->kid ) + next_down = next_down->next; + + kid_t *old_next_kid_down = next_down != 0 ? next_down->kid : 0; + kid_t *new_next_kid_down = 0; + + tree_t *new_tree = colm_copy_tree( prg, ref->kid->tree, + old_next_kid_down, &new_next_kid_down ); + colm_tree_upref( prg, new_tree ); + + /* Downref the original. Don't need to consider freeing because + * refs were > 1. */ + ref->kid->tree->refs -= 1; + + while ( ref != 0 && ref != next_down ) { + next = ref->next; + ref->next = 0; + + ref->kid->tree = new_tree; + ref = next; + } + + /* Correct kid pointers down from ref. */ + while ( next_down != 0 && next_down->kid == old_next_kid_down ) { + next_down->kid = new_next_kid_down; + next_down = next_down->next; + } + } + else { + /* Reset the list as we go down. */ + next = ref->next; + ref->next = 0; + ref = next; + } + } +} + +tree_t *set_list_mem( list_t *list, half_t field, tree_t *value ) +{ + if ( value != 0 ) + assert( value->refs >= 1 ); + + tree_t *existing = 0; + switch ( field ) { + case 0: +// existing = list->head->value; +// list->head->value = value; + break; + case 1: +// existing = list->tail->value; +// list->tail->value = value; + break; + default: + assert( false ); + break; + } + return existing; +} + +struct tree_pair map_remove( program_t *prg, map_t *map, tree_t *key ) +{ + map_el_t *map_el = map_impl_find( prg, map, key ); + struct tree_pair result = { 0, 0 }; + if ( map_el != 0 ) { + map_detach( prg, map, map_el ); + result.key = map_el->key; + //mapElFree( prg, mapEl ); + } + + return result; +} + +#if 0 +tree_t *map_unstore( program_t *prg, map_t *map, tree_t *key, tree_t *existing ) +{ + tree_t *stored = 0; + if ( existing == 0 ) { + map_el_t *map_el = map_detach_by_key( prg, map, key ); + // stored = mapEl->tree; + map_el_free( prg, map_el ); + } + else { + map_el_t *map_el = map_impl_find( prg, map, key ); + // stored = mapEl->tree; + //mapEl->tree = existing; + } + return stored; +} +#endif + +tree_t *map_find( program_t *prg, map_t *map, tree_t *key ) +{ +// map_el_t *mapEl = mapImplFind( prg, map, key ); +// return mapEl == 0 ? 0 : mapEl->tree; + return 0; +} + +long map_length( map_t *map ) +{ + return map->tree_size; +} + +void list_push_tail( program_t *prg, list_t *list, tree_t *val ) +{ +// if ( val != 0 ) +// assert( val->refs >= 1 ); +// list_el_t *listEl = colm_list_el_new( prg ); +// listEl->value = val; +// listAppend( list, listEl ); +} + +void list_push_head( program_t *prg, list_t *list, tree_t *val ) +{ +// if ( val != 0 ) +// assert( val->refs >= 1 ); +// list_el_t *listEl = listElAllocate( prg ); +// listEl->value = val; +// listPrepend( list, listEl ); +} + +tree_t *list_remove_end( program_t *prg, list_t *list ) +{ +// tree_t *tree = list->tail->value; +// listElFree( prg, listDetachLast( list ) ); +// return tree; + return 0; +} + +tree_t *list_remove_head( program_t *prg, list_t *list ) +{ +// tree_t *tree = list->head; +// listDetachFirst( list ); +// return tree; + return 0; +} + +tree_t *get_parser_mem( parser_t *parser, word_t field ) +{ + tree_t *result = 0; + switch ( field ) { + case 0: { + tree_t *tree = get_parsed_root( parser->pda_run, parser->pda_run->stop_target > 0 ); + result = tree; + break; + } + case 1: { + struct pda_run *pda_run = parser->pda_run; + result = pda_run->parse_error_text; + break; + } + default: { + assert( false ); + break; + } + } + return result; +} + +tree_t *get_list_mem_split( program_t *prg, list_t *list, word_t field ) +{ + tree_t *sv = 0; + switch ( field ) { + case 0: +// sv = splitTree( prg, list->head->value ); +// list->head->value = sv; + break; + case 1: +// sv = splitTree( prg, list->tail->value ); +// list->tail->value = sv; + break; + default: + assert( false ); + break; + } + return sv; +} + + +#if 0 +int map_insert( program_t *prg, map_t *map, tree_t *key, tree_t *element ) +{ + map_el_t *map_el = map_insert_key( prg, map, key, 0 ); + + if ( map_el != 0 ) { + //mapEl->tree = element; + return true; + } + + return false; +} +#endif + +#if 0 +void map_unremove( program_t *prg, map_t *map, tree_t *key, tree_t *element ) +{ + map_el_t *map_el = map_insert_key( prg, map, key, 0 ); + assert( map_el != 0 ); + //mapEl->tree = element; +} +#endif + +#if 0 +tree_t *map_uninsert( program_t *prg, map_t *map, tree_t *key ) +{ + map_el_t *el = map_detach_by_key( prg, map, key ); +// tree_t *val = el->tree; + map_el_free( prg, el ); +// return val; + return 0; +} +#endif + +#if 0 +tree_t *map_store( program_t *prg, map_t *map, tree_t *key, tree_t *element ) +{ + tree_t *old_tree = 0; + map_el_t *el_in_tree = 0; + map_el_t *map_el = map_insert_key( prg, map, key, &el_in_tree ); + +// if ( mapEl != 0 ) +// mapEl->tree = element; +// else { +// /* Element with key exists. Overwriting the value. */ +// oldTree = elInTree->tree; +// elInTree->tree = element; +// } + + return old_tree; +} +#endif + +static tree_t *tree_search_kid( program_t *prg, kid_t *kid, long id ) +{ + /* This node the one? */ + if ( kid->tree->id == id ) + return kid->tree; + + tree_t *res = 0; + + /* Search children. */ + kid_t *child = tree_child( prg, kid->tree ); + if ( child != 0 ) + res = tree_search_kid( prg, child, id ); + + /* Search siblings. */ + if ( res == 0 && kid->next != 0 ) + res = tree_search_kid( prg, kid->next, id ); + + return res; +} + +tree_t *tree_search( program_t *prg, tree_t *tree, long id ) +{ + tree_t *res = 0; + if ( tree->id == id ) + res = tree; + else { + kid_t *child = tree_child( prg, tree ); + if ( child != 0 ) + res = tree_search_kid( prg, child, id ); + } + return res; +} + +static location_t *loc_search_kid( program_t *prg, kid_t *kid ) +{ + /* This node the one? */ + if ( kid->tree->tokdata != 0 && kid->tree->tokdata->location != 0 ) + return kid->tree->tokdata->location; + + location_t *res = 0; + + /* Search children. */ + kid_t *child = tree_child( prg, kid->tree ); + if ( child != 0 ) + res = loc_search_kid( prg, child ); + + /* Search siblings. */ + if ( res == 0 && kid->next != 0 ) + res = loc_search_kid( prg, kid->next ); + + return res; +} + +static location_t *loc_search( program_t *prg, tree_t *tree ) +{ + location_t *res = 0; + if ( tree->tokdata != 0 && tree->tokdata->location != 0 ) + return tree->tokdata->location; + + kid_t *child = tree_child( prg, tree ); + if ( child != 0 ) + res = loc_search_kid( prg, child ); + + return res; +} + +struct colm_location *colm_find_location( program_t *prg, tree_t *tree ) +{ + return loc_search( prg, tree ); +} + +head_t *tree_to_str( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ) +{ + /* Collect the tree data. */ + str_collect_t collect; + init_str_collect( &collect ); + + if ( attrs ) + colm_print_tree_collect_a( prg, sp, &collect, tree, trim ); + else + colm_print_tree_collect( prg, sp, &collect, tree, trim ); + + /* Set up the input stream. */ + head_t *ret = string_alloc_full( prg, collect.data, collect.length ); + + str_collect_destroy( &collect ); + + return ret; +} + diff --git a/src/tree.h b/src/tree.h new file mode 100644 index 00000000..97833c6f --- /dev/null +++ b/src/tree.h @@ -0,0 +1,401 @@ +/* + * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_TREE_H +#define _COLM_TREE_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <colm/colm.h> +#include <colm/type.h> +#include <colm/input.h> +#include <colm/internal.h> +#include <colm/defs.h> + +#define COLM_INDENT_OFF -1 + +typedef unsigned char code_t; +#if SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P + typedef unsigned long word_t; +#elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P + typedef unsigned long long word_t; +#else + #error "The type word_t was not declared" +#endif +typedef unsigned long half_t; + +struct bindings; +struct function_info; + +typedef struct colm_tree tree_t; +#include <colm/struct.h> + +typedef struct colm_location +{ + const char *name; + long line; + long column; + long byte; +} location_t; + +/* Header located just before string data. */ +typedef struct colm_data +{ + const char *data; + long length; + struct colm_location *location; +} head_t; + +/* Kid: used to implement a list of child trees. Kids are never shared. The + * trees they point to may be shared. This struct is also used on the stack by + * pushing two words and taking a pointer. We use it to take references to + * trees. Do not modify this struct. */ +typedef struct colm_kid +{ + struct colm_tree *tree; + struct colm_kid *next; +} kid_t; + +/* Reference chains. Allocated on the stack. The chain goes up the list of kids + * to the root of the reference and tells us which trees we need to split so + * they are not shared before we can modify a node in a tree. Do not change + * this struct. */ +typedef struct colm_ref +{ + struct colm_kid *kid; + struct colm_ref *next; +} ref_t; + +struct tree_pair +{ + tree_t *key; + tree_t *val; +}; + +typedef struct colm_parse_tree +{ + short id; + unsigned short flags; + + struct colm_parse_tree *child; + struct colm_parse_tree *next; + struct colm_parse_tree *left_ignore; + struct colm_parse_tree *right_ignore; + kid_t *shadow; + + /* Parsing algorithm. */ + long state; + short cause_reduce; + + /* Retry vars. Might be able to unify lower and upper. */ + long retry_region; + char retry_lower; + char retry_upper; +} parse_tree_t; + +typedef struct colm_pointer +{ + /* Must overlay tree_t. */ + short id; + unsigned short flags; + long refs; + kid_t *child; + + colm_value_t value; +} pointer_t; + +typedef struct colm_str +{ + /* Must overlay tree_t. */ + short id; + unsigned short flags; + long refs; + kid_t *child; + + head_t *value; +} str_t; + +/* + * Maps + */ +struct generic_info +{ + long type; + + long el_struct_id; + long el_offset; + + enum TYPE key_type; + long key_offset; + + enum TYPE value_type; + long value_offset; + + long parser_id; +}; + +enum IterType +{ + IT_Tree = 1, + IT_RevTree, + IT_User +}; + +typedef struct colm_tree_iter +{ + enum IterType type; + ref_t root_ref; + ref_t ref; + long search_id; + tree_t **stack_root; + long arg_size; + long yield_size; + long root_size; +} tree_iter_t; + +typedef struct colm_generic_iter +{ + enum IterType type; + ref_t root_ref; + ref_t ref; + tree_t **stack_root; + long arg_size; + long yield_size; + long root_size; + long generic_id; +} generic_iter_t; + +/* This must overlay tree iter because some of the same bytecodes are used. */ +typedef struct colm_rev_tree_iter +{ + enum IterType type; + ref_t root_ref; + ref_t ref; + long search_id; + tree_t **stack_root; + long arg_size; + long yield_size; + long root_size; + + /* For detecting a split at the leaf. */ + kid_t *kid_at_yield; + long children; +} rev_tree_iter_t; + +typedef struct colm_user_iter +{ + enum IterType type; + /* The current item. */ + ref_t ref; + tree_t **stack_root; + long arg_size; + long yield_size; + long root_size; + + code_t *resume; + tree_t **frame; + long search_id; +} user_iter_t; + +void colm_tree_upref_( tree_t *tree ); +void colm_tree_upref( struct colm_program *prg, tree_t *tree ); +void colm_tree_downref( struct colm_program *prg, tree_t **sp, tree_t *tree ); +long colm_cmp_tree( struct colm_program *prg, const tree_t *tree1, const tree_t *tree2 ); + +tree_t *push_right_ignore( struct colm_program *prg, tree_t *push_to, tree_t *right_ignore ); +tree_t *push_left_ignore( struct colm_program *prg, tree_t *push_to, tree_t *left_ignore ); +tree_t *pop_right_ignore( struct colm_program *prg, tree_t **sp, + tree_t *pop_from, tree_t **right_ignore ); +tree_t *pop_left_ignore( struct colm_program *prg, tree_t **sp, + tree_t *pop_from, tree_t **left_ignore ); +tree_t *tree_left_ignore( struct colm_program *prg, tree_t *tree ); +tree_t *tree_right_ignore( struct colm_program *prg, tree_t *tree ); +kid_t *tree_left_ignore_kid( struct colm_program *prg, tree_t *tree ); +kid_t *tree_right_ignore_kid( struct colm_program *prg, tree_t *tree ); +kid_t *tree_child( struct colm_program *prg, const tree_t *tree ); +kid_t *tree_attr( struct colm_program *prg, const tree_t *tree ); +kid_t *kid_list_concat( kid_t *list1, kid_t *list2 ); +kid_t *tree_extract_child( struct colm_program *prg, tree_t *tree ); +kid_t *reverse_kid_list( kid_t *kid ); + +tree_t *colm_construct_pointer( struct colm_program *prg, colm_value_t value ); +tree_t *colm_construct_term( struct colm_program *prg, word_t id, head_t *tokdata ); +tree_t *colm_construct_tree( struct colm_program *prg, kid_t *kid, + tree_t **bindings, long pat ); +tree_t *colm_construct_object( struct colm_program *prg, kid_t *kid, + tree_t **bindings, long lang_el_id ); +tree_t *colm_construct_token( struct colm_program *prg, tree_t **args, long nargs ); + +int test_false( struct colm_program *prg, tree_t *tree ); +tree_t *make_tree( struct colm_program *prg, tree_t **args, long nargs ); +stream_t *open_file( struct colm_program *prg, tree_t *name, tree_t *mode ); +stream_t *colm_stream_open_file( struct colm_program *prg, tree_t *name, tree_t *mode ); +stream_t *colm_stream_open_fd( struct colm_program *prg, char *name, long fd ); +kid_t *copy_ignore_list( struct colm_program *prg, kid_t *ignore_header ); +kid_t *copy_kid_list( struct colm_program *prg, kid_t *kid_list ); +void colm_stream_free( struct colm_program *prg, stream_t *s ); +tree_t *colm_copy_tree( struct colm_program *prg, tree_t *tree, + kid_t *old_next_down, kid_t **new_next_down ); + +colm_value_t colm_get_pointer_val( tree_t *pointer ); +tree_t *colm_tree_get_field( tree_t *tree, word_t field ); +tree_t *get_field_split( struct colm_program *prg, tree_t *tree, word_t field ); +tree_t *get_rhs_el( struct colm_program *prg, tree_t *lhs, long position ); +void set_rhs_el( program_t *prg, tree_t *lhs, long position, tree_t *value ); +kid_t *get_rhs_el_kid( struct colm_program *prg, tree_t *lhs, long position ); +parse_tree_t *get_rhs_parse_tree( struct colm_program *prg, + parse_tree_t *lhs, long position ); +void colm_tree_set_field( struct colm_program *prg, tree_t *tree, long field, tree_t *value ); + +void set_triter_cur( struct colm_program *prg, tree_iter_t *iter, tree_t *tree ); +void set_uiter_cur( struct colm_program *prg, user_iter_t *uiter, tree_t *tree ); +void ref_set_value( struct colm_program *prg, tree_t **sp, ref_t *ref, tree_t *v ); +tree_t *tree_search( struct colm_program *prg, tree_t *tree, long id ); + +int match_pattern( tree_t **bindings, struct colm_program *prg, + long pat, kid_t *kid, int check_next ); +tree_t *tree_iter_deref_cur( tree_iter_t *iter ); + +/* For making references of attributes. */ +kid_t *get_field_kid( tree_t *tree, word_t field ); + +tree_t *copy_real_tree( struct colm_program *prg, tree_t *tree, + kid_t *old_next_down, kid_t **new_next_down ); +void split_iter_cur( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); +tree_t *set_list_mem( list_t *list, half_t field, tree_t *value ); + +void list_push_tail( struct colm_program *prg, list_t *list, tree_t *val ); +void list_push_head( struct colm_program *prg, list_t *list, tree_t *val ); +tree_t *list_remove_end( struct colm_program *prg, list_t *list ); +tree_t *list_remove_head( struct colm_program *prg, list_t *list ); +tree_t *get_list_mem_split( struct colm_program *prg, list_t *list, word_t field ); +tree_t *get_parser_mem( parser_t *parser, word_t field ); + +tree_t *tree_iter_advance( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); +tree_t *tree_iter_next_child( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); +tree_t *tree_rev_iter_prev_child( struct colm_program *prg, tree_t ***psp, rev_tree_iter_t *iter ); +tree_t *tree_iter_next_repeat( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); +tree_t *tree_iter_prev_repeat( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); + +/* An automatically grown buffer for collecting tokens. Always reuses space; + * never down resizes. */ +typedef struct colm_str_collect +{ + char *data; + int allocated; + int length; + struct indent_impl indent; +} str_collect_t; + +void init_str_collect( str_collect_t *collect ); +void str_collect_destroy( str_collect_t *collect ); +void str_collect_append( str_collect_t *collect, const char *data, long len ); +void str_collect_clear( str_collect_t *collect ); +tree_t *tree_trim( struct colm_program *prg, tree_t **sp, tree_t *tree ); + +void colm_print_tree_collect( struct colm_program *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ); + +void colm_print_tree_collect_a( struct colm_program *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ); + +void colm_print_tree_file( struct colm_program *prg, tree_t **sp, + struct stream_impl_data *impl, tree_t *tree, int trim ); +void colm_print_xml_stdout( struct colm_program *prg, tree_t **sp, + struct stream_impl_data *impl, tree_t *tree, int comm_attr, int trim ); + +void colm_postfix_tree_collect( struct colm_program *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ); +void colm_postfix_tree_file( struct colm_program *prg, tree_t **sp, + struct stream_impl *impl, tree_t *tree, int trim ); + +/* + * Iterators. + */ + +user_iter_t *colm_uiter_create( struct colm_program *prg, tree_t ***psp, + struct function_info *fi, long search_id ); +void uiter_init( struct colm_program *prg, tree_t **sp, user_iter_t *uiter, + struct function_info *fi, int revert_on ); + +void colm_init_tree_iter( tree_iter_t *tree_iter, tree_t **stack_root, + long arg_size, long root_size, const ref_t *root_ref, int search_id ); +void colm_init_rev_tree_iter( rev_tree_iter_t *rev_triter, tree_t **stack_root, + long arg_size, long root_size, const ref_t *root_ref, int search_id, int children ); +void colm_init_user_iter( user_iter_t *user_iter, tree_t **stack_root, long root_size, + long arg_size, long search_id ); + +void colm_tree_iter_destroy( struct colm_program *prg, + tree_t ***psp, tree_iter_t *iter ); + +void colm_rev_tree_iter_destroy( struct colm_program *prg, + tree_t ***psp, rev_tree_iter_t *iter ); + +void colm_uiter_destroy( struct colm_program *prg, tree_t ***psp, user_iter_t *uiter ); +void colm_uiter_unwind( struct colm_program *prg, tree_t ***psp, user_iter_t *uiter ); + +tree_t *cast_tree( struct colm_program *prg, int lang_el_id, tree_t *tree ); + +void colm_init_list_iter( generic_iter_t *list_iter, tree_t **stack_root, + long arg_size, long root_size, const ref_t *root_ref, int generic_id ); +void colm_list_iter_destroy( struct colm_program *prg, + tree_t ***psp, generic_iter_t *iter ); + +tree_t *colm_list_iter_advance( struct colm_program *prg, + tree_t ***psp, generic_iter_t *iter ); +tree_t *colm_rev_list_iter_advance( struct colm_program *prg, + tree_t ***psp, generic_iter_t *iter ); + +tree_t *colm_list_iter_deref_cur( struct colm_program *prg, generic_iter_t *iter ); +void colm_list_append( struct colm_list *list, struct colm_list_el *new_el ); +void colm_list_prepend( struct colm_list *list, struct colm_list_el *new_el ); + +void colm_vlist_append( struct colm_program *prg, list_t *list, value_t value ); +void colm_vlist_prepend( struct colm_program *prg, list_t *list, value_t value ); +value_t colm_vlist_detach_head( struct colm_program *prg, list_t *list ); +value_t colm_vlist_detach_tail( struct colm_program *prg, list_t *list ); + +value_t colm_viter_deref_cur( struct colm_program *prg, generic_iter_t *iter ); + +str_t *string_prefix( program_t *prg, str_t *str, long len ); +str_t *string_suffix( program_t *prg, str_t *str, long pos ); +head_t *string_alloc_full( struct colm_program *prg, const char *data, long length ); +tree_t *construct_string( struct colm_program *prg, head_t *s ); + +void free_kid_list( program_t *prg, kid_t *kid ); + +void colm_print_tree_collect_xml( program_t *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ); + +void colm_print_tree_collect_xml_ac( program_t *prg, tree_t **sp, + str_collect_t *collect, tree_t *tree, int trim ); + +head_t *tree_to_str( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ); + +#if defined(__cplusplus) +} +#endif + +#endif /* COLM_TREE_H */ + diff --git a/src/type.h b/src/type.h new file mode 100644 index 00000000..dca8f2ad --- /dev/null +++ b/src/type.h @@ -0,0 +1,43 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_TYPE_H +#define _COLM_TYPE_H + +enum TYPE +{ + TYPE_NOTYPE = 0x00, + TYPE_NIL = 0x01, + TYPE_TREE = 0x02, + TYPE_REF = 0x03, + TYPE_ITER = 0x04, + TYPE_STRUCT = 0x05, + TYPE_GENERIC = 0x06, + TYPE_INT = 0x07, + TYPE_BOOL = 0x08, + TYPE_LIST_PTRS = 0x09, + TYPE_MAP_PTRS = 0x0a, + TYPE_VOID = 0x0b +}; + +#endif /* _COLM_TYPE_H */ + diff --git a/src/version.h.cmake.in b/src/version.h.cmake.in new file mode 100644 index 00000000..0b45a8f0 --- /dev/null +++ b/src/version.h.cmake.in @@ -0,0 +1,9 @@ +/* version.h Generated from version.h.cmake.in by cmake */ + +#ifndef _COLM_VERSION_H +#define _COLM_VERSION_H + +#cmakedefine COLM_VERSION "@COLM_VERSION@" +#cmakedefine COLM_PUBDATE "@COLM_PUBDATE@" + +#endif /* _COLM_VERSION_H */ |