diff options
author | Adrian Thurston <thurston@colm.net> | 2019-09-08 21:11:17 -0600 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2019-09-08 21:11:17 -0600 |
commit | c860c61607117582abd8f23881eed87957197484 (patch) | |
tree | 4d4e65dddc710e15f008189a9308d95924350c3f /src | |
parent | f37c916aed2600951b8966a86020406b0b0542cf (diff) | |
download | colm-c860c61607117582abd8f23881eed87957197484.tar.gz |
moved the original colm src dir to /colm
Diffstat (limited to 'src')
-rw-r--r-- | src/.gitignore | 39 | ||||
-rw-r--r-- | src/CMakeLists.txt | 175 | ||||
-rw-r--r-- | src/Makefile.am | 127 | ||||
-rw-r--r-- | src/buffer.h | 57 | ||||
-rw-r--r-- | src/bytecode.c | 4938 | ||||
-rw-r--r-- | src/bytecode.h | 678 | ||||
-rw-r--r-- | src/closure.cc | 458 | ||||
-rw-r--r-- | src/codegen.cc | 63 | ||||
-rw-r--r-- | src/codevect.c | 183 | ||||
-rw-r--r-- | src/colm-config.cmake.in | 3 | ||||
-rw-r--r-- | src/colm.h | 153 | ||||
-rw-r--r-- | src/colm.lm | 892 | ||||
-rw-r--r-- | src/commit.c | 111 | ||||
-rw-r--r-- | src/compiler.cc | 1247 | ||||
-rw-r--r-- | src/compiler.h | 1155 | ||||
-rw-r--r-- | src/config.h.cmake.in | 14 | ||||
-rw-r--r-- | src/consinit.cc | 889 | ||||
-rw-r--r-- | src/consinit.h | 110 | ||||
-rw-r--r-- | src/cstring.h | 862 | ||||
-rw-r--r-- | src/ctinput.cc | 550 | ||||
-rw-r--r-- | src/debug.c | 82 | ||||
-rw-r--r-- | src/debug.h | 65 | ||||
-rw-r--r-- | src/declare.cc | 1623 | ||||
-rw-r--r-- | src/defs.h.cmake.in | 11 | ||||
-rw-r--r-- | src/defs.h.in | 40 | ||||
-rw-r--r-- | src/dotgen.cc | 117 | ||||
-rw-r--r-- | src/dotgen.h | 52 | ||||
-rw-r--r-- | src/exports.cc | 307 | ||||
-rw-r--r-- | src/fsmap.cc | 806 | ||||
-rw-r--r-- | src/fsmattach.cc | 427 | ||||
-rw-r--r-- | src/fsmbase.cc | 603 | ||||
-rw-r--r-- | src/fsmcodegen.cc | 911 | ||||
-rw-r--r-- | src/fsmcodegen.h | 210 | ||||
-rw-r--r-- | src/fsmexec.cc | 220 | ||||
-rw-r--r-- | src/fsmgraph.cc | 981 | ||||
-rw-r--r-- | src/fsmgraph.h | 1321 | ||||
-rw-r--r-- | src/fsmmin.cc | 737 | ||||
-rw-r--r-- | src/fsmstate.cc | 441 | ||||
-rw-r--r-- | src/global.h | 110 | ||||
-rw-r--r-- | src/input.c | 740 | ||||
-rw-r--r-- | src/input.h | 230 | ||||
-rw-r--r-- | src/internal.h | 33 | ||||
-rw-r--r-- | src/iter.c | 648 | ||||
-rw-r--r-- | src/keyops.h | 199 | ||||
-rw-r--r-- | src/list.c | 255 | ||||
-rw-r--r-- | src/lmparse.kh | 86 | ||||
-rw-r--r-- | src/lmparse.kl | 2139 | ||||
-rw-r--r-- | src/lmscan.h | 104 | ||||
-rw-r--r-- | src/lmscan.rl | 637 | ||||
-rw-r--r-- | src/loadcolm.cc | 2851 | ||||
-rw-r--r-- | src/loadcolm.h | 31 | ||||
-rw-r--r-- | src/loadinit.cc | 413 | ||||
-rw-r--r-- | src/loadinit.h | 77 | ||||
-rw-r--r-- | src/lookup.cc | 323 | ||||
-rw-r--r-- | src/main.cc | 798 | ||||
-rw-r--r-- | src/map.c | 876 | ||||
-rw-r--r-- | src/map.cc | 27 | ||||
-rw-r--r-- | src/map.h | 86 | ||||
-rw-r--r-- | src/parser.cc | 1122 | ||||
-rw-r--r-- | src/parser.h | 197 | ||||
-rw-r--r-- | src/parsetree.cc | 1493 | ||||
-rw-r--r-- | src/parsetree.h | 3605 | ||||
-rw-r--r-- | src/pcheck.cc | 156 | ||||
-rw-r--r-- | src/pcheck.h | 50 | ||||
-rw-r--r-- | src/pdabuild.cc | 2204 | ||||
-rw-r--r-- | src/pdacodegen.cc | 698 | ||||
-rw-r--r-- | src/pdacodegen.h | 107 | ||||
-rw-r--r-- | src/pdagraph.cc | 533 | ||||
-rw-r--r-- | src/pdagraph.h | 517 | ||||
-rw-r--r-- | src/pdarun.c | 2290 | ||||
-rw-r--r-- | src/pdarun.h | 477 | ||||
-rw-r--r-- | src/pool.c | 248 | ||||
-rw-r--r-- | src/pool.h | 73 | ||||
-rw-r--r-- | src/print.c | 778 | ||||
-rw-r--r-- | src/program.c | 337 | ||||
-rw-r--r-- | src/program.h | 186 | ||||
-rw-r--r-- | src/redbuild.cc | 562 | ||||
-rw-r--r-- | src/redbuild.h | 161 | ||||
-rw-r--r-- | src/redfsm.cc | 1049 | ||||
-rw-r--r-- | src/redfsm.h | 479 | ||||
-rw-r--r-- | src/reduce.cc | 954 | ||||
-rw-r--r-- | src/resolve.cc | 976 | ||||
-rw-r--r-- | src/rtvector.h | 35 | ||||
-rw-r--r-- | src/stream.c | 805 | ||||
-rw-r--r-- | src/string.c | 281 | ||||
-rw-r--r-- | src/struct.c | 185 | ||||
-rw-r--r-- | src/struct.h | 180 | ||||
-rw-r--r-- | src/synthesis.cc | 3362 | ||||
-rw-r--r-- | src/tree.c | 1620 | ||||
-rw-r--r-- | src/tree.h | 396 | ||||
-rw-r--r-- | src/type.h | 43 | ||||
-rw-r--r-- | src/version.h.cmake.in | 9 |
92 files changed, 0 insertions, 58489 deletions
diff --git a/src/.gitignore b/src/.gitignore deleted file mode 100644 index c633440c..00000000 --- a/src/.gitignore +++ /dev/null @@ -1,39 +0,0 @@ -/*.o -/*.lo -/Makefile.in -/Makefile -/.*.d -/colm -/config.h.in -/config.h.in~ -/config.h -/defs.h -/version.h -/tags -/.deps -/libcolm.a -/libcolm.la -/.libs -/stamp-h1 -/stamp-h2 - -/include - -/bootstrap0 -/bootstrap1 -/gen - -/libprog.a - -/CMakeFiles -/cmake_install.cmake -/*.exe - -# Common testing files. -/tmp.lm -/tmp.c -/tmp -/input[0-9] -/input -/output -/log diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index b961f9af..00000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,175 +0,0 @@ -# Check type size -include(CheckTypeSize) -check_type_size("long" SIZEOF_LONG) -check_type_size("unsigned long" SIZEOF_UNSIGNED_LONG) -check_type_size("unsigned long long" SIZEOF_UNSIGNED_LONG_LONG) -check_type_size("void *" SIZEOF_VOID_P) - -# Check system headers -include(CheckIncludeFile) -check_include_file(sys/mman.h HAVE_SYS_MMAN_H) -check_include_file(sys/wait.h HAVE_SYS_WAIT_H) -check_include_file(unistd.h HAVE_UNISTD_H) - -# Prepare settings -string(TOLOWER ${PROJECT_NAME} _PACKAGE_NAME) -if("${CMAKE_BUILD_TYPE}" MATCHES "[Dd][Ee][Bb]") - set(DEBUG 1) -endif() -set(VERSION "${PROJECT_VERSION}") -set(PUBDATE "${PROJECT_PUBDATE}") - -set(common_COMPILE_DEFINITIONS PREFIX="${CMAKE_INSTALL_PREFIX}") - -# Generate headers -configure_file(version.h.cmake.in version.h @ONLY) -configure_file(config.h.cmake.in config.h @ONLY) -configure_file(defs.h.cmake.in defs.h @ONLY) -configure_file(colm-config.cmake.in - "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake" @ONLY) - -# Runtime headers -set(RUNTIME_HDR - bytecode.h debug.h pool.h input.h - pdarun.h map.h type.h tree.h struct.h program.h colm.h internal.h) - -# Buildtime headers stub -foreach(_hdr ${RUNTIME_HDR} rtvector.h) - file(GENERATE - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/include/colm/${_hdr}" - CONTENT "#include \"${CMAKE_CURRENT_LIST_DIR}/${_hdr}\"\n") -endforeach() -foreach(_hdr config.h defs.h) - file(GENERATE - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/include/colm/${_hdr}" - CONTENT "#include \"../../${_hdr}\"\n") - list(APPEND RUNTIME_HDR "${CMAKE_CURRENT_BINARY_DIR}/${_hdr}") -endforeach() - -# Other CMake modules -include(GNUInstallDirs) - -# libcolm - -add_library(libcolm - map.c pdarun.c list.c input.c stream.c debug.c - codevect.c pool.c string.c tree.c iter.c - bytecode.c program.c struct.c commit.c - print.c) - -target_include_directories(libcolm - PUBLIC - $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> - $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include> - $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>) - -set_target_properties(libcolm PROPERTIES - OUTPUT_NAME colm) - -# libprog - -add_library(libprog - buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h - input.h keyops.h map.h compiler.h - parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h - redfsm.h rtvector.h tree.h global.h colm.h parser.h cstring.h - internal.h - resolve.cc lookup.cc synthesis.cc parsetree.cc - fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc - fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc - redfsm.cc fsmexec.cc redbuild.cc closure.cc fsmap.cc - dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc - exports.cc compiler.cc parser.cc reduce.cc) - -target_include_directories(libprog - PUBLIC - $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}> - $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../aapl> - $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> - $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>) - -set_target_properties(libprog PROPERTIES - OUTPUT_NAME prog) - -# bootstrap0 - -add_executable(bootstrap0 - consinit.cc consinit.h main.cc) - -target_link_libraries(bootstrap0 libprog libcolm) - -set_property(TARGET bootstrap0 APPEND PROPERTY - COMPILE_DEFINITIONS CONS_INIT ${common_COMPILE_DEFINITIONS}) - -set_property(TARGET bootstrap0 APPEND PROPERTY - COMPILE_FLAGS -fpermissive) - -# bootstrap1 - -make_directory("${CMAKE_CURRENT_BINARY_DIR}/gen") - -add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/gen/parse1.c" - "${CMAKE_CURRENT_BINARY_DIR}/gen/if1.h" - "${CMAKE_CURRENT_BINARY_DIR}/gen/if1.cc" - COMMAND bootstrap0 - ARGS -c -o parse1.c -e if1.h -x if1.cc - WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen") - -add_executable(bootstrap1 - loadinit.h loadinit.cc main.cc - "${CMAKE_CURRENT_BINARY_DIR}/gen/parse1.c" - "${CMAKE_CURRENT_BINARY_DIR}/gen/if1.cc") - -target_link_libraries(bootstrap1 libprog libcolm) - -set_property(TARGET bootstrap1 APPEND PROPERTY - COMPILE_DEFINITIONS LOAD_INIT ${common_COMPILE_DEFINITIONS}) - -# colm - -add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/gen/parse2.c" - "${CMAKE_CURRENT_BINARY_DIR}/gen/if2.h" - "${CMAKE_CURRENT_BINARY_DIR}/gen/if2.cc" - COMMAND bootstrap1 - ARGS -c -o parse2.c -e if2.h -x if2.cc "${CMAKE_CURRENT_LIST_DIR}/colm.lm" - WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/gen") - -add_executable(colm - loadcolm.h loadcolm.cc main.cc - "${CMAKE_CURRENT_BINARY_DIR}/gen/parse2.c" - "${CMAKE_CURRENT_BINARY_DIR}/gen/if2.cc") - -target_link_libraries(colm libprog libcolm) - -set_property(TARGET colm APPEND PROPERTY - COMPILE_DEFINITIONS LOAD_COLM ${common_COMPILE_DEFINITIONS}) - -if(${PROJECT_NAME}_MAKE_INSTALL) - if(NOT DEFINED CMAKE_INSTALL_CMAKEDIR) - set(CMAKE_INSTALL_CMAKEDIR - "${CMAKE_INSTALL_LIBDIR}/cmake/${_PACKAGE_NAME}" - CACHE STRING "CMake packages") - endif() - install(FILES ${RUNTIME_HDR} - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/colm") - install(TARGETS libcolm colm - EXPORT ${_PACKAGE_NAME}-targets - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" - LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") - install(EXPORT ${_PACKAGE_NAME}-targets - NAMESPACE ${_PACKAGE_NAME}:: - DESTINATION "${CMAKE_INSTALL_CMAKEDIR}") - export(EXPORT ${_PACKAGE_NAME}-targets - NAMESPACE ${_PACKAGE_NAME}:: - FILE "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-targets.cmake") - include(CMakePackageConfigHelpers) - write_basic_package_version_file( - "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake" - VERSION ${PROJECT_VERSION} - COMPATIBILITY AnyNewerVersion) - install(FILES - "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config.cmake" - "${PROJECT_BINARY_DIR}/${_PACKAGE_NAME}-config-version.cmake" - DESTINATION "${CMAKE_INSTALL_CMAKEDIR}") -endif() diff --git a/src/Makefile.am b/src/Makefile.am deleted file mode 100644 index a9b48a70..00000000 --- a/src/Makefile.am +++ /dev/null @@ -1,127 +0,0 @@ -# -# Copyright 2007-2018 Adrian Thurston <thurston@colm.net> -# - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -AM_CPPFLAGS = -I$(top_srcdir)/aapl -Iinclude - -AUTOMAKE_OPTIONS = subdir-objects - -bin_PROGRAMS = colm -noinst_PROGRAMS = bootstrap0 bootstrap1 - -RUNTIME_SRC = \ - map.c pdarun.c list.c input.c stream.c debug.c \ - codevect.c pool.c string.c tree.c iter.c \ - bytecode.c program.c struct.c commit.c \ - print.c - -RUNTIME_HDR = \ - bytecode.h config.h defs.h debug.h pool.h input.h \ - pdarun.h map.h type.h tree.h struct.h program.h colm.h internal.h - -lib_LTLIBRARIES = libcolm.la - -noinst_LIBRARIES = libprog.a - -libcolm_la_SOURCES = $(RUNTIME_SRC) -libcolm_la_LDFLAGS = -release ${PACKAGE_VERSION} -no-undefined - -if LINKER_NO_UNDEFINED -libcolm_la_LDFLAGS += -Wl,--no-undefined -endif - -common_CFLAGS = \ - -Wall \ - -DPREFIX='"$(prefix)"' \ - -Iinclude - -libprog_a_SOURCES = \ - buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \ - input.h keyops.h map.h compiler.h \ - parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \ - redfsm.h rtvector.h tree.h version.h global.h colm.h parser.h cstring.h \ - internal.h \ - \ - resolve.cc lookup.cc synthesis.cc parsetree.cc \ - fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \ - fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \ - redfsm.cc fsmexec.cc redbuild.cc closure.cc fsmap.cc \ - dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc \ - exports.cc compiler.cc parser.cc reduce.cc - -libprog_a_CXXFLAGS = $(common_CFLAGS) - -bootstrap0_CXXFLAGS = $(common_CFLAGS) -DCONS_INIT -bootstrap0_SOURCES = consinit.cc consinit.h main.cc -bootstrap0_LDADD = libprog.a libcolm.la - -bootstrap1_CXXFLAGS = $(common_CFLAGS) -DLOAD_INIT -bootstrap1_CFLAGS = $(common_CFLAGS) -bootstrap1_SOURCES = \ - loadinit.h loadinit.cc main.cc -nodist_bootstrap1_SOURCES = \ - gen/if1.h gen/if1.cc gen/parse1.c -bootstrap1_LDADD = libprog.a libcolm.la - -colm_CXXFLAGS = $(common_CFLAGS) -DLOAD_COLM -colm_CFLAGS = $(common_CFLAGS) -colm_SOURCES = \ - loadcolm.h loadcolm.cc main.cc -nodist_colm_SOURCES = \ - gen/if2.h gen/if2.cc gen/parse2.c -colm_LDADD = libprog.a libcolm.la - -colmincdir = $(includedir)/colm - -colminc_HEADERS = $(RUNTIME_HDR) - -gen/parse1.c: bootstrap0$(EXEEXT) - mkdir -p gen - $(builddir)/bootstrap0 -c -o gen/parse1.c -e gen/if1.h -x gen/if1.cc - -gen/if1.h: gen/parse1.c -gen/if1.cc: gen/parse1.c - -gen/parse2.c: bootstrap1$(EXEEXT) colm.lm - mkdir -p gen - $(builddir)/bootstrap1 -c -o gen/parse2.c -e gen/if2.h -x gen/if2.cc colm.lm - -gen/if2.h: gen/parse2.c -gen/if2.cc: gen/parse2.c - -gen/bootstrap1-if1.$(OBJEXT): gen/if1.h gen/if1.cc gen/parse1.c -bootstrap1-loadinit.$(OBJEXT): gen/if1.h gen/if1.cc gen/parse1.c - -gen/colm-if2.$(OBJEXT): gen/if2.h gen/if2.cc gen/parse2.c -colm-loadcolm.$(OBJEXT): gen/if2.h gen/if2.cc gen/parse2.c - -BUILT_SOURCES = version.h include/colm - -include/colm: - mkdir -p include - ln -s .. include/colm - -version.h: Makefile - echo '#define VERSION "$(PACKAGE_VERSION)"' > version.h - echo '#define PUBDATE "$(PUBDATE)"' >> version.h - -CLEANFILES = $(BUILT_SOURCES) -EXTRA_DIST = colm.lm diff --git a/src/buffer.h b/src/buffer.h deleted file mode 100644 index 58db85de..00000000 --- a/src/buffer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2003-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_BUFFER_H -#define _COLM_BUFFER_H - -#define BUFFER_INITIAL_SIZE 4096 - -/* An automatically grown buffer for collecting tokens. Always reuses space; - * never down resizes. */ -struct Buffer -{ - Buffer() - { - data = (char*) malloc( BUFFER_INITIAL_SIZE ); - allocated = BUFFER_INITIAL_SIZE; - length = 0; - } - ~Buffer() { free(data); } - - void append( char p ) - { - if ( length == allocated ) { - allocated *= 2; - data = (char*) realloc( data, allocated ); - } - data[length++] = p; - } - - void clear() { length = 0; } - - char *data; - int allocated; - int length; -}; - -#endif /* _COLM_BUFFER_H */ - diff --git a/src/bytecode.c b/src/bytecode.c deleted file mode 100644 index fe0026fc..00000000 --- a/src/bytecode.c +++ /dev/null @@ -1,4938 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <colm/bytecode.h> - -#include <sys/types.h> -#if defined(HAVE_SYS_WAIT_H) -#include <sys/wait.h> -#endif -#include <assert.h> -#include <string.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <signal.h> - -#include <colm/pool.h> -#include <colm/debug.h> - -#define TRUE_VAL 1 -#define FALSE_VAL 0 - -#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 - #error "SIZEOF_LONG contained an unexpected value" -#endif - -#define read_byte( i ) do { \ - i = ((uchar) *instr++); \ -} while(0) - -#define read_half( i ) do { \ - i = ((word_t) *instr++); \ - i |= ((word_t) *instr++) << 8; \ -} while(0) - -/* There are better ways. */ -#if SIZEOF_LONG == 4 - - #define read_type( type, i ) do { \ - word_t w; \ - w = ((word_t) *instr++); \ - w |= ((word_t) *instr++) << 8; \ - w |= ((word_t) *instr++) << 16; \ - w |= ((word_t) *instr++) << 24; \ - i = (type) w; \ - } while(0) - - #define read_type_p( Type, i, p ) do { \ - i = ((Type) p[0]); \ - i |= ((Type) p[1]) << 8; \ - i |= ((Type) p[2]) << 16; \ - i |= ((Type) p[3]) << 24; \ - } while(0) - - #define consume_word() instr += 4 - -#else - - #define read_type( type, i ) do { \ - word_t _w; \ - _w = ((word_t) *instr++); \ - _w |= ((word_t) *instr++) << 8; \ - _w |= ((word_t) *instr++) << 16; \ - _w |= ((word_t) *instr++) << 24; \ - _w |= ((word_t) *instr++) << 32; \ - _w |= ((word_t) *instr++) << 40; \ - _w |= ((word_t) *instr++) << 48; \ - _w |= ((word_t) *instr++) << 56; \ - i = (type) _w; \ - } while(0) - - #define read_type_p( type, i, p ) do { \ - i = ((type) p[0]); \ - i |= ((type) p[1]) << 8; \ - i |= ((type) p[2]) << 16; \ - i |= ((type) p[3]) << 24; \ - i |= ((type) p[4]) << 32; \ - i |= ((type) p[5]) << 40; \ - i |= ((type) p[6]) << 48; \ - i |= ((type) p[7]) << 56; \ - } while(0) - - #define consume_word() instr += 8 -#endif - -#define read_tree( i ) read_type( tree_t*, i ) -#define read_parser( i ) read_type( parser_t*, i ) -#define read_word( i ) read_type( word_t, i ) -#define read_stream( i ) read_type( stream_t*, i ) -#define read_input( i ) read_type( input_t*, i ) - -#define read_word_p( i, p ) read_type_p( word_t, i, p ) - -#define consume_byte() instr += 1 -#define consume_half() instr += 2 - -static void rcode_downref( program_t *prg, tree_t **sp, code_t *instr ); - -static void make_stdin( program_t *prg ) -{ - if ( prg->stdin_val == 0 ) - prg->stdin_val = colm_stream_open_fd( prg, "<stdin>", 0 ); -} - -static void make_stdout( program_t *prg ) -{ - if ( prg->stdout_val == 0 ) - prg->stdout_val = colm_stream_open_fd( prg, "<stdout>", 1 ); -} - -static void make_stderr( program_t *prg ) -{ - if ( prg->stderr_val == 0 ) - prg->stderr_val = colm_stream_open_fd( prg, "<stderr>", 2 ); -} - -static void flush_streams( program_t *prg ) -{ - if ( prg->stdout_val != 0 ) { - struct stream_impl *si = prg->stdout_val->impl; - si->funcs->flush_stream( prg, si ); - } - - if ( prg->stderr_val != 0 ) { - struct stream_impl *si = prg->stderr_val->impl; - si->funcs->flush_stream( prg, si ); - } -} - -void colm_parser_set_context( program_t *prg, tree_t **sp, parser_t *parser, struct_t *val ) -{ - parser->pda_run->context = val; -} - -static head_t *tree_to_str_xml( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ) -{ - /* Collect the tree data. */ - str_collect_t collect; - init_str_collect( &collect ); - - colm_print_tree_collect_xml( prg, sp, &collect, tree, trim ); - - /* Set up the input stream. */ - head_t *ret = string_alloc_full( prg, collect.data, collect.length ); - - str_collect_destroy( &collect ); - - return ret; -} - -static head_t *tree_to_str_xml_ac( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ) -{ - /* Collect the tree data. */ - str_collect_t collect; - init_str_collect( &collect ); - - colm_print_tree_collect_xml_ac( prg, sp, &collect, tree, trim ); - - /* Set up the input stream. */ - head_t *ret = string_alloc_full( prg, collect.data, collect.length ); - - str_collect_destroy( &collect ); - - return ret; -} - -static head_t *tree_to_str_postfix( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ) -{ - /* Collect the tree data. */ - str_collect_t collect; - init_str_collect( &collect ); - - colm_postfix_tree_collect( prg, sp, &collect, tree, trim ); - - /* Set up the input stream. */ - head_t *ret = string_alloc_full( prg, collect.data, collect.length ); - - str_collect_destroy( &collect ); - - return ret; -} - - -static word_t stream_append_text( program_t *prg, tree_t **sp, input_t *dest, tree_t *input, int trim ) -{ - long length = 0; - struct input_impl *impl = input_to_impl( dest ); - - if ( input->id == LEL_ID_PTR ) { - assert(false); - } - else { - /* Collect the tree data. */ - str_collect_t collect; - init_str_collect( &collect ); - colm_print_tree_collect( prg, sp, &collect, input, trim ); - - /* Load it into the input. */ - impl->funcs->append_data( prg, impl, collect.data, collect.length ); - length = collect.length; - str_collect_destroy( &collect ); - } - - return length; -} - -static word_t stream_append_tree( program_t *prg, tree_t **sp, input_t *dest, tree_t *to_append ) -{ - long length = 0; - struct input_impl *impl = input_to_impl( dest ); - - if ( to_append->id == LEL_ID_PTR ) { - assert(false); - } - else if ( to_append->id == LEL_ID_STR ) { - /* Collect the tree data. */ - str_collect_t collect; - init_str_collect( &collect ); - colm_print_tree_collect( prg, sp, &collect, to_append, false ); - - /* Load it into the to_append. */ - impl->funcs->append_data( prg, impl, collect.data, collect.length ); - length = collect.length; - str_collect_destroy( &collect ); - } - else { - colm_tree_upref( prg, to_append ); - impl->funcs->append_tree( prg, impl, to_append ); - } - - return length; -} - -static word_t stream_append_stream( program_t *prg, tree_t **sp, input_t *dest, stream_t *stream ) -{ - long length = 0; - - struct input_impl *impl = input_to_impl( dest ); - impl->funcs->append_stream( prg, impl, stream ); - - return length; -} - -static void stream_undo_append( program_t *prg, tree_t **sp, - struct input_impl *is, tree_t *input, long length ) -{ - if ( input->id == LEL_ID_PTR ) - assert(false); - else if ( input->id == LEL_ID_STR ) - is->funcs->undo_append_data( prg, is, length ); - else { - is->funcs->undo_append_data( prg, is, length ); - } -} - -static void stream_undo_append_stream( program_t *prg, tree_t **sp, struct input_impl *is, - tree_t *input, long length ) -{ - is->funcs->undo_append_stream( prg, is ); -} - -static tree_t *stream_pull_bc( program_t *prg, tree_t **sp, struct pda_run *pda_run, - input_t *input, tree_t *length ) -{ - long len = ((long)length); - struct input_impl *impl = input_to_impl( input ); - head_t *tokdata = colm_stream_pull( prg, sp, pda_run, impl, len ); - return construct_string( prg, tokdata ); -} - - -static void undo_stream_pull( struct colm_program *prg, struct input_impl *is, const char *data, long length ) -{ - //debug( REALM_PARSE, "undoing stream pull\n" ); - is->funcs->undo_consume_data( prg, is, data, length ); -} - -static void undo_pull( program_t *prg, input_t *input, tree_t *str ) -{ - struct input_impl *impl = input_to_impl( input ); - const char *data = string_data( ( (str_t*)str )->value ); - long length = string_length( ( (str_t*)str )->value ); - undo_stream_pull( prg, impl, data, length ); -} - -static long stream_push( program_t *prg, tree_t **sp, struct input_impl *in, tree_t *tree, int ignore ) -{ - long length = -1; - if ( tree->id == LEL_ID_PTR ) { - assert(false); - } - else if ( tree->id == LEL_ID_STR ) { - /* This should become a compile error. If it's text, it's up to the - * scanner to decide. Want to force it then send a token. */ - assert( !ignore ); - - /* Collect the tree data. */ - str_collect_t collect; - init_str_collect( &collect ); - colm_print_tree_collect( prg, sp, &collect, tree, false ); - - colm_stream_push_text( prg, in, collect.data, collect.length ); - length = collect.length; - str_collect_destroy( &collect ); - - } - else { - colm_tree_upref( prg, tree ); - colm_stream_push_tree( prg, in, tree, ignore ); - } - - return length; -} - -static long stream_push_stream( program_t *prg, tree_t **sp, - struct input_impl *in, stream_t *stream ) -{ - colm_stream_push_stream( prg, in, stream ); - return -1; -} - -static void set_local( execution_t *exec, long field, tree_t *tree ) -{ - if ( tree != 0 ) - assert( tree->refs >= 1 ); - vm_set_local( exec, field, tree ); -} - -static tree_t *get_local_split( program_t *prg, execution_t *exec, long field ) -{ - tree_t *val = vm_get_local( exec, field ); - tree_t *split = split_tree( prg, val ); - vm_set_local( exec, field, split ); - return split; -} - -static void downref_local_trees( program_t *prg, tree_t **sp, - execution_t *exec, struct local_info *locals, long locals_len ) -{ - long i; - for ( i = locals_len-1; i >= 0; i-- ) { - if ( locals[i].type == LI_Tree ) { - debug( prg, REALM_BYTECODE, "local tree downref: %ld\n", - (long)locals[i].offset ); - - tree_t *tree = (tree_t*) vm_get_local( exec, (long)locals[i].offset ); - colm_tree_downref( prg, sp, tree ); - } - } -} - -static void downref_locals( program_t *prg, tree_t ***psp, - execution_t *exec, struct local_info *locals, long locals_len ) -{ - long i; - for ( i = locals_len-1; i >= 0; i-- ) { - switch ( locals[i].type ) { - case LI_Tree: { - debug( prg, REALM_BYTECODE, "local tree downref: %ld\n", - (long)locals[i].offset ); - tree_t *tree = (tree_t*) vm_get_local( exec, (long)locals[i].offset ); - colm_tree_downref( prg, *psp, tree ); - break; - } - case LI_Iter: { - debug( prg, REALM_BYTECODE, "local iter downref: %ld\n", - (long)locals[i].offset ); - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal( exec, (long)locals[i].offset ); - colm_tree_iter_destroy( prg, psp, iter ); - break; - } - case LI_RevIter: { - debug( prg, REALM_BYTECODE, "local rev iter downref: %ld\n", - (long)locals[i].offset ); - rev_tree_iter_t *riter = (rev_tree_iter_t*) vm_get_plocal( exec, - (long)locals[i].offset ); - colm_rev_tree_iter_destroy( prg, psp, riter ); - break; - } - case LI_UserIter: { - debug( prg, REALM_BYTECODE, "local user iter downref: %ld\n", - (long)locals[i].offset ); - user_iter_t *uiter = (user_iter_t*) vm_get_local( exec, locals[i].offset ); - colm_uiter_unwind( prg, psp, uiter ); - break; - } - } - } -} - -static tree_t *construct_arg0( program_t *prg, int argc, const char **argv, const int *argl ) -{ - tree_t *arg0 = 0; - if ( argc > 0 ) { - size_t len = argl != 0 ? argl[0] : strlen(argv[0]); - head_t *head = colm_string_alloc_pointer( prg, argv[0], len ); - arg0 = construct_string( prg, head ); - colm_tree_upref( prg, arg0 ); - } - return arg0; -} - -static list_t *construct_argv( program_t *prg, int argc, const char **argv, const int *argl ) -{ - list_t *list = (list_t*)colm_construct_generic( prg, prg->rtd->argv_generic_id, 0 ); - int i; - for ( i = 1; i < argc; i++ ) { - size_t len = argl != 0 ? argl[i] : strlen(argv[i]); - head_t *head = colm_string_alloc_pointer( prg, argv[i], len ); - tree_t *arg = construct_string( prg, head ); - colm_tree_upref( prg, arg ); - - struct_t *strct = colm_struct_new_size( prg, 16 ); - strct->id = prg->rtd->argv_el_id; - colm_struct_set_field( strct, tree_t*, 0, arg ); - list_el_t *list_el = colm_struct_get_addr( strct, list_el_t*, 1 ); - colm_list_append( list, list_el ); - } - - return list; -} - - -static list_t *construct_stds( program_t *prg ) -{ - make_stdout( prg ); - - list_t *list = (list_t*)colm_construct_generic( prg, prg->rtd->stds_generic_id, 0 ); - - struct_t *strct = colm_struct_new_size( prg, 16 ); - strct->id = prg->rtd->stds_el_id; - colm_struct_set_field( strct, stream_t*, 0, prg->stdout_val ); - list_el_t *list_el = colm_struct_get_addr( strct, list_el_t*, 1 ); - colm_list_append( list, list_el ); - - return list; -} - -/* - * Execution environment - */ - -void colm_rcode_downref_all( program_t *prg, tree_t **sp, struct rt_code_vect *rev ) -{ - while ( rev->tab_len > 0 ) { - /* Read the length */ - code_t *prcode = rev->data + rev->tab_len - SIZEOF_WORD; - word_t len; - read_word_p( len, prcode ); - - /* Find the start of block. */ - long start = rev->tab_len - len - SIZEOF_WORD; - prcode = rev->data + start; - - /* Execute it. */ - rcode_downref( prg, sp, prcode ); - - /* Backup over it. */ - rev->tab_len -= len + SIZEOF_WORD; - } -} - -static code_t *pcr_call( program_t *prg, execution_t *exec, tree_t ***psp, code_t *instr, parser_t *parser ) -{ - tree_t **sp = *psp; - - int frame_size = 0; - if ( parser->pda_run->frame_id >= 0 ) { - struct frame_info *fi = &prg->rtd->frame_info[parser->pda_run->frame_id]; - frame_size = fi->frame_size; - } - - vm_contiguous( 8 + frame_size ); - - vm_push_type( tree_t**, exec->frame_ptr ); - vm_push_type( tree_t**, exec->iframe_ptr ); - vm_push_type( long, exec->frame_id ); - vm_push_type( word_t, exec->steps ); - vm_push_type( word_t, exec->pcr ); - vm_push_parser( exec->parser ); - vm_push_type( word_t, exec->WV ); - - /* Return back to this instruction. We are alternating between - * parsing and calling instructions. */ - code_t *return_to = instr - SIZEOF_CODE; - vm_push_type( code_t*, return_to ); - - exec->frame_ptr = 0; - exec->iframe_ptr = 0; - exec->frame_id = 0; - exec->steps = 0; - exec->parser = parser; - - instr = parser->pda_run->code; - exec->WV = 1; - - exec->frame_id = parser->pda_run->frame_id; - - if ( parser->pda_run->frame_id >= 0 ) { - struct frame_info *fi = &prg->rtd->frame_info[parser->pda_run->frame_id]; - - exec->frame_ptr = vm_ptop(); - vm_pushn( fi->frame_size ); - memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); - } - - *psp = sp; - return instr; -} - -void colm_execute( program_t *prg, execution_t *exec, code_t *code ) -{ - tree_t **sp = prg->stack_root; - - struct frame_info *fi = &prg->rtd->frame_info[prg->rtd->root_frame_id]; - - /* Set up the stack as if we have - * called. We allow a return value. */ - - long stretch = FR_AA + fi->frame_size; - vm_contiguous( stretch ); - - vm_push_tree( 0 ); - vm_push_tree( 0 ); - vm_push_tree( 0 ); - vm_push_tree( 0 ); - vm_push_tree( 0 ); - - exec->frame_ptr = vm_ptop(); - vm_pushn( fi->frame_size ); - memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); - - /* Execution loop. */ - sp = colm_execute_code( prg, exec, sp, code ); - - downref_locals( prg, &sp, exec, fi->locals, fi->locals_len ); - vm_popn( fi->frame_size ); - - vm_pop_ignore(); - vm_pop_ignore(); - colm_tree_downref( prg, sp, prg->return_val ); - prg->return_val = vm_pop_tree(); - vm_pop_ignore(); - - prg->stack_root = sp; -} - -tree_t *colm_run_func( struct colm_program *prg, int frame_id, - const char **params, int param_count ) -{ - /* Make the arguments available to the program. */ - prg->argc = 0; - prg->argv = 0; - prg->argl = 0; - - execution_t execution; - memset( &execution, 0, sizeof(execution) ); - - tree_t **sp = prg->stack_root; - - struct frame_info *fi = &prg->rtd->frame_info[frame_id]; - code_t *code = fi->codeWC; - - vm_pushn( param_count ); - execution.call_args = vm_ptop(); - memset( vm_ptop(), 0, sizeof(word_t) * param_count ); - - int p; - for ( p = 0; p < param_count; p++ ) { - if ( params[p] == 0 ) { - ((value_t*)execution.call_args)[p] = 0; - } - else { - head_t *head = colm_string_alloc_pointer( prg, params[p], strlen(params[p]) ); - tree_t *tree = construct_string( prg, head ); - colm_tree_upref( prg, tree ); - ((tree_t**)execution.call_args)[p] = tree; - } - } - - long stretch = FR_AA + fi->frame_size; - vm_contiguous( stretch ); - - /* Set up the stack as if we have called. We allow a return value. */ - vm_push_tree( (tree_t*)execution.call_args ); - vm_push_tree( 0 ); - vm_push_tree( 0 ); - vm_push_tree( 0 ); - vm_push_tree( 0 ); - - execution.frame_id = frame_id; - - execution.frame_ptr = vm_ptop(); - vm_pushn( fi->frame_size ); - memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); - - /* Execution loop. */ - sp = colm_execute_code( prg, &execution, sp, code ); - - colm_tree_downref( prg, sp, prg->return_val ); - prg->return_val = execution.ret_val; - - vm_popn( param_count ); - - assert( sp == prg->stack_root ); - - return prg->return_val; -}; - -int colm_make_reverse_code( struct pda_run *pda_run ) -{ - struct rt_code_vect *reverse_code = &pda_run->reverse_code; - struct rt_code_vect *rcode_collect = &pda_run->rcode_collect; - - /* Do we need to revert the left hand side? */ - - /* Check if there was anything generated. */ - if ( rcode_collect->tab_len == 0 ) - return false; - - if ( pda_run->rc_block_count == 0 ) { - /* One reverse code run for the DECK terminator. */ - append_code_val( reverse_code, IN_PCR_END_DECK ); - append_code_val( reverse_code, IN_PCR_RET ); - append_word( reverse_code, 2 ); - pda_run->rc_block_count += 1; - colm_increment_steps( pda_run ); - } - - long start_length = reverse_code->tab_len; - - /* Go backwards, group by group, through the reverse code. Push each group - * to the global reverse code stack. */ - code_t *p = rcode_collect->data + rcode_collect->tab_len; - while ( p != rcode_collect->data ) { - p--; - long len = *p; - p = p - len; - append_code_vect( reverse_code, p, len ); - } - - /* Stop, then place a total length in the global stack. */ - append_code_val( reverse_code, IN_PCR_RET ); - long length = reverse_code->tab_len - start_length; - append_word( reverse_code, length ); - - /* Clear the revere code buffer. */ - rcode_collect->tab_len = 0; - - pda_run->rc_block_count += 1; - colm_increment_steps( pda_run ); - - return true; -} - -void colm_transfer_reverse_code( struct pda_run *pda_run, parse_tree_t *parse_tree ) -{ - if ( pda_run->rc_block_count > 0 ) { - //debug( REALM_PARSE, "attaching reverse code to token\n" ); - parse_tree->flags |= PF_HAS_RCODE; - pda_run->rc_block_count = 0; - } -} - -static void rcode_unit_term( execution_t *exec ) -{ - append_code_val( &exec->parser->pda_run->rcode_collect, exec->rcode_unit_len ); - exec->rcode_unit_len = 0; -} - -static void rcode_unit_start( execution_t *exec ) -{ - exec->rcode_unit_len = 0; -} - -static void rcode_code( execution_t *exec, const code_t code ) -{ - append_code_val( &exec->parser->pda_run->rcode_collect, code ); - exec->rcode_unit_len += SIZEOF_CODE; -} - -static void rcode_half( execution_t *exec, const half_t half ) -{ - append_half( &exec->parser->pda_run->rcode_collect, half ); - exec->rcode_unit_len += SIZEOF_HALF; -} - -static void rcode_word( execution_t *exec, const word_t word ) -{ - append_word( &exec->parser->pda_run->rcode_collect, word ); - exec->rcode_unit_len += SIZEOF_WORD; -} - -code_t *colm_pop_reverse_code( struct rt_code_vect *all_rev ) -{ - /* Read the length */ - code_t *prcode = all_rev->data + all_rev->tab_len - SIZEOF_WORD; - word_t len; - read_word_p( len, prcode ); - - /* Find the start of block. */ - long start = all_rev->tab_len - len - SIZEOF_WORD; - prcode = all_rev->data + start; - - /* Backup over it. */ - all_rev->tab_len -= len + SIZEOF_WORD; - return prcode; -} - -tree_t **colm_execute_code( program_t *prg, execution_t *exec, tree_t **sp, code_t *instr ) -{ - /* When we exit we are going to verify that we did not eat up any stack - * space. */ - tree_t **root = sp; - code_t c; - -again: - c = *instr++; - //debug( REALM_BYTECODE, "--in 0x%x\n", c ); - - switch ( c ) { - case IN_RESTORE_LHS: { - tree_t *restore; - read_tree( restore ); - - debug( prg, REALM_BYTECODE, "IN_RESTORE_LHS\n" ); - colm_tree_downref( prg, sp, exec->parser->pda_run->parse_input->shadow->tree ); - exec->parser->pda_run->parse_input->shadow->tree = restore; - break; - } - case IN_LOAD_NIL: { - debug( prg, REALM_BYTECODE, "IN_LOAD_NIL\n" ); - vm_push_tree( 0 ); - break; - } - case IN_LOAD_TREE: { - tree_t *tree; - read_tree( tree ); - vm_push_tree( tree ); - debug( prg, REALM_BYTECODE, "IN_LOAD_TREE %p id: %d refs: %d\n", - tree, tree->id, tree->refs ); - break; - } - case IN_LOAD_WORD: { - debug( prg, REALM_BYTECODE, "IN_LOAD_WORD\n" ); - word_t w; - read_word( w ); - vm_push_type( word_t, w ); - break; - } - case IN_LOAD_TRUE: { - debug( prg, REALM_BYTECODE, "IN_LOAD_TRUE\n" ); - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - break; - } - case IN_LOAD_FALSE: { - debug( prg, REALM_BYTECODE, "IN_LOAD_FALSE\n" ); - //colm_tree_upref( prg, prg->falseVal ); - vm_push_tree( prg->false_val ); - break; - } - case IN_LOAD_INT: { - word_t i; - read_word( i ); - - debug( prg, REALM_BYTECODE, "IN_LOAD_INT %d\n", i ); - - value_t value = i; - vm_push_value( value ); - break; - } - case IN_LOAD_STR: { - word_t offset; - read_word( offset ); - - debug( prg, REALM_BYTECODE, "IN_LOAD_STR %d\n", offset ); - - head_t *lit = make_literal( prg, offset ); - tree_t *tree = construct_string( prg, lit ); - colm_tree_upref( prg, tree ); - vm_push_tree( tree ); - break; - } - case IN_READ_REDUCE: { - half_t generic_id; - half_t reducer_id; - read_half( generic_id ); - read_half( reducer_id ); - - input_t *input = vm_pop_input(); - - debug( prg, REALM_BYTECODE, "IN_READ_REDUCE %hd %hd\n", generic_id, reducer_id ); - - prg->rtd->read_reduce( prg, reducer_id, input ); - - vm_push_tree( 0 ); - - break; - } - - /* - * LOAD_GLOBAL - */ - case IN_LOAD_GLOBAL_R: { - debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_R\n" ); - - vm_push_struct( prg->global ); - break; - } - case IN_LOAD_GLOBAL_WV: { - debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_WV\n" ); - - assert( exec->WV ); - - vm_push_struct( prg->global ); - - /* Set up the reverse instruction. */ - rcode_unit_start( exec ); - rcode_code( exec, IN_LOAD_GLOBAL_BKT ); - break; - } - case IN_LOAD_GLOBAL_WC: { - debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_WC\n" ); - - assert( !exec->WV ); - - /* This is identical to the _R version, but using it for writing - * would be confusing. */ - vm_push_struct( prg->global ); - break; - } - case IN_LOAD_GLOBAL_BKT: { - debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" ); - - vm_push_struct( prg->global ); - break; - } - - case IN_LOAD_INPUT_R: { - debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_R\n" ); - - assert( exec->parser != 0 ); - vm_push_input( exec->parser->input ); - break; - } - case IN_LOAD_INPUT_WV: { - debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_WV\n" ); - - assert( exec->WV ); - - assert( exec->parser != 0 ); - vm_push_input( exec->parser->input ); - - /* Set up the reverse instruction. */ - rcode_unit_start( exec ); - rcode_code( exec, IN_LOAD_INPUT_BKT ); - rcode_word( exec, (word_t)exec->parser->input ); - break; - } - case IN_LOAD_INPUT_WC: { - debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_WC\n" ); - - assert( !exec->WV ); - - assert( exec->parser != 0 ); - vm_push_input( exec->parser->input ); - break; - } - case IN_LOAD_INPUT_BKT: { - tree_t *accum_stream; - read_tree( accum_stream ); - - debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" ); - - colm_tree_upref( prg, accum_stream ); - vm_push_tree( accum_stream ); - break; - } - - case IN_LOAD_CONTEXT_R: { - debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_R\n" ); - - vm_push_type( struct_t*, exec->parser->pda_run->context ); - break; - } - case IN_LOAD_CONTEXT_WV: { - debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_WV\n" ); - - assert( exec->WV ); - - vm_push_type( struct_t *, exec->parser->pda_run->context ); - - /* Set up the reverse instruction. */ - rcode_unit_start( exec ); - rcode_code( exec, IN_LOAD_CONTEXT_BKT ); - break; - } - case IN_LOAD_CONTEXT_WC: { - debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_WC\n" ); - - assert( !exec->WV ); - - /* This is identical to the _R version, but using it for writing - * would be confusing. */ - vm_push_type( struct_t *, exec->parser->pda_run->context ); - break; - } - case IN_LOAD_CONTEXT_BKT: { - debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" ); - - vm_push_type( struct_t *, exec->parser->pda_run->context ); - break; - } - - case IN_SET_PARSER_CONTEXT: { - debug( prg, REALM_BYTECODE, "IN_SET_PARSER_CONTEXT\n" ); - - struct_t *strct = vm_pop_struct(); - parser_t *parser = vm_pop_parser(); - - colm_parser_set_context( prg, sp, parser, strct ); - - vm_push_parser( parser ); - break; - } - - case IN_SET_PARSER_INPUT: { - debug( prg, REALM_BYTECODE, "IN_SET_PARSER_INPUT\n" ); - - input_t *to_replace_with = vm_pop_input(); - parser_t *parser = vm_pop_parser(); - - parser->input = to_replace_with; - - vm_push_parser( parser ); - - break; - } - - case IN_INIT_CAPTURES: { - consume_byte(); - - debug( prg, REALM_BYTECODE, "IN_INIT_CAPTURES\n" ); - - /* If there are captures (this is a translate block) then copy them into - * the local frame now. */ - struct lang_el_info *lel_info = prg->rtd->lel_info; - struct pda_run *pda_run = exec->parser->pda_run; - char **mark = pda_run->mark; - - int i, num_capture_attr = lel_info[pda_run->token_id].num_capture_attr; - for ( i = 0; i < num_capture_attr; i++ ) { - struct lang_el_info *lei = &lel_info[exec->parser->pda_run->token_id]; - CaptureAttr *ca = &prg->rtd->capture_attr[lei->capture_attr + i]; - head_t *data = string_alloc_full( prg, mark[ca->mark_enter], - mark[ca->mark_leave] - mark[ca->mark_enter] ); - tree_t *string = construct_string( prg, data ); - colm_tree_upref( prg, string ); - set_local( exec, -1 - i, string ); - } - break; - } - case IN_INIT_RHS_EL: { - half_t position; - short field; - read_half( position ); - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_INIT_RHS_EL %hd\n", field ); - - tree_t *val = get_rhs_el( prg, exec->parser->pda_run->red_lel->shadow->tree, position ); - colm_tree_upref( prg, val ); - vm_set_local(exec, field, val); - break; - } - - case IN_INIT_LHS_EL: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_INIT_LHS_EL %hd\n", field ); - - /* We transfer it to to the local field. Possibly take a copy. */ - tree_t *val = exec->parser->pda_run->red_lel->shadow->tree; - - /* Save it. */ - colm_tree_upref( prg, val ); - exec->parser->pda_run->parsed = val; - - exec->parser->pda_run->red_lel->shadow->tree = 0; - vm_set_local(exec, field, val); - break; - } - case IN_STORE_LHS_EL: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_STORE_LHS_EL %hd\n", field ); - - tree_t *val = vm_get_local(exec, field); - vm_set_local(exec, field, 0); - exec->parser->pda_run->red_lel->shadow->tree = val; - break; - } - case IN_UITER_ADVANCE: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_UITER_ADVANCE\n" ); - - /* Get the iterator. */ - user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); - - long yield_size = vm_ssize() - uiter->root_size; - assert( uiter->yield_size == yield_size ); - - /* Fix the return instruction pointer. */ - uiter->stack_root[-IFR_AA + IFR_RIN] = (SW)instr; - - instr = uiter->resume; - exec->frame_ptr = uiter->frame; - exec->iframe_ptr = &uiter->stack_root[-IFR_AA]; - break; - } - case IN_UITER_GET_CUR_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_UITER_GET_CUR_R\n" ); - - user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); - tree_t *val = uiter->ref.kid->tree; - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_UITER_GET_CUR_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_UITER_GET_CUR_WC\n" ); - - user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); - split_ref( prg, &sp, &uiter->ref ); - tree_t *split = uiter->ref.kid->tree; - colm_tree_upref( prg, split ); - vm_push_tree( split ); - break; - } - case IN_UITER_SET_CUR_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_UITER_SET_CUR_WC\n" ); - - tree_t *t = vm_pop_tree(); - user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); - split_ref( prg, &sp, &uiter->ref ); - tree_t *old = uiter->ref.kid->tree; - set_uiter_cur( prg, uiter, t ); - colm_tree_downref( prg, sp, old ); - break; - } - case IN_GET_LOCAL_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_R %hd\n", field ); - - tree_t *val = vm_get_local(exec, field); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_GET_LOCAL_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_WC %hd\n", field ); - - tree_t *split = get_local_split( prg, exec, field ); - colm_tree_upref( prg, split ); - vm_push_tree( split ); - break; - } - case IN_SET_LOCAL_WC: { - short field; - read_half( field ); - debug( prg, REALM_BYTECODE, "IN_SET_LOCAL_WC %hd\n", field ); - - tree_t *val = vm_pop_tree(); - colm_tree_downref( prg, sp, vm_get_local(exec, field) ); - set_local( exec, field, val ); - break; - } - case IN_GET_LOCAL_VAL_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_VAL_R %hd\n", field ); - - tree_t *val = vm_get_local(exec, field); - vm_push_tree( val ); - break; - } - case IN_SET_LOCAL_VAL_WC: { - short field; - read_half( field ); - debug( prg, REALM_BYTECODE, "IN_SET_LOCAL_VAL_WC %hd\n", field ); - - tree_t *val = vm_pop_tree(); - vm_set_local(exec, field, val); - break; - } - case IN_SAVE_RET: { - debug( prg, REALM_BYTECODE, "IN_SAVE_RET\n" ); - - value_t val = vm_pop_value(); - vm_set_local(exec, FR_RV, (tree_t*)val); - break; - } - case IN_GET_LOCAL_REF_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_REF_R\n" ); - - ref_t *ref = (ref_t*) vm_get_plocal(exec, field); - tree_t *val = ref->kid->tree; - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_GET_LOCAL_REF_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LOCAL_REF_WC\n" ); - - ref_t *ref = (ref_t*) vm_get_plocal(exec, field); - split_ref( prg, &sp, ref ); - tree_t *val = ref->kid->tree; - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_SET_LOCAL_REF_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_SET_LOCAL_REF_WC\n" ); - - tree_t *val = vm_pop_tree(); - ref_t *ref = (ref_t*) vm_get_plocal(exec, field); - split_ref( prg, &sp, ref ); - ref_set_value( prg, sp, ref, val ); - break; - } - case IN_GET_FIELD_TREE_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_R %d\n", field ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *val = colm_tree_get_field( obj, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_GET_FIELD_TREE_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_WC %d\n", field ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *split = get_field_split( prg, obj, field ); - colm_tree_upref( prg, split ); - vm_push_tree( split ); - break; - } - case IN_GET_FIELD_TREE_WV: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_WV\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *split = get_field_split( prg, obj, field ); - colm_tree_upref( prg, split ); - vm_push_tree( split ); - - /* Set up the reverse instruction. */ - rcode_code( exec, IN_GET_FIELD_TREE_BKT ); - rcode_half( exec, field ); - break; - } - case IN_GET_FIELD_TREE_BKT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_BKT\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *split = get_field_split( prg, obj, field ); - colm_tree_upref( prg, split ); - vm_push_tree( split ); - break; - } - case IN_SET_FIELD_TREE_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_WC %d\n", field ); - - tree_t *obj = vm_pop_tree(); - tree_t *val = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - /* Downref the old value. */ - tree_t *prev = colm_tree_get_field( obj, field ); - colm_tree_downref( prg, sp, prev ); - - colm_tree_set_field( prg, obj, field, val ); - break; - } - case IN_SET_FIELD_TREE_WV: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_WV %d\n", field ); - - tree_t *obj = vm_pop_tree(); - tree_t *val = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - /* Save the old value, then set the field. */ - tree_t *prev = colm_tree_get_field( obj, field ); - colm_tree_set_field( prg, obj, field, val ); - - /* Set up the reverse instruction. */ - rcode_code( exec, IN_SET_FIELD_TREE_BKT ); - rcode_half( exec, field ); - rcode_word( exec, (word_t)prev ); - rcode_unit_term( exec ); - break; - } - case IN_SET_FIELD_TREE_BKT: { - short field; - tree_t *val; - read_half( field ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_BKT\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - /* Downref the old value. */ - tree_t *prev = colm_tree_get_field( obj, field ); - colm_tree_downref( prg, sp, prev ); - - colm_tree_set_field( prg, obj, field, val ); - break; - } - case IN_SET_FIELD_TREE_LEAVE_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_LEAVE_WC\n" ); - - /* Note that we don't downref the object here because we are - * leaving it on the stack. */ - tree_t *obj = vm_pop_tree(); - tree_t *val = vm_pop_tree(); - - /* Downref the old value. */ - tree_t *prev = colm_tree_get_field( obj, field ); - colm_tree_downref( prg, sp, prev ); - - /* Set the field. */ - colm_tree_set_field( prg, obj, field, val ); - - /* Leave the object on the top of the stack. */ - vm_push_tree( obj ); - break; - } - case IN_GET_FIELD_VAL_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_FIELD_VAL_R %d\n", field ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *pointer = colm_tree_get_field( obj, field ); - value_t value = 0; - if ( pointer != 0 ) - value = colm_get_pointer_val( pointer ); - vm_push_value( value ); - break; - } - case IN_SET_FIELD_VAL_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_SET_FIELD_VAL_WC %d\n", field ); - - tree_t *obj = vm_pop_tree(); - value_t value = vm_pop_value(); - colm_tree_downref( prg, sp, obj ); - - /* Downref the old value. */ - tree_t *prev = colm_tree_get_field( obj, field ); - colm_tree_downref( prg, sp, prev ); - - /* Make it into a pointer. */ - tree_t *pointer = colm_construct_pointer( prg, value ); - colm_tree_upref( prg, pointer ); - - colm_tree_set_field( prg, obj, field, pointer ); - break; - } - case IN_NEW_STRUCT: { - short id; - read_half( id ); - - debug( prg, REALM_BYTECODE, "IN_NEW_STRUCT %hd\n", id ); - struct_t *item = colm_struct_new( prg, id ); - vm_push_struct( item ); - break; - } - case IN_NEW_STREAM: { - debug( prg, REALM_BYTECODE, "IN_NEW_STREAM\n" ); - stream_t *item = colm_stream_open_collect( prg ); - vm_push_stream( item ); - break; - } - case IN_GET_COLLECT_STRING: { - debug( prg, REALM_BYTECODE, "IN_GET_COLLECT_STRING\n" ); - stream_t *stream = vm_pop_stream(); - str_t *str = collect_string( prg, stream ); - colm_tree_upref( prg, (tree_t*)str ); - vm_push_string( str ); - break; - } - case IN_GET_STRUCT_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_R %d\n", field ); - - tree_t *obj = vm_pop_tree(); - tree_t *val = colm_struct_get_field( obj, tree_t*, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_GET_STRUCT_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_WC %d\n", field ); - - tree_t *obj = vm_pop_tree(); - tree_t *val = colm_struct_get_field( obj, tree_t*, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - - break; - } - case IN_GET_STRUCT_WV: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_WV\n" ); - - tree_t *obj = vm_pop_tree(); - tree_t *val = colm_struct_get_field( obj, tree_t*, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - - /* Set up the reverse instruction. */ - rcode_code( exec, IN_GET_STRUCT_BKT ); - rcode_half( exec, field ); - break; - } - case IN_GET_STRUCT_BKT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_BKT\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *split = get_field_split( prg, obj, field ); - colm_tree_upref( prg, split ); - vm_push_tree( split ); - break; - } - case IN_SET_STRUCT_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_WC %d\n", field ); - - tree_t *obj = vm_pop_tree(); - tree_t *val = vm_pop_tree(); - - /* Downref the old value. */ - tree_t *prev = colm_struct_get_field( obj, tree_t*, field ); - colm_tree_downref( prg, sp, prev ); - colm_struct_set_field( obj, tree_t*, field, val ); - break; - } - case IN_SET_STRUCT_WV: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_WV %d\n", field ); - - struct_t *obj = vm_pop_struct(); - tree_t *val = vm_pop_tree(); - - /* Save the old value, then set the field. */ - tree_t *prev = colm_struct_get_field( obj, tree_t*, field ); - colm_struct_set_field( obj, tree_t*, field, val ); - - /* Set up the reverse instruction. */ - rcode_code( exec, IN_SET_STRUCT_BKT ); - rcode_half( exec, field ); - rcode_word( exec, (word_t)prev ); - rcode_unit_term( exec ); - break; - } - case IN_SET_STRUCT_BKT: { - short field; - tree_t *val; - read_half( field ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_BKT\n" ); - - tree_t *obj = vm_pop_tree(); - - /* Downref the old value. */ - tree_t *prev = colm_struct_get_field( obj, tree_t*, field ); - colm_tree_downref( prg, sp, prev ); - - colm_struct_set_field( obj, tree_t*, field, val ); - break; - } - case IN_GET_STRUCT_VAL_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_STRUCT_VAL_R %d\n", field ); - - tree_t *obj = vm_pop_tree(); - tree_t *val = colm_struct_get_field( obj, tree_t*, field ); - vm_push_tree( val ); - break; - } - case IN_SET_STRUCT_VAL_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_WC %d\n", field ); - - struct_t *strct = vm_pop_struct(); - tree_t *val = vm_pop_tree(); - - colm_struct_set_field( strct, tree_t*, field, val ); - break; - } - case IN_SET_STRUCT_VAL_WV: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_WV %d\n", field ); - - struct_t *strct = vm_pop_struct(); - tree_t *val = vm_pop_tree(); - - tree_t *prev = colm_struct_get_field( strct, tree_t*, field ); - colm_struct_set_field( strct, tree_t*, field, val ); - - rcode_code( exec, IN_SET_STRUCT_VAL_BKT ); - rcode_half( exec, field ); - rcode_word( exec, (word_t)prev ); - rcode_unit_term( exec ); - break; - } - case IN_SET_STRUCT_VAL_BKT: { - short field; - tree_t *val; - read_half( field ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_BKT\n" ); - - tree_t *obj = vm_pop_tree(); - - colm_struct_set_field( obj, tree_t*, field, val ); - break; - } - case IN_GET_RHS_VAL_R: { - debug( prg, REALM_BYTECODE, "IN_GET_RHS_VAL_R\n" ); - int i, done = 0; - uchar len; - - tree_t *obj = vm_pop_tree(), *val = 0; - colm_tree_downref( prg, sp, obj ); - - read_byte( len ); - for ( i = 0; i < len; i++ ) { - uchar prod_num, child_num; - read_byte( prod_num ); - read_byte( child_num ); - if ( !done && obj->prod_num == prod_num ) { - val = get_rhs_el( prg, obj, child_num ); - done = 1; - } - } - - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_POP_TREE: { - debug( prg, REALM_BYTECODE, "IN_POP_TREE\n" ); - - tree_t *val = vm_pop_tree(); - colm_tree_downref( prg, sp, val ); - break; - } - case IN_POP_VAL: { - debug( prg, REALM_BYTECODE, "IN_POP_VAL\n" ); - - vm_pop_tree(); - break; - } - case IN_POP_N_WORDS: { - short n; - read_half( n ); - - debug( prg, REALM_BYTECODE, "IN_POP_N_WORDS %hd\n", n ); - - vm_popn( n ); - break; - } - case IN_INT_TO_STR: { - debug( prg, REALM_BYTECODE, "IN_INT_TO_STR\n" ); - - value_t i = vm_pop_value(); - head_t *res = int_to_str( prg, (long)i ); - tree_t *str = construct_string( prg, res ); - colm_tree_upref( prg, str ); - vm_push_tree( str ); - break; - } - case IN_TREE_TO_STR_XML: { - debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_XML_AC\n" ); - - tree_t *tree = vm_pop_tree(); - head_t *res = tree_to_str_xml( prg, sp, tree, false, false ); - tree_t *str = construct_string( prg, res ); - colm_tree_upref( prg, str ); - vm_push_tree( str ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_TREE_TO_STR_XML_AC: { - debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_XML_AC\n" ); - - tree_t *tree = vm_pop_tree(); - head_t *res = tree_to_str_xml_ac( prg, sp, tree, false, false ); - tree_t *str = construct_string( prg, res ); - colm_tree_upref( prg, str ); - vm_push_tree( str ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_TREE_TO_STR_POSTFIX: { - debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_XML_AC\n" ); - - tree_t *tree = vm_pop_tree(); - head_t *res = tree_to_str_postfix( prg, sp, tree, false, false ); - tree_t *str = construct_string( prg, res ); - colm_tree_upref( prg, str ); - vm_push_tree( str ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_TREE_TO_STR: { - debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR\n" ); - - tree_t *tree = vm_pop_tree(); - head_t *res = tree_to_str( prg, sp, tree, false, false ); - tree_t *str = construct_string( prg, res ); - colm_tree_upref( prg, str ); - vm_push_tree( str ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_TREE_TO_STR_TRIM: { - debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_TRIM\n" ); - - tree_t *tree = vm_pop_tree(); - head_t *res = tree_to_str( prg, sp, tree, true, false ); - tree_t *str = construct_string( prg, res ); - colm_tree_upref( prg, str ); - vm_push_tree( str ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_TREE_TO_STR_TRIM_A: { - debug( prg, REALM_BYTECODE, "IN_TREE_TO_STR_TRIM_A\n" ); - - tree_t *tree = vm_pop_tree(); - head_t *res = tree_to_str( prg, sp, tree, true, true ); - tree_t *str = construct_string( prg, res ); - colm_tree_upref( prg, str ); - vm_push_tree( str ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_TREE_TRIM: { - debug( prg, REALM_BYTECODE, "IN_TREE_TRIM\n" ); - - tree_t *tree = vm_pop_tree(); - tree_t *trimmed = tree_trim( prg, sp, tree ); - vm_push_tree( trimmed ); - break; - } - case IN_CONCAT_STR: { - debug( prg, REALM_BYTECODE, "IN_CONCAT_STR\n" ); - - str_t *s2 = vm_pop_string(); - str_t *s1 = vm_pop_string(); - head_t *res = concat_str( s1->value, s2->value ); - tree_t *str = construct_string( prg, res ); - colm_tree_upref( prg, str ); - colm_tree_downref( prg, sp, (tree_t*)s1 ); - colm_tree_downref( prg, sp, (tree_t*)s2 ); - vm_push_tree( str ); - break; - } - - case IN_STR_LENGTH: { - debug( prg, REALM_BYTECODE, "IN_STR_LENGTH\n" ); - - str_t *str = vm_pop_string(); - long len = string_length( str->value ); - value_t res = len; - vm_push_value( res ); - colm_tree_downref( prg, sp, (tree_t*)str ); - break; - } - case IN_JMP_FALSE_TREE: { - short dist; - read_half( dist ); - - debug( prg, REALM_BYTECODE, "IN_JMP_FALSE_TREE %d\n", dist ); - - tree_t *tree = vm_pop_tree(); - if ( test_false( prg, tree ) ) - instr += dist; - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_JMP_TRUE_TREE: { - short dist; - read_half( dist ); - - debug( prg, REALM_BYTECODE, "IN_JMP_TRUE_TREE %d\n", dist ); - - tree_t *tree = vm_pop_tree(); - if ( !test_false( prg, tree ) ) - instr += dist; - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_JMP_FALSE_VAL: { - short dist; - read_half( dist ); - - debug( prg, REALM_BYTECODE, "IN_JMP_FALSE_VAL %d\n", dist ); - - tree_t *tree = vm_pop_tree(); - if ( tree == 0 ) - instr += dist; - break; - } - case IN_JMP_TRUE_VAL: { - short dist; - read_half( dist ); - - debug( prg, REALM_BYTECODE, "IN_JMP_TRUE_VAL %d\n", dist ); - - tree_t *tree = vm_pop_tree(); - if ( tree != 0 ) - instr += dist; - break; - } - case IN_JMP: { - short dist; - read_half( dist ); - - debug( prg, REALM_BYTECODE, "IN_JMP\n" ); - - instr += dist; - break; - } - case IN_REJECT: { - debug( prg, REALM_BYTECODE, "IN_REJECT\n" ); - exec->parser->pda_run->reject = true; - break; - } - - /* - * Binary comparison operators. - */ - case IN_TST_EQL_TREE: { - debug( prg, REALM_BYTECODE, "IN_TST_EQL_TREE\n" ); - - tree_t *o2 = vm_pop_tree(); - tree_t *o1 = vm_pop_tree(); - long r = colm_cmp_tree( prg, o1, o2 ); - value_t val = r == 0 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - colm_tree_downref( prg, sp, o1 ); - colm_tree_downref( prg, sp, o2 ); - break; - } - case IN_TST_EQL_VAL: { - debug( prg, REALM_BYTECODE, "IN_TST_EQL_VAL\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - value_t val = o1 == o2 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - break; - } - case IN_TST_NOT_EQL_TREE: { - debug( prg, REALM_BYTECODE, "IN_TST_NOT_EQL_TREE\n" ); - - tree_t *o2 = vm_pop_tree(); - tree_t *o1 = vm_pop_tree(); - long r = colm_cmp_tree( prg, o1, o2 ); - value_t val = r != 0 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - colm_tree_downref( prg, sp, o1 ); - colm_tree_downref( prg, sp, o2 ); - break; - } - case IN_TST_NOT_EQL_VAL: { - debug( prg, REALM_BYTECODE, "IN_TST_NOT_EQL_VAL\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - value_t val = o1 != o2 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - break; - } - case IN_TST_LESS_VAL: { - debug( prg, REALM_BYTECODE, "IN_TST_LESS_VAL\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - value_t res = (long)o1 < (long)o2 ? TRUE_VAL : FALSE_VAL; - vm_push_value( res ); - break; - } - case IN_TST_LESS_TREE: { - debug( prg, REALM_BYTECODE, "IN_TST_LESS_TREE\n" ); - - tree_t *o2 = vm_pop_tree(); - tree_t *o1 = vm_pop_tree(); - long r = colm_cmp_tree( prg, o1, o2 ); - value_t val = r < 0 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - colm_tree_downref( prg, sp, o1 ); - colm_tree_downref( prg, sp, o2 ); - break; - } - case IN_TST_LESS_EQL_VAL: { - debug( prg, REALM_BYTECODE, "IN_TST_LESS_EQL_VAL\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - value_t val = (long)o1 <= (long)o2 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - break; - } - case IN_TST_LESS_EQL_TREE: { - debug( prg, REALM_BYTECODE, "IN_TST_LESS_EQL_TREE\n" ); - - tree_t *o2 = vm_pop_tree(); - tree_t *o1 = vm_pop_tree(); - long r = colm_cmp_tree( prg, o1, o2 ); - value_t val = r <= 0 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - colm_tree_downref( prg, sp, o1 ); - colm_tree_downref( prg, sp, o2 ); - break; - } - case IN_TST_GRTR_VAL: { - debug( prg, REALM_BYTECODE, "IN_TST_GRTR_VAL\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - value_t val = (long)o1 > (long)o2 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - break; - } - case IN_TST_GRTR_TREE: { - debug( prg, REALM_BYTECODE, "IN_TST_GRTR_TREE\n" ); - - tree_t *o2 = vm_pop_tree(); - tree_t *o1 = vm_pop_tree(); - long r = colm_cmp_tree( prg, o1, o2 ); - value_t val = r > 0 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - colm_tree_downref( prg, sp, o1 ); - colm_tree_downref( prg, sp, o2 ); - break; - } - case IN_TST_GRTR_EQL_VAL: { - debug( prg, REALM_BYTECODE, "IN_TST_GRTR_EQL_VAL\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - - value_t val = (long)o1 >= (long)o2 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - break; - } - case IN_TST_GRTR_EQL_TREE: { - debug( prg, REALM_BYTECODE, "IN_TST_GRTR_EQL_TREE\n" ); - - tree_t *o2 = vm_pop_tree(); - tree_t *o1 = vm_pop_tree(); - long r = colm_cmp_tree( prg, o1, o2 ); - value_t val = r >= 0 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - colm_tree_downref( prg, sp, o1 ); - colm_tree_downref( prg, sp, o2 ); - break; - } - case IN_TST_LOGICAL_AND: { - debug( prg, REALM_BYTECODE, "IN_TST_LOGICAL_AND\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - value_t val = o1 && o2 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - break; - } - case IN_TST_LOGICAL_OR: { - debug( prg, REALM_BYTECODE, "IN_TST_LOGICAL_OR\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - value_t val = o1 || o2 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - break; - } - - case IN_TST_NZ_TREE: { - debug( prg, REALM_BYTECODE, "IN_TST_NZ_TREE\n" ); - - tree_t *tree = vm_pop_tree(); - long r = !test_false( prg, tree ); - colm_tree_downref( prg, sp, tree ); - vm_push_value( r ); - break; - } - - case IN_NOT_VAL: { - debug( prg, REALM_BYTECODE, "IN_NOT_VAL\n" ); - - value_t o1 = vm_pop_value(); - value_t val = o1 == 0 ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - break; - } - - case IN_NOT_TREE: { - debug( prg, REALM_BYTECODE, "IN_NOT_TREE\n" ); - - tree_t *tree = vm_pop_tree(); - long r = test_false( prg, tree ); - value_t val = r ? TRUE_VAL : FALSE_VAL; - vm_push_value( val ); - colm_tree_downref( prg, sp, tree ); - break; - } - - case IN_ADD_INT: { - debug( prg, REALM_BYTECODE, "IN_ADD_INT\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - long r = (long)o1 + (long)o2; - value_t val = r; - vm_push_value( val ); - break; - } - case IN_MULT_INT: { - debug( prg, REALM_BYTECODE, "IN_MULT_INT\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - long r = (long)o1 * (long)o2; - value_t val = r; - vm_push_value( val ); - break; - } - case IN_DIV_INT: { - debug( prg, REALM_BYTECODE, "IN_DIV_INT\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - long r = (long)o1 / (long)o2; - value_t val = r; - vm_push_value( val ); - break; - } - case IN_SUB_INT: { - debug( prg, REALM_BYTECODE, "IN_SUB_INT\n" ); - - value_t o2 = vm_pop_value(); - value_t o1 = vm_pop_value(); - long r = (long)o1 - (long)o2; - value_t val = r; - vm_push_value( val ); - break; - } - case IN_DUP_VAL: { - debug( prg, REALM_BYTECODE, "IN_DUP_VAL\n" ); - - word_t val = (word_t)vm_top(); - vm_push_type( word_t, val ); - break; - } - case IN_DUP_TREE: { - debug( prg, REALM_BYTECODE, "IN_DUP_TREE\n" ); - - tree_t *val = vm_top(); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_TRITER_FROM_REF: { - short field; - half_t arg_size; - half_t search_type_id; - read_half( field ); - read_half( arg_size ); - read_half( search_type_id ); - - debug( prg, REALM_BYTECODE, "IN_TRITER_FROM_REF " - "%hd %hd %hd\n", field, arg_size, search_type_id ); - - ref_t root_ref; - root_ref.kid = vm_pop_kid(); - root_ref.next = vm_pop_ref(); - void *mem = vm_get_plocal(exec, field); - - tree_t **stack_root = vm_ptop(); - long root_size = vm_ssize(); - - colm_init_tree_iter( (tree_iter_t*)mem, stack_root, - arg_size, root_size, &root_ref, search_type_id ); - break; - } - case IN_TRITER_UNWIND: - case IN_TRITER_DESTROY: { - short field; - read_half( field ); - - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); - debug( prg, REALM_BYTECODE, "IN_TRITER_DESTROY %hd %d\n", - field, iter->yield_size ); - colm_tree_iter_destroy( prg, &sp, iter ); - break; - } - case IN_REV_TRITER_FROM_REF: { - short field; - half_t arg_size; - half_t search_type_id; - read_half( field ); - read_half( arg_size ); - read_half( search_type_id ); - - debug( prg, REALM_BYTECODE, "IN_REV_TRITER_FROM_REF " - "%hd %hd %hd\n", field, arg_size, search_type_id ); - - ref_t root_ref; - root_ref.kid = vm_pop_kid(); - root_ref.next = vm_pop_ref(); - - tree_t **stack_root = vm_ptop(); - long root_size = vm_ssize(); - - int children = 0; - kid_t *kid = tree_child( prg, root_ref.kid->tree ); - while ( kid != 0 ) { - vm_push_kid( kid ); - kid = kid->next; - children++; - } - - void *mem = vm_get_plocal(exec, field); - colm_init_rev_tree_iter( (rev_tree_iter_t*)mem, stack_root, - arg_size, root_size, &root_ref, search_type_id, children ); - break; - } - case IN_REV_TRITER_UNWIND: - case IN_REV_TRITER_DESTROY: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_REV_TRITER_DESTROY\n" ); - - rev_tree_iter_t *iter = (rev_tree_iter_t*) vm_get_plocal(exec, field); - colm_rev_tree_iter_destroy( prg, &sp, iter ); - break; - } - case IN_TREE_SEARCH: { - word_t id; - read_word( id ); - - debug( prg, REALM_BYTECODE, "IN_TREE_SEARCH\n" ); - - tree_t *tree = vm_pop_tree(); - tree_t *res = tree_search( prg, tree, id ); - colm_tree_upref( prg, res ); - vm_push_tree( res ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_TRITER_ADVANCE: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_TRITER_ADVANCE\n" ); - - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); - tree_t *res = tree_iter_advance( prg, &sp, iter ); - //colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_TRITER_NEXT_CHILD: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_TRITER_NEXT_CHILD\n" ); - - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); - tree_t *res = tree_iter_next_child( prg, &sp, iter ); - //colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_REV_TRITER_PREV_CHILD: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_REV_TRITER_PREV_CHILD\n" ); - - rev_tree_iter_t *iter = (rev_tree_iter_t*) vm_get_plocal(exec, field); - tree_t *res = tree_rev_iter_prev_child( prg, &sp, iter ); - //colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_TRITER_NEXT_REPEAT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_TRITER_NEXT_REPEAT\n" ); - - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); - tree_t *res = tree_iter_next_repeat( prg, &sp, iter ); - //colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_TRITER_PREV_REPEAT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_TRITER_PREV_REPEAT\n" ); - - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); - tree_t *res = tree_iter_prev_repeat( prg, &sp, iter ); - //colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_TRITER_GET_CUR_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_TRITER_GET_CUR_R\n" ); - - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); - tree_t *tree = tree_iter_deref_cur( iter ); - colm_tree_upref( prg, tree ); - vm_push_tree( tree ); - break; - } - case IN_TRITER_GET_CUR_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_TRITER_GET_CUR_WC\n" ); - - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); - split_iter_cur( prg, &sp, iter ); - tree_t *tree = tree_iter_deref_cur( iter ); - colm_tree_upref( prg, tree ); - vm_push_tree( tree ); - break; - } - case IN_TRITER_SET_CUR_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_TRITER_SET_CUR_WC\n" ); - - tree_t *tree = vm_pop_tree(); - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); - split_iter_cur( prg, &sp, iter ); - tree_t *old = tree_iter_deref_cur( iter ); - set_triter_cur( prg, iter, tree ); - colm_tree_downref( prg, sp, old ); - break; - } - case IN_GEN_ITER_FROM_REF: { - short field; - half_t arg_size; - half_t generic_id; - read_half( field ); - read_half( arg_size ); - read_half( generic_id ); - - debug( prg, REALM_BYTECODE, "IN_GEN_ITER_FROM_REF " - "%hd %hd %hd\n", field, arg_size, generic_id ); - - ref_t root_ref; - root_ref.kid = vm_pop_kid(); - root_ref.next = vm_pop_ref(); - void *mem = vm_get_plocal(exec, field); - - tree_t **stack_root = vm_ptop(); - long root_size = vm_ssize(); - - colm_init_list_iter( (generic_iter_t*)mem, stack_root, arg_size, - root_size, &root_ref, generic_id ); - break; - } - case IN_GEN_ITER_UNWIND: - case IN_GEN_ITER_DESTROY: { - short field; - read_half( field ); - - generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); - - debug( prg, REALM_BYTECODE, "IN_LIST_ITER_DESTROY %d\n", iter->yield_size ); - - colm_list_iter_destroy( prg, &sp, iter ); - break; - } - case IN_LIST_ITER_ADVANCE: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_LIST_ITER_ADVANCE\n" ); - - generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); - tree_t *res = colm_list_iter_advance( prg, &sp, iter ); - //colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_REV_LIST_ITER_ADVANCE: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_REV_LIST_ITER_ADVANCE\n" ); - - generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); - tree_t *res = colm_rev_list_iter_advance( prg, &sp, iter ); - //colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_MAP_ITER_ADVANCE: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_MAP_ITER_ADVANCE\n" ); - - generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); - tree_t *res = colm_map_iter_advance( prg, &sp, iter ); - //colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_GEN_ITER_GET_CUR_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GEN_ITER_GET_CUR_R\n" ); - - generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); - tree_t *tree = colm_list_iter_deref_cur( prg, iter ); - //colm_tree_upref( prg, tree ); - vm_push_tree( tree ); - break; - } - case IN_GEN_VITER_GET_CUR_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GEN_VITER_GET_CUR_R\n" ); - - generic_iter_t *iter = (generic_iter_t*) vm_get_plocal(exec, field); - value_t value = colm_viter_deref_cur( prg, iter ); - vm_push_value( value ); - break; - } - case IN_MATCH: { - half_t pattern_id; - read_half( pattern_id ); - - debug( prg, REALM_BYTECODE, "IN_MATCH\n" ); - - tree_t *tree = vm_pop_tree(); - - /* Run the match, push the result. */ - int root_node = prg->rtd->pat_repl_info[pattern_id].offset; - - /* Bindings are indexed starting at 1. Zero bindId to represent no - * binding. We make a space for it here rather than do math at - * access them. */ - long num_bindings = prg->rtd->pat_repl_info[pattern_id].num_bindings; - tree_t *bindings[1+num_bindings]; - memset( bindings, 0, sizeof(tree_t*)*(1+num_bindings) ); - - kid_t kid; - kid.tree = tree; - kid.next = 0; - int matched = match_pattern( bindings, prg, root_node, &kid, false ); - - if ( !matched ) - memset( bindings, 0, sizeof(tree_t*)*(1+num_bindings) ); - else { - int b; - for ( b = 1; b <= num_bindings; b++ ) - assert( bindings[b] != 0 ); - } - - tree_t *result = matched ? tree : 0; - colm_tree_upref( prg, result ); - vm_push_tree( result ? tree : 0 ); - int b; - for ( b = 1; b <= num_bindings; b++ ) { - colm_tree_upref( prg, bindings[b] ); - vm_push_tree( bindings[b] ); - } - - colm_tree_downref( prg, sp, tree ); - break; - } - - case IN_PROD_NUM: { - debug( prg, REALM_BYTECODE, "IN_PROD_NUM\n" ); - - tree_t *tree = vm_pop_tree(); - colm_tree_downref( prg, sp, tree ); - - value_t v = tree->prod_num; - vm_push_value( v ); - break; - } - - case IN_PRINT_TREE: { - uchar trim; - read_byte( trim ); - - debug( prg, REALM_BYTECODE, "IN_PRINT_TREE %d\n", (int)trim ); - - tree_t *to_send = vm_pop_tree(); - stream_t *stream = vm_pop_stream(); - - struct stream_impl *si = stream_to_impl( stream ); - - int auto_trim; - if ( trim == TRIM_YES ) - auto_trim = true; - else if ( trim == TRIM_NO ) - auto_trim = false; - else - auto_trim = si->funcs->get_option( prg, si, 0 ); - - si->funcs->print_tree( prg, sp, si, to_send, auto_trim ); - vm_push_stream( stream ); - colm_tree_downref( prg, sp, to_send ); - break; - } - - case IN_SEND_TEXT_W: { - uchar trim; - read_byte( trim ); - - debug( prg, REALM_BYTECODE, "IN_SEND_TEXT_W %d\n", (int)trim ); - - tree_t *to_send = vm_pop_tree(); - parser_t *parser = vm_pop_parser(); - - struct input_impl *si = input_to_impl( parser->input ); - - int auto_trim; - if ( trim == TRIM_YES ) - auto_trim = true; - else if ( trim == TRIM_NO ) - auto_trim = false; - else - auto_trim = si->funcs->get_option( prg, si, 0 ); - - word_t len = stream_append_text( prg, sp, parser->input, to_send, auto_trim ); - - vm_push_parser( parser ); - - if ( !exec->WV ) - colm_tree_downref( prg, sp, to_send ); - else { - rcode_unit_start( exec ); - rcode_code( exec, IN_SEND_TEXT_BKT ); - rcode_word( exec, (word_t) parser ); - rcode_word( exec, (word_t) to_send ); - rcode_word( exec, (word_t) len ); - rcode_unit_term( exec ); - } - - exec->steps = parser->pda_run->steps; - exec->pcr = PCR_START; - break; - } - - case IN_SEND_TEXT_BKT: { - parser_t *parser; - tree_t *sent; - word_t len; - read_parser( parser ); - read_tree( sent ); - read_word( len ); - - debug( prg, REALM_BYTECODE, "IN_SEND_TEXT_BKT\n" ); - - struct input_impl *si = input_to_impl( parser->input ); - stream_undo_append( prg, sp, si, sent, len ); - - colm_tree_downref( prg, sp, sent ); - break; - } - - case IN_SEND_TREE_W: { - uchar trim; - read_byte( trim ); - - debug( prg, REALM_BYTECODE, "IN_SEND_TREE_W %d\n", (int)trim ); - - tree_t *to_send = vm_pop_tree(); - parser_t *parser = vm_pop_parser(); - - struct input_impl *si = input_to_impl( parser->input ); - - int auto_trim; - if ( trim == TRIM_YES ) - auto_trim = true; - else if ( trim == TRIM_NO ) - auto_trim = false; - else - auto_trim = si->funcs->get_option( prg, si, 0 ); - - if ( auto_trim ) - to_send = tree_trim( prg, sp, to_send ); - - word_t len = stream_append_tree( prg, sp, parser->input, to_send ); - - vm_push_parser( parser ); - - if ( !exec->WV ) - colm_tree_downref( prg, sp, to_send ); - else { - rcode_unit_start( exec ); - rcode_code( exec, IN_SEND_TREE_BKT ); - rcode_word( exec, (word_t) parser ); - rcode_word( exec, (word_t) to_send ); - rcode_word( exec, (word_t) len ); - rcode_unit_term( exec ); - } - - exec->steps = parser->pda_run->steps; - exec->pcr = PCR_START; - break; - } - - case IN_SEND_TREE_BKT: { - parser_t *parser; - tree_t *sent; - word_t len; - read_parser( parser ); - read_tree( sent ); - read_word( len ); - - debug( prg, REALM_BYTECODE, "IN_SEND_TREE_BKT\n" ); - - struct input_impl *si = input_to_impl( parser->input ); - stream_undo_append( prg, sp, si, sent, len ); - - colm_tree_downref( prg, sp, sent ); - break; - } - - case IN_SEND_NOTHING: { - parser_t *parser = vm_pop_parser(); - vm_push_parser( parser ); - exec->steps = parser->pda_run->steps; - exec->pcr = PCR_START; - break; - } - case IN_SEND_STREAM_W: { - debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_W\n" ); - - stream_t *to_send = vm_pop_stream(); - parser_t *parser = vm_pop_parser(); - - word_t len = stream_append_stream( prg, sp, parser->input, to_send ); - - vm_push_parser( parser ); - - if ( exec->WV ) { - rcode_unit_start( exec ); - rcode_code( exec, IN_SEND_STREAM_BKT ); - rcode_word( exec, (word_t) parser ); - rcode_word( exec, (word_t) to_send ); - rcode_word( exec, (word_t) len ); - rcode_unit_term( exec ); - } - - exec->steps = parser->pda_run->steps; - exec->pcr = PCR_START; - - break; - } - - case IN_SEND_STREAM_BKT: { - parser_t *parser; - tree_t *sent; - word_t len; - read_parser( parser ); - read_tree( sent ); - read_word( len ); - - debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_BKT\n" ); - - struct input_impl *si = input_to_impl( parser->input ); - stream_undo_append_stream( prg, sp, si, sent, len ); - break; - } - - case IN_SEND_EOF_W: { - struct input_impl *si; - - debug( prg, REALM_BYTECODE, "IN_SEND_EOF_W\n" ); - parser_t *parser = vm_pop_parser(); - vm_push_parser( parser ); - - si = input_to_impl( parser->input ); - si->funcs->set_eof_mark( prg, si, true ); - - if ( exec->WV ) { - rcode_unit_start( exec ); - rcode_code( exec, IN_SEND_EOF_BKT ); - rcode_word( exec, (word_t) parser ); - rcode_unit_term( exec ); - } - - exec->steps = parser->pda_run->steps; - exec->pcr = PCR_START; - break; - } - - case IN_SEND_EOF_BKT: { - parser_t *parser; - read_parser( parser ); - - debug( prg, REALM_BYTECODE, "IN_SEND_EOF_BKT\n" ); - - struct input_impl *si = input_to_impl( parser->input ); - si->funcs->set_eof_mark( prg, si, false ); - break; - } - - case IN_INPUT_CLOSE_WC: { - debug( prg, REALM_BYTECODE, "IN_INPUT_CLOSE_WC\n" ); - - stream_t *stream = vm_pop_stream(); - struct stream_impl *si = stream->impl; - - si->funcs->close_stream( prg, si ); - - vm_push_stream( stream ); - break; - } - case IN_INPUT_AUTO_TRIM_WC: { - debug( prg, REALM_BYTECODE, "IN_INPUT_AUTO_TRIM_WC\n" ); - - stream_t *stream = vm_pop_stream(); - value_t auto_trim = vm_pop_value(); - struct stream_impl *si = stream->impl; - - si->funcs->set_option( prg, si, 0, (long) auto_trim ); - - vm_push_stream( stream ); - break; - } - case IN_IINPUT_AUTO_TRIM_WC: { - debug( prg, REALM_BYTECODE, "IN_INPUT_AUTO_TRIM_WC\n" ); - - input_t *input = vm_pop_input(); - value_t auto_trim = vm_pop_value(); - struct input_impl *ii = input->impl; - - ii->funcs->set_option( prg, ii, 0, (long) auto_trim ); - - vm_push_input( input ); - break; - } - - case IN_SET_ERROR: { - debug( prg, REALM_BYTECODE, "IN_SET_ERROR\n" ); - - tree_t *error = vm_pop_tree(); - colm_tree_downref( prg, sp, prg->error ); - prg->error = error; - break; - } - - case IN_GET_ERROR: { - debug( prg, REALM_BYTECODE, "IN_GET_ERROR\n" ); - - vm_pop_tree(); - colm_tree_upref( prg, prg->error ); - vm_push_tree( prg->error ); - break; - } - - /* stream: - * Push value and stash current on IN_PCR_CALL. The instructions - * exectued by a call need access to the stream the parser was called - * with. We need to preserver the stream for the caller, so we push - * first set it to the current stream. - * pcr: - * Need to preserve the pcr value between pda run invocations. Push - * current pcr value and start fresh with a new value on PCR_CALL. - * steps: - * Init from the PDA run when we start to parse. Need to preserve the - * starting steps value from the start of parsing to the moment we - * write the backtrack instruction. Start fresh with a private value - * on a PCR_CALL by pushing and initializing. */ - - case IN_PARSE_INIT_BKT: { - debug( prg, REALM_BYTECODE, "IN_PARSE_INIT_BKT\n" ); - - parser_t *parser; - word_t steps; - - read_parser( parser ); - read_word( steps ); - - vm_push_parser( parser ); - - exec->steps = steps; - exec->pcr = PCR_START; - break; - } - - case IN_LOAD_RETVAL: { - debug( prg, REALM_BYTECODE, "IN_LOAD_RETVAL\n" ); - vm_push_tree( exec->ret_val ); - break; - } - - case IN_PCR_RET: { - debug( prg, REALM_BYTECODE, "IN_PCR_RET\n" ); - - if ( exec->frame_id >= 0 ) { - struct frame_info *fi = &prg->rtd->frame_info[exec->frame_id]; - downref_local_trees( prg, sp, exec, fi->locals, fi->locals_len ); - debug( prg, REALM_BYTECODE, "RET: %d\n", fi->frame_size ); - - vm_popn( fi->frame_size ); - } - - instr = vm_pop_type(code_t*); - - exec->WV = vm_pop_type(word_t); - exec->parser = vm_pop_parser(); - exec->pcr = vm_pop_type(word_t); - exec->steps = vm_pop_type(word_t); - exec->frame_id = vm_pop_type(long); - exec->iframe_ptr = vm_pop_type(tree_t**); - exec->frame_ptr = vm_pop_type(tree_t**); - - assert( instr != 0 ); - break; - } - - case IN_PCR_END_DECK: { - debug( prg, REALM_BYTECODE, "IN_PCR_END_DECK\n" ); - exec->parser->pda_run->on_deck = false; - break; - } - - case IN_PARSE_FRAG_W: { - parser_t *parser = vm_pop_parser(); - vm_push_parser( parser ); - - debug( prg, REALM_BYTECODE, "IN_PARSE_FRAG_W\n" ); - - exec->pcr = colm_parse_frag( prg, sp, parser->pda_run, - parser->input, exec->pcr ); - - /* If done, jump to the terminating instruction, otherwise fall - * through to call some code, then jump back here. */ - if ( exec->pcr != PCR_DONE ) - instr = pcr_call( prg, exec, &sp, instr, parser ); - else { - if ( exec->WV ) { - rcode_unit_start( exec ); - - rcode_code( exec, IN_PARSE_INIT_BKT ); - rcode_word( exec, (word_t)parser ); - rcode_word( exec, (word_t)exec->steps ); - rcode_code( exec, IN_PARSE_FRAG_BKT ); - rcode_unit_term( exec ); - } - - if ( prg->induce_exit ) - goto out; - } - break; - } - - case IN_PARSE_FRAG_BKT: { - parser_t *parser = vm_pop_parser(); - vm_push_parser( parser ); - - debug( prg, REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" ); - - exec->pcr = colm_parse_undo_frag( prg, sp, parser->pda_run, - parser->input, exec->pcr, exec->steps ); - - if ( exec->pcr != PCR_DONE ) - instr = pcr_call( prg, exec, &sp, instr, parser ); - else { - vm_pop_parser(); - } - break; - } - - case IN_REDUCE_COMMIT: { - parser_t *parser = vm_pop_parser(); - vm_push_parser( parser ); - - debug( prg, REALM_BYTECODE, "IN_REDUCE_COMMIT\n" ); - - colm_parse_reduce_commit( prg, sp, parser->pda_run ); - break; - } - - - case IN_INPUT_PULL_WV: { - debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_WV\n" ); - - input_t *input = vm_pop_input(); - tree_t *len = vm_pop_tree(); - tree_t *string = stream_pull_bc( prg, sp, 0, input, len ); - colm_tree_upref( prg, string ); - vm_push_tree( string ); - - /* Single unit. */ - colm_tree_upref( prg, string ); - rcode_code( exec, IN_INPUT_PULL_BKT ); - rcode_word( exec, (word_t) string ); - rcode_unit_term( exec ); - - //colm_tree_downref( prg, sp, len ); - break; - } - - case IN_INPUT_PULL_WC: { - debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_WC\n" ); - - input_t *input = vm_pop_input(); - tree_t *len = vm_pop_tree(); - tree_t *string = stream_pull_bc( prg, sp, 0, input, len ); - colm_tree_upref( prg, string ); - vm_push_tree( string ); - - //colm_tree_downref( prg, sp, len ); - break; - } - case IN_INPUT_PULL_BKT: { - tree_t *string; - read_tree( string ); - - input_t *input = vm_pop_input(); - - debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" ); - - undo_pull( prg, input, string ); - colm_tree_downref( prg, sp, string ); - break; - } - case IN_INPUT_PUSH_WV: { - debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_WV\n" ); - - input_t *input = vm_pop_input(); - tree_t *tree = vm_pop_tree(); - long len = stream_push( prg, sp, input_to_impl( input ), tree, false ); - vm_push_tree( 0 ); - - /* Single unit. */ - rcode_code( exec, IN_INPUT_PUSH_BKT ); - rcode_word( exec, len ); - rcode_unit_term( exec ); - - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_INPUT_PUSH_IGNORE_WV: { - debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_IGNORE_WV\n" ); - - input_t *input = vm_pop_input(); - tree_t *tree = vm_pop_tree(); - long len = stream_push( prg, sp, input_to_impl( input ), tree, true ); - vm_push_tree( 0 ); - - /* Single unit. */ - rcode_code( exec, IN_INPUT_PUSH_BKT ); - rcode_word( exec, len ); - rcode_unit_term( exec ); - - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_INPUT_PUSH_BKT: { - word_t len; - read_word( len ); - - debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_BKT %d\n", len ); - - input_t *input = vm_pop_input(); - colm_undo_stream_push( prg, sp, input_to_impl( input ), len ); - break; - } - case IN_INPUT_PUSH_STREAM_WV: { - debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_STREAM_WV\n" ); - - input_t *input = vm_pop_input(); - stream_t *to_push = vm_pop_stream(); - long len = stream_push_stream( prg, sp, input_to_impl( input ), to_push ); - vm_push_tree( 0 ); - - /* Single unit. */ - rcode_code( exec, IN_INPUT_PUSH_BKT ); - rcode_word( exec, len ); - rcode_unit_term( exec ); - break; - } - case IN_INPUT_PUSH_STREAM_BKT: { - word_t len; - read_word( len ); - - debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_STREAM_BKT %d\n", len ); - - input_t *input = vm_pop_input(); - colm_undo_stream_push( prg, sp, input_to_impl( input ), len ); - break; - } - case IN_CONS_GENERIC: { - half_t generic_id; - half_t stop_id; - read_half( generic_id ); - read_half( stop_id ); - - debug( prg, REALM_BYTECODE, "IN_CONS_GENERIC %hd %hd\n", generic_id, stop_id ); - - struct_t *gen = colm_construct_generic( prg, generic_id, stop_id ); - vm_push_struct( gen ); - break; - } - case IN_CONS_REDUCER: { - half_t generic_id; - half_t reducer_id; - read_half( generic_id ); - read_half( reducer_id ); - - debug( prg, REALM_BYTECODE, "IN_CONS_REDUCER %hd\n", generic_id ); - - struct_t *gen = colm_construct_reducer( prg, generic_id, reducer_id ); - vm_push_struct( gen ); - break; - } - case IN_CONS_OBJECT: { - half_t lang_el_id; - read_half( lang_el_id ); - - debug( prg, REALM_BYTECODE, "IN_CONS_OBJECT %hd\n", lang_el_id ); - - tree_t *repl_tree = colm_construct_object( prg, 0, 0, lang_el_id ); - vm_push_tree( repl_tree ); - break; - } - case IN_CONSTRUCT: { - half_t pattern_id; - read_half( pattern_id ); - - debug( prg, REALM_BYTECODE, "IN_CONSTRUCT\n" ); - - //struct lang_el_info *lelInfo = prg->rtd->lelInfo; - //struct pat_cons_node *nodes = prg->rtd->patReplNodes; - int root_node = prg->rtd->pat_repl_info[pattern_id].offset; - - /* Note that bindIds are indexed at one. Add one spot for them. */ - int num_bindings = prg->rtd->pat_repl_info[pattern_id].num_bindings; - tree_t *bindings[1+num_bindings]; - - int b; - for ( b = 1; b <= num_bindings; b++ ) { - bindings[b] = vm_pop_tree(); - assert( bindings[b] != 0 ); - } - - tree_t *repl_tree = colm_construct_tree( prg, 0, bindings, root_node ); - - vm_push_tree( repl_tree ); - break; - } - case IN_CONSTRUCT_TERM: { - half_t token_id; - read_half( token_id ); - - debug( prg, REALM_BYTECODE, "IN_CONSTRUCT_TERM\n" ); - - /* Pop the string we are constructing the token from. */ - str_t *str = vm_pop_string(); - tree_t *res = colm_construct_term( prg, token_id, str->value ); - colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_MAKE_TOKEN: { - uchar nargs; - int i; - read_byte( nargs ); - - debug( prg, REALM_BYTECODE, "IN_MAKE_TOKEN\n" ); - - tree_t *arg[nargs]; - for ( i = nargs-1; i >= 0; i-- ) - arg[i] = vm_pop_tree(); - - tree_t *result = colm_construct_token( prg, arg, nargs ); - for ( i = 1; i < nargs; i++ ) - colm_tree_downref( prg, sp, arg[i] ); - vm_push_tree( result ); - break; - } - case IN_MAKE_TREE: { - uchar nargs; - int i; - read_byte( nargs ); - - debug( prg, REALM_BYTECODE, "IN_MAKE_TREE\n" ); - - tree_t *arg[nargs]; - for ( i = nargs-1; i >= 0; i-- ) - arg[i] = vm_pop_tree(); - - tree_t *result = make_tree( prg, arg, nargs ); - for ( i = 1; i < nargs; i++ ) - colm_tree_downref( prg, sp, arg[i] ); - - vm_push_tree( result ); - break; - } - case IN_TREE_CAST: { - half_t lang_el_id; - read_half( lang_el_id ); - - debug( prg, REALM_BYTECODE, "IN_TREE_CAST %hd\n", lang_el_id ); - - tree_t *tree = vm_pop_tree(); - tree_t *res = cast_tree( prg, lang_el_id, tree ); - colm_tree_upref( prg, res ); - colm_tree_downref( prg, sp, tree ); - vm_push_tree( res ); - break; - } - case IN_PTR_ACCESS_WV: { - debug( prg, REALM_BYTECODE, "IN_PTR_ACCESS_WV\n" ); - - struct_t *ptr = vm_pop_struct(); - vm_push_struct( ptr ); - - /* This is an initial global load. Need to reverse execute it. */ - rcode_unit_start( exec ); - rcode_code( exec, IN_PTR_ACCESS_BKT ); - rcode_word( exec, (word_t) ptr ); - break; - } - case IN_PTR_ACCESS_BKT: { - word_t p; - read_word( p ); - - debug( prg, REALM_BYTECODE, "IN_PTR_ACCESS_BKT\n" ); - - struct_t *ptr = (struct_t*)p; - vm_push_type( struct_t *, ptr ); - break; - } - case IN_REF_FROM_LOCAL: { - short int field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_REF_FROM_LOCAL %hd\n", field ); - - /* First push the null next pointer, then the kid pointer. */ - kid_t *kid = (kid_t*)vm_get_plocal(exec, field); - vm_contiguous( 2 ); - vm_push_ref( 0 ); - vm_push_kid( kid ); - break; - } - case IN_REF_FROM_REF: { - short int field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_REF_FROM_REF %hd\n", field ); - - ref_t *ref = (ref_t*)vm_get_plocal(exec, field); - vm_contiguous( 2 ); - vm_push_ref( ref ); - vm_push_kid( ref->kid ); - break; - } - case IN_REF_FROM_QUAL_REF: { - short int back; - short int field; - read_half( back ); - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_REF_FROM_QUAL_REF\n" ); - - ref_t *ref = (ref_t*)(sp + back); - - tree_t *obj = ref->kid->tree; - kid_t *attr_kid = get_field_kid( obj, field ); - - vm_contiguous( 2 ); - vm_push_ref( ref ); - vm_push_kid( attr_kid ); - break; - } - case IN_RHS_REF_FROM_QUAL_REF: { - short int back; - int i, done = 0; - uchar len; - - read_half( back ); - - debug( prg, REALM_BYTECODE, "IN_RHS_REF_FROM_QUAL_REF\n" ); - - ref_t *ref = (ref_t*)(sp + back); - - tree_t *obj = ref->kid->tree; - kid_t *attr_kid = 0; - - read_byte( len ); - for ( i = 0; i < len; i++ ) { - uchar prod_num, child_num; - read_byte( prod_num ); - read_byte( child_num ); - if ( !done && obj->prod_num == prod_num ) { - attr_kid = get_rhs_el_kid( prg, obj, child_num ); - done = 1; - } - } - - vm_contiguous( 2 ); - vm_push_ref( ref ); - vm_push_kid( attr_kid ); - break; - } - case IN_REF_FROM_BACK: { - short int back; - read_half( back ); - - debug( prg, REALM_BYTECODE, "IN_REF_FROM_BACK %hd\n", back ); - - kid_t *ptr = (kid_t*)(sp + back); - - vm_contiguous( 2 ); - vm_push_ref( 0 ); - vm_push_kid( ptr ); - break; - } - case IN_TRITER_REF_FROM_CUR: { - short int field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_TRITER_REF_FROM_CUR\n" ); - - /* Push the next pointer first, then the kid. */ - tree_iter_t *iter = (tree_iter_t*) vm_get_plocal(exec, field); - ref_t *ref = &iter->ref; - vm_contiguous( 2 ); - vm_push_ref( ref ); - vm_push_kid( iter->ref.kid ); - break; - } - case IN_UITER_REF_FROM_CUR: { - short int field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_UITER_REF_FROM_CUR\n" ); - - /* Push the next pointer first, then the kid. */ - user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); - vm_contiguous( 2 ); - vm_push_ref( uiter->ref.next ); - vm_push_kid( uiter->ref.kid ); - break; - } - case IN_GET_TOKEN_DATA_R: { - debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_DATA_R\n" ); - - tree_t *tree = vm_pop_tree(); - head_t *data = string_copy( prg, tree->tokdata ); - tree_t *str = construct_string( prg, data ); - colm_tree_upref( prg, str ); - vm_push_tree( str ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_SET_TOKEN_DATA_WC: { - debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_WC\n" ); - - tree_t *tree = vm_pop_tree(); - tree_t *val = vm_pop_tree(); - head_t *head = string_copy( prg, ((str_t*)val)->value ); - string_free( prg, tree->tokdata ); - tree->tokdata = head; - - colm_tree_downref( prg, sp, tree ); - colm_tree_downref( prg, sp, val ); - break; - } - case IN_SET_TOKEN_DATA_WV: { - debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_WV\n" ); - - tree_t *tree = vm_pop_tree(); - tree_t *val = vm_pop_tree(); - - head_t *oldval = tree->tokdata; - head_t *head = string_copy( prg, ((str_t*)val)->value ); - tree->tokdata = head; - - /* Set up reverse code. Needs no args. */ - rcode_code( exec, IN_SET_TOKEN_DATA_BKT ); - rcode_word( exec, (word_t)oldval ); - rcode_unit_term( exec ); - - colm_tree_downref( prg, sp, tree ); - colm_tree_downref( prg, sp, val ); - break; - } - case IN_SET_TOKEN_DATA_BKT: { - debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT \n" ); - - word_t oldval; - read_word( oldval ); - - tree_t *tree = vm_pop_tree(); - head_t *head = (head_t*)oldval; - string_free( prg, tree->tokdata ); - tree->tokdata = head; - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_GET_TOKEN_FILE_R: { - debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_FILE_R\n" ); - tree_t *tree = vm_pop_tree(); - tree_t *str = 0; - if ( tree->tokdata->location ) { - const char *fn = tree->tokdata->location->name; - head_t *data = string_alloc_full( prg, fn, strlen(fn) ); - str = construct_string( prg, data ); - colm_tree_upref( prg, str ); - } - vm_push_tree( str ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_GET_TOKEN_LINE_R: { - debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_LINE_R\n" ); - - tree_t *tree = vm_pop_tree(); - value_t integer = 0; - if ( tree->tokdata->location ) - integer = tree->tokdata->location->line; - vm_push_value( integer ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_GET_TOKEN_COL_R: { - debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_COL_R\n" ); - - tree_t *tree = vm_pop_tree(); - value_t integer = 0; - if ( tree->tokdata->location ) - integer = tree->tokdata->location->column; - vm_push_value( integer ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_GET_TOKEN_POS_R: { - debug( prg, REALM_BYTECODE, "IN_GET_TOKEN_POS_R\n" ); - - tree_t *tree = vm_pop_tree(); - value_t integer = 0; - if ( tree->tokdata->location ) - integer = tree->tokdata->location->byte; - vm_push_value( integer ); - colm_tree_downref( prg, sp, tree ); - break; - } - case IN_GET_MATCH_LENGTH_R: { - debug( prg, REALM_BYTECODE, "IN_GET_MATCH_LENGTH_R\n" ); - - value_t integer = string_length(exec->parser->pda_run->tokdata); - vm_push_value( integer ); - break; - } - case IN_GET_MATCH_TEXT_R: { - debug( prg, REALM_BYTECODE, "IN_GET_MATCH_TEXT_R\n" ); - - head_t *s = string_copy( prg, exec->parser->pda_run->tokdata ); - tree_t *tree = construct_string( prg, s ); - colm_tree_upref( prg, tree ); - vm_push_tree( tree ); - break; - } - case IN_LIST_LENGTH: { - debug( prg, REALM_BYTECODE, "IN_LIST_LENGTH\n" ); - - list_t *list = vm_pop_list(); - long len = colm_list_length( list ); - value_t res = len; - vm_push_value( res ); - break; - } - case IN_GET_LIST_EL_MEM_R: { - short gen_id, field; - read_half( gen_id ); - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LIST_EL_MEM_R\n" ); - - struct_t *s = vm_pop_struct(); - - list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); - struct_t *val = colm_list_el_get( prg, list_el, gen_id, field ); - vm_push_struct( val ); - break; - } - case IN_GET_LIST_MEM_R: { - short gen_id, field; - read_half( gen_id ); - read_half( field ); - - debug( prg, REALM_BYTECODE, - "IN_GET_LIST_MEM_R %hd %hd\n", gen_id, field ); - - list_t *list = vm_pop_list(); - struct_t *val = colm_list_get( prg, list, gen_id, field ); - vm_push_struct( val ); - break; - } - case IN_GET_LIST_MEM_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_WC\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_GET_LIST_MEM_WV: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_WV\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - - /* Set up the reverse instruction. */ - rcode_code( exec, IN_GET_LIST_MEM_BKT ); - rcode_half( exec, field ); - break; - } - case IN_GET_LIST_MEM_BKT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_BKT\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *res = get_list_mem_split( prg, (list_t*)obj, field ); - colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_GET_VLIST_MEM_R: { - short gen_id, field; - read_half( gen_id ); - read_half( field ); - - debug( prg, REALM_BYTECODE, - "IN_GET_VLIST_MEM_R %hd %hd\n", gen_id, field ); - - list_t *list = vm_pop_list(); - struct_t *el = colm_list_get( prg, list, gen_id, field ); - - value_t val = colm_struct_get_field( el, value_t, 0 ); - vm_push_value( val ); - break; - } - case IN_GET_VLIST_MEM_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_VLIST_MEM_WC\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_GET_VLIST_MEM_WV: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_VLIST_MEM_WV\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - - /* Set up the reverse instruction. */ - rcode_code( exec, IN_GET_LIST_MEM_BKT ); - rcode_half( exec, field ); - break; - } - case IN_GET_VLIST_MEM_BKT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_VLIST_MEM_BKT\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *res = get_list_mem_split( prg, (list_t*)obj, field ); - colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - case IN_GET_PARSER_STREAM: { - debug( prg, REALM_BYTECODE, "IN_GET_PARSER_STREAM\n" ); - parser_t *parser = vm_pop_parser(); - vm_push_input( parser->input ); - break; - } - case IN_GET_PARSER_MEM_R: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_PARSER_MEM_R %hd\n", field ); - - parser_t *parser = vm_pop_parser(); - - tree_t *val = get_parser_mem( parser, field ); - - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - - case IN_GET_MAP_EL_MEM_R: { - short gen_id, field; - read_half( gen_id ); - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_MAP_EL_MEM_R\n" ); - - struct_t *strct = vm_pop_struct(); - - map_el_t *map_el = colm_struct_to_map_el( prg, strct, gen_id ); - struct_t *val = colm_map_el_get( prg, map_el, gen_id, field ); - vm_push_struct( val ); - break; - } - case IN_MAP_LENGTH: { - debug( prg, REALM_BYTECODE, "IN_MAP_LENGTH\n" ); - - tree_t *obj = vm_pop_tree(); - long len = map_length( (map_t*)obj ); - value_t res = len; - vm_push_value( res ); - break; - } - case IN_GET_MAP_MEM_R: { - short gen_id, field; - read_half( gen_id ); - read_half( field ); - - debug( prg, REALM_BYTECODE, - "IN_GET_MAP_MEM_R %hd %hd\n", gen_id, field ); - - map_t *map = vm_pop_map(); - struct_t *val = colm_map_get( prg, map, gen_id, field ); - vm_push_struct( val ); - break; - } - case IN_GET_MAP_MEM_WC: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_WC\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - break; - } - case IN_GET_MAP_MEM_WV: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_WV\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *val = get_list_mem_split( prg, (list_t*)obj, field ); - colm_tree_upref( prg, val ); - vm_push_tree( val ); - - /* Set up the reverse instruction. */ - rcode_code( exec, IN_GET_MAP_MEM_BKT ); - rcode_half( exec, field ); - break; - } - case IN_GET_MAP_MEM_BKT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_BKT\n" ); - - tree_t *obj = vm_pop_tree(); - colm_tree_downref( prg, sp, obj ); - - tree_t *res = get_list_mem_split( prg, (list_t*)obj, field ); - colm_tree_upref( prg, res ); - vm_push_tree( res ); - break; - } - - case IN_STASH_ARG: { - half_t pos; - half_t size; - read_half( pos ); - read_half( size ); - - debug( prg, REALM_BYTECODE, "IN_STASH_ARG %hd %hd\n", pos, size ); - - while ( size > 0 ) { - value_t v = vm_pop_value(); - ((value_t*)exec->call_args)[pos] = v; - size -= 1; - pos += 1; - } - - break; - } - - case IN_PREP_ARGS: { - half_t size; - read_half( size ); - - debug( prg, REALM_BYTECODE, "IN_PREP_ARGS %hd\n", size ); - - vm_push_type( tree_t**, exec->call_args ); - vm_pushn( size ); - exec->call_args = vm_ptop(); - memset( vm_ptop(), 0, sizeof(word_t) * size ); - break; - } - - case IN_CLEAR_ARGS: { - half_t size; - read_half( size ); - - debug( prg, REALM_BYTECODE, "IN_CLEAR_ARGS %hd\n", size ); - - vm_popn( size ); - exec->call_args = vm_pop_type( tree_t** ); - break; - } - - case IN_HOST: { - half_t func_id; - read_half( func_id ); - - debug( prg, REALM_BYTECODE, "IN_HOST %hd\n", func_id ); - - sp = prg->rtd->host_call( prg, func_id, sp ); - break; - } - case IN_CALL_WV: { - half_t func_id; - read_half( func_id ); - - struct function_info *fi = &prg->rtd->function_info[func_id]; - struct frame_info *fr = &prg->rtd->frame_info[fi->frame_id]; - - debug( prg, REALM_BYTECODE, "IN_CALL_WV %s\n", fr->name ); - - vm_contiguous( FR_AA + fi->frame_size ); - - vm_push_type( tree_t**, exec->call_args ); - vm_push_value( 0 ); /* Return value. */ - vm_push_type( code_t*, instr ); - vm_push_type( tree_t**, exec->frame_ptr ); - vm_push_type( long, exec->frame_id ); - - instr = fr->codeWV; - exec->frame_id = fi->frame_id; - - exec->frame_ptr = vm_ptop(); - vm_pushn( fr->frame_size ); - memset( vm_ptop(), 0, sizeof(word_t) * fr->frame_size ); - break; - } - case IN_CALL_WC: { - half_t func_id; - read_half( func_id ); - - struct function_info *fi = &prg->rtd->function_info[func_id]; - struct frame_info *fr = &prg->rtd->frame_info[fi->frame_id]; - - debug( prg, REALM_BYTECODE, "IN_CALL_WC %s %d\n", fr->name, fr->frame_size ); - - vm_contiguous( FR_AA + fi->frame_size ); - - vm_push_type( tree_t**, exec->call_args ); - vm_push_value( 0 ); /* Return value. */ - vm_push_type( code_t*, instr ); - vm_push_type( tree_t**, exec->frame_ptr ); - vm_push_type( long, exec->frame_id ); - - instr = fr->codeWC; - exec->frame_id = fi->frame_id; - - exec->frame_ptr = vm_ptop(); - vm_pushn( fr->frame_size ); - memset( vm_ptop(), 0, sizeof(word_t) * fr->frame_size ); - break; - } - case IN_YIELD: { - debug( prg, REALM_BYTECODE, "IN_YIELD\n" ); - - kid_t *kid = vm_pop_kid(); - ref_t *next = vm_pop_ref(); - user_iter_t *uiter = (user_iter_t*) vm_plocal_iframe( IFR_AA ); - - if ( kid == 0 || kid->tree == 0 || - kid->tree->id == uiter->search_id || - uiter->search_id == prg->rtd->any_id ) - { - /* Store the yeilded value. */ - uiter->ref.kid = kid; - uiter->ref.next = next; - uiter->yield_size = vm_ssize() - uiter->root_size; - uiter->resume = instr; - uiter->frame = exec->frame_ptr; - - /* Restore the instruction and frame pointer. */ - instr = (code_t*) vm_local_iframe(IFR_RIN); - exec->frame_ptr = (tree_t**) vm_local_iframe(IFR_RFR); - exec->iframe_ptr = (tree_t**) vm_local_iframe(IFR_RIF); - - /* Return the yield result on the top of the stack. */ - tree_t *result = uiter->ref.kid != 0 ? prg->true_val : prg->false_val; - //colm_tree_upref( prg, result ); - vm_push_tree( result ); - } - break; - } - case IN_UITER_CREATE_WV: { - short field; - half_t func_id, search_id; - read_half( field ); - read_half( func_id ); - read_half( search_id ); - - debug( prg, REALM_BYTECODE, "IN_UITER_CREATE_WV\n" ); - - struct function_info *fi = prg->rtd->function_info + func_id; - - vm_contiguous( (sizeof(user_iter_t) / sizeof(word_t)) + FR_AA + fi->frame_size ); - - user_iter_t *uiter = colm_uiter_create( prg, &sp, fi, search_id ); - vm_set_local(exec, field, (SW) uiter); - - /* This is a setup similar to as a call, only the frame structure - * is slightly different for user iterators. We aren't going to do - * the call. We don't need to set up the return ip because the - * uiter advance will set it. The frame we need to do because it - * is set once for the lifetime of the iterator. */ - vm_push_type( tree_t**, exec->call_args ); - vm_push_value( 0 ); - - vm_push_type( code_t*, 0 ); /* Return instruction pointer, */ - vm_push_type( tree_t**, exec->iframe_ptr ); /* Return iframe. */ - vm_push_type( tree_t**, exec->frame_ptr ); /* Return frame. */ - - uiter->frame = vm_ptop(); - vm_pushn( fi->frame_size ); - memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); - - uiter_init( prg, sp, uiter, fi, true ); - break; - } - case IN_UITER_CREATE_WC: { - short field; - half_t func_id, search_id; - read_half( field ); - read_half( func_id ); - read_half( search_id ); - - debug( prg, REALM_BYTECODE, "IN_UITER_CREATE_WC\n" ); - - struct function_info *fi = prg->rtd->function_info + func_id; - - vm_contiguous( (sizeof(user_iter_t) / sizeof(word_t)) + FR_AA + fi->frame_size ); - - user_iter_t *uiter = colm_uiter_create( prg, &sp, fi, search_id ); - vm_set_local(exec, field, (SW) uiter); - - /* This is a setup similar to as a call, only the frame structure - * is slightly different for user iterators. We aren't going to do - * the call. We don't need to set up the return ip because the - * uiter advance will set it. The frame we need to do because it - * is set once for the lifetime of the iterator. */ - vm_push_type( tree_t**, exec->call_args ); - vm_push_value( 0 ); - - vm_push_type( code_t*, 0 ); /* Return instruction pointer, */ - vm_push_type( tree_t**, exec->iframe_ptr ); /* Return iframe. */ - vm_push_type( tree_t**, exec->frame_ptr ); /* Return frame. */ - - uiter->frame = vm_ptop(); - vm_pushn( fi->frame_size ); - memset( vm_ptop(), 0, sizeof(word_t) * fi->frame_size ); - - uiter_init( prg, sp, uiter, fi, false ); - break; - } - case IN_UITER_DESTROY: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_UITER_DESTROY %hd\n", field ); - - user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); - colm_uiter_destroy( prg, &sp, uiter ); - break; - } - - case IN_UITER_UNWIND: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_UITER_UNWIND %hd\n", field ); - - user_iter_t *uiter = (user_iter_t*) vm_get_local(exec, field); - colm_uiter_unwind( prg, &sp, uiter ); - break; - } - - case IN_RET: { - struct frame_info *fi = &prg->rtd->frame_info[exec->frame_id]; - downref_local_trees( prg, sp, exec, fi->locals, fi->locals_len ); - vm_popn( fi->frame_size ); - - exec->frame_id = vm_pop_type(long); - exec->frame_ptr = vm_pop_type(tree_t**); - instr = vm_pop_type(code_t*); - exec->ret_val = vm_pop_tree(); - vm_pop_value(); - //vm_popn( fi->argSize ); - - fi = &prg->rtd->frame_info[exec->frame_id]; - debug( prg, REALM_BYTECODE, "IN_RET %s\n", fi->name ); - - /* This if for direct calls of functions. */ - if ( instr == 0 ){ - //assert( sp == root ); - return sp; - } - - /* Might be some unwind code. */ - { - short unwind_len; - read_half( unwind_len ); - if ( unwind_len > 0 ) { - instr += unwind_len; - debug( prg, REALM_BYTECODE, - "skipping unwind code length: %hd\n", unwind_len ); - } - } - - break; - } - case IN_TO_UPPER: { - debug( prg, REALM_BYTECODE, "IN_TO_UPPER\n" ); - - tree_t *in = vm_pop_tree(); - head_t *head = string_to_upper( in->tokdata ); - tree_t *upper = construct_string( prg, head ); - colm_tree_upref( prg, upper ); - vm_push_tree( upper ); - colm_tree_downref( prg, sp, in ); - break; - } - case IN_TO_LOWER: { - debug( prg, REALM_BYTECODE, "IN_TO_LOWER\n" ); - - tree_t *in = vm_pop_tree(); - head_t *head = string_to_lower( in->tokdata ); - tree_t *lower = construct_string( prg, head ); - colm_tree_upref( prg, lower ); - vm_push_tree( lower ); - colm_tree_downref( prg, sp, in ); - break; - } - case IN_OPEN_FILE: { - debug( prg, REALM_BYTECODE, "IN_OPEN_FILE\n" ); - - tree_t *mode = vm_pop_tree(); - tree_t *name = vm_pop_tree(); - stream_t *res = colm_stream_open_file( prg, name, mode ); - vm_push_stream( res ); - colm_tree_downref( prg, sp, name ); - colm_tree_downref( prg, sp, mode ); - break; - } - case IN_GET_CONST: { - short constValId; - read_half( constValId ); - - switch ( constValId ) { - case CONST_STDIN: { - debug( prg, REALM_BYTECODE, "CONST_STDIN\n" ); - - /* Pop the root object. */ - vm_pop_tree(); - - make_stdin( prg ); - - vm_push_stream( prg->stdin_val ); - break; - } - case CONST_STDOUT: { - debug( prg, REALM_BYTECODE, "CONST_STDOUT\n" ); - - /* Pop the root object. */ - vm_pop_tree(); - make_stdout( prg ); - - vm_push_stream( prg->stdout_val ); - break; - } - case CONST_STDERR: { - debug( prg, REALM_BYTECODE, "CONST_STDERR\n" ); - - /* Pop the root object. */ - vm_pop_tree(); - - make_stderr( prg ); - - vm_push_stream( prg->stderr_val ); - break; - } - case CONST_ARG: { - word_t offset; - read_word( offset ); - - debug( prg, REALM_BYTECODE, "CONST_ARG %d\n", offset ); - - /* Pop the root object. */ - vm_pop_tree(); - - head_t *lit = make_literal( prg, offset ); - tree_t *tree = construct_string( prg, lit ); - colm_tree_upref( prg, tree ); - vm_push_tree( tree ); - break; - } - } - break; - } - case IN_SYSTEM: { - debug( prg, REALM_BYTECODE, "IN_SYSTEM\n" ); - - vm_pop_tree(); - str_t *cmd = vm_pop_string(); - - char *cmd0 = malloc( cmd->value->length + 1 ); - memcpy( cmd0, cmd->value->data, cmd->value->length ); - cmd0[cmd->value->length] = 0; - - int res = system( cmd0 ); - - free( cmd0 ); - -#if defined(HAVE_SYS_WAIT_H) - if ( WIFSIGNALED( res ) ) - raise( WTERMSIG( res ) ); - res = WEXITSTATUS( res ); -#else - // WARNING: Check result -#endif - - colm_tree_downref( prg, sp, (tree_t*)cmd ); - - value_t val = res; - vm_push_value( val ); - break; - } - - case IN_DONE: - return sp; - - case IN_FN: { - c = *instr++; - switch ( c ) { - case FN_STR_ATOI: { - debug( prg, REALM_BYTECODE, "FN_STR_ATOI\n" ); - - str_t *str = vm_pop_string(); - word_t res = str_atoi( str->value ); - value_t integer = res; - vm_push_value( integer ); - colm_tree_downref( prg, sp, (tree_t*)str ); - break; - } - case FN_STR_ATOO: { - debug( prg, REALM_BYTECODE, "FN_STR_ATOO\n" ); - - str_t *str = vm_pop_string(); - word_t res = str_atoo( str->value ); - value_t integer = res; - vm_push_value( integer ); - colm_tree_downref( prg, sp, (tree_t*)str ); - break; - } - case FN_STR_UORD8: { - debug( prg, REALM_BYTECODE, "FN_STR_UORD8\n" ); - - str_t *str = vm_pop_string(); - word_t res = str_uord8( str->value ); - value_t integer = res; - vm_push_value( integer ); - colm_tree_downref( prg, sp, (tree_t*)str ); - break; - } - case FN_STR_UORD16: { - debug( prg, REALM_BYTECODE, "FN_STR_UORD16\n" ); - - str_t *str = vm_pop_string(); - word_t res = str_uord16( str->value ); - value_t integer = res; - vm_push_value( integer ); - colm_tree_downref( prg, sp, (tree_t*)str ); - break; - } - case FN_STR_PREFIX: { - debug( prg, REALM_BYTECODE, "FN_STR_PREFIX\n" ); - - str_t *str = vm_pop_string(); - value_t len = vm_pop_value(); - - str_t *res = string_prefix( prg, str, (long) len ); - colm_tree_upref( prg, (tree_t*) res ); - vm_push_string( res ); - colm_tree_downref( prg, sp, (tree_t*)str ); - break; - } - case FN_STR_SUFFIX: { - debug( prg, REALM_BYTECODE, "FN_STR_SUFFIX\n" ); - - str_t *str = vm_pop_string(); - value_t pos = vm_pop_value(); - - str_t *res = string_suffix( prg, str, (long) pos ); - colm_tree_upref( prg, (tree_t*) res ); - vm_push_string( res ); - colm_tree_downref( prg, sp, (tree_t*)str ); - break; - } - case FN_PREFIX: { - debug( prg, REALM_BYTECODE, "FN_PREFIX\n" ); - - value_t len = vm_pop_value(); - str_t *str = vm_pop_string(); - - str_t *res = string_prefix( prg, str, (long) len ); - colm_tree_upref( prg, (tree_t*) res ); - vm_push_string( res ); - colm_tree_downref( prg, sp, (tree_t*)str ); - break; - } - case FN_SUFFIX: { - debug( prg, REALM_BYTECODE, "FN_SUFFIX\n" ); - - value_t pos = vm_pop_value(); - str_t *str = vm_pop_string(); - - str_t *res = string_suffix( prg, str, (long) pos ); - colm_tree_upref( prg, (tree_t*) res ); - vm_push_string( res ); - colm_tree_downref( prg, sp, (tree_t*)str ); - break; - } - case FN_SPRINTF: { - debug( prg, REALM_BYTECODE, "FN_SPRINTF\n" ); - - vm_pop_tree(); - value_t integer = vm_pop_value(); - str_t *format = vm_pop_string(); - head_t *res = string_sprintf( prg, format, (long)integer ); - str_t *str = (str_t*)construct_string( prg, res ); - colm_tree_upref( prg, (tree_t*)str ); - vm_push_string( str ); - colm_tree_downref( prg, sp, (tree_t*)format ); - break; - } - case FN_LOAD_ARG0: { - half_t field; - read_half( field ); - debug( prg, REALM_BYTECODE, "FN_LOAD_ARG0 %lu\n", field ); - - /* tree_t comes back upreffed. */ - tree_t *tree = construct_arg0( prg, prg->argc, prg->argv, prg->argl ); - tree_t *prev = colm_struct_get_field( prg->global, tree_t*, field ); - colm_tree_downref( prg, sp, prev ); - colm_struct_set_field( prg->global, tree_t*, field, tree ); - break; - } - case FN_LOAD_ARGV: { - half_t field; - read_half( field ); - debug( prg, REALM_BYTECODE, "FN_LOAD_ARGV %lu\n", field ); - - list_t *list = construct_argv( prg, prg->argc, prg->argv, prg->argl ); - colm_struct_set_field( prg->global, list_t*, field, list ); - break; - } - case FN_INIT_STDS: { - half_t field; - read_half( field ); - debug( prg, REALM_BYTECODE, "FN_INIT_STDS %lu\n", field ); - - list_t *list = construct_stds( prg ); - colm_struct_set_field( prg->global, list_t*, field, list ); - break; - } - case FN_STOP: { - debug( prg, REALM_BYTECODE, "FN_STOP\n" ); - - flush_streams( prg ); - goto out; - } - - case FN_LIST_PUSH_HEAD_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_WC\n" ); - - list_t *list = vm_pop_list(); - struct_t *s = vm_pop_struct(); - - list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); - colm_list_prepend( list, list_el ); - - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - break; - } - case FN_LIST_PUSH_HEAD_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_WV\n" ); - - list_t *list = vm_pop_list(); - struct_t *s = vm_pop_struct(); - - list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); - colm_list_prepend( list, list_el ); - - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - - /* Set up reverse code. Needs no args. */ - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_LIST_PUSH_HEAD_BKT ); - rcode_unit_term( exec ); - break; - } - case FN_LIST_PUSH_HEAD_BKT: { - debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_BKT\n" ); - - list_t *list = vm_pop_list(); - colm_list_detach_head( list ); - break; - } - case FN_LIST_PUSH_TAIL_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_WC\n" ); - - list_t *list = vm_pop_list(); - struct_t *s = vm_pop_struct(); - - list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); - colm_list_append( list, list_el ); - - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - break; - } - case FN_LIST_PUSH_TAIL_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_WV\n" ); - - list_t *list = vm_pop_list(); - struct_t *s = vm_pop_struct(); - - list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); - colm_list_append( list, list_el ); - - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - - /* Set up reverse code. Needs no args. */ - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_LIST_PUSH_TAIL_BKT ); - rcode_unit_term( exec ); - break; - } - case FN_LIST_PUSH_TAIL_BKT: { - debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_BKT\n" ); - - list_t *list = vm_pop_list(); - colm_list_detach_tail( list ); - break; - } - case FN_LIST_POP_TAIL_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_WC\n" ); - - list_t *list = vm_pop_list(); - - list_el_t *tail = list->tail; - colm_list_detach_tail( list ); - struct_t *s = colm_generic_el_container( prg, tail, gen_id ); - - vm_push_struct( s ); - break; - } - case FN_LIST_POP_TAIL_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_WV\n" ); - - list_t *list = vm_pop_list(); - - list_el_t *tail = list->tail; - colm_list_detach_tail( list ); - struct_t *s = colm_generic_el_container( prg, tail, gen_id ); - - vm_push_struct( s ); - - /* Set up reverse. */ - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_LIST_POP_TAIL_BKT ); - rcode_half( exec, gen_id ); - rcode_word( exec, (word_t)s ); - rcode_unit_term( exec ); - break; - } - case FN_LIST_POP_TAIL_BKT: { - short gen_id; - tree_t *val; - read_half( gen_id ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_BKT\n" ); - - list_t *list = vm_pop_list(); - struct_t *s = (struct_t*) val; - - list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); - - colm_list_append( list, list_el ); - break; - } - case FN_LIST_POP_HEAD_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_WC\n" ); - - list_t *list = vm_pop_list(); - - list_el_t *head = list->head; - colm_list_detach_head( list ); - struct_t *s = colm_generic_el_container( prg, head, gen_id ); - - vm_push_struct( s ); - break; - } - case FN_LIST_POP_HEAD_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_WV\n" ); - - list_t *list = vm_pop_list(); - - list_el_t *head = list->head; - colm_list_detach_head( list ); - struct_t *s = colm_generic_el_container( prg, head, gen_id ); - - vm_push_struct( s ); - - /* Set up reverse. The result comes off the list downrefed. - * Need it up referenced for the reverse code too. */ - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_LIST_POP_HEAD_BKT ); - rcode_half( exec, gen_id ); - rcode_word( exec, (word_t)s ); - rcode_unit_term( exec ); - break; - } - case FN_LIST_POP_HEAD_BKT: { - short gen_id; - tree_t *val; - read_half( gen_id ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_BKT\n" ); - - list_t *list = vm_pop_list(); - struct_t *s = (struct_t*) val; - - list_el_t *list_el = colm_struct_to_list_el( prg, s, gen_id ); - - colm_list_prepend( list, list_el ); - break; - } - case FN_MAP_FIND: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_MAP_FIND %hd\n", gen_id ); - - map_t *map = vm_pop_map(); - tree_t *key = vm_pop_tree(); - - map_el_t *map_el = colm_map_find( prg, map, key ); - - struct colm_struct *strct = map_el != 0 ? - colm_generic_el_container( prg, map_el, gen_id ) : 0; - - vm_push_struct( strct ); - - if ( map->generic_info->key_type == TYPE_TREE ) - colm_tree_downref( prg, sp, key ); - break; - } - case FN_MAP_INSERT_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_WC %hd\n", gen_id ); - - map_t *map = vm_pop_map(); - struct_t *s = vm_pop_struct(); - - map_el_t *map_el = colm_struct_to_map_el( prg, s, gen_id ); - - colm_map_insert( prg, map, map_el ); - - vm_push_tree( prg->true_val ); - break; - } - case FN_MAP_INSERT_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_WV %hd\n", gen_id ); - - map_t *map = vm_pop_map(); - struct_t *s = vm_pop_struct(); - - map_el_t *map_el = colm_struct_to_map_el( prg, s, gen_id ); - - map_el_t *inserted = colm_map_insert( prg, map, map_el ); - - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_MAP_INSERT_BKT ); - rcode_half( exec, gen_id ); - rcode_code( exec, inserted != 0 ? 1 : 0 ); - rcode_word( exec, (word_t)map_el ); - rcode_unit_term( exec ); - break; - } - - case FN_MAP_INSERT_BKT: { - short gen_id; - uchar inserted; - word_t wmap_el; - - read_half( gen_id ); - read_byte( inserted ); - read_word( wmap_el ); - - map_el_t *map_el = (map_el_t*)wmap_el; - - debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_BKT %d\n", - (int)inserted ); - - map_t *map = vm_pop_map(); - - if ( inserted ) - colm_map_detach( prg, map, map_el ); - break; - } - case FN_MAP_DETACH_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_WC %hd\n", gen_id ); - - map_t *map = vm_pop_map(); - struct_t *s = vm_pop_struct(); - - map_el_t *map_el = colm_struct_to_map_el( prg, s, gen_id ); - - colm_map_detach( prg, map, map_el ); - - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - break; - } - case FN_MAP_DETACH_WV: { - debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_WV\n" ); - - tree_t *obj = vm_pop_tree(); - tree_t *key = vm_pop_tree(); - struct tree_pair pair = map_remove( prg, (map_t*)obj, key ); - - colm_tree_upref( prg, pair.val ); - vm_push_tree( pair.val ); - - /* Reverse instruction. */ - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_MAP_DETACH_BKT ); - rcode_word( exec, (word_t)pair.key ); - rcode_word( exec, (word_t)pair.val ); - rcode_unit_term( exec ); - - colm_tree_downref( prg, sp, obj ); - colm_tree_downref( prg, sp, key ); - break; - } - case FN_MAP_DETACH_BKT: { - tree_t *key, *val; - read_tree( key ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_BKT\n" ); - - /* Either both or neither. */ - assert( ( key == 0 ) ^ ( val != 0 ) ); - - tree_t *obj = vm_pop_tree(); - #if 0 - if ( key != 0 ) - map_unremove( prg, (map_t*)obj, key, val ); - #endif - - colm_tree_downref( prg, sp, obj ); - break; - } - case FN_VMAP_INSERT_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_WC %hd\n", gen_id ); - - map_t *map = vm_pop_map(); - struct_t *value = vm_pop_struct(); - struct_t *key = vm_pop_struct(); - - colm_vmap_insert( prg, map, key, value ); - - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - break; - } - case FN_VMAP_INSERT_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_WV %hd\n", gen_id ); - - map_t *map = vm_pop_map(); - struct_t *value = vm_pop_struct(); - struct_t *key = vm_pop_struct(); - - map_el_t *inserted = colm_vmap_insert( prg, map, key, value ); - - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_VMAP_INSERT_BKT ); - rcode_half( exec, gen_id ); - rcode_code( exec, inserted != 0 ? 1 : 0 ); - rcode_word( exec, (word_t)inserted ); - rcode_unit_term( exec ); - break; - } - case FN_VMAP_INSERT_BKT: { - short gen_id; - uchar inserted; - word_t wmap_el; - - read_half( gen_id ); - read_byte( inserted ); - read_word( wmap_el ); - - map_el_t *map_el = (map_el_t*)wmap_el; - - debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_BKT %d\n", - (int)inserted ); - - map_t *map = vm_pop_map(); - - if ( inserted ) - colm_map_detach( prg, map, map_el ); - break; - } - case FN_VMAP_REMOVE_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VMAP_REMOVE_WC %hd\n", gen_id ); - - map_t *map = vm_pop_map(); - tree_t *key = vm_pop_tree(); - - colm_vmap_remove( prg, map, key ); - - //colm_tree_upref( prg, prg->trueVal ); - vm_push_tree( prg->true_val ); - break; - } - case FN_VMAP_FIND: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VMAP_FIND %hd\n", gen_id ); - - map_t *map = vm_pop_map(); - tree_t *key = vm_pop_tree(); - - tree_t *result = colm_vmap_find( prg, map, key ); - - vm_push_tree( result ); - - if ( map->generic_info->key_type == TYPE_TREE ) - colm_tree_downref( prg, sp, key ); - break; - } - case FN_VLIST_PUSH_TAIL_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_TAIL_WC %hd\n", gen_id ); - - list_t *list = vm_pop_list(); - value_t value = vm_pop_value(); - - colm_vlist_append( prg, list, value ); - - vm_push_tree( prg->true_val ); - break; - } - case FN_VLIST_PUSH_TAIL_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_TAIL_WV %hd\n", gen_id ); - - list_t *list = vm_pop_list(); - value_t value = vm_pop_value(); - - colm_vlist_append( prg, list, value ); - - vm_push_tree( prg->true_val ); - - /* Set up reverse code. Needs no args. */ - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_VLIST_PUSH_TAIL_BKT ); - rcode_unit_term( exec ); - break; - } - case FN_VLIST_PUSH_TAIL_BKT: { - debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_TAIL_BKT\n" ); - - list_t *list = vm_pop_list(); - colm_list_detach_tail( list ); - break; - } - case FN_VLIST_PUSH_HEAD_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_HEAD_WC %hd\n", gen_id ); - - list_t *list = vm_pop_list(); - value_t value = vm_pop_value(); - - colm_vlist_prepend( prg, list, value ); - - vm_push_tree( prg->true_val ); - break; - } - case FN_VLIST_PUSH_HEAD_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_HEAD_WV %hd\n", gen_id ); - - list_t *list = vm_pop_list(); - value_t value = vm_pop_value(); - - colm_vlist_prepend( prg, list, value ); - - vm_push_tree( prg->true_val ); - - /* Set up reverse code. Needs no args. */ - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_VLIST_PUSH_HEAD_BKT ); - rcode_unit_term( exec ); - break; - } - case FN_VLIST_PUSH_HEAD_BKT: { - debug( prg, REALM_BYTECODE, "FN_VLIST_PUSH_HEAD_BKT\n" ); - - list_t *list = vm_pop_list(); - colm_list_detach_head( list ); - break; - } - case FN_VLIST_POP_HEAD_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_POP_HEAD_WC %hd\n", gen_id ); - - list_t *list = vm_pop_list(); - - value_t result = colm_vlist_detach_head( prg, list ); - vm_push_value( result ); - break; - } - case FN_VLIST_POP_HEAD_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_POP_HEAD_WV %hd\n", gen_id ); - - list_t *list = vm_pop_list(); - - value_t result = colm_vlist_detach_head( prg, list ); - vm_push_value( result ); - - /* Set up reverse. */ - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_VLIST_POP_HEAD_BKT ); - rcode_half( exec, gen_id ); - rcode_word( exec, (word_t)result ); - rcode_unit_term( exec ); - break; - } - case FN_VLIST_POP_HEAD_BKT: { - short gen_id; - tree_t *val; - read_half( gen_id ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_POP_HEAD_BKT\n" ); - - list_t *list = vm_pop_list(); - - colm_vlist_prepend( prg, list, (value_t)val ); - break; - } - case FN_VLIST_POP_TAIL_WC: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_POP_TAIL_WC %hd\n", gen_id ); - - list_t *list = vm_pop_list(); - - value_t result = colm_vlist_detach_tail( prg, list ); - vm_push_value( result ); - break; - } - case FN_VLIST_POP_TAIL_WV: { - short gen_id; - read_half( gen_id ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_POP_TAIL_WV %hd\n", gen_id ); - - list_t *list = vm_pop_list(); - - value_t result = colm_vlist_detach_tail( prg, list ); - vm_push_value( result ); - - /* Set up reverse. */ - rcode_code( exec, IN_FN ); - rcode_code( exec, FN_VLIST_POP_TAIL_BKT ); - rcode_half( exec, gen_id ); - rcode_word( exec, (word_t)result ); - rcode_unit_term( exec ); - break; - } - case FN_VLIST_POP_TAIL_BKT: { - short gen_id; - tree_t *val; - read_half( gen_id ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "FN_VLIST_POP_TAIL_BKT\n" ); - - list_t *list = vm_pop_list(); - - colm_vlist_append( prg, list, (value_t)val ); - break; - } - - case FN_EXIT_HARD: { - debug( prg, REALM_BYTECODE, "FN_EXIT\n" ); - - vm_pop_tree(); - prg->exit_status = vm_pop_type(long); - prg->induce_exit = 1; - exit( prg->exit_status ); - } - case FN_EXIT: { - /* The unwind code follows the exit call (exception, see - * synthesis). */ - short unwind_len; - read_half( unwind_len ); - - debug( prg, REALM_BYTECODE, "FN_EXIT, unwind len: %hd\n", unwind_len ); - - vm_pop_tree(); - prg->exit_status = vm_pop_type(long); - prg->induce_exit = 1; - - while ( true ) { - /* We stop on the root, leaving the psuedo-call setup on the - * stack. Note we exclude the local data. */ - if ( exec->frame_id == prg->rtd->root_frame_id ) - break; - - struct frame_info *fi = &prg->rtd->frame_info[exec->frame_id]; - - debug( prg, REALM_BYTECODE, "FN_EXIT, popping frame %s, " - "unwind-len %hd, arg-size %ld\n", - ( fi->name != 0 ? fi->name : "<no-name>" ), - unwind_len, fi->arg_size ); - - if ( unwind_len > 0 ) - sp = colm_execute_code( prg, exec, sp, instr ); - - downref_locals( prg, &sp, exec, fi->locals, fi->locals_len ); - vm_popn( fi->frame_size ); - - /* Call layout. */ - exec->frame_id = vm_pop_type(long); - exec->frame_ptr = vm_pop_type(tree_t**); - instr = vm_pop_type(code_t*); - - tree_t *ret_val = vm_pop_tree(); - vm_pop_value(); - - /* The IN_PREP_ARGS stack data. */ - vm_popn( fi->arg_size ); - vm_pop_value(); - - if ( fi->ret_tree ) { - /* Problem here. */ - colm_tree_downref( prg, sp, ret_val ); - } - - read_half( unwind_len ); - } - - goto out; - } - default: { - fatal( "UNKNOWN FUNCTION: 0x%02x -- something is wrong\n", c ); - break; - }} - break; - } - - /* Halt is a default instruction given by the compiler when it is - * asked to generate and instruction it doesn't have. It is deliberate - * and can represent "not implemented" or "compiler error" because a - * variable holding instructions was not properly initialize. */ - case IN_HALT: { - fatal( "IN_HALT -- compiler did something wrong\n" ); - exit(1); - break; - } - default: { - fatal( "UNKNOWN INSTRUCTION: 0x%02x -- something is wrong\n", *(instr-1) ); - assert(false); - break; - } - } - goto again; - -out: - if ( ! prg->induce_exit ) - assert( sp == root ); - return sp; -} - -/* - * Deleteing rcode required downreffing any trees held by it. - */ -static void rcode_downref( program_t *prg, tree_t **sp, code_t *instr ) -{ -again: - switch ( *instr++ ) { - case IN_PARSE_INIT_BKT: { - debug( prg, REALM_BYTECODE, "IN_PARSE_INIT_BKT\n" ); - - consume_word(); //( parser ); - consume_word(); //( steps ); - - break; - } - case IN_SEND_EOF_BKT: { - debug( prg, REALM_BYTECODE, "IN_SEND_EOF_BKT\n" ); - consume_word(); //( parser ); - break; - } - - case IN_LOAD_TREE: { - tree_t *w; - read_tree( w ); - debug( prg, REALM_BYTECODE, "IN_LOAD_TREE %p\n", w ); - colm_tree_downref( prg, sp, w ); - break; - } - case IN_LOAD_WORD: { - consume_word(); - debug( prg, REALM_BYTECODE, "IN_LOAD_WORD\n" ); - break; - } - case IN_RESTORE_LHS: { - tree_t *restore; - read_tree( restore ); - debug( prg, REALM_BYTECODE, "IN_RESTORE_LHS\n" ); - colm_tree_downref( prg, sp, restore ); - break; - } - - case IN_PARSE_FRAG_BKT: { - debug( prg, REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" ); - break; - } - case IN_PCR_RET: { - debug( prg, REALM_BYTECODE, "IN_PCR_RET\n" ); - return; - } - case IN_PCR_END_DECK: { - debug( prg, REALM_BYTECODE, "IN_PCR_END_DECK\n" ); - return; - } - case IN_SEND_TEXT_BKT: { - tree_t *input; - - consume_word(); //( parser ); - read_tree( input ); - consume_word(); //( len ); - - debug( prg, REALM_BYTECODE, "IN_SEND_TEXT_BKT\n" ); - - colm_tree_downref( prg, sp, input ); - break; - } - case IN_SEND_TREE_BKT: { - tree_t *input; - - consume_word(); //( parser ); - read_tree( input ); - consume_word(); //( len ); - - debug( prg, REALM_BYTECODE, "IN_SEND_TREE_BKT\n" ); - - colm_tree_downref( prg, sp, input ); - break; - } - case IN_SEND_STREAM_BKT: { - consume_word(); //( sptr ); - consume_word(); //( input ); - consume_word(); //( len ); - - debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_BKT\n" ); - break; - } - - case IN_INPUT_PULL_BKT: { - tree_t *string; - read_tree( string ); - - debug( prg, REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" ); - - colm_tree_downref( prg, sp, string ); - break; - } - case IN_INPUT_PUSH_BKT: { - consume_word(); //( len ); - - debug( prg, REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" ); - break; - } - case IN_LOAD_GLOBAL_BKT: { - debug( prg, REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" ); - break; - } - case IN_LOAD_CONTEXT_BKT: { - debug( prg, REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" ); - break; - } - case IN_LOAD_INPUT_BKT: { - consume_word(); //( input ); - debug( prg, REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" ); - break; - } - case IN_GET_FIELD_TREE_BKT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_FIELD_TREE_BKT %hd\n", field ); - break; - } - case IN_SET_FIELD_TREE_BKT: { - short field; - tree_t *val; - read_half( field ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "IN_SET_FIELD_TREE_BKT %hd\n", field ); - - colm_tree_downref( prg, sp, val ); - break; - } - case IN_SET_STRUCT_BKT: { - short field; - tree_t *val; - read_half( field ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_BKT %hd\n", field ); - - colm_tree_downref( prg, sp, val ); - break; - } - case IN_SET_STRUCT_VAL_BKT: { - consume_half(); //( field ); - consume_word(); //( val ); - - debug( prg, REALM_BYTECODE, "IN_SET_STRUCT_VAL_BKT\n" ); - break; - } - case IN_PTR_ACCESS_BKT: { - consume_word(); //( ptr ); - - debug( prg, REALM_BYTECODE, "IN_PTR_ACCESS_BKT\n" ); - break; - } - case IN_SET_TOKEN_DATA_BKT: { - word_t oldval; - read_word( oldval ); - - debug( prg, REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT\n" ); - - head_t *head = (head_t*)oldval; - string_free( prg, head ); - break; - } - case IN_GET_LIST_MEM_BKT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_LIST_MEM_BKT %hd\n", field ); - break; - } - case IN_GET_MAP_MEM_BKT: { - short field; - read_half( field ); - - debug( prg, REALM_BYTECODE, "IN_GET_MAP_MEM_BKT %hd\n", field ); - break; - } - case IN_FN: { - switch ( *instr++ ) { - case FN_LIST_PUSH_HEAD_BKT: { - debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_HEAD_BKT\n" ); - break; - } - case FN_LIST_POP_HEAD_BKT: { - consume_half(); //( genId ); - consume_word(); //( val ); - - debug( prg, REALM_BYTECODE, "FN_LIST_POP_HEAD_BKT\n" ); - - break; - } - case FN_LIST_PUSH_TAIL_BKT: { - debug( prg, REALM_BYTECODE, "FN_LIST_PUSH_TAIL_BKT\n" ); - break; - } - case FN_LIST_POP_TAIL_BKT: { - consume_half(); //( genId ); - consume_word(); //( val ); - - debug( prg, REALM_BYTECODE, "FN_LIST_POP_TAIL_BKT\n" ); - - break; - } - case FN_MAP_INSERT_BKT: { - uchar inserted; - - consume_half(); //( genId ); - read_byte( inserted ); - consume_word(); //( wmapEl ); - - debug( prg, REALM_BYTECODE, "FN_MAP_INSERT_BKT %d\n", - (int)inserted ); - break; - } - case FN_VMAP_INSERT_BKT: { - short gen_id; - uchar inserted; - //word_t wmap_el; - - read_half( gen_id ); - read_byte( inserted ); - consume_word(); //read_word( wmap_el ); - - //map_el_t *map_el = (map_el_t*)wmap_el; - - debug( prg, REALM_BYTECODE, "FN_VMAP_INSERT_BKT %d\n", - (int)inserted ); - - break; - } - case FN_MAP_DETACH_BKT: { - tree_t *key, *val; - read_tree( key ); - read_tree( val ); - - debug( prg, REALM_BYTECODE, "FN_MAP_DETACH_BKT\n" ); - - colm_tree_downref( prg, sp, key ); - colm_tree_downref( prg, sp, val ); - break; - } - - case FN_VLIST_PUSH_TAIL_BKT: { - break; - } - - case FN_VLIST_PUSH_HEAD_BKT: { - break; - } - - case FN_VLIST_POP_HEAD_BKT: { - short gen_id; - //word_t result; - read_half( gen_id ); - consume_word(); //read_word( result ); - break; - } - - case FN_VLIST_POP_TAIL_BKT: { - short gen_id; - //word_t result; - read_half( gen_id ); - consume_word(); //read_word( result ); - break; - } - - default: { - fatal( "UNKNOWN FUNCTION 0x%02x: -- reverse code downref\n", *(instr-1)); - assert(false); - }} - break; - } - default: { - fatal( "UNKNOWN INSTRUCTION 0x%02x: -- reverse code downref\n", *(instr-1)); - assert(false); - break; - } - } - goto again; -} - - diff --git a/src/bytecode.h b/src/bytecode.h deleted file mode 100644 index 02cd78f4..00000000 --- a/src/bytecode.h +++ /dev/null @@ -1,678 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_BYTECODE_H -#define _COLM_BYTECODE_H - -#include <colm/pdarun.h> -#include <colm/type.h> -#include <colm/tree.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 - #error "SIZEOF_LONG contained an unexpected value" -#endif - -typedef unsigned long ulong; -typedef unsigned char uchar; - -#define IN_NONE 0x00 -#define IN_LOAD_INT 0x01 -#define IN_LOAD_STR 0x02 -#define IN_LOAD_NIL 0x03 -#define IN_LOAD_TRUE 0x04 -#define IN_LOAD_FALSE 0x05 -#define IN_LOAD_TREE 0x06 -#define IN_LOAD_WORD 0x07 - -#define IN_ADD_INT 0x08 -#define IN_SUB_INT 0x09 -#define IN_MULT_INT 0x0a -#define IN_DIV_INT 0x0b - -#define IN_TST_EQL_VAL 0x59 -#define IN_TST_EQL_TREE 0x0c -#define IN_TST_NOT_EQL_TREE 0x0d -#define IN_TST_NOT_EQL_VAL 0x5f -#define IN_TST_LESS_VAL 0x0e -#define IN_TST_LESS_TREE 0xbd -#define IN_TST_GRTR_VAL 0x0f -#define IN_TST_GRTR_TREE 0xbf -#define IN_TST_LESS_EQL_VAL 0x10 -#define IN_TST_LESS_EQL_TREE 0xc0 -#define IN_TST_GRTR_EQL_VAL 0x11 -#define IN_TST_GRTR_EQL_TREE 0xcd -#define IN_TST_LOGICAL_AND 0x12 -#define IN_TST_LOGICAL_OR 0x13 - -#define IN_TST_NZ_TREE 0xd1 - -#define IN_LOAD_RETVAL 0xd4 - -#define IN_STASH_ARG 0x20 -#define IN_PREP_ARGS 0xe8 -#define IN_CLEAR_ARGS 0xe9 - -#define IN_GEN_ITER_FROM_REF 0xd3 -#define IN_GEN_ITER_DESTROY 0xd5 -#define IN_GEN_ITER_UNWIND 0x74 -#define IN_GEN_ITER_GET_CUR_R 0xdf -#define IN_GEN_VITER_GET_CUR_R 0xe7 -#define IN_LIST_ITER_ADVANCE 0xde -#define IN_REV_LIST_ITER_ADVANCE 0x77 -#define IN_MAP_ITER_ADVANCE 0xe6 - -#define IN_NOT_VAL 0x14 -#define IN_NOT_TREE 0xd2 - -#define IN_JMP 0x15 -#define IN_JMP_FALSE_TREE 0x16 -#define IN_JMP_TRUE_TREE 0x17 -#define IN_JMP_FALSE_VAL 0xb8 -#define IN_JMP_TRUE_VAL 0xed - -#define IN_STR_LENGTH 0x19 -#define IN_CONCAT_STR 0x1a -#define IN_TREE_TRIM 0x1b - -#define IN_POP_TREE 0x1d -#define IN_POP_N_WORDS 0x1e -#define IN_POP_VAL 0xbe -#define IN_DUP_VAL 0x1f -#define IN_DUP_TREE 0xf2 - -#define IN_REJECT 0x21 -#define IN_MATCH 0x22 -#define IN_PROD_NUM 0x6a -#define IN_CONSTRUCT 0x23 -#define IN_CONS_OBJECT 0xf0 -#define IN_CONS_GENERIC 0xf1 -#define IN_TREE_CAST 0xe4 - -#define IN_GET_LOCAL_R 0x25 -#define IN_GET_LOCAL_WC 0x26 -#define IN_SET_LOCAL_WC 0x27 - -#define IN_GET_LOCAL_REF_R 0x28 -#define IN_GET_LOCAL_REF_WC 0x29 -#define IN_SET_LOCAL_REF_WC 0x2a - -#define IN_SAVE_RET 0x2b - -#define IN_GET_FIELD_TREE_R 0x2c -#define IN_GET_FIELD_TREE_WC 0x2d -#define IN_GET_FIELD_TREE_WV 0x2e -#define IN_GET_FIELD_TREE_BKT 0x2f - -#define IN_SET_FIELD_TREE_WV 0x30 -#define IN_SET_FIELD_TREE_WC 0x31 -#define IN_SET_FIELD_TREE_BKT 0x32 -#define IN_SET_FIELD_TREE_LEAVE_WC 0x33 - -#define IN_GET_FIELD_VAL_R 0x5e -#define IN_SET_FIELD_VAL_WC 0x60 - -#define IN_GET_MATCH_LENGTH_R 0x34 -#define IN_GET_MATCH_TEXT_R 0x35 - -#define IN_GET_TOKEN_DATA_R 0x36 -#define IN_SET_TOKEN_DATA_WC 0x37 -#define IN_SET_TOKEN_DATA_WV 0x38 -#define IN_SET_TOKEN_DATA_BKT 0x39 - -#define IN_GET_TOKEN_FILE_R 0x80 -#define IN_GET_TOKEN_LINE_R 0x3b -#define IN_GET_TOKEN_POS_R 0x3a -#define IN_GET_TOKEN_COL_R 0x81 - -#define IN_INIT_RHS_EL 0x3c -#define IN_INIT_LHS_EL 0x3d -#define IN_INIT_CAPTURES 0x3e -#define IN_STORE_LHS_EL 0x3f -#define IN_RESTORE_LHS 0x40 - -#define IN_TRITER_FROM_REF 0x41 -#define IN_TRITER_ADVANCE 0x42 -#define IN_TRITER_NEXT_CHILD 0x43 -#define IN_TRITER_GET_CUR_R 0x44 -#define IN_TRITER_GET_CUR_WC 0x45 -#define IN_TRITER_SET_CUR_WC 0x46 -#define IN_TRITER_UNWIND 0x73 -#define IN_TRITER_DESTROY 0x47 -#define IN_TRITER_NEXT_REPEAT 0x48 -#define IN_TRITER_PREV_REPEAT 0x49 - -#define IN_REV_TRITER_FROM_REF 0x4a -#define IN_REV_TRITER_DESTROY 0x4b -#define IN_REV_TRITER_UNWIND 0x75 -#define IN_REV_TRITER_PREV_CHILD 0x4c - -#define IN_UITER_DESTROY 0x4d -#define IN_UITER_UNWIND 0x71 -#define IN_UITER_CREATE_WV 0x4e -#define IN_UITER_CREATE_WC 0x4f -#define IN_UITER_ADVANCE 0x50 -#define IN_UITER_GET_CUR_R 0x51 -#define IN_UITER_GET_CUR_WC 0x52 -#define IN_UITER_SET_CUR_WC 0x53 - -#define IN_TREE_SEARCH 0x54 - -#define IN_LOAD_GLOBAL_R 0x55 -#define IN_LOAD_GLOBAL_WV 0x56 -#define IN_LOAD_GLOBAL_WC 0x57 -#define IN_LOAD_GLOBAL_BKT 0x58 - -#define IN_PTR_ACCESS_WV 0x5a -#define IN_PTR_ACCESS_BKT 0x61 - -#define IN_REF_FROM_LOCAL 0x62 -#define IN_REF_FROM_REF 0x63 -#define IN_REF_FROM_QUAL_REF 0x64 -#define IN_RHS_REF_FROM_QUAL_REF 0xee -#define IN_REF_FROM_BACK 0xe3 -#define IN_TRITER_REF_FROM_CUR 0x65 -#define IN_UITER_REF_FROM_CUR 0x66 - -#define IN_GET_MAP_EL_MEM_R 0x6c - -#define IN_MAP_LENGTH 0x67 - -#define IN_LIST_LENGTH 0x72 - -#define IN_GET_LIST_MEM_R 0x79 -#define IN_GET_LIST_MEM_WC 0x7a -#define IN_GET_LIST_MEM_WV 0x7b -#define IN_GET_LIST_MEM_BKT 0x7c - -#define IN_GET_VLIST_MEM_R 0xeb -#define IN_GET_VLIST_MEM_WC 0xec -#define IN_GET_VLIST_MEM_WV 0x70 -#define IN_GET_VLIST_MEM_BKT 0x5c - -#define IN_CONS_REDUCER 0x76 -#define IN_READ_REDUCE 0x69 - -#define IN_DONE 0x78 - -#define IN_GET_LIST_EL_MEM_R 0xf5 - -#define IN_GET_MAP_MEM_R 0x6d -#define IN_GET_MAP_MEM_WV 0x7d -#define IN_GET_MAP_MEM_WC 0x7e -#define IN_GET_MAP_MEM_BKT 0x7f - -#define IN_TREE_TO_STR_XML 0x6e -#define IN_TREE_TO_STR_XML_AC 0x6f -#define IN_TREE_TO_STR_POSTFIX 0xb6 - -#define IN_HOST 0xea - -#define IN_CALL_WC 0x8c -#define IN_CALL_WV 0x8d -#define IN_RET 0x8e -#define IN_YIELD 0x8f -#define IN_HALT 0x8b - -#define IN_INT_TO_STR 0x97 -#define IN_TREE_TO_STR 0x98 -#define IN_TREE_TO_STR_TRIM 0x99 -#define IN_TREE_TO_STR_TRIM_A 0x18 - -#define IN_CREATE_TOKEN 0x9a -#define IN_MAKE_TOKEN 0x9b -#define IN_MAKE_TREE 0x9c -#define IN_CONSTRUCT_TERM 0x9d - -#define IN_INPUT_PULL_WV 0x9e -#define IN_INPUT_PULL_WC 0xe1 -#define IN_INPUT_PULL_BKT 0x9f - -#define IN_INPUT_CLOSE_WC 0xef -#define IN_INPUT_AUTO_TRIM_WC 0x82 -#define IN_IINPUT_AUTO_TRIM_WC 0x83 - -#define IN_PARSE_FRAG_W 0xa2 -#define IN_PARSE_INIT_BKT 0xa1 -#define IN_PARSE_FRAG_BKT 0xa6 - -#define IN_SEND_NOTHING 0xa0 -#define IN_SEND_TEXT_W 0x89 -#define IN_SEND_TEXT_BKT 0x8a - -#define IN_PRINT_TREE 0xa3 - -#define IN_SEND_TREE_W 0xa9 -#define IN_SEND_TREE_BKT 0xaa - -#define IN_REPLACE_STREAM 0x88 - -#define IN_SEND_STREAM_W 0x90 -#define IN_SEND_STREAM_BKT 0x1c - -#define IN_SEND_EOF_W 0x87 -#define IN_SEND_EOF_BKT 0xa4 - -#define IN_REDUCE_COMMIT 0xa5 - -#define IN_PCR_RET 0xb2 -#define IN_PCR_END_DECK 0xb3 - -#define IN_OPEN_FILE 0xb4 - -#define IN_GET_CONST 0xb5 - -#define IN_TO_UPPER 0xb9 -#define IN_TO_LOWER 0xba - -#define IN_LOAD_INPUT_R 0xc1 -#define IN_LOAD_INPUT_WV 0xc2 -#define IN_LOAD_INPUT_WC 0xc3 -#define IN_LOAD_INPUT_BKT 0xc4 - -#define IN_INPUT_PUSH_WV 0xc5 -#define IN_INPUT_PUSH_BKT 0xc6 -#define IN_INPUT_PUSH_IGNORE_WV 0xc7 - -#define IN_INPUT_PUSH_STREAM_WV 0xf3 -#define IN_INPUT_PUSH_STREAM_BKT 0xf4 - -#define IN_LOAD_CONTEXT_R 0xc8 -#define IN_LOAD_CONTEXT_WV 0xc9 -#define IN_LOAD_CONTEXT_WC 0xca -#define IN_LOAD_CONTEXT_BKT 0xcb - -#define IN_SET_PARSER_CONTEXT 0xd0 -#define IN_SET_PARSER_INPUT 0x96 - -#define IN_GET_RHS_VAL_R 0xd7 -#define IN_GET_RHS_VAL_WC 0xd8 -#define IN_GET_RHS_VAL_WV 0xd9 -#define IN_GET_RHS_VAL_BKT 0xda -#define IN_SET_RHS_VAL_WC 0xdb -#define IN_SET_RHS_VAL_WV 0xdc -#define IN_SET_RHS_VAL_BKT 0xdd - -#define IN_GET_PARSER_MEM_R 0x5b - -#define IN_GET_STREAM_MEM_R 0xb7 - -#define IN_GET_PARSER_STREAM 0x6b - -#define IN_GET_ERROR 0xcc -#define IN_SET_ERROR 0xe2 - -#define IN_SYSTEM 0xe5 - -#define IN_GET_STRUCT_R 0xf7 -#define IN_GET_STRUCT_WC 0xf8 -#define IN_GET_STRUCT_WV 0xf9 -#define IN_GET_STRUCT_BKT 0xfa -#define IN_SET_STRUCT_WC 0xfb -#define IN_SET_STRUCT_WV 0xfc -#define IN_SET_STRUCT_BKT 0xfd -#define IN_GET_STRUCT_VAL_R 0x93 -#define IN_SET_STRUCT_VAL_WV 0x94 -#define IN_SET_STRUCT_VAL_WC 0x95 -#define IN_SET_STRUCT_VAL_BKT 0x5d -#define IN_NEW_STRUCT 0xfe - -#define IN_GET_LOCAL_VAL_R 0x91 -#define IN_SET_LOCAL_VAL_WC 0x92 - -#define IN_NEW_STREAM 0x24 -#define IN_GET_COLLECT_STRING 0x68 - -/* - * Const things to get. - */ -#define CONST_STDIN 0x10 -#define CONST_STDOUT 0x11 -#define CONST_STDERR 0x12 -#define CONST_ARG 0x13 - - - -/* - * IN_FN instructions. - */ - -#define IN_FN 0xff -#define FN_NONE 0x00 -#define FN_STOP 0x0a - -#define FN_STR_ATOI 0x1d -#define FN_STR_ATOO 0x38 -#define FN_STR_UORD8 0x01 -#define FN_STR_SORD8 0x02 -#define FN_STR_UORD16 0x03 -#define FN_STR_SORD16 0x04 -#define FN_STR_UORD32 0x05 -#define FN_STR_SORD32 0x06 -#define FN_STR_PREFIX 0x36 -#define FN_STR_SUFFIX 0x37 -#define FN_SPRINTF 0xd6 -#define FN_LOAD_ARGV 0x07 -#define FN_LOAD_ARG0 0x08 -#define FN_INIT_STDS 0x3e - - -#define FN_LIST_PUSH_TAIL_WV 0x11 -#define FN_LIST_PUSH_TAIL_WC 0x12 -#define FN_LIST_PUSH_TAIL_BKT 0x13 -#define FN_LIST_POP_TAIL_WV 0x14 -#define FN_LIST_POP_TAIL_WC 0x15 -#define FN_LIST_POP_TAIL_BKT 0x16 -#define FN_LIST_PUSH_HEAD_WV 0x17 -#define FN_LIST_PUSH_HEAD_WC 0x18 -#define FN_LIST_PUSH_HEAD_BKT 0x19 -#define FN_LIST_POP_HEAD_WV 0x1a -#define FN_LIST_POP_HEAD_WC 0x1b -#define FN_LIST_POP_HEAD_BKT 0x1c - -#define FN_MAP_FIND 0x24 -#define FN_MAP_INSERT_WV 0x1e -#define FN_MAP_INSERT_WC 0x1f -#define FN_MAP_INSERT_BKT 0x20 -#define FN_MAP_DETACH_WV 0x21 -#define FN_MAP_DETACH_WC 0x22 -#define FN_MAP_DETACH_BKT 0x23 - -#define FN_VMAP_FIND 0x29 -#define FN_VMAP_INSERT_WC 0x25 -#define FN_VMAP_INSERT_WV 0x26 -#define FN_VMAP_INSERT_BKT 0x3d -#define FN_VMAP_REMOVE_WC 0x27 -#define FN_VMAP_REMOVE_WV 0x28 - -#define FN_VLIST_PUSH_TAIL_WV 0x2a -#define FN_VLIST_PUSH_TAIL_WC 0x2b -#define FN_VLIST_PUSH_TAIL_BKT 0x2c -#define FN_VLIST_POP_TAIL_WV 0x2d -#define FN_VLIST_POP_TAIL_WC 0x2e -#define FN_VLIST_POP_TAIL_BKT 0x2f -#define FN_VLIST_PUSH_HEAD_WV 0x30 -#define FN_VLIST_PUSH_HEAD_WC 0x31 -#define FN_VLIST_PUSH_HEAD_BKT 0x32 -#define FN_VLIST_POP_HEAD_WV 0x33 -#define FN_VLIST_POP_HEAD_WC 0x34 -#define FN_VLIST_POP_HEAD_BKT 0x35 -#define FN_EXIT 0x39 -#define FN_EXIT_HARD 0x3a -#define FN_PREFIX 0x3b -#define FN_SUFFIX 0x3c - -#define TRIM_DEFAULT 0x01 -#define TRIM_YES 0x02 -#define TRIM_NO 0x03 - -/* Types of Generics. */ -enum GEN { - GEN_PARSER = 0x14, - GEN_LIST = 0x15, - GEN_MAP = 0x16 -}; - -/* Known language element ids. */ -enum LEL_ID { - LEL_ID_PTR = 1, - LEL_ID_STR = 2, - LEL_ID_IGNORE = 3 -}; - -/* - * Flags - */ - -/* A tree that has been generated by a termDup. */ -#define PF_TERM_DUP 0x0001 - -/* Has been processed by the commit function. All children have also been - * processed. */ -#define PF_COMMITTED 0x0002 - -/* Created by a token generation action, not made from the input. */ -#define PF_ARTIFICIAL 0x0004 - -/* Named node from a pattern or constructor. */ -#define PF_NAMED 0x0008 - -/* There is reverse code associated with this tree node. */ -#define PF_HAS_RCODE 0x0010 - -#define PF_RIGHT_IGNORE 0x0020 - -#define PF_LEFT_IL_ATTACHED 0x0400 -#define PF_RIGHT_IL_ATTACHED 0x0800 - -#define AF_LEFT_IGNORE 0x0100 -#define AF_RIGHT_IGNORE 0x0200 - -#define AF_SUPPRESS_LEFT 0x4000 -#define AF_SUPPRESS_RIGHT 0x8000 - -/* - * Call stack. - */ - -/* Number of spots in the frame, after the args. */ -#define FR_AA 5 - -/* Positions relative to the frame pointer. */ -#define FR_CA 4 /* call args */ -#define FR_RV 3 /* return value */ -#define FR_RI 2 /* return instruction */ -#define FR_RFP 1 /* return frame pointer */ -#define FR_RFD 0 /* return frame id. */ - -/* - * Calling Convention: - * a1 - * a2 - * a3 - * ... - * return value FR_RV - * return instr FR_RI - * return frame ptr FR_RFP - * return frame id FR_RFD - */ - -/* - * User iterator call stack. - * Adds an iframe pointer, removes the return value. - */ - -/* Number of spots in the frame, after the args. */ -#define IFR_AA 5 - -/* Positions relative to the frame pointer. */ -#define IFR_RIN 2 /* return instruction */ -#define IFR_RIF 1 /* return iframe pointer */ -#define IFR_RFR 0 /* return frame pointer */ - -#define vm_push_type(type, i) \ - ( ( sp == prg->sb_beg ? (sp = vm_bs_add(prg, sp, 1)) : 0 ), (*((type*)(--sp)) = (i)) ) - -#define vm_pushn(n) \ - ( ( (sp-(n)) < prg->sb_beg ? (sp = vm_bs_add(prg, sp, n)) : 0 ), (sp -= (n)) ) - -#define vm_pop_type(type) \ - ({ SW r = *sp; (sp+1) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, 1)) : (sp += 1); (type)r; }) - -#define vm_push_tree(i) vm_push_type(tree_t*, i) -#define vm_push_input(i) vm_push_type(input_t*, i) -#define vm_push_stream(i) vm_push_type(stream_t*, i) -#define vm_push_struct(i) vm_push_type(struct_t*, i) -#define vm_push_parser(i) vm_push_type(parser_t*, i) -#define vm_push_value(i) vm_push_type(value_t, i) -#define vm_push_string(i) vm_push_type(str_t*, i) -#define vm_push_kid(i) vm_push_type(kid_t*, i) -#define vm_push_ref(i) vm_push_type(ref_t*, i) -#define vm_push_string(i) vm_push_type(str_t*, i) -#define vm_push_ptree(i) vm_push_type(parse_tree_t*, i) - -#define vm_pop_tree() vm_pop_type(tree_t*) -#define vm_pop_input() vm_pop_type(input_t*) -#define vm_pop_stream() vm_pop_type(stream_t*) -#define vm_pop_struct() vm_pop_type(struct_t*) -#define vm_pop_parser() vm_pop_type(parser_t*) -#define vm_pop_list() vm_pop_type(list_t*) -#define vm_pop_map() vm_pop_type(map_t*) -#define vm_pop_value() vm_pop_type(value_t) -#define vm_pop_string() vm_pop_type(str_t*) -#define vm_pop_kid() vm_pop_type(kid_t*) -#define vm_pop_ref() vm_pop_type(ref_t*) -#define vm_pop_ptree() vm_pop_type(parse_tree_t*) - -#define vm_pop_ignore() \ - ({ (sp+1) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, 1)) : (sp += 1); }) - -#define vm_popn(n) \ - ({ (sp+(n)) >= prg->sb_end ? (sp = vm_bs_pop(prg, sp, n)) : (sp += (n)); }) - -#define vm_contiguous(n) \ - ( ( (sp-(n)) < prg->sb_beg ? (sp = vm_bs_add(prg, sp, n)) : 0 ) ) - -#define vm_top() (*sp) -#define vm_ptop() (sp) - -#define vm_ssize() ( prg->sb_total + (prg->sb_end - sp) ) - -#define vm_local_iframe(o) (exec->iframe_ptr[o]) -#define vm_plocal_iframe(o) (&exec->iframe_ptr[o]) - -void vm_init( struct colm_program * ); -tree_t** vm_bs_add( struct colm_program *, tree_t **, int ); -tree_t** vm_bs_pop( struct colm_program *, tree_t **, int ); -void vm_clear( struct colm_program * ); - -typedef tree_t *SW; -typedef tree_t **StackPtr; - -/* Can't use sizeof() because we have used types that are bigger than the - * serial representation. */ -#define SIZEOF_CODE 1 -#define SIZEOF_HALF 2 -#define SIZEOF_WORD sizeof(word_t) - -typedef struct colm_execution -{ - tree_t **frame_ptr; - tree_t **iframe_ptr; - long frame_id; - tree_t **call_args; - - long rcode_unit_len; - - parser_t *parser; - long steps; - long pcr; - tree_t *ret_val; - char WV; -} execution_t; - -struct colm_execution; - -static inline tree_t **vm_get_plocal( struct colm_execution *exec, int o ) -{ - if ( o >= FR_AA ) { - tree_t **call_args = (tree_t**)exec->frame_ptr[FR_CA]; - return &call_args[o - FR_AA]; - } - else { - return &exec->frame_ptr[o]; - } -} - -static inline tree_t *vm_get_local( struct colm_execution *exec, int o ) -{ - if ( o >= FR_AA ) { - tree_t **call_args = (tree_t**)exec->frame_ptr[FR_CA]; - return call_args[o - FR_AA]; - } - else { - return exec->frame_ptr[o]; - } -} - -static inline void vm_set_local( struct colm_execution *exec, int o, tree_t* v ) -{ - if ( o >= FR_AA ) { - tree_t **call_args = (tree_t**)exec->frame_ptr[FR_CA]; - call_args[o - FR_AA] = v; - } - else { - exec->frame_ptr[o] = v; - } -} - - -long string_length( head_t *str ); -const char *string_data( head_t *str ); -head_t *init_str_space( long length ); -head_t *string_copy( struct colm_program *prg, head_t *head ); -void string_free( struct colm_program *prg, head_t *head ); -void string_shorten( head_t *tokdata, long newlen ); -head_t *concat_str( head_t *s1, head_t *s2 ); -word_t str_atoi( head_t *str ); -word_t str_atoo( head_t *str ); -word_t str_uord16( head_t *head ); -word_t str_uord8( head_t *head ); -word_t cmp_string( head_t *s1, head_t *s2 ); -head_t *string_to_upper( head_t *s ); -head_t *string_to_lower( head_t *s ); -head_t *string_sprintf( program_t *prg, str_t *format, long integer ); - -head_t *make_literal( struct colm_program *prg, long litoffset ); -head_t *int_to_str( struct colm_program *prg, word_t i ); - -void colm_execute( struct colm_program *prg, execution_t *exec, code_t *code ); -void reduction_execution( execution_t *exec, tree_t **sp ); -void generation_execution( execution_t *exec, tree_t **sp ); -void reverse_execution( execution_t *exec, tree_t **sp, struct rt_code_vect *all_rev ); - -kid_t *alloc_attrs( struct colm_program *prg, long length ); -void free_attrs( struct colm_program *prg, kid_t *attrs ); -kid_t *get_attr_kid( tree_t *tree, long pos ); - -tree_t *split_tree( struct colm_program *prg, tree_t *t ); - -void colm_rcode_downref_all( struct colm_program *prg, tree_t **sp, struct rt_code_vect *cv ); -int colm_make_reverse_code( struct pda_run *pda_run ); -void colm_transfer_reverse_code( struct pda_run *pda_run, parse_tree_t *tree ); - -void split_ref( struct colm_program *prg, tree_t ***sp, ref_t *from_ref ); - -void alloc_global( struct colm_program *prg ); -tree_t **colm_execute_code( struct colm_program *prg, - execution_t *exec, tree_t **sp, code_t *instr ); -code_t *colm_pop_reverse_code( struct rt_code_vect *all_rev ); - -#ifdef __cplusplus -} -#endif - -#endif /* _COLM_BYTECODE_H */ - diff --git a/src/closure.cc b/src/closure.cc deleted file mode 100644 index 066bf12b..00000000 --- a/src/closure.cc +++ /dev/null @@ -1,458 +0,0 @@ -/* - * Copyright 2005-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <assert.h> -#include <stdbool.h> - -#include <iostream> - -#include "compiler.h" - -using std::endl; -using std::cerr; - -void Compiler::lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, - PdaTrans *expandFrom, Production *prod ) -{ - /* We use dot sets for finding unique states. In the future, should merge - * dots sets with the stateSet pointer (only need one of these). */ - assert( dest != prodState ); - dest->dotSet.insert( prodState->dotSet ); - - /* Get the epsilons, context, out priorities. */ - dest->pendingCommits.insert( prodState->pendingCommits ); - //if ( prodState->pendingCommits.length() > 0 ) - // cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl; - - if ( prodState->transMap.length() > 0 ) { - assert( prodState->transMap.length() == 1 ); - PdaTrans *srcTrans = prodState->transMap[0].value; - - /* Look for the source in the destination. */ - TransMapEl *destTel = dest->transMap.find( srcTrans->lowKey ); - if ( destTel == 0 ) { - /* Make a new state and transition to it. */ - PdaState *newState = pdaGraph->addState(); - PdaTrans *newTrans = new PdaTrans(); - - /* Attach the new transition to the new state. */ - newTrans->lowKey = srcTrans->lowKey; - pdaGraph->attachTrans( dest, newState, newTrans ); - pdaGraph->addInTrans( newTrans, srcTrans ); - - /* The transitions we make during lr0 closure are all shifts. */ - assert( newTrans->isShift ); - assert( srcTrans->isShift ); - - /* The new state must have its state set setup. */ - newState->stateSet = new PdaStateSet; - newState->stateSet->insert( srcTrans->toState ); - - /* Insert the transition into the map. Be sure to set destTel, it - * is needed below. */ - dest->transMap.insert( srcTrans->lowKey, newTrans, &destTel ); - - /* If the item is a non-term, queue it for closure. */ - LangEl *langEl = langElIndex[srcTrans->lowKey]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - pdaGraph->transClosureQueue.append( newTrans ); - //cerr << "put to trans closure queue" << endl; - } - } - else { - //cerr << "merging transitions" << endl; - destTel->value->toState->stateSet->insert( srcTrans->toState ); - pdaGraph->addInTrans( destTel->value, srcTrans ); - } - - /* If this is an expansion then we may need to bring in commits. */ - if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { - //cerr << "SETTING COMMIT ON CLOSURE ROUND" << endl; - destTel->value->commits.insert( expandFrom->commits ); - - expandFrom->commits.empty(); - } - } - else { - /* ProdState does not have any transitions out. It is at the end of a - * production. */ - if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { - //cerr << "SETTING COMMIT IN PENDING LOOKAHEAD" << endl; - for ( LongSet::Iter len = expandFrom->commits; len.lte(); len++ ) - dest->pendingCommits.insert( ProdIdPair( prod->prodId, *len ) ); - - expandFrom->commits.empty(); - } - } -} - -void Compiler::lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ) -{ - /* State should not already be closed. */ - assert( !state->inClosedMap ); - - /* This is used each time we invoke closure, it must be cleared. */ - pdaGraph->transClosureQueue.abandon(); - - /* Drag in the core items. */ - for ( PdaStateSet::Iter ssi = *state->stateSet; ssi.lte(); ssi++ ) - lr0BringInItem( pdaGraph, state, *ssi, 0, 0 ); - - /* Now bring in the derived items. */ - while ( pdaGraph->transClosureQueue.length() > 0 ) { - PdaTrans *toClose = pdaGraph->transClosureQueue.detachFirst(); - //cerr << "have a transition to derive" << endl; - - /* Get the langEl. */ - LangEl *langEl = langElIndex[toClose->lowKey]; - - /* Make graphs for all of the productions that the non - * terminal goes to that are not already in the state's dotSet. */ - for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { - /* Bring in the start state of the production. */ - lr0BringInItem( pdaGraph, state, prod->fsm->startState, toClose, prod ); - } - } - - /* Try and insert into the closed dict. */ - DotSetMapEl *lastFound; - if ( pdaGraph->closedMap.insert( state, &lastFound ) ) { - /* Insertion into closed dict succeeded. There is no state with the - * same dot set. The state is now closed. It is guaranteed a spot in - * the closed dict and it will never go away (states never deleted - * during closure). */ - pdaGraph->stateClosedList.append( state ); - state->inClosedMap = true; - - /* Add all of the states in the out transitions to the closure queue. - * This will give us a depth first search of the graph. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - /* Get the state the transEl goes to. */ - PdaState *targ = trans->value->toState; - - /* If the state on this tranisition has not already been slated - * for closure, then add it to the queue. */ - if ( !targ->onClosureQueue && !targ->inClosedMap ) { - pdaGraph->stateClosureQueue.append( targ ); - targ->onClosureQueue = true; - } - } - } - else { - /* Insertion into closed dict failed. There is an existing state - * with the same dot set. Get the existing state. */ - pdaGraph->inTransMove( lastFound, state ); - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - pdaGraph->stateList.detach( tel->value->toState ); - delete tel->value->toState; - delete tel->value; - } - pdaGraph->stateList.detach( state ); - delete state; - } -} - -/* Invoke cloure on the graph. We use a queue here to achieve a breadth - * first search of the tree we build. Note, there are back edges in this - * tree. They are the edges made when upon closure, a dot set exists - * already. */ -void Compiler::lr0CloseAllStates( PdaGraph *pdaGraph ) -{ - /* While there are items on the closure queue. */ - while ( pdaGraph->stateClosureQueue.length() > 0 ) { - /* Pop the first item off. */ - PdaState *state = pdaGraph->stateClosureQueue.detachFirst(); - state->onClosureQueue = false; - - /* Invoke closure upon the state. */ - lr0InvokeClosure( pdaGraph, state ); - } -} - -void Compiler::transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, - PdaState *state, long prodId ) -{ - ProdIdPairSet &pendingCommits = state->pendingCommits; - for ( ProdIdPairSet::Iter pi = pendingCommits; pi.lte(); pi++ ) { - if ( pi->onReduce == prodId ) - trans->commits.insert( pi->length ); - } -} - -void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ) -{ - for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { - int prodId = ets->prodId; - PdaState *expandTo = ets->state; - - for ( FollowToAdd::Iter fkey = followKeys; fkey.lte(); fkey++ ) { - TransMapEl *transEl = expandTo->transMap.find( fkey->key ); - - if ( transEl != 0 ) { - /* Set up the follow transition. */ - PdaTrans *destTrans = transEl->value; - - transferCommits( pdaGraph, destTrans, expandTo, prodId ); - - pdaGraph->addInReduction( destTrans, prodId, fkey->value ); - } - else { - /* Set up the follow transition. */ - PdaTrans *followTrans = new PdaTrans; - followTrans->lowKey = fkey->key; - followTrans->isShift = false; - followTrans->reductions.insert( prodId, fkey->value ); - - transferCommits( pdaGraph, followTrans, expandTo, prodId ); - - pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); - expandTo->transMap.insert( followTrans->lowKey, followTrans ); - pdaGraph->transClosureQueue.append( followTrans ); - } - } - } -} - -long PdaTrans::maxPrior() -{ - long prior = LONG_MIN; - if ( isShift && shiftPrior > prior ) - prior = shiftPrior; - for ( ReductionMap::Iter red = reductions; red.lte(); red++ ) { - if ( red->value > prior ) - prior = red->value; - } - return prior; -} - -void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ) -{ - /* Finding non-terminals into the state. */ - for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { - long key = in->lowKey; - LangEl *langEl = langElIndex[key]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - /* Finding the following transitions. */ - FollowToAdd followKeys; - for ( TransMap::Iter fout = state->transMap; fout.lte(); fout++ ) { - int fkey = fout->key; - LangEl *flel = langElIndex[fkey]; - if ( flel == 0 || flel->type == LangEl::Term ) { - long prior = fout->value->maxPrior(); - followKeys.insert( fkey, prior ); - } - } - - if ( followKeys.length() > 0 ) - lalr1AddFollow2( pdaGraph, in, followKeys ); - } - } -} - -void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, - long followKey, long prior ) -{ - for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { - int prodId = ets->prodId; - PdaState *expandTo = ets->state; - - TransMapEl *transEl = expandTo->transMap.find( followKey ); - if ( transEl != 0 ) { - /* Add in the reductions, or in the shift. */ - PdaTrans *destTrans = transEl->value; - - transferCommits( pdaGraph, destTrans, expandTo, prodId ); - - pdaGraph->addInReduction( destTrans, prodId, prior ); - } - else { - /* Set up the follow transition. */ - PdaTrans *followTrans = new PdaTrans; - followTrans->lowKey = followKey; - followTrans->isShift = false; - followTrans->reductions.insert( prodId, prior ); - - transferCommits( pdaGraph, followTrans, expandTo, prodId ); - - pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); - expandTo->transMap.insert( followTrans->lowKey, followTrans ); - pdaGraph->transClosureQueue.append( followTrans ); - } - } -} - -void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ) -{ - PdaState *state = trans->fromState; - int fkey = trans->lowKey; - LangEl *flel = langElIndex[fkey]; - if ( flel == 0 || flel->type == LangEl::Term ) { - /* Finding non-terminals into the state. */ - for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { - long key = in->lowKey; - LangEl *langEl = langElIndex[key]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - //cerr << "FOLLOW PRIOR TRANSFER 2: " << prior << endl; - long prior = trans->maxPrior(); - lalr1AddFollow2( pdaGraph, in, fkey, prior ); - } - } - } -} - -/* Add follow sets to an LR(0) graph to make it LALR(1). */ -void Compiler::lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls ) -{ - /* Make the state that all reduction actions go to. Since a reduction pops - * states of the stack and sets the new target state, this state is - * actually never reached. Just here to link the trans to. */ - actionDestState = pdaGraph->addState(); - pdaGraph->setFinState( actionDestState ); - - for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { - /* Get the entry into the graph and traverse over start. */ - PdaState *overStart = pdaGraph->followFsm( (*pe)->startState, (*pe)->rootDef->fsm ); - - /* Add _eof after the initial _start. */ - PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState, - (*pe)->eofLel->id, (*pe)->eofLel->id ); - eofTrans->isShift = true; - } - - /* This was used during lr0 table construction. */ - pdaGraph->transClosureQueue.abandon(); - - /* Need to pass over every state initially. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) - lalr1AddFollow1( pdaGraph, state ); - - /* While the closure queue has items, pop them off and add follow - * characters. */ - while ( pdaGraph->transClosureQueue.length() > 0 ) { - /* Pop the first item off and add Follow for it . */ - PdaTrans *trans = pdaGraph->transClosureQueue.detachFirst(); - lalr1AddFollow1( pdaGraph, trans ); - } -} - -void Compiler::linkExpansions( PdaGraph *pdaGraph ) -{ - pdaGraph->setStateNumbers(); - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - /* Find transitions out on non terminals. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - long key = trans->key; - LangEl *langEl = langElIndex[key]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - /* For each production that the non terminal expand to ... */ - for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { - /* Follow the production and add to the trans's expand to set. */ - PdaState *followRes = pdaGraph->followFsm( state, prod->fsm ); - - //LangEl *lel = langElIndex[key]; - //cerr << state->stateNum << ", "; - //if ( lel != 0 ) - // cerr << lel->data; - //else - // cerr << (char)key; - //cerr << " -> " << (*fto)->stateNum << " on " << - // prod->data << " (fss = " << fin.pos() << ")" << endl; - trans->value->expandTo.insert( ExpandToEl( followRes, prod->prodId ) ); - } - } - } - } -} - -/* Add terminal versions of all nonterminal transitions. */ -void Compiler::addDupTerms( PdaGraph *pdaGraph ) -{ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - PdaTransList newTranitions; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - LangEl *lel = langElIndex[trans->value->lowKey]; - if ( lel->type == LangEl::NonTerm ) { - PdaTrans *dupTrans = new PdaTrans; - dupTrans->lowKey = lel->termDup->id; - dupTrans->isShift = true; - - /* Save the target state in to state. In the next loop when we - * attach the transition we must clear this because the - * attaching code requires the transition to be unattached. */ - dupTrans->toState = trans->value->toState; - newTranitions.append( dupTrans ); - - /* Commit code used? */ - //transferCommits( pdaGraph, followTrans, expandTo, prodId ); - } - } - - for ( PdaTrans *dup = newTranitions.head; dup != 0; ) { - PdaTrans *next = dup->next; - PdaState *toState = dup->toState; - dup->toState = 0; - pdaGraph->attachTrans( state, toState, dup ); - state->transMap.insert( dup->lowKey, dup ); - dup = next; - } - } -} - -/* Generate a LALR(1) graph. */ -void Compiler::lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls ) -{ - /* Make the intial graph. */ - pdaGraph->langElIndex = langElIndex; - - for ( Vector<LangEl*>::Iter r = parserEls; r.lte(); r++ ) { - /* Create the entry point. */ - PdaState *rs = pdaGraph->addState(); - pdaGraph->entryStateSet.insert( rs ); - - /* State set of just one state. */ - rs->stateSet = new PdaStateSet; - rs->stateSet->insert( (*r)->rootDef->fsm->startState ); - - /* Queue the start state for closure. */ - rs->onClosureQueue = true; - pdaGraph->stateClosureQueue.append( rs ); - - (*r)->startState = rs; - } - - /* Run the lr0 closure. */ - lr0CloseAllStates( pdaGraph ); - - /* Add terminal versions of all nonterminal transitions. */ - addDupTerms( pdaGraph ); - - /* Link production expansions to the place they expand to. */ - linkExpansions( pdaGraph ); - - /* Walk the graph adding follow sets to the LR(0) graph. */ - lalr1AddFollowSets( pdaGraph, parserEls ); - -// /* Set the commit on the final eof shift. */ -// PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id ); -// PdaTrans *eofTrans = overStart->toState->findTrans( eofLangEl->id ); -// eofTrans->afterShiftCommits.insert( 2 ); -} diff --git a/src/codegen.cc b/src/codegen.cc deleted file mode 100644 index 219c6ec2..00000000 --- a/src/codegen.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <iostream> -#include "fsmcodegen.h" - -void FsmCodeGen::writeIncludes() -{ - out << - "#include <stdio.h>\n" - "#include <stdlib.h>\n" - "#include <string.h>\n" - "#include <assert.h>\n" - "\n" - "#include <colm/pdarun.h>\n" - "#include <colm/debug.h>\n" - "#include <colm/bytecode.h>\n" - "#include <colm/config.h>\n" - "#include <colm/defs.h>\n" - "#include <colm/input.h>\n" - "#include <colm/tree.h>\n" - "#include <colm/program.h>\n" - "#include <colm/colm.h>\n" - "\n"; -} - -void FsmCodeGen::writeMain( long activeRealm ) -{ - out << - "int main( int argc, const char **argv )\n" - "{\n" - " struct colm_program *prg;\n" - " int exit_status;\n" - "\n" - " prg = colm_new_program( &" << objectName << " );\n" - " colm_set_debug( prg, " << activeRealm << " );\n" - " colm_run_program( prg, argc, argv );\n" - " exit_status = colm_delete_program( prg );\n" - " return exit_status;\n" - "}\n" - "\n"; - - out.flush(); -} diff --git a/src/codevect.c b/src/codevect.c deleted file mode 100644 index 50b86336..00000000 --- a/src/codevect.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <string.h> -#include <stdlib.h> - -#include <colm/rtvector.h> -#include <colm/pdarun.h> - -void init_rt_code_vect( struct rt_code_vect *vect ) -{ - vect->data = 0; - vect->tab_len = 0; - vect->alloc_len = 0; -} - -static long new_size_up( long existing, long needed ) -{ - return needed > existing ? (needed<<1) : existing; -} - -static long new_size_down( long existing, long needed ) -{ - return needed < (existing>>2) ? (needed<<1) : existing; -} - -/* Up resize the data for len elements using Resize::upResize to tell us the - * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ -static void up_resize( struct rt_code_vect *vect, long len ) -{ - /* Ask the resizer what the new tabLen will be. */ - long new_len = new_size_up(vect->alloc_len, len); - - /* Did the data grow? */ - if ( new_len > vect->alloc_len ) { - vect->alloc_len = new_len; - if ( vect->data != 0 ) { - /* Table exists already, resize it up. */ - vect->data = (code_t*) realloc( vect->data, sizeof(code_t) * new_len ); - //if ( vect->data == 0 ) - // throw std::bad_alloc(); - } - else { - /* Create the data. */ - vect->data = (code_t*) malloc( sizeof(code_t) * new_len ); - //if ( vect->data == 0 ) - // throw std::bad_alloc(); - } - } -} - -/* Down resize the data for len elements using Resize::downResize to determine - * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ -static void down_resize( struct rt_code_vect *vect, long len) -{ - /* Ask the resizer what the new tabLen will be. */ - long new_len = new_size_down( vect->alloc_len, len ); - - /* Did the data shrink? */ - if ( new_len < vect->alloc_len ) { - vect->alloc_len = new_len; - if ( new_len == 0 ) { - /* Simply free the data. */ - free( vect->data ); - vect->data = 0; - } - else { - /* Not shrinking to size zero, realloc it to the smaller size. */ - vect->data = (code_t*) realloc( vect->data, sizeof(code_t) * new_len ); - //if ( vect->data == 0 ) - // throw std::bad_alloc(); - } - } -} - - -void colm_rt_code_vect_empty( struct rt_code_vect *vect ) -{ - if ( vect->data != 0 ) { - /* Free the data space. */ - free( vect->data ); - vect->data = 0; - vect->tab_len = vect->alloc_len = 0; - } -} - -void colm_rt_code_vect_replace( struct rt_code_vect *vect, long pos, - const code_t *val, long len ) -{ - long end_pos, i; - //code_t *item; - - /* If we are given a negative position to replace at then - * treat it as a position relative to the length. */ - if ( pos < 0 ) - pos = vect->tab_len + pos; - - /* The end is the one past the last item that we want - * to write to. */ - end_pos = pos + len; - - /* Make sure we have enough space. */ - if ( end_pos > vect->tab_len ) { - up_resize( vect, end_pos ); - - /* Delete any objects we need to delete. */ - //item = vect->data + pos; - //for ( i = pos; i < vect->tabLen; i++, item++ ) - // item->~code_t(); - - /* We are extending the vector, set the new data length. */ - vect->tab_len = end_pos; - } - else { - /* Delete any objects we need to delete. */ - //item = vect->data + pos; - //for ( i = pos; i < endPos; i++, item++ ) - // item->~code_t(); - } - - /* Copy data in using copy constructor. */ - code_t *dst = vect->data + pos; - const code_t *src = val; - for ( i = 0; i < len; i++, dst++, src++ ) - *dst = *src; -} - -void colm_rt_code_vect_remove( struct rt_code_vect *vect, long pos, long len ) -{ - long new_len, len_to_slide_over, end_pos; - code_t *dst;//, *item; - - /* If we are given a negative position to remove at then - * treat it as a position relative to the length. */ - if ( pos < 0 ) - pos = vect->tab_len + pos; - - /* The first position after the last item deleted. */ - end_pos = pos + len; - - /* The new data length. */ - new_len = vect->tab_len - len; - - /* The place in the data we are deleting at. */ - dst = vect->data + pos; - - /* Call Destructors. */ - //item = dst; - //for ( long i = 0; i < len; i += 1, item += 1 ) - // item->~code_t(); - - /* Shift data over if necessary. */ - len_to_slide_over = vect->tab_len - end_pos; - if ( len > 0 && len_to_slide_over > 0 ) - memmove(dst, dst + len, sizeof(code_t)*len_to_slide_over); - - /* Shrink the data if necessary. */ - down_resize( vect, new_len ); - - /* Set the new data length. */ - vect->tab_len = new_len; -} - - diff --git a/src/colm-config.cmake.in b/src/colm-config.cmake.in deleted file mode 100644 index 7e3b8f27..00000000 --- a/src/colm-config.cmake.in +++ /dev/null @@ -1,3 +0,0 @@ -# @_PACKAGE_NAME@-config.cmake Generated from colm-config.cmake.in by cmake - -include("${CMAKE_CURRENT_LIST_DIR}/@_PACKAGE_NAME@-targets.cmake") diff --git a/src/colm.h b/src/colm.h deleted file mode 100644 index 55368840..00000000 --- a/src/colm.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_COLM_H -#define _COLM_COLM_H - -#ifdef __cplusplus -extern "C" { -#endif - -struct colm_data; -struct colm_tree; -struct colm_kid; -struct colm_program; -struct colm_sections; -struct colm_tree; -struct colm_location; - -struct indent_impl -{ - /* Indentation. */ - int level; - int indent; -}; - -extern struct colm_sections colm_object; - -typedef unsigned long colm_value_t; - -struct colm_tree -{ - /* First four will be overlaid in other structures. */ - short id; - unsigned short flags; - long refs; - struct colm_kid *child; - - struct colm_data *tokdata; - - /* FIXME: this needs to go somewhere else. Will do for now. */ - unsigned short prod_num; -}; - -struct colm_print_args -{ - void *arg; - int comm; - int attr; - int trim; - struct indent_impl *indent; - - void (*out)( struct colm_print_args *args, const char *data, int length ); - void (*open_tree)( struct colm_program *prg, struct colm_tree **sp, - struct colm_print_args *args, struct colm_kid *parent, struct colm_kid *kid ); - void (*print_term)( struct colm_program *prg, struct colm_tree **sp, - struct colm_print_args *args, struct colm_kid *kid ); - void (*close_tree)( struct colm_program *prg, struct colm_tree **sp, - struct colm_print_args *args, struct colm_kid *parent, struct colm_kid *kid ); -}; - -void colm_print_null( struct colm_program *prg, struct colm_tree **sp, - struct colm_print_args *args, struct colm_kid *parent, struct colm_kid *kid ); -void colm_print_term_tree( struct colm_program *prg, struct colm_tree **sp, - struct colm_print_args *print_args, struct colm_kid *kid ); - -struct colm_tree **colm_vm_root( struct colm_program *prg ); -struct colm_tree *colm_return_val( struct colm_program *prg ); -void colm_print_tree_args( struct colm_program *prg, struct colm_tree **sp, - struct colm_print_args *print_args, struct colm_tree *tree ); - -int colm_repeat_end( struct colm_tree *tree ); -int colm_list_last( struct colm_tree *tree ); - -struct colm_tree *colm_get_rhs_val( struct colm_program *prg, struct colm_tree *tree, int *a ); -struct colm_tree *colm_get_attr( struct colm_tree *tree, long pos ); -struct colm_tree *colm_get_global( struct colm_program *prg, long pos ); -struct colm_tree *colm_get_repeat_next( struct colm_tree *tree ); -struct colm_tree *colm_get_repeat_val( struct colm_tree *tree ); -struct colm_location *colm_find_location( struct colm_program *prg, struct colm_tree *tree ); - -/* Debug realms. To turn on, pass to colm_set_debug before invocation. */ -#define COLM_DBG_BYTECODE 0x00000001 -#define COLM_DBG_PARSE 0x00000002 -#define COLM_DBG_MATCH 0x00000004 -#define COLM_DBG_COMPILE 0x00000008 -#define COLM_DBG_POOL 0x00000010 -#define COLM_DBG_PRINT 0x00000020 -#define COLM_DBG_INPUT 0x00000040 -#define COLM_DBG_SCAN 0x00000080 - -#define COLM_RN_NEITHER 0x00 -#define COLM_RN_DATA 0x01 -#define COLM_RN_LOC 0x02 -#define COLM_RN_BOTH 0x03 - -/* - * Primary Interface. - */ - -/* Allocate a program. Takes program static data as arg. Normally this is - * &colm_object. */ -struct colm_program *colm_new_program( struct colm_sections *rtd ); - -/* Enable debug realms for a program. */ -void colm_set_debug( struct colm_program *prg, long active_realm ); - -/* Run a top-level colm program. */ -void colm_run_program( struct colm_program *prg, int argc, const char **argv ); - -/* Run a top-level colm program, with argument lengths (allows binary data). */ -void colm_run_program2( struct colm_program *prg, int argc, const char **argv, const int *argl ); - -/* Run a single exported colm function. */ -struct colm_tree *colm_run_func( struct colm_program *prg, int frame_id, - const char **params, int param_count ); - -/* Delete a colm program. Clears all memory. */ -int colm_delete_program( struct colm_program *prg ); - -/* Set the pointer to the reduce struct used. */ -void *colm_get_reduce_ctx( struct colm_program *prg ); -void colm_set_reduce_ctx( struct colm_program *prg, void *ctx ); -void colm_set_reduce_clean( struct colm_program *prg, unsigned char reduce_clean ); - -const char *colm_error( struct colm_program *prg, int *length ); - -const char **colm_extract_fns( struct colm_program *prg ); - -#ifdef __cplusplus -} -#endif - -#endif /* _COLM_COLM_H */ - diff --git a/src/colm.lm b/src/colm.lm deleted file mode 100644 index 3875babd..00000000 --- a/src/colm.lm +++ /dev/null @@ -1,892 +0,0 @@ -# Main region. -lex - token DEF / 'def' / - token LEX / 'lex' / - token END / 'end' / - token TOKEN / 'token' / - token RL / 'rl' / - token IGNORE / 'ignore' / - token PRINT / 'print' / - token PRINTS / 'prints' / - token PARSE / 'parse' / - token REDUCE / 'reduce' / - token READ_REDUCE /'read_reduce'/ - token PARSE_TREE / 'parse_tree' / - token PARSE_STOP / 'parse_stop' / - token CONS / 'construct' | 'cons' / - token MATCH / 'match' / - token REQUIRE / 'require' / - token SEND / 'send' / - token SEND_TREE / 'send_tree' / - token NAMESPACE / 'namespace' / - token REDUCTION / 'reduction' / - token FOR / 'for' / - token IF / 'if' / - token YIELD / 'yield' / - token WHILE / 'while' / - token ELSIF / 'elsif' / - token ELSE / 'else' / - token IN / 'in' / - token PARSER / 'parser' | 'accum' / - token LIST / 'list' / - token LIST_EL / 'list_el' / - token MAP / 'map' / - token MAP_EL / 'map_el' / - token PTR / 'ptr' / - token ITER / 'iter' / - token REF / 'ref' / - token EXPORT / 'export' / - token RETURN / 'return' / - token BREAK / 'break' / - token REJECT / 'reject' / - token REDUCEFIRST / 'reducefirst' / - token ALIAS / 'alias' / - token COMMIT / 'commit' / - token NEW / 'new' / - token PREEOF / 'preeof' / - token GLOBAL / 'global' / - token EOS / 'eos' / - token CAST / 'cast' / - token SWITCH / 'switch' / - token CASE / 'case' / - token DEFAULT / 'default' / - token INT / 'int' / - token BOOL / 'bool' / - token VOID / 'void' / - - token MAKE_TOKEN / 'make_token' / - token MAKE_TREE / 'make_tree' / - - token TYPEID / 'typeid' / - - token LITERAL / 'literal' / - token CONTEXT / 'context' / - token STRUCT / 'struct' / - token NI /'ni'/ - - token NIL / 'nil' / - token TRUE / 'true' / - token FALSE / 'false' / - - token LEFT /'left'/ - token RIGHT /'right'/ - token NONASSOC /'nonassoc'/ - - token INCLUDE /'include'/ - - token id / - ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) . - ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' ) * - / - - token number - / ( '0' .. '9' ) + / - - token backtick_lit - / '`' . ^( ' ' | '\n' | '\t' | ']' )+ | '`]' / - - token DQ / '\"' / ni - token SQ / '\'' / ni - token TILDE / '~' / ni - - token SQOPEN /'['/ - token SQCLOSE /']'/ - token BAR /'|'/ - token FSLASH /'/'/ - token COLON /':'/ - token DOUBLE_COLON /'::'/ - token DOT /'.'/ - token ARROW /'->'/ - token POPEN /'('/ - token PCLOSE /')'/ - token COPEN /'{'/ - token CCLOSE /'}'/ - token STAR /'*'/ - token QUESTION /'?'/ - token EQUALS /'='/ - token EQEQ /'=='/ - token NEQ /'!='/ - token COMMA /','/ - token LT /'<'/ - token GT /'>'/ - token LTEQ /'<='/ - token GTEQ /'>='/ - token BANG /'!'/ - token DOLLAR /'$'/ - token CARET /'^'/ - token AT /'@'/ - token PERCENT /'%'/ - token PLUS /'+'/ - token MINUS /'-'/ - token AMPAMP /'&&'/ - token BARBAR /'||'/ - - ignore / ( '\n' | '\t' | ' ' )+ / - ignore / '#' . ( ^'\n' )* . '\n' / -end - -lex - token LIT_DQ / '\"' / - token LIT_DQ_NL / '\n' / - token LIT_SQOPEN / '[' / - token LIT_SQCLOSE / ']' / - - token lit_dq_data - / ( ^( '\n' | '\"' | '[' | ']' | '\\' ) | '\\' . any )+ / -end - -lex - token CONS_SQ / '\'' / - token CONS_SQ_NL / '\n' / - - token sq_cons_data - / ( ^( '\n' | '\'' | '\\' ) | '\\' . any )+ / -end - -lex - token TILDE_NL / '\n' / - token tilde_data - / ( ^'\n' )+ / -end - -lex - token lex_id / - ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) . - ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' ) * - / - - token lex_uint - / ( '0' .. '9' )+ / - - token lex_hex - / '0x' . ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' )+ / - - token lex_lit / - '\'' . ( ^( '\'' | '\\' ) | '\\' . any )* . ( '\'' | '\'i' ) | - '\"' . ( ^( '\"' | '\\' ) | '\\' . any )* . ( '\"' | '\"i' ) - / - - token LEX_DOT /'.'/ - token LEX_BAR /'|'/ - token LEX_AMP /'&'/ - token LEX_DASH /'-'/ - token LEX_POPEN /'('/ - token LEX_PCLOSE /')'/ - token LEX_STAR /'*'/ - token LEX_STARSTAR /'**'/ - token LEX_QUESTION /'?'/ - token LEX_PLUS /'+'/ - token LEX_CARET /'^'/ - token LEX_DOTDOT /'..'/ - token LEX_SQOPEN_POS /'['/ ni - token LEX_SQOPEN_NEG /'[^'/ ni - token LEX_FSLASH /'/'/ - - token LEX_DASHDASH /'--'/ - token LEX_COLON_GT /':>'/ - token LEX_COLON_GTGT /':>>'/ - token LEX_LT_COLON /'<:'/ - - ignore / - ( '\n' | '\t' | ' ' ) . - ( '\n' | '\t' | ' ' )* - / - - ignore / '#' . ( ^'\n' )* . '\n' / -end - -lex - token RE_DASH / '-' / - token RE_CHAR / ^( '\\' | '-' | ']' ) | '\\' . any / - token RE_SQCLOSE / ']' / -end - -def start - [RootItemList: root_item*] - -def root_item - [rl_def] :Rl commit -| [literal_def] :Literal commit -| [token_def] :Token commit -| [ic_def] :IgnoreCollector commit -| [ignore_def] :Ignore commit -| [cfl_def] :Cfl commit -| [region_def] :Region commit -| [struct_def] :Struct commit -| [namespace_def] :Namespace commit -| [function_def] :Function commit -| [in_host_def] :InHost commit -| [iter_def] :Iter commit -| [statement] :Statement commit -| [global_def] :Global commit -| [export_def] :Export commit -| [pre_eof_def] :PreEof commit -| [precedence_def] :Precedence commit -| [alias_def] :Alias commit -| [include] :Include commit -| [reduction_def] :Reduction commit - -def include - [INCLUDE SQ SqConsDataList: sq_cons_data* sq_lit_term] - -def precedence_def - [pred_type pred_token_list] - -def pred_type - [LEFT] :Left -| [RIGHT] :Right -| [NONASSOC] :NonAssoc - -def pred_token_list - [pred_token_list COMMA pred_token] :List -| [pred_token] :Base - -def pred_token - [region_qual id] :Id -| [region_qual backtick_lit] :Lit - -def pre_eof_def - [PREEOF COPEN lang_stmt_list CCLOSE] - -def alias_def - [ALIAS id type_ref] - -def struct_item - [struct_var_def] :StructVar commit -| [literal_def] :Literal commit -| [rl_def] :Rl commit -| [token_def] :Token commit -| [ic_def] :IgnoreCollector commit -| [ignore_def] :Ignore commit -| [cfl_def] :Cfl commit -| [region_def] :Region commit -| [struct_def] :Struct commit -| [function_def] :Function commit -| [in_host_def] :InHost commit -| [iter_def] :Iter commit -| [export_def] :Export commit -| [pre_eof_def] :PreEof commit -| [precedence_def] :Precedence commit -| [alias_def] :Alias commit - -def export_def - [EXPORT var_def opt_def_init] - -def global_def - [GLOBAL var_def opt_def_init] - -def iter_def - [ITER id POPEN ParamVarDefList: param_var_def_list PCLOSE - COPEN lang_stmt_list CCLOSE] - -def reference_type_ref - [REF LT type_ref GT] - -def param_var_def_seq - [param_var_def COMMA param_var_def_seq] -| [param_var_def] - -def param_var_def_list - [param_var_def_seq] -| [] - -def param_var_def - [id COLON type_ref] :Type -| [id COLON reference_type_ref] :Ref - -def opt_export - [EXPORT] :Export -| [] - -def function_def - [opt_export type_ref id - POPEN ParamVarDefList: param_var_def_list PCLOSE - COPEN lang_stmt_list CCLOSE] - -def in_host_def - [opt_export type_ref id - POPEN ParamVarDefList: param_var_def_list PCLOSE - EQUALS HostFunc: id] - -def struct_var_def - [var_def] - -def struct_key - [STRUCT] | [CONTEXT] - -def struct_def - [struct_key id ItemList: struct_item* END] - -def literal_def - [LITERAL literal_list] - -def literal_list - [literal_list literal_item] :Item -| [literal_item] :Base - -def literal_item - [no_ignore_left backtick_lit no_ignore_right] - -def no_ignore_left - [NI MINUS] :Ni -| [] - -def no_ignore_right - [MINUS NI] :Ni -| [] - -def reduction_def - [REDUCTION id ItemList: reduction_item* END] - -lex - token RED_OPEN / '{' / - token RED_CLOSE / '}' / - - token red_id / - ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) . - ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' ) * - / - - token red_comment / - '//' . ( ^'\n' )* . '\n' | - '/*' . any* :> '*/' - / - - token red_ws / - ( '\n' | '\t' | ' ' )+ - / - - token red_lit / - '\'' . ( ^( '\'' | '\\' ) | '\\' . any )* . ( '\'' | '\'i' ) | - '\"' . ( ^( '\"' | '\\' ) | '\\' . any )* . ( '\"' | '\"i' ) - / - - token RED_LHS / '$' . '$' / - token RED_RHS_REF / '$' . red_id / - token RED_RHS_LOC / '@' . red_id / - token RED_TREE_REF / '$*' . red_id / - - token RED_RHS_NREF / '$' . ('1' .. '9') . ('0' .. '9')* / - token RED_RHS_NLOC / '@' . ('1' .. '9') . ('0' .. '9')* / - token RED_TREE_NREF / '$*' . ('1' .. '9') . ('0' .. '9')* / - - token red_any / any / -end - -def red_nonterm - [type_ref RED_OPEN HostItems: host_item* RED_CLOSE] - -def red_action - [type_ref COLON id RED_OPEN HostItems: host_item* RED_CLOSE] - -def host_item - [red_id] -| [red_lit] -| [red_comment] -| [red_ws] -| [red_any] -| [RED_LHS] -| [RED_RHS_REF] -| [RED_TREE_REF] -| [RED_RHS_LOC] -| [RED_RHS_NREF] -| [RED_TREE_NREF] -| [RED_RHS_NLOC] -| [RED_OPEN HostItems: host_item* RED_CLOSE] - -def reduction_item - [red_nonterm] :NonTerm commit -| [red_action] :Action commit - -def namespace_def - [NAMESPACE id ItemList: namespace_item* END] - -def namespace_item - [rl_def] :Rl commit -| [literal_def] :Literal commit -| [token_def] :Token commit -| [ic_def] :IgnoreCollector commit -| [ignore_def] :Ignore commit -| [cfl_def] :Cfl commit -| [region_def] :Region commit -| [struct_def] :Struct commit -| [namespace_def] :Namespace commit -| [function_def] :Function commit -| [in_host_def] :InHost commit -| [iter_def] :Iter commit -| [pre_eof_def] :PreEof commit -| [precedence_def] :Precedence commit -| [alias_def] :Alias commit -| [include] :Include commit -| [global_def] :Global commit - -def obj_var_list - [] - -def opt_reduce_first - [REDUCEFIRST] -| [] - -def cfl_def - [DEF id - VarDefList: var_def* - opt_reduce_first - prod_list] - -def region_def - [LEX RootItemList: root_item* END] - -def rl_def - [RL id LEX_FSLASH lex_expr LEX_FSLASH] - -def opt_lex_expr - [lex_expr] -| [] - -def token_def - [TOKEN id VarDefList: var_def* - no_ignore_left - LEX_FSLASH opt_lex_expr LEX_FSLASH - no_ignore_right - opt_translate] - -def ic_def - [TOKEN id MINUS] - -def opt_translate - [COPEN lang_stmt_list CCLOSE] :Translate -| [] - -def opt_id - [id] :Id -| [] - -def ignore_def - [IGNORE opt_id LEX_FSLASH opt_lex_expr LEX_FSLASH] - -def prod_el - [opt_prod_el_name region_qual id opt_repeat] :Id -| [opt_prod_el_name region_qual backtick_lit opt_repeat] :Lit - -def opt_prod_el_name - [id COLON] :Name -| [] - -def prod_el_list - [prod_el_list prod_el] :List -| [] - -def opt_commit - [COMMIT] :Commit -| [] - -def opt_prod_name - [COLON id] :Name -| [] - -def prod - [SQOPEN prod_el_list SQCLOSE - opt_prod_name - opt_commit - opt_reduce] - -def opt_reduce - [COPEN lang_stmt_list CCLOSE] :Reduce -| [] - -def prod_list - [prod_list BAR prod] :List -| [prod] :Base - -def case_clause - [CASE pattern block_or_single] :Pattern commit -| [CASE id block_or_single] :Id commit -| [CASE id pattern block_or_single] :IdPat commit - -def default_clause - [DEFAULT block_or_single] commit - -def case_clause_list - [case_clause case_clause_list] :Recursive -| [case_clause] :BaseCase -| [default_clause] :BaseDefault - -# Note a commit on switch doesn't work because the default clause in -# case_clause follow sets cause a premature commit. We could use a proper list -# for case clauses, followed by an optional default, but just move the commits -# to the clauses, which is is a better commit strategy anyways. Gives more -# regular commits. - -def statement - [print_stmt] :Print commit -| [var_def opt_def_init] :VarDef commit -| [FOR id COLON type_ref IN iter_call block_or_single] :For commit -| [IF code_expr block_or_single elsif_list] :If commit -| [SWITCH var_ref case_clause_list] :SwitchUnder -| [SWITCH var_ref COPEN case_clause_list CCLOSE] :SwitchBlock -| [WHILE code_expr block_or_single] :While commit -| [var_ref EQUALS code_expr] :LhsVarRef commit -| [YIELD var_ref] :Yield commit -| [RETURN code_expr] :Return commit -| [BREAK] :Break commit -| [REJECT] :Reject commit -| [var_ref POPEN call_arg_list PCLOSE] :Call -| [stmt_or_factor] :StmtOrFactor -| [accumulate opt_eos] :BareSend - -def elsif_list - [elsif_clause elsif_list] :Clause -| [optional_else] :OptElse - -def elsif_clause - [ELSIF code_expr block_or_single] - -def optional_else - [ELSE block_or_single] :Else -| [] - -def call_arg_seq - [code_expr COMMA call_arg_seq] -| [code_expr] - -def call_arg_list - [call_arg_seq] -| [] - -def iter_call - [E1 var_ref POPEN call_arg_list PCLOSE] :Call -| [E2 id] :Id -| [E3 code_expr] :Expr - -def block_or_single - [COPEN lang_stmt_list CCLOSE] :Block -| [statement] :Single - -def require_pattern - [REQUIRE var_ref pattern] - -def opt_require_stmt - [require_pattern lang_stmt_list] :Require -| [] :Base - -def lang_stmt_list - [StmtList: statement* opt_require_stmt] - -def opt_def_init - [EQUALS code_expr] :Init -| [] :Base - -def var_def - [id COLON type_ref] - -def print_stmt - [PRINT POPEN call_arg_list PCLOSE] :Tree -| [PRINTS POPEN var_ref COMMA call_arg_list PCLOSE] :PrintStream -| [PRINT accumulate] :Accum - -def expr_stmt - [code_expr] - -def code_expr - [code_expr AMPAMP code_relational] :AmpAmp -| [code_expr BARBAR code_relational] :BarBar -| [code_relational] :Base - -def code_relational - [code_relational EQEQ code_additive] :EqEq -| [code_relational NEQ code_additive] :Neq -| [code_relational LT code_additive] :Lt -| [code_relational GT code_additive] :Gt -| [code_relational LTEQ code_additive] :LtEq -| [code_relational GTEQ code_additive] :GtEq -| [code_additive] :Base - -def code_additive - [code_additive PLUS code_multiplicitive] :Plus -| [code_additive MINUS code_multiplicitive] :Minus -| [code_multiplicitive] :Base - -def code_multiplicitive - [code_multiplicitive STAR code_unary] :Star -| [code_multiplicitive FSLASH code_unary] :Fslash -| [code_unary] :Base - -def code_unary - [BANG code_factor] :Bang -| [DOLLAR code_factor] :Dollar -| [DOLLAR DOLLAR code_factor] :DollarDollar -| [CARET code_factor] :Caret -| [AT code_factor] :At -| [PERCENT code_factor] :Percent -| [code_factor] :Base - -def opt_eos - [DOT] :Dot -| [EOS] :Eos -| [] - -def code_factor - [number] :Number -| [var_ref POPEN call_arg_list PCLOSE] :Call -| [var_ref] :VarRef -| [NIL] :Nil -| [TRUE] :True -| [FALSE] :False -| [POPEN code_expr PCLOSE] :Paren -| [string] :String -| [type_ref IN var_ref] :In -| [TYPEID LT type_ref GT] :TypeId -| [CAST LT type_ref GT code_factor] :Cast -| [stmt_or_factor] :StmtOrFactor - -def type_ref - [region_qual id opt_repeat] :Id -| [INT] :Int -| [BOOL] :Bool -| [VOID] :Void -| [PARSER LT type_ref GT] :Parser -| [LIST LT type_ref GT] :List -| [MAP LT KeyType: type_ref COMMA ValType: type_ref GT] :Map -| [LIST_EL LT type_ref GT] :ListEl -| [MAP_EL LT KeyType: type_ref COMMA ValType: type_ref GT] :MapEl - -def region_qual - [region_qual id DOUBLE_COLON] :Qual -| [] :Base - -def opt_repeat - [STAR] :Star -| [PLUS] :Plus -| [QUESTION] :Question -| [] - -def opt_capture - [id COLON] :Id -| [] - -def opt_field_init - [POPEN FieldInitList: field_init* PCLOSE] :Init -| [] :Base - -def field_init - [code_expr] - -def stmt_or_factor - [PARSE opt_capture type_ref opt_field_init accumulate] :Parse -| [PARSE_TREE opt_capture type_ref opt_field_init accumulate] :ParseTree -| [PARSE_STOP opt_capture type_ref opt_field_init accumulate] :ParseStop -| [REDUCE id type_ref opt_field_init accumulate] :Reduce -| [READ_REDUCE id type_ref opt_field_init accumulate] :ReadReduce -| [SEND var_ref accumulate opt_eos] :Send -| [SEND_TREE var_ref accumulate opt_eos] :SendTree -| [MAKE_TREE POPEN call_arg_list PCLOSE] :MakeTree -| [MAKE_TOKEN POPEN call_arg_list PCLOSE] :MakeToken -| [CONS opt_capture type_ref opt_field_init constructor] :Cons -| [MATCH var_ref pattern] :Match -| [NEW opt_capture type_ref POPEN FieldInitList: field_init* PCLOSE] :New - -# -# Pattern -# - -def opt_label - [id COLON] :Id -| [] - -def dq_lit_term - [LIT_DQ] | [LIT_DQ_NL] - -def sq_lit_term - [CONS_SQ] | [CONS_SQ_NL] - -def opt_tilde_data - [tilde_data] -| [] - -def pattern_el_lel - [region_qual id opt_repeat] :Id -| [region_qual backtick_lit opt_repeat] :Lit - -def pattern_el - [opt_label pattern_el_lel] :PatternEl -| [DQ LitpatElList: litpat_el* dq_lit_term] :Dq -| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq -| [TILDE opt_tilde_data TILDE_NL] :Tilde - -def litpat_el - [lit_dq_data] :ConsData -| [LIT_SQOPEN PatternElList: pattern_el* LIT_SQCLOSE] :SubList - -def pattern_top_el - [DQ LitpatElList: litpat_el* dq_lit_term] :Dq -| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq -| [TILDE opt_tilde_data TILDE_NL] :Tilde -| [SQOPEN PatternElList: pattern_el* SQCLOSE] :SubList - -def pattern_list - [pattern_top_el] :Base - -def pattern - [pattern_list] - -# -# Constructor List -# - -def E1 [] -def E2 [] -def E3 [] -def E4 [] - -def cons_el - [E1 region_qual backtick_lit] :Lit -| [E1 DQ LitConsElList: lit_cons_el* dq_lit_term] :Dq -| [E1 SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq -| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde -| [E2 code_expr] :CodeExpr - -def lit_cons_el - [lit_dq_data] :ConsData -| [LIT_SQOPEN ConsElList: cons_el* LIT_SQCLOSE] :SubList - -def cons_top_el - [DQ LitConsElList: lit_cons_el* dq_lit_term] :Dq -| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq -| [TILDE opt_tilde_data TILDE_NL] :Tilde -| [SQOPEN ConsElList: cons_el* SQCLOSE] :SubList - -def cons_list - [cons_top_el] :Base - -def constructor - [cons_list] - -# -# Accumulate -# - -def accum_el - [E1 DQ LitAccumElList: lit_accum_el* dq_lit_term] :Dq -| [E1 SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq -| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde -| [E2 code_expr] :CodeExpr - -def lit_accum_el - [lit_dq_data] :ConsData -| [LIT_SQOPEN AccumElList: accum_el* LIT_SQCLOSE] :SubList - -def accum_top_el - [DQ LitAccumElList: lit_accum_el* dq_lit_term] :Dq -| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq -| [TILDE opt_tilde_data TILDE_NL] :Tilde -| [SQOPEN AccumElList: accum_el* SQCLOSE] :SubList - -def accum_list - [accum_top_el accum_list] :List -| [accum_top_el] :Base - -def accumulate - [accum_list] - -# -# String List -# - -def string_el - [E1 DQ LitStringElList: lit_string_el* dq_lit_term] :Dq -| [E1 SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq -| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde -| [E2 code_expr] :CodeExpr - -def lit_string_el - [lit_dq_data] :ConsData -| [LIT_SQOPEN StringElList: string_el* LIT_SQCLOSE] :SubList - -def string_top_el - [DQ LitStringElList: lit_string_el* dq_lit_term] :Dq -| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq -| [TILDE opt_tilde_data TILDE_NL] :Tilde -| [SQOPEN StringElList: string_el* SQCLOSE] :SubList - -def string_list - [string_top_el] :Base - -def string - [string_list] - -# -# Variable References -# - -def var_ref - [region_qual qual id] - -def qual - [qual id DOT] :Dot -| [qual id ARROW] :Arrow -| [] :Base - -# -# Lexical analysis. -# - -def lex_expr - [lex_expr LEX_BAR lex_term] :Bar -| [lex_expr LEX_AMP lex_term] :Amp -| [lex_expr LEX_DASH lex_term] :Dash -| [lex_expr LEX_DASHDASH lex_term] :DashDash -| [lex_term] :Base - -def opt_lex_dot - [LEX_DOT] -| [] - -def lex_term - [lex_term opt_lex_dot lex_factor_rep] :Dot -| [lex_term LEX_COLON_GT lex_factor_rep] :ColonGt -| [lex_term LEX_COLON_GTGT lex_factor_rep] :ColonGtGt -| [lex_term LEX_LT_COLON lex_factor_rep] :LtColon -| [lex_factor_rep] :Base - -def lex_factor_rep - [lex_factor_rep LEX_STAR] :Star -| [lex_factor_rep LEX_STARSTAR] :StarStar -| [lex_factor_rep LEX_PLUS] :Plus -| [lex_factor_rep LEX_QUESTION] :Question -| [lex_factor_rep COPEN lex_uint CCLOSE ] :Exact -| [lex_factor_rep COPEN COMMA lex_uint CCLOSE ] :Max -| [lex_factor_rep COPEN lex_uint COMMA CCLOSE ] :Min -| [lex_factor_rep COPEN Low: lex_uint COMMA High: lex_uint CCLOSE ] :Range -| [lex_factor_neg] :Base - -def lex_factor_neg - [LEX_CARET lex_factor_neg] :Caret -| [lex_factor] :Base - -def lex_range_lit - [lex_lit] :Lit -| [lex_num] :Number - -def lex_num - [lex_uint] -| [lex_hex] - -#| [LEX_DASH num] - -def lex_factor - [lex_lit] :Literal -| [lex_id] :Id -| [lex_uint] :Number -| [lex_hex] :Hex -| [Low: lex_range_lit LEX_DOTDOT High: lex_range_lit] :Range -| [LEX_SQOPEN_POS reg_or_data RE_SQCLOSE] :PosOrBlock -| [LEX_SQOPEN_NEG reg_or_data RE_SQCLOSE] :NegOrBlock -| [LEX_POPEN lex_expr LEX_PCLOSE] :Paren - -def reg_or_data - [reg_or_data reg_or_char] :Data -| [] :Base - -def reg_or_char - [RE_CHAR] :Char -| [Low: RE_CHAR RE_DASH High: RE_CHAR] :Range diff --git a/src/commit.c b/src/commit.c deleted file mode 100644 index 28da8c1d..00000000 --- a/src/commit.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright 2015-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <errno.h> -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> - -#include "config.h" -#include "debug.h" -#include "pdarun.h" -#include "bytecode.h" -#include "tree.h" -#include "pool.h" -#include "internal.h" - -void commit_clear_kid_list( program_t *prg, tree_t **sp, kid_t *kid ) -{ - kid_t *next; - while ( kid ) { - colm_tree_downref( prg, sp, kid->tree ); - next = kid->next; - kid_free( prg, kid ); - kid = next; - } -} - -void commit_clear_parse_tree( program_t *prg, tree_t **sp, - struct pda_run *pda_run, parse_tree_t *pt ) -{ - tree_t **top = vm_ptop(); - - if ( pt == 0 ) - return; - -free_tree: - if ( pt->next != 0 ) { - vm_push_ptree( pt->next ); - } - - if ( pt->left_ignore != 0 ) { - vm_push_ptree( pt->left_ignore ); - } - - if ( pt->child != 0 ) { - vm_push_ptree( pt->child ); - } - - if ( pt->right_ignore != 0 ) { - vm_push_ptree( pt->right_ignore ); - } - - /* Only the root level of the stack has tree - * shadows and we are below that. */ - assert( pt->shadow == 0 ); - parse_tree_free( pda_run, pt ); - - /* Any trees to downref? */ - if ( sp != top ) { - pt = vm_pop_ptree(); - goto free_tree; - } -} - -static int been_committed( parse_tree_t *parse_tree ) -{ - return parse_tree->flags & PF_COMMITTED; -} - -void commit_reduce( program_t *prg, tree_t **root, struct pda_run *pda_run ) -{ - tree_t **sp = root; - parse_tree_t *pt = pda_run->stack_top; - - /* The top level of the stack is linked right to left. This is the - * traversal order we need for committing. */ - while ( pt != 0 && !been_committed( pt ) ) { - vm_push_ptree( pt ); - pt = pt->next; - } - - while ( sp != root ) { - pt = vm_pop_ptree(); - - prg->rtd->commit_reduce_forward( prg, sp, pda_run, pt ); - pt->child = 0; - - pt->flags |= PF_COMMITTED; - pt = pt->next; - } -} diff --git a/src/compiler.cc b/src/compiler.cc deleted file mode 100644 index 72cf99fa..00000000 --- a/src/compiler.cc +++ /dev/null @@ -1,1247 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "compiler.h" - -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include <stdbool.h> -#include <unistd.h> -#include <assert.h> -#include <iostream> - -#include "redbuild.h" -#include "pdacodegen.h" -#include "fsmcodegen.h" -#include "colm.h" - -using std::ostringstream; -using std::cout; -using std::cerr; -using std::endl; - -char machineMain[] = "main"; -exit_object endp; -void operator<<( ostream &out, exit_object & ) -{ - out << endl; - exit(1); -} - -/* Perform minimization after an operation according - * to the command line args. */ -void afterOpMinimize( FsmGraph *fsm, bool lastInSeq ) -{ - /* Switch on the prefered minimization algorithm. */ - if ( lastInSeq ) { - /* First clean up the graph. FsmGraph operations may leave these - * lying around. There should be no dead end states. The subtract - * intersection operators are the only places where they may be - * created and those operators clean them up. */ - fsm->removeUnreachableStates(); - fsm->minimizePartition2(); - } -} - -/* Count the transitions in the fsm by walking the state list. */ -int countTransitions( FsmGraph *fsm ) -{ - int numTrans = 0; - FsmState *state = fsm->stateList.head; - while ( state != 0 ) { - numTrans += state->outList.length(); - state = state->next; - } - return numTrans; -} - -Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ) -{ - /* Reset errno so we can check for overflow or underflow. In the event of - * an error, sets the return val to the upper or lower bound being tested - * against. */ - errno = 0; - unsigned int size = keyOps->alphType->size; - bool unusedBits = size < sizeof(unsigned long); - - unsigned long ul = strtoul( str, 0, 16 ); - - if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) { - error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ul = 1 << (size * 8); - } - - if ( unusedBits && ul >> (size * 8 - 1) ) - ul |= (ULONG_MAX >> (size*8 ) ) << (size*8); - - return Key( (long)ul ); -} - -Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd ) -{ - /* Convert the number to a decimal. First reset errno so we can check - * for overflow or underflow. */ - errno = 0; - long long minVal = keyOps->alphType->minVal; - long long maxVal = keyOps->alphType->maxVal; - - long long ll = strtoll( str, 0, 10 ); - - /* Check for underflow. */ - if ( (errno == ERANGE && ll < 0) || ll < minVal) { - error(loc) << "literal " << str << " underflows the alphabet type" << endl; - ll = minVal; - } - /* Check for overflow. */ - else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) { - error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ll = maxVal; - } - - return Key( (long)ll ); -} - -/* Make an fsm key in int format (what the fsm graph uses) from an alphabet - * number returned by the parser. Validates that the number doesn't overflow - * the alphabet type. */ -Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd ) -{ - /* Switch on hex/decimal format. */ - if ( str[0] == '0' && str[1] == 'x' ) - return makeFsmKeyHex( str, loc, pd ); - else - return makeFsmKeyDec( str, loc, pd ); -} - -/* Make an fsm int format (what the fsm graph uses) from a single character. - * Performs proper conversion depending on signed/unsigned property of the - * alphabet. */ -Key makeFsmKeyChar( char c, Compiler *pd ) -{ - /* Copy from a char type. */ - return Key( c ); -} - -/* Make an fsm key array in int format (what the fsm graph uses) from a string - * of characters. Performs proper conversion depending on signed/unsigned - * property of the alphabet. */ -void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd ) -{ - /* Copy from a char star type. */ - char *src = data; - for ( int i = 0; i < len; i++ ) - result[i] = Key(src[i]); -} - -/* Like makeFsmKeyArray except the result has only unique keys. They ordering - * will be changed. */ -void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, - bool caseInsensitive, Compiler *pd ) -{ - /* Copy from a char star type. */ - char *src = data; - for ( int si = 0; si < len; si++ ) { - Key key( src[si] ); - result.insert( key ); - if ( caseInsensitive ) { - if ( key.isLower() ) - result.insert( key.toUpper() ); - else if ( key.isUpper() ) - result.insert( key.toLower() ); - } - } -} - -FsmGraph *dotFsm( Compiler *pd ) -{ - FsmGraph *retFsm = new FsmGraph(); - retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); - return retFsm; -} - -FsmGraph *dotStarFsm( Compiler *pd ) -{ - FsmGraph *retFsm = new FsmGraph(); - retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); - return retFsm; -} - -/* Make a builtin type. Depends on the signed nature of the alphabet type. */ -FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd ) -{ - /* FsmGraph created to return. */ - FsmGraph *retFsm = 0; - - switch ( builtin ) { - case BT_Any: { - /* All characters. */ - retFsm = dotFsm( pd ); - break; - } - case BT_Ascii: { - /* Ascii characters 0 to 127. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( 0, 127 ); - break; - } - case BT_Extend: { - /* Ascii extended characters. This is the full byte range. Dependent - * on signed, vs no signed. If the alphabet is one byte then just use - * dot fsm. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( -128, 127 ); - break; - } - case BT_Alpha: { - /* Alpha [A-Za-z]. */ - FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph(); - upper->rangeFsm( 'A', 'Z' ); - lower->rangeFsm( 'a', 'z' ); - upper->unionOp( lower ); - upper->minimizePartition2(); - retFsm = upper; - break; - } - case BT_Digit: { - /* Digits [0-9]. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( '0', '9' ); - break; - } - case BT_Alnum: { - /* Alpha numerics [0-9A-Za-z]. */ - FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph(); - FsmGraph *upper = new FsmGraph(); - digit->rangeFsm( '0', '9' ); - upper->rangeFsm( 'A', 'Z' ); - lower->rangeFsm( 'a', 'z' ); - digit->unionOp( upper ); - digit->unionOp( lower ); - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lower: { - /* Lower case characters. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( 'a', 'z' ); - break; - } - case BT_Upper: { - /* Upper case characters. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( 'A', 'Z' ); - break; - } - case BT_Cntrl: { - /* Control characters. */ - FsmGraph *cntrl = new FsmGraph(); - FsmGraph *highChar = new FsmGraph(); - cntrl->rangeFsm( 0, 31 ); - highChar->concatFsm( 127 ); - cntrl->unionOp( highChar ); - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Graph: { - /* Graphical ascii characters [!-~]. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( '!', '~' ); - break; - } - case BT_Print: { - /* Printable characters. Same as graph except includes space. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( ' ', '~' ); - break; - } - case BT_Punct: { - /* Punctuation. */ - FsmGraph *range1 = new FsmGraph(); - FsmGraph *range2 = new FsmGraph(); - FsmGraph *range3 = new FsmGraph(); - FsmGraph *range4 = new FsmGraph(); - range1->rangeFsm( '!', '/' ); - range2->rangeFsm( ':', '@' ); - range3->rangeFsm( '[', '`' ); - range4->rangeFsm( '{', '~' ); - range1->unionOp( range2 ); - range1->unionOp( range3 ); - range1->unionOp( range4 ); - range1->minimizePartition2(); - retFsm = range1; - break; - } - case BT_Space: { - /* Whitespace: [\t\v\f\n\r ]. */ - FsmGraph *cntrl = new FsmGraph(); - FsmGraph *space = new FsmGraph(); - cntrl->rangeFsm( '\t', '\r' ); - space->concatFsm( ' ' ); - cntrl->unionOp( space ); - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Xdigit: { - /* Hex digits [0-9A-Fa-f]. */ - FsmGraph *digit = new FsmGraph(); - FsmGraph *upper = new FsmGraph(); - FsmGraph *lower = new FsmGraph(); - digit->rangeFsm( '0', '9' ); - upper->rangeFsm( 'A', 'F' ); - lower->rangeFsm( 'a', 'f' ); - digit->unionOp( upper ); - digit->unionOp( lower ); - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lambda: { - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - break; - } - case BT_Empty: { - retFsm = new FsmGraph(); - retFsm->emptyFsm(); - break; - }} - - return retFsm; -} - -/* - * Compiler - */ - -/* Initialize the structure that will collect info during the parse of a - * machine. */ -Compiler::Compiler( ) -: - nextPriorKey(0), - nextNameId(0), - alphTypeSet(false), - getKeyExpr(0), - accessExpr(0), - curStateExpr(0), - lowerNum(0), - upperNum(0), - errorCount(0), - curActionOrd(0), - curPriorOrd(0), - nextEpsilonResolvedLink(0), - nextTokenId(1), - rootCodeBlock(0), - mainReturnUT(0), - //access(0), - //tokenStruct(0), - - ptrLangEl(0), - strLangEl(0), - anyLangEl(0), - rootLangEl(0), - noTokenLangEl(0), - eofLangEl(0), - errorLangEl(0), - ignoreLangEl(0), - - firstNonTermId(0), - prodIdIndex(0), - - global(0), - globalSel(0), - globalObjectDef(0), - arg0(0), - argv(0), - - stream(0), - inputSel(0), - streamSel(0), - - uniqueTypeNil(0), - uniqueTypePtr(0), - uniqueTypeBool(0), - uniqueTypeInt(0), - uniqueTypeStr(0), - uniqueTypeIgnore(0), - uniqueTypeAny(0), - uniqueTypeInput(0), - uniqueTypeStream(0), - nextPatConsId(0), - nextGenericId(1), - nextFuncId(0), - nextHostId(0), - nextObjectId(1), /* 0 is reserved for no object. */ - nextFrameId(0), - nextParserId(0), - revertOn(true), - predValue(0), - nextMatchEndNum(0), - argvTypeRef(0), - inContiguous(false), - contiguousOffset(0), - contiguousStretch(0) -{ -} - -/* Clean up the data collected during a parse. */ -Compiler::~Compiler() -{ - /* Delete all the nodes in the action list. Will cause all the - * string data that represents the actions to be deallocated. */ - actionList.empty(); - - for ( CharVectVect::Iter fns = streamFileNames; fns.lte(); fns++ ) { - const char **ptr = *fns; - while ( *ptr != 0 ) { - ::free( (void*)*ptr ); - ptr += 1; - } - free( (void*) *fns ); - } -} - -ostream &operator<<( ostream &out, const Token &token ) -{ - out << token.data; - return out; -} - -/* Write out a name reference. */ -ostream &operator<<( ostream &out, const NameRef &nameRef ) -{ - int pos = 0; - if ( nameRef[pos] == 0 ) { - out << "::"; - pos += 1; - } - out << nameRef[pos++]; - for ( ; pos < nameRef.length(); pos++ ) - out << "::" << nameRef[pos]; - return out; -} - -NameInst **Compiler::makeNameIndex() -{ - /* The number of nodes in the tree can now be given by nextNameId. Put a - * null pointer on the end of the list to terminate it. */ - NameInst **nameIndex = new NameInst*[nextNameId+1]; - memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) ); - - for ( NameInstList::Iter ni = nameInstList; ni.lte(); ni++ ) - nameIndex[ni->id] = ni; - - return nameIndex; -} - -void Compiler::createBuiltin( const char *name, BuiltinMachine builtin ) -{ - LexExpression *expression = LexExpression::cons( builtin ); - LexJoin *join = LexJoin::cons( expression ); - LexDefinition *varDef = new LexDefinition( name, join ); - GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); - rootNamespace->rlMap.insert( graphDictEl ); -} - -/* Initialize the graph dict with builtin types. */ -void Compiler::initGraphDict( ) -{ - createBuiltin( "any", BT_Any ); - createBuiltin( "ascii", BT_Ascii ); - createBuiltin( "extend", BT_Extend ); - createBuiltin( "alpha", BT_Alpha ); - createBuiltin( "digit", BT_Digit ); - createBuiltin( "alnum", BT_Alnum ); - createBuiltin( "lower", BT_Lower ); - createBuiltin( "upper", BT_Upper ); - createBuiltin( "cntrl", BT_Cntrl ); - createBuiltin( "graph", BT_Graph ); - createBuiltin( "print", BT_Print ); - createBuiltin( "punct", BT_Punct ); - createBuiltin( "space", BT_Space ); - createBuiltin( "xdigit", BT_Xdigit ); - createBuiltin( "null", BT_Lambda ); - createBuiltin( "zlen", BT_Lambda ); - createBuiltin( "empty", BT_Empty ); -} - -/* Initialize the key operators object that will be referenced by all fsms - * created. */ -void Compiler::initKeyOps( ) -{ - /* Signedness and bounds. */ - HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; - thisKeyOps.setAlphType( alphType ); - - if ( lowerNum != 0 ) { - /* If ranges are given then interpret the alphabet type. */ - thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); - thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); - } -} - -/* Remove duplicates of unique actions from an action table. */ -void Compiler::removeDups( ActionTable &table ) -{ - /* Scan through the table looking for unique actions to - * remove duplicates of. */ - for ( int i = 0; i < table.length(); i++ ) { - /* Remove any duplicates ahead of i. */ - for ( int r = i+1; r < table.length(); ) { - if ( table[r].value == table[i].value ) - table.vremove(r); - else - r += 1; - } - } -} - -/* Remove duplicates from action lists. This operates only on transition and - * eof action lists and so should be called once all actions have been - * transfered to their final resting place. */ -void Compiler::removeActionDups( FsmGraph *graph ) -{ - /* Loop all states. */ - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { - /* Loop all transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) - removeDups( trans->actionTable ); - removeDups( state->toStateActionTable ); - removeDups( state->fromStateActionTable ); - removeDups( state->eofActionTable ); - } -} - -Action *Compiler::newAction( const String &name, InlineList *inlineList ) -{ - InputLoc loc; - loc.line = 1; - loc.col = 1; - loc.fileName = 0; - - Action *action = Action::cons( loc, name, inlineList ); - actionList.append( action ); - return action; -} - -void Compiler::initLongestMatchData() -{ - if ( regionSetList.length() > 0 ) { - /* The initActId action gives act a default value. */ - InlineList *il4 = InlineList::cons(); - il4->append( InlineItem::cons( InputLoc(), InlineItem::LmInitAct ) ); - initActId = newAction( "initact", il4 ); - initActId->isLmAction = true; - - /* The setTokStart action sets tokstart. */ - InlineList *il5 = InlineList::cons(); - il5->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokStart ) ); - setTokStart = newAction( "tokstart", il5 ); - setTokStart->isLmAction = true; - - /* The setTokEnd action sets tokend. */ - InlineList *il3 = InlineList::cons(); - il3->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokEnd ) ); - setTokEnd = newAction( "tokend", il3 ); - setTokEnd->isLmAction = true; - - /* The action will also need an ordering: ahead of all user action - * embeddings. */ - initActIdOrd = curActionOrd++; - setTokStartOrd = curActionOrd++; - setTokEndOrd = curActionOrd++; - } -} - -void Compiler::finishGraphBuild( FsmGraph *graph ) -{ - /* Resolve any labels that point to multiple states. Any labels that are - * still around are referenced only by gotos and calls and they need to be - * made into deterministic entry points. */ - graph->deterministicEntry(); - - /* - * All state construction is now complete. - */ - - /* Transfer global error actions. */ - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) - graph->transferErrorActions( state, 0 ); - - removeActionDups( graph ); - - /* Remove unreachable states. There should be no dead end states. The - * subtract and intersection operators are the only places where they may - * be created and those operators clean them up. */ - graph->removeUnreachableStates(); - - /* No more fsm operations are to be done. Action ordering numbers are - * no longer of use and will just hinder minimization. Clear them. */ - graph->nullActionKeys(); - - /* Transition priorities are no longer of use. We can clear them - * because they will just hinder minimization as well. Clear them. */ - graph->clearAllPriorities(); - - /* Minimize here even if we minimized at every op. Now that function - * keys have been cleared we may get a more minimal fsm. */ - graph->minimizePartition2(); - graph->compressTransitions(); -} - -/* Build the name tree and supporting data structures. */ -NameInst *Compiler::makeNameTree() -{ - /* Create the root name. */ - nextNameId = 1; - - /* First make the name tree. */ - for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) { - /* Recurse on the instance. */ - rel->makeNameTree( rel->loc, this ); - } - - return 0; -} - -FsmGraph *Compiler::makeAllRegions() -{ - /* Build the name tree and supporting data structures. */ - makeNameTree(); - NameInst **nameIndex = makeNameIndex(); - - int numGraphs = 0; - FsmGraph **graphs = new FsmGraph*[regionImplList.length()]; - - /* Make all the instantiations, we know that main exists in this list. */ - for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) { - /* Build the graph from a walk of the parse tree. */ - FsmGraph *newGraph = rel->walk( this ); - - /* Wrap up the construction. */ - finishGraphBuild( newGraph ); - - /* Save off the new graph. */ - graphs[numGraphs++] = newGraph; - } - - /* NOTE: If putting in minimization here we need to include eofTarget - * into the minimization algorithm. It is currently set by the longest - * match operator and not considered anywhere else. */ - - FsmGraph *all; - if ( numGraphs == 0 ) { - all = new FsmGraph; - all->lambdaFsm(); - } - else { - /* Add all the other graphs into the first. */ - all = graphs[0]; - all->globOp( graphs+1, numGraphs-1 ); - delete[] graphs; - } - - /* Go through all the token regions and check for lmRequiresErrorState. */ - for ( RegionImplList::Iter reg = regionImplList; reg.lte(); reg++ ) { - if ( reg->lmSwitchHandlesError ) - all->lmRequiresErrorState = true; - } - - all->nameIndex = nameIndex; - - return all; -} - -void Compiler::analyzeAction( Action *action, InlineList *inlineList ) -{ - /* FIXME: Actions used as conditions should be very constrained. */ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - //if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) - // action->anyCall = true; - - /* Need to recurse into longest match items. */ - if ( item->type == InlineItem::LmSwitch ) { - RegionImpl *lm = item->tokenRegion; - for ( TokenInstanceListReg::Iter lmi = lm->tokenInstanceList; lmi.lte(); lmi++ ) { - if ( lmi->action != 0 ) - analyzeAction( action, lmi->action->inlineList ); - } - } - - if ( item->type == InlineItem::LmOnLast || - item->type == InlineItem::LmOnNext || - item->type == InlineItem::LmOnLagBehind ) - { - TokenInstance *lmi = item->longestMatchPart; - if ( lmi->action != 0 ) - analyzeAction( action, lmi->action->inlineList ); - } - - if ( item->children != 0 ) - analyzeAction( action, item->children ); - } -} - -void Compiler::analyzeGraph( FsmGraph *graph ) -{ - for ( ActionList::Iter act = actionList; act.lte(); act++ ) - analyzeAction( act, act->inlineList ); - - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - /* The transition list. */ - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) - at->value->numTransRefs += 1; - } - - for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) - at->value->numToStateRefs += 1; - - for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) - at->value->numFromStateRefs += 1; - - for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) - at->value->numEofRefs += 1; - } -} - -FsmGraph *Compiler::makeScanner() -{ - /* Make the graph, do minimization. */ - FsmGraph *fsmGraph = makeAllRegions(); - - /* If any errors have occured in the input file then don't write anything. */ - if ( gblErrorCount > 0 ) - return 0; - - analyzeGraph( fsmGraph ); - - /* Decide if an error state is necessary. - * 1. There is an error transition - * 2. There is a gap in the transitions - * 3. The longest match operator requires it. */ - if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() ) - fsmGraph->errState = fsmGraph->addState(); - - /* State numbers need to be assigned such that all final states have a - * larger state id number than all non-final states. This enables the - * first_final mechanism to function correctly. We also want states to be - * ordered in a predictable fashion. So we first apply a depth-first - * search, then do a stable sort by final state status, then assign - * numbers. */ - - fsmGraph->depthFirstOrdering(); - fsmGraph->sortStatesByFinal(); - fsmGraph->setStateNumbers( 0 ); - - return fsmGraph; -} - -LangEl *Compiler::makeRepeatProd( const InputLoc &loc, Namespace *nspace, - const String &repeatName, UniqueType *ut ) -{ - LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm ); - prodName->isRepeat = true; - - ProdElList *prodElList1 = new ProdElList; - - /* Build the first production of the repeat. */ - TypeRef *typeRef1 = TypeRef::cons( loc, ut ); - ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, - InputLoc(), 0, false, typeRef1, 0 ); - - UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); - TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT ); - ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, - InputLoc(), 0, false, typeRef2, 0 ); - - prodElList1->append( factor1 ); - prodElList1->append( factor2 ); - - Production *newDef1 = Production::cons( InputLoc(), - prodName, prodElList1, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef1 ); - prodList.append( newDef1 ); - - /* Build the second production of the repeat. */ - ProdElList *prodElList2 = new ProdElList; - - Production *newDef2 = Production::cons( InputLoc(), - prodName, prodElList2, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef2 ); - prodList.append( newDef2 ); - - return prodName; -} - -LangEl *Compiler::makeListProd( const InputLoc &loc, Namespace *nspace, - const String &listName, UniqueType *ut ) -{ - LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm ); - prodName->isList = true; - - /* Build the first production of the list. */ - TypeRef *typeRef1 = TypeRef::cons( loc, ut ); - ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 ); - - UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); - TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT ); - ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef2, 0 ); - - ProdElList *prodElList1 = new ProdElList; - prodElList1->append( factor1 ); - prodElList1->append( factor2 ); - - Production *newDef1 = Production::cons( loc, - prodName, prodElList1, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef1 ); - prodList.append( newDef1 ); - - /* Build the second production of the list. */ - TypeRef *typeRef3 = TypeRef::cons( loc, ut ); - ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef3, 0 ); - - ProdElList *prodElList2 = new ProdElList; - prodElList2->append( factor3 ); - - Production *newDef2 = Production::cons( loc, - prodName, prodElList2, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef2 ); - prodList.append( newDef2 ); - - return prodName; -} - -LangEl *Compiler::makeOptProd( const InputLoc &loc, Namespace *nspace, - const String &optName, UniqueType *ut ) -{ - LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm ); - prodName->isOpt = true; - - ProdElList *prodElList1 = new ProdElList; - - /* Build the first production of the repeat. */ - TypeRef *typeRef1 = TypeRef::cons( loc, ut ); - ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef1, 0 ); - prodElList1->append( factor1 ); - - Production *newDef1 = Production::cons( loc, - prodName, prodElList1, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef1 ); - prodList.append( newDef1 ); - - /* Build the second production of the repeat. */ - ProdElList *prodElList2 = new ProdElList; - - Production *newDef2 = Production::cons( loc, - prodName, prodElList2, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef2 ); - prodList.append( newDef2 ); - - return prodName; -} - -Namespace *Namespace::findNamespace( const String &name ) -{ - for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) { - if ( strcmp( name, (*c)->name ) == 0 ) - return *c; - } - return 0; -} - -Reduction *Namespace::findReduction( const String &name ) -{ - for ( ReductionVect::Iter r = reductions; r.lte(); r++ ) { - if ( strcmp( name, (*r)->name ) == 0 ) - return *r; - } - return 0; -} - -/* Search from a previously resolved qualification. (name 1+ in a qual list). */ -Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart ) -{ - /* While there are still parts in the qualification. */ - while ( qualPart.lte() ) { - Namespace *child = from->findNamespace( *qualPart ); - if ( child == 0 ) - return 0; - - from = child; - qualPart.increment(); - } - - return from; -} - -Namespace *NamespaceQual::getQual( Compiler *pd ) -{ - /* Do the search only once. */ - if ( cachedNspaceQual != 0 ) - return cachedNspaceQual; - - if ( qualNames.length() == 0 ) { - /* No qualification, use the region the qualification was - * declared in. */ - cachedNspaceQual = declInNspace; - } - else if ( strcmp( qualNames[0], "root" ) == 0 ) { - /* First item is "root." Start the downward search from there. */ - StringVect::Iter qualPart = qualNames; - qualPart.increment(); - cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart ); - return cachedNspaceQual; - } - else { - /* Have a qualification. Move upwards through the declared - * regions looking for the first part. */ - StringVect::Iter qualPart = qualNames; - Namespace *parentNamespace = declInNspace; - while ( parentNamespace != 0 ) { - /* Search for the first part underneath the current parent. */ - Namespace *child = parentNamespace->findNamespace( *qualPart ); - - if ( child != 0 ) { - /* Found the first part. Start going below the result. */ - qualPart.increment(); - cachedNspaceQual = searchFrom( child, qualPart ); - return cachedNspaceQual; - } - - /* Not found, move up to the parent. */ - parentNamespace = parentNamespace->parentNamespace; - } - - /* Failed to find the place to start from. */ - cachedNspaceQual = 0; - } - - return cachedNspaceQual; -} - -void Compiler::initEmptyScanner( RegionSet *regionSet, TokenRegion *reg ) -{ - if ( reg != 0 && reg->impl->tokenInstanceList.length() == 0 ) { - reg->impl->wasEmpty = true; - - static int def = 1; - String name( 64, "__%p_DEF_PAT_%d", reg, def++ ); - - LexJoin *join = LexJoin::cons( LexExpression::cons( BT_Any ) ); - - TokenDef *tokenDef = TokenDef::cons( name, String(), false, false, - join, 0, internal, nextTokenId++, rootNamespace, - regionSet, 0, 0 ); - - TokenInstance *tokenInstance = TokenInstance::cons( tokenDef, - join, internal, nextTokenId++, - rootNamespace, reg ); - - reg->impl->tokenInstanceList.append( tokenInstance ); - - /* These do not go in the namespace so so they cannot get declared - * in the declare pass. */ - LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term ); - - tokenInstance->tokenDef->tdLangEl = lel; - lel->tokenDef = tokenDef; - } -} - -void Compiler::initEmptyScanners() -{ - for ( RegionSetList::Iter regionSet = regionSetList; regionSet.lte(); regionSet++ ) { - initEmptyScanner( regionSet, regionSet->tokenIgnore ); - initEmptyScanner( regionSet, regionSet->tokenOnly ); - initEmptyScanner( regionSet, regionSet->ignoreOnly ); - initEmptyScanner( regionSet, regionSet->collectIgnore ); - } -} - -pda_run *Compiler::parsePattern( program_t *prg, tree_t **sp, const InputLoc &loc, - int parserId, struct input_impl *sourceStream ) -{ - struct pda_run *pdaRun = new pda_run; - colm_pda_init( prg, pdaRun, pdaTables, parserId, 0, false, 0, false ); - - long pcr = colm_parse_loop( prg, sp, pdaRun, sourceStream, PCR_START ); - assert( pcr == PCR_DONE ); - if ( pdaRun->parse_error ) { - cerr << ( loc.fileName != 0 ? loc.fileName : "<input>" ) << - ":" << loc.line << ":" << loc.col; - - if ( pdaRun->parse_error_text != 0 ) { - colm_data *tokdata = pdaRun->parse_error_text->tokdata; - cerr << ": relative error: "; - cerr.write( tokdata->data, tokdata->length ); - } - else { - cerr << ": parse error"; - } - - cerr << endl; - gblErrorCount += 1; - } - - return pdaRun; -} - -void Compiler::parsePatterns() -{ - program_t *prg = colm_new_program( runtimeData ); - - colm_set_debug( prg, gblActiveRealm ); - - /* Turn off context-dependent parsing. */ - prg->ctx_dep_parsing = 0; - - tree_t **sp = prg->stack_root; - - for ( ConsList::Iter cons = replList; cons.lte(); cons++ ) { - if ( cons->langEl != 0 ) { - struct input_impl *in = colm_impl_new_cons( strdup("<internal>"), cons ); - cons->pdaRun = parsePattern( prg, sp, cons->loc, cons->langEl->parserId, in ); - } - } - - for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { - struct input_impl *in = colm_impl_new_pat( strdup("<internal>"), pat ); - pat->pdaRun = parsePattern( prg, sp, pat->loc, pat->langEl->parserId, in ); - } - - /* Bail on above errors. */ - if ( gblErrorCount > 0 ) - exit(1); - - fillInPatterns( prg ); -} - -void Compiler::collectParserEls( BstSet<LangEl*> &parserEls ) -{ - for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { - /* We assume the reduction action compilation phase was run before - * pattern parsing and it decorated the pattern with the target type. */ - assert( pat->langEl != 0 ); - if ( pat->langEl->type != LangEl::NonTerm ) - error(pat->loc) << "pattern type is not a non-terminal" << endp; - - if ( pat->langEl->parserId < 0 ) { - /* Make a parser for the language element. */ - parserEls.insert( pat->langEl ); - pat->langEl->parserId = nextParserId++; - } - } - - for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) { - /* We need the the language element from the compilation process. */ - assert( repl->langEl != 0 ); - - if ( repl->langEl->parserId < 0 ) { - /* Make a parser for the language element. */ - parserEls.insert( repl->langEl ); - repl->langEl->parserId = nextParserId++; - } - } - - /* Make parsers that we need. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->parserId >= 0 ) - parserEls.insert( lel ); - } -} - -void Compiler::writeHostCall() -{ - /* - * Host Call - */ - for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) { - *outStream << - "value_t " << hc->hostCall << "( program_t *prg, tree_t **sp"; - for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++ ) { - *outStream << - ", value_t"; - } - *outStream << " );\n"; - } - - *outStream << - "tree_t **" << objectName << "_host_call( program_t *prg, long code, tree_t **sp )\n" - "{\n" - " value_t rtn = 0;\n" - " switch ( code ) {\n"; - - for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) { - *outStream << - " case " << hc->funcId << ": {\n"; - - int pos = hc->paramList->length() - 1; - for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos-- ) { - *outStream << - " value_t p" << pos << " = vm_pop_value();\n"; - } - - *outStream << - " rtn = " << hc->hostCall << "( prg, sp"; - - pos = 0; - for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos++ ) { - *outStream << - ", p" << pos; - } - *outStream << " );\n" - " break;\n" - " }\n"; - } - - *outStream << - " }\n" - " vm_push_value( rtn );\n" - " return sp;\n" - "}\n"; - -} - -void Compiler::generateOutput( long activeRealm, bool includeCommit ) -{ - FsmCodeGen *fsmGen = new FsmCodeGen( *outStream, redFsm, fsmTables ); - - PdaCodeGen *pdaGen = new PdaCodeGen( *outStream ); - - fsmGen->writeIncludes(); - pdaGen->defineRuntime(); - fsmGen->writeCode(); - - /* Make parsers that we need. */ - pdaGen->writeParserData( 0, pdaTables ); - - /* Write the runtime data. */ - pdaGen->writeRuntimeData( runtimeData, pdaTables ); - - writeHostCall(); - - if ( includeCommit ) - writeCommitStub(); - - if ( !gblLibrary ) - fsmGen->writeMain( activeRealm ); - - outStream->flush(); -} - - -void Compiler::prepGrammar() -{ - /* This will create language elements. */ - wrapNonTerminals(); - - makeLangElIds(); - makeStructElIds(); - makeLangElNames(); - makeDefinitionNames(); - noUndefindLangEls(); - - /* Put the language elements in an index by language element id. */ - langElIndex = new LangEl*[nextLelId+1]; - memset( langElIndex, 0, sizeof(LangEl*)*(nextLelId+1) ); - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) - langElIndex[lel->id] = lel; - - makeProdFsms(); - - /* Allocate the Runtime data now. Every PdaTable that we make - * will reference it, but it will be filled in after all the tables are - * built. */ - runtimeData = new colm_sections; -} - -void Compiler::compile() -{ - beginProcessing(); - initKeyOps(); - - /* Declare types. */ - declarePass(); - - /* Resolve type references. */ - resolvePass(); - - makeTerminalWrappers(); - makeEofElements(); - - /* - * Parsers - */ - - /* Init the longest match data */ - initLongestMatchData(); - FsmGraph *fsmGraph = makeScanner(); - - prepGrammar(); - - placeAllLanguageObjects(); - placeAllStructObjects(); - placeAllFrameObjects(); - placeAllFunctions(); - - /* Compile bytecode. */ - compileByteCode(); - - /* Make the reduced scanner. */ - RedFsmBuild reduce( this, fsmGraph ); - redFsm = reduce.reduceMachine(); - - BstSet<LangEl*> parserEls; - collectParserEls( parserEls ); - - makeParser( parserEls ); - - /* Make the scanner tables. */ - fsmTables = redFsm->makeFsmTables(); - - /* Now that all parsers are built, make the global runtimeData. */ - makeRuntimeData(); - - /* - * All compilation is now complete. - */ - - /* Parse constructors and patterns. */ - parsePatterns(); -} - diff --git a/src/compiler.h b/src/compiler.h deleted file mode 100644 index 1b03504f..00000000 --- a/src/compiler.h +++ /dev/null @@ -1,1155 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_PARSEDATA_H -#define _COLM_PARSEDATA_H - -#include <limits.h> - -#include <iostream> - -#include <avlmap.h> -#include <avlset.h> -#include <bstmap.h> -#include <vector.h> -#include <bstset.h> -#include <dlist.h> -#include <dlistmel.h> -#include <fsmgraph.h> -#include <compare.h> - -#include "global.h" -#include "keyops.h" -#include "parsetree.h" -#include "cstring.h" -#include "pdagraph.h" -#include "pdarun.h" -#include "bytecode.h" -#include "program.h" -#include "internal.h" - -using std::ostream; - -struct exit_object { }; -extern exit_object endp; -void operator<<( std::ostream &out, exit_object & ); -extern const char *objectName; -extern bool hostAdapters; - -/* Forwards. */ -struct RedFsm; -struct LangEl; -struct Compiler; -struct PdaCodeGen; -struct FsmCodeGen; - -#define SHIFT_CODE 0x1 -#define REDUCE_CODE 0x2 -#define SHIFT_REDUCE_CODE 0x3 - -typedef Vector<const char**> CharVectVect; - -/* This is used for tracking the current stack of include file/machine pairs. It is - * is used to detect and recursive include structure. */ -struct IncludeStackItem -{ - IncludeStackItem( const char *fileName ) - : fileName(fileName) {} - - const char *fileName; -}; - -typedef Vector<IncludeStackItem> IncludeStack; -typedef Vector<const char *> ArgsVector; - -struct DefineArg -{ - DefineArg( String name, String value ) - : name(name), value(value) {} - - String name; - String value; -}; - -typedef Vector<DefineArg> DefineVector; - -extern DefineVector defineArgs; -extern ArgsVector includePaths; - -inline long makeReduceCode( long reduction, bool isShiftReduce ) -{ - return ( isShiftReduce ? SHIFT_REDUCE_CODE : REDUCE_CODE ) | - ( reduction << 2 ); -} - -struct ProdEl; -struct ProdElList; -struct PdaLiteral; -struct Production; - -/* A pointer to this is in struct pda_run, but it's specification is not known by the - * runtime code. The runtime functions that access it are defined in - * ctinput.cpp and stubbed in fsmcodegen.cpp */ -struct bindings - : public Vector<parse_tree_t*> -{}; - -struct DefListEl { Production *prev, *next; }; -struct LelDefListEl { Production *prev, *next; }; -typedef Vector< LangEl* > LangElVect; -typedef Vector< ProdEl* > FactorVect; - -typedef AvlMap<String, long, ColmCmpStr> StringMap; -typedef AvlMapEl<String, long> StringMapEl; - -enum PredType { - PredLeft, - PredRight, - PredNonassoc, - PredNone -}; - -struct PredDecl -{ - PredDecl( TypeRef *typeRef, long predValue ) - : typeRef(typeRef), predValue(predValue) - {} - - TypeRef *typeRef; - PredType predType; - long predValue; - - PredDecl *prev, *next; -}; - -typedef DList<PredDecl> PredDeclList; - -/* Graph dictionary. */ -struct Production -: - public DefListEl, public LelDefListEl -{ - Production() - : - prodName(0), prodElList(0), prodCommit(false), redBlock(0), - prodId(0), prodNum(0), fsm(0), fsmLength(0), uniqueEmptyLeader(0), - isLeftRec(false), localFrame(0), lhsField(0), predOf(0) - {} - - static Production* cons( const InputLoc &loc, LangEl *prodName, ProdElList *prodElList, - String name, bool prodCommit, CodeBlock *redBlock, int prodId, int prodNum ) - { - Production *p = new Production; - p->loc = loc; - p->prodName = prodName; - p->_name = name; - p->prodElList = prodElList; - p->prodCommit = prodCommit; - p->redBlock = redBlock; - p->prodId = prodId; - p->prodNum = prodNum; - return p; - } - - InputLoc loc; - LangEl *prodName; - ProdElList *prodElList; - String _name; - bool prodCommit; - - CodeBlock *redBlock; - - int prodId; - int prodNum; - - PdaGraph *fsm; - int fsmLength; - String data; - LongSet reducesTo; - - LangEl *uniqueEmptyLeader; - - ProdIdSet nonTermFirstSet; - AlphSet firstSet; - bool isLeftRec; - - ObjectDef *localFrame; - ObjectField *lhsField; - - LangEl *predOf; - - UnsignedCharVect copy; -}; - -struct CmpDefById -{ - static int compare( Production *d1, Production *d2 ) - { - if ( d1->prodId < d2->prodId ) - return -1; - else if ( d1->prodId > d2->prodId ) - return 1; - else - return 0; - } -}; - - -/* Map dotItems to productions. */ -typedef BstMap< int, Production*, CmpOrd<int> > DotItemIndex; -typedef BstMapEl< int, Production*> DotItemIndexEl; - -struct DefList -: - public DListMel<Production, DefListEl> -{}; - -/* A vector of production vectors. Each non terminal can have many productions. */ -struct LelDefList -: - public DListMel<Production, LelDefListEl> -{}; - -/* A set of machines made during a closure round. */ -typedef Vector< PdaGraph* > Machines; - -/* List of language elements. */ -typedef DList<LangEl> LelList; - -typedef Vector< TokenInstance* > TokenInstanceVect; - -struct UniqueType; - -typedef Vector<LangEl*> LangElVect; -typedef BstSet<LangEl*> LangElSet; - -/* A language element class. Can be a nonTerm or a term. */ -struct LangEl : public DListEl<LangEl> -{ - enum Type { Unknown, Term, NonTerm }; - - LangEl( Namespace *nspace, const String &name, Type type ); - ~LangEl(); - - /* The region the language element was defined in. */ - Namespace *nspace; - - String name; - String lit; - - String fullName; - String fullLit; - - /* For referencing the type. */ - String refName; - - /* For declaring things inside the type. */ - String declName; - - String xmlTag; - - Type type; - long id; - String displayString; - long numAppearances; - bool commit; - bool isIgnore; - bool reduceFirst; - bool isLiteral; - bool isRepeat; - bool isList; - bool isOpt; - bool parseStop; - bool isEOF; - - LangEl *repeatOf; - - /* Productions from the language element if it is a non-terminal. */ - LelDefList defList; - - TokenDef *tokenDef; - Production *rootDef; - LangEl *termDup; - LangEl *eofLel; - - PdaGraph *pdaGraph; - struct pda_tables *pdaTables; - - PdaState *startState; - - CodeBlock *transBlock; - - ObjectDef *objectDef; - - long thisSize; - long ofiOffset; - - long parserId; - - PredType predType; - long predValue; - - StructDef *contextDef; - StructDef *contextIn; - bool noPreIgnore; - bool noPostIgnore; - bool isZero; - RegionSet *regionSet; -}; - -struct ProdEl -{ - /* Language elements a factor node can be. */ - enum Type { - LiteralType, - ReferenceType - }; - - /* Construct with a reference to a var def. */ - ProdEl( Type type, const InputLoc &loc, ObjectField *captureField, - bool commit, TypeRef *typeRef, int priorVal ) - : - type(type), - production(0), - captureField(captureField), - rhsElField(0), - commit(commit), - typeRef(typeRef), - langEl(0), - priorVal(priorVal) - {} - - ProdEl( const InputLoc &loc, TypeRef *typeRef ) - : - type(ReferenceType), - production(0), - captureField(0), - rhsElField(0), - commit(false), - typeRef(typeRef), - langEl(0), - priorVal(0) - {} - - Type type; - Production *production; - int pos; - ObjectField *captureField; - ObjectField *rhsElField; - bool commit; - TypeRef *typeRef; - LangEl *langEl; - int priorVal; - - ProdEl *prev, *next; -}; - -struct ProdElList : public DList<ProdEl> -{ - PdaGraph *walk( Compiler *pd, Production *prod ); -}; - -/* This should be renamed. It is a literal string in a type reference. */ -struct PdaLiteral -{ - PdaLiteral( const InputLoc &loc, const String &data ) - : loc(loc), data(data), value(0) { } - - InputLoc loc; - String data; - long value; -}; - -/* Nodes in the tree that use this action. */ -typedef Vector<NameInst*> ActionRefs; - -/* Element in list of actions. Contains the string for the code to exectute. */ -struct Action -: - public DListEl<Action>, - public AvlTreeEl<Action> -{ -public: - - static Action *cons( const InputLoc &loc, const String &name, InlineList *inlineList ) - { - Action *a = new Action; - a->loc = (loc); - a->name = (name); - a->markType = (MarkNone); - a->objField = (0); - a->markId = (-1); - a->inlineList = (inlineList); - a->actionId = (-1); - a->numTransRefs = (0); - a->numToStateRefs = (0); - a->numFromStateRefs = (0); - a->numEofRefs = (0); - a->numCondRefs = (0); - a->anyCall = (false); - a->isLmAction = (false); - return a; - } - - static Action *cons( MarkType markType, long markId ) - { - Action *a = new Action; - a->name = ("mark"); - a->markType = (markType); - a->objField = (0); - a->markId = (markId); - a->inlineList = (InlineList::cons()); - a->actionId = (-1); - a->numTransRefs = (0); - a->numToStateRefs = (0); - a->numFromStateRefs = (0); - a->numEofRefs = (0); - a->numCondRefs = (0); - a->anyCall = (false); - a->isLmAction = (false); - return a; - } - - /* Key for action dictionary. */ - const String &getKey() const { return name; } - - /* Data collected during parse. */ - InputLoc loc; - String name; - - MarkType markType; - ObjectField *objField; - long markId; - - InlineList *inlineList; - int actionId; - - void actionName( ostream &out ) - { - if ( name != 0 ) - out << name; - else - out << loc.line << ":" << loc.col; - } - - /* Places in the input text that reference the action. */ - ActionRefs actionRefs; - - /* Number of references in the final machine. */ - bool numRefs() - { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } - int numTransRefs; - int numToStateRefs; - int numFromStateRefs; - int numEofRefs; - int numCondRefs; - bool anyCall; - - bool isLmAction; -}; - -/* A list of actions. */ -typedef DList<Action> ActionList; - -struct VarDef; -struct LexJoin; -struct LexTerm; -struct FactorAug; -struct FactorLabel; -struct FactorRep; -struct FactorNeg; -struct Factor; -struct Literal; -struct Range; -struct RegExpr; -struct ReItem; -struct ReOrBlock; -struct ReOrItem; -struct TokenRegion; - -/* tree_t of instantiated names. */ -typedef BstMapEl<String, NameInst*> NameMapEl; -typedef BstMap<String, NameInst*, ColmCmpStr> NameMap; -typedef Vector<NameInst*> NameVect; -typedef BstSet<NameInst*> NameSet; - -/* Node in the tree of instantiated names. */ -struct NameInst -{ - NameInst( int id ) - : id(id) {} - - int id; - - /* Pointers for the name search queue. */ - NameInst *prev, *next; -}; - -typedef DList<NameInst> NameInstList; - -/* Stack frame used in walking the name tree. */ -struct NameFrame -{ - NameInst *prevNameInst; - int prevNameChild; - NameInst *prevLocalScope; -}; - -/* Class to collect information about the machine during the - * parse of input. */ -struct Compiler -{ - /* Create a new parse data object. This is done at the beginning of every - * fsm specification. */ - Compiler(); - ~Compiler(); - - /* - * Setting up the graph dict. - */ - - void compileLiteralTokens(); - void initEmptyScanners(); - void initEmptyScanner( RegionSet *regionSet, TokenRegion *reg ); - void initUniqueTypes(); - - /* Initialize a graph dict with the basic fsms. */ - void initGraphDict(); - void createBuiltin( const char *name, BuiltinMachine builtin ); - - /* Make a name id in the current name instantiation scope if it is not - * already there. */ - NameInst *makeJoinNameTree( LexJoin *join ); - NameInst *makeNameTree(); - NameInst **makeNameIndex(); - - void printNameTree( NameInst *rootName ); - void printNameIndex( NameInst **nameIndex ); - - /* Resove name references in action code and epsilon transitions. */ - NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ); - void resolveFrom( NameSet &result, NameInst *refFrom, - const NameRef &nameRef, int namePos ); - - /* Set the alphabet type. If type types are not valid returns false. */ - bool setAlphType( char *s1, char *s2 ); - bool setAlphType( char *s1 ); - - /* Unique actions. */ - void removeDups( ActionTable &actionTable ); - void removeActionDups( FsmGraph *graph ); - - /* Dumping the name instantiation tree. */ - void printNameInst( NameInst *nameInst, int level ); - - /* Make the graph from a graph dict node. Does minimization. */ - void finishGraphBuild( FsmGraph *graph ); - FsmGraph *makeAllRegions(); - FsmGraph *makeScanner(); - - void analyzeAction( Action *action, InlineList *inlineList ); - void analyzeGraph( FsmGraph *graph ); - void resolvePrecedence( PdaGraph *pdaGraph ); - LangEl *predOf( PdaTrans *trans, long action ); - bool precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 ); - bool precedenceRemoveBoth( LangEl *l1, LangEl *l2 ); - - void placeFrameFields( ObjectDef *localFrame ); - void placeUserFunction( Function *func, bool isUserIter ); - void placeAllStructObjects(); - void placeAllLanguageObjects(); - void placeAllFrameObjects(); - void placeAllFunctions(); - - void initKeyOps(); - - /* - * Data collected during the parse. - */ - - /* List of actions. Will be pasted into a switch statement. */ - ActionList actionList; - - /* The id of the next priority name and label. */ - int nextPriorKey, nextNameId; - - /* Alphabet type. */ - HostType *userAlphType; - bool alphTypeSet; - - /* Element type and get key expression. */ - InlineList *getKeyExpr; - InlineList *accessExpr; - InlineList *curStateExpr; - - /* The alphabet range. */ - char *lowerNum, *upperNum; - Key lowKey, highKey; - InputLoc rangeLowLoc, rangeHighLoc; - - /* Number of errors encountered parsing the fsm spec. */ - int errorCount; - - /* Counting the action and priority ordering. */ - int curActionOrd; - int curPriorOrd; - - /* Root of the name tree. */ - NameInst *curNameInst; - int curNameChild; - NameInstList nameInstList; - - /* The place where resolved epsilon transitions go. These cannot go into - * the parse tree because a single epsilon op can resolve more than once - * to different nameInsts if the machine it's in is used more than once. */ - NameVect epsilonResolvedLinks; - int nextEpsilonResolvedLink; - - /* Root of the name tree used for doing local name searches. */ - NameInst *localNameScope; - - void setLmInRetLoc( InlineList *inlineList ); - void initLongestMatchData(); - - /* Counter for assigning ids to longest match items. */ - int nextTokenId; - - RegionImplList regionImplList; - RegionList regionList; - RegionSetList regionSetList; - - NamespaceList namespaceList; - - Action *newAction( const String &name, InlineList *inlineList ); - - Action *setTokStart; - int setTokStartOrd; - - Action *initActId; - int initActIdOrd; - - Action *setTokEnd; - int setTokEndOrd; - - CodeBlock *rootCodeBlock; - - void beginProcessing() - { - ::keyOps = &thisKeyOps; - } - - KeyOps thisKeyOps; - - UniqueType *mainReturnUT; - - CharVectVect streamFileNames; - - /* CONTEXT FREE */ - ProdElList *makeProdElList( LangEl *langEl ); - void wrapNonTerminals(); - void makeDefinitionNames(); - void noUndefindLangEls(); - void declareBaseLangEls(); - void makeLangElIds(); - void makeStructElIds(); - void makeLangElNames(); - void makeTerminalWrappers(); - void makeEofElements(); - void makeIgnoreCollectors(); - void resolvePrecedence(); - void resolveReductionActions(); - void findReductionActionProds(); - void resolveReducers(); - - Production *findProductionByLabel( LangEl *langEl, String label ); - - void declarePass(); - void resolvePass(); - - /* Parser generation. */ - void advanceReductions( PdaGraph *pdaGraph ); - void sortActions( PdaGraph *pdaGraph ); - void addDupTerms( PdaGraph *pdaGraph ); - void linkExpansions( PdaGraph *pdaGraph ); - void lalr1FollowEpsilonOp( PdaGraph *pdaGraph ); - - void transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, PdaState *state, long prodId ); - - void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ); - void lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ); - - void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior ); - void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ); - - void lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls ); - - void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, - PdaTrans *expandFrom, Production *prod ); - void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ); - void lr0CloseAllStates( PdaGraph *pdaGraph ); - - void lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls ); - - void reduceActions( PdaGraph *pdaGraph ); - - bool makeNonTermFirstSetProd( Production *prod, PdaState *state ); - void makeNonTermFirstSets(); - - bool makeFirstSetProd( Production *prod, PdaState *state ); - void makeFirstSets(); - - int findIndexOff( struct pda_tables *pdaTables, PdaGraph *pdaGraph, - PdaState *state, int &currLen ); - void trySetTime( PdaTrans *trans, long code, long &time ); - void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey, - bool noPreIgnore, bool noPostIgnore ); - PdaState *followProd( PdaState *tabState, PdaState *prodState ); - void findFollow( AlphSet &result, PdaState *overTab, - PdaState *overSrc, Production *parentDef ); - void pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls ); - void pdaOrderFollow( LangEl *rootEl, PdaState *tabState, - PdaTrans *tabTrans, PdaTrans *srcTrans, - Production *parentDef, Production *definition, long &time ); - void pdaOrderProd( LangEl *rootEl, PdaState *tabState, - PdaState *srcState, Production *parentDef, long &time ); - void analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls ); - - void makeProdFsms(); - void insertUniqueEmptyProductions(); - void printNonTermFirstSets(); - void printFirstSets(); - - LangEl *makeRepeatProd( const InputLoc &loc, Namespace *nspace, - const String &repeatName, UniqueType *ut ); - LangEl *makeListProd( const InputLoc &loc, Namespace *nspace, - const String &listName, UniqueType *ut ); - LangEl *makeOptProd( const InputLoc &loc, Namespace *nspace, - const String &optName, UniqueType *ut ); - void resolveProdEl( ProdEl *prodEl ); - void resolveProductionEls(); - - void addMatchText( ObjectDef *frame, LangEl *lel ); - void addMatchLength( ObjectDef *frame, LangEl *lel ); - void addInput( ObjectDef *frame ); - void addThis( ObjectDef *frame ); - void addTransTokVar( ObjectDef *frame, LangEl *lel ); - void addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ); - void addProdRedObjectVar( ObjectDef *localFrame, LangEl *langEl ); - void addProdObjects(); - - void addProdRHSLoads( Production *prod, CodeVect &code, long &insertPos ); - void addProdLHSLoad( Production *prod, CodeVect &code, long &insertPos ); - void addPushBackLHS( Production *prod, CodeVect &code, long &insertPos ); - - void prepGrammar(); - struct pda_run *parsePattern( program_t *prg, tree_t **sp, const InputLoc &loc, - int parserId, struct input_impl *sourceStream ); - void parsePatterns(); - - void collectParserEls( LangElSet &parserEls ); - void makeParser( LangElSet &parserEls ); - PdaGraph *makePdaGraph( BstSet<LangEl*> &parserEls ); - struct pda_tables *makePdaTables( PdaGraph *pdaGraph ); - - void fillInPatterns( program_t *prg ); - void makeRuntimeData(); - - /* Generate and write out the fsm. */ - void generateGraphviz(); - - void verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph ); - void computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph ); - - void initListElField( GenericType *gen, const char *name, int offset ); - void initListFieldEl( GenericType *gen, const char *name, int offset ); - void initListFieldVal( GenericType *gen, const char *name, int offset ); - - void initListFields( GenericType *gen ); - void initListFunctions( GenericType *gen ); - - void initMapElKey( GenericType *gen, const char *name, int offset ); - void initMapElField( GenericType *gen, const char *name, int offset ); - void initMapField( GenericType *gen, const char *name, int offset ); - - void initMapFields( GenericType *gen ); - void initMapFunctions( GenericType *gen ); - - void initVectorFunctions( GenericType *gen ); - void initParserField( GenericType *gen, const char *name, - int offset, TypeRef *typeRef ); - void initParserFunctions( GenericType *gen ); - void initParserFields( GenericType *gen ); - - void addStdin(); - void addStdout(); - void addStderr(); - void addArgv(); - void addStds(); - void addError(); - void addDefineArgs(); - int argvOffset(); - int arg0Offset(); - int stdsOffset(); - void makeDefaultIterators(); - void addLengthField( ObjectDef *objDef, code_t getLength ); - ObjectDef *findObject( const String &name ); - void resolveListElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); - void resolveMapElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); - void resolveElementOf( ObjectDef *obj ); - void makeFuncVisible( Function *func, bool isUserIter ); - void makeInHostVisible( Function *func ); - - void declareFunction( Function *func ); - void declareReductionCode( Production *prod ); - void declareTranslateBlock( LangEl *langEl ); - void declarePreEof( TokenRegion *region ); - void declareRootBlock(); - void declareByteCode(); - - void resolveFunction( Function *func ); - void resolveInHost( Function *func ); - void resolvePreEof( TokenRegion *region ); - void resolveRootBlock(); - void resolveTranslateBlock( LangEl *langEl ); - void resolveReductionCode( Production *prod ); - void resolveParseTree(); - - void compileFunction( Function *func, CodeVect &code ); - void compileFunction( Function *func ); - void compileUserIter( Function *func, CodeVect &code ); - void compileUserIter( Function *func ); - void compilePreEof( TokenRegion *region ); - void compileRootBlock(); - void compileTranslateBlock( LangEl *langEl ); - void findLocals( ObjectDef *localFrame, CodeBlock *block ); - void makeProdCopies( Production *prod ); - void compileReductionCode( Production *prod ); - void removeNonUnparsableRepls(); - void compileByteCode(); - - void resolveUses(); - void generateOutput( long activeRealm, bool includeCommit ); - void compile(); - - void openNameSpace( ostream &out, Namespace *nspace ); - void closeNameSpace( ostream &out, Namespace *nspace ); - void refNameSpace( LangEl *lel, Namespace *nspace ); - void generateExports(); - void generateExportsImpl(); - - struct local_info *makeLocalInfo( Locals &locals ); - short *makeTrees( ObjectDef *objectDef, int &numTrees ); - - /* - * Graphviz Generation - */ - void writeTransList( PdaState *state ); - void writeDotFile( PdaGraph *graph ); - void writeDotFile( ); - - - /* - * Data collected during the parse. - */ - - LelList langEls; - StructElList structEls; - DefList prodList; - - /* Dumping. */ - DotItemIndex dotItemIndex; - - PredDeclList predDeclList; - - /* The name of the file the fsm is from, and the spec name. */ - // EXISTS IN RL: char *fileName; - String parserName; - // EXISTS IN RL: InputLoc sectionLoc; - - /* How to access the instance data. */ - String access; - - /* The name of the token structure. */ - String tokenStruct; - - GenericType *anyList; - GenericType *anyMap; - GenericType *anyVector; - - LangEl *ptrLangEl; - LangEl *strLangEl; - LangEl *anyLangEl; - LangEl *rootLangEl; - LangEl *noTokenLangEl; - LangEl *eofLangEl; - LangEl *errorLangEl; - LangEl *ignoreLangEl; - - Namespace *rootNamespace; - - int nextLelId; - int firstNonTermId; - int firstStructElId; - int structInbuiltId; - int structInputId; - int structStreamId; - - LangEl **langElIndex; - PdaState *actionDestState; - DefSetSet prodSetSet; - - Production **prodIdIndex; - AlphSet literalSet; - - PatList patternList; - ConsList replList; - ParserTextList parserTextList; - - StructDef *global; - StructEl *globalSel; - ObjectDef *globalObjectDef; - ObjectField *arg0; - ObjectField *argv; - ObjectField *stds; - StructDef *argvEl; - StructEl *argvElSel; - StructEl *stdsElSel; - - StructDef *input; - StructDef *stream; - StructEl *inputSel; - StructEl *streamSel; - - VectorTypeIdMap vectorTypeIdMap; - - UniqueType *findUniqueType( enum TYPE typeId ); - UniqueType *findUniqueType( enum TYPE typeId, LangEl *langEl ); - UniqueType *findUniqueType( enum TYPE typeId, IterDef *iterDef ); - UniqueType *findUniqueType( enum TYPE typeId, StructEl *structEl ); - UniqueType *findUniqueType( enum TYPE typeId, GenericType *generic ); - - UniqueGeneric *findUniqueGeneric( UniqueGeneric::Type type, - UniqueType *utKey, UniqueType *utValue ); - UniqueGeneric *findUniqueGeneric( UniqueGeneric::Type type, - UniqueType *utValue ); - - UniqueType *uniqueTypeNil; - UniqueType *uniqueTypeVoid; - UniqueType *uniqueTypePtr; - UniqueType *uniqueTypeBool; - UniqueType *uniqueTypeInt; - UniqueType *uniqueTypeStr; - UniqueType *uniqueTypeIgnore; - UniqueType *uniqueTypeAny; - - UniqueType *uniqueTypeInput; - UniqueType *uniqueTypeStream; - - UniqueTypeMap uniqeTypeMap; - UniqueRepeatMap uniqeRepeatMap; - UniqueGenericMap uniqueGenericMap; - - void declareGlobalFields(); - void declareStrFields(); - - void declareInputField( ObjectDef *objDef, code_t getLength ); - void declareInputFields(); - - void declareStreamField( ObjectDef *objDef, code_t getLength ); - void declareStreamFields(); - - void declareIntFields(); - void declareTokenFields(); - - ObjectDef *intObj; - ObjectDef *strObj; - ObjectDef *inputObj; - ObjectDef *streamObj; - - struct fsm_tables *fsmTables; - struct colm_sections *runtimeData; - - int nextPatConsId; - int nextGenericId; - - FunctionList functionList; - FunctionList inHostList; - int nextFuncId; - int nextHostId; - - enum CompileContext { - CompileTranslation, - CompileReduction, - CompileFunction, - CompileRoot - }; - - CompileContext compileContext; - LongVect returnJumps; - LongVect breakJumps; - Function *curFunction; - - /* For stack unwinding. Used at exits, returns, iterator destroy, etc. */ - CodeVect unwindCode; - - ObjectField *makeDataEl(); - ObjectField *makeFileEl(); - ObjectField *makeLineEl(); - ObjectField *makeColEl(); - ObjectField *makePosEl(); - - IterDef *findIterDef( IterDef::Type type, GenericType *generic ); - IterDef *findIterDef( IterDef::Type type, Function *func ); - IterDef *findIterDef( IterDef::Type type ); - IterDefSet iterDefSet; - - enum GeneratesType { GenToken, GenIgnore, GenCfl }; - - int nextObjectId; - GeneratesType generatesType; - bool generatesIgnore; - - StringMap literalStrings; - - long nextFrameId; - long nextParserId; - - ObjectDef *rootLocalFrame; - - bool revertOn; - - RedFsm *redFsm; - - PdaGraph *pdaGraph; - struct pda_tables *pdaTables; - - long predValue; - long nextMatchEndNum; - - TypeRef *argvTypeRef; - TypeRef *stdsTypeRef; - - bool inContiguous; - int contiguousOffset; - int contiguousStretch; - - void declareReVars(); - - void initReductionNeeds( Reduction *reduction ); - - void findRhsRefs( bool &lhsUsed, Vector<ProdEl*> &rhsUsed, Vector<ProdEl*> &treeUsed, - Vector<ProdEl*> &locUsed, Reduction *reduction, Production *production, - const ReduceTextItemList &list ); - - void computeNeeded( Reduction *reduction, Production *production, - const ReduceTextItemList &list ); - void computeNeeded(); - - void loadRefs( Reduction *reduction, Production *production, - const ReduceTextItemList &list, bool read ); - - void writePostfixReduce( Reduction *reduction ); - void writeParseReduce( Reduction *reduction ); - - void writeParseReduce(); - void writePostfixReduce(); - - void writeHostCall(); - void writeNeeds(); - void writeCommit(); - void writeReduceStructs(); - void writeReduceDispatchers(); - void writeUnescape(); - - void writeLhsRef( Production *production, ReduceTextItem *i ); - void writeRhsRef( Production *production, ReduceTextItem *i ); - void writeTreeRef( Production *production, ReduceTextItem *i ); - void writeRhsLoc( Production *production, ReduceTextItem *i ); - void writeHostItemList( Production *production, const ReduceTextItemList &list ); - void writeCommitStub(); -}; - -void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true ); -Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ); -Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd ); -Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd ); -Key makeFsmKeyChar( char c, Compiler *pd ); -void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd ); -void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, - bool caseInsensitive, Compiler *pd ); -FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd ); -FsmGraph *dotFsm( Compiler *pd ); -FsmGraph *dotStarFsm( Compiler *pd ); - -void errorStateLabels( const NameSet &locations ); - -struct ColmParser; - -typedef AvlMap<String, ColmParser *, ColmCmpStr> ParserDict; -typedef AvlMapEl<String, ColmParser *> ParserDictEl; - -LangEl *declareLangEl( Compiler *pd, Namespace *nspace, - const String &data, LangEl::Type type ); -LangEl *addLangEl( Compiler *pd, Namespace *nspace, - const String &data, LangEl::Type type ); - -StructEl *declareStruct( Compiler *pd, Namespace *nspace, - const String &data, StructDef *context ); - -void declareTypeAlias( Compiler *pd, Namespace *nspace, - const String &data, TypeRef *typeRef ); - -LangEl *findType( Compiler *pd, Namespace *nspace, const String &data ); - -ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, - bool isConst, bool useFnInstr = false, GenericType *useGeneric = 0 ); - -ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, - UniqueType *arg1, bool isConst, bool useFnInstr = false, - GenericType *useGeneric = 0 ); - -ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, - UniqueType *arg1, UniqueType *arg2, bool isConst, - bool useFnInstr = false, GenericType *useGeneric = 0 ); - -ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, - bool isConst, bool useFnInstr = false, GenericType *useGeneric = 0 ); - -ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, - UniqueType *arg1, bool isConst, bool useFnInstr = false, - GenericType *useGeneric = 0 ); - -ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, - UniqueType *arg1, UniqueType *arg2, bool isConst, - bool useFnInstr = false, GenericType *useGeneric = 0 ); - -extern "C" struct input_impl *colm_impl_new_pat( char *name, struct Pattern *pattern ); -extern "C" struct input_impl *colm_impl_new_cons( char *name, struct Constructor *constructor ); - -#endif /* _COLM_PARSEDATA_H */ - diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in deleted file mode 100644 index 455cffe7..00000000 --- a/src/config.h.cmake.in +++ /dev/null @@ -1,14 +0,0 @@ -/* config.h Generated from config.h.cmake.in by cmake */ - -#ifndef _COLM_CONFIG_H -#define _COLM_CONFIG_H - -#cmakedefine DEBUG 1 - -#cmakedefine HAVE_SYS_MMAN_H 1 -#cmakedefine HAVE_SYS_WAIT_H 1 -#cmakedefine HAVE_UNISTD_H 1 - -#cmakedefine SIZEOF_LONG @SIZEOF_LONG@ - -#endif /* _COLM_CONFIG_H */ diff --git a/src/consinit.cc b/src/consinit.cc deleted file mode 100644 index 995e24f1..00000000 --- a/src/consinit.cc +++ /dev/null @@ -1,889 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "consinit.h" - -#include <iostream> - -using std::cout; -using std::cerr; -using std::endl; - -LexTerm *rangeTerm( const char *low, const char *high ) -{ - Literal *lowLit = Literal::cons( internal, String( low ), Literal::LitString ); - Literal *highLit = Literal::cons( internal, String( high ), Literal::LitString ); - Range *range = Range::cons( lowLit, highLit ); - LexFactor *factor = LexFactor::cons( range ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); - LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); - LexTerm *term = LexTerm::cons( factorAug ); - return term; -} - -LexFactorNeg *litFactorNeg( const char *str ) -{ - Literal *lit = Literal::cons( internal, String( str ), Literal::LitString ); - LexFactor *factor = LexFactor::cons( lit ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - return factorNeg; -} - -LexFactorAug *litFactorAug( const char *str ) -{ - Literal *lit = Literal::cons( internal, String( str ), Literal::LitString ); - LexFactor *factor = LexFactor::cons( lit ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); - LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); - return factorAug; -} - -LexTerm *litTerm( const char *str ) -{ - Literal *lit = Literal::cons( internal, String( str ), Literal::LitString ); - LexFactor *factor = LexFactor::cons( lit ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); - LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); - LexTerm *term = LexTerm::cons( factorAug ); - return term; -} - -LexExpression *litExpr( const char *str ) -{ - LexTerm *term = litTerm( str ); - LexExpression *expr = LexExpression::cons( term ); - return expr; -} - -LexExpression *orExpr( LexTerm *term1, LexTerm *term2 ) -{ - LexExpression *expr1 = LexExpression::cons( term1 ); - return LexExpression::cons( expr1, term2, LexExpression::OrType ); -} - -LexExpression *orExpr( LexTerm *term1, LexTerm *term2, LexTerm *term3 ) -{ - LexExpression *expr1 = LexExpression::cons( term1 ); - LexExpression *expr2 = LexExpression::cons( expr1, term2, LexExpression::OrType ); - LexExpression *expr3 = LexExpression::cons( expr2, term3, LexExpression::OrType ); - return expr3; -} - -LexExpression *orExpr( LexTerm *term1, LexTerm *term2, LexTerm *term3, LexTerm *term4 ) -{ - LexExpression *expr1 = LexExpression::cons( term1 ); - LexExpression *expr2 = LexExpression::cons( expr1, term2, LexExpression::OrType ); - LexExpression *expr3 = LexExpression::cons( expr2, term3, LexExpression::OrType ); - LexExpression *expr4 = LexExpression::cons( expr3, term4, LexExpression::OrType ); - return expr4; -} - -LexExpression *orExpr( LexTerm *term1, LexTerm *term2, LexTerm *term3, - LexTerm *term4, LexTerm *term5, LexTerm *term6 ) -{ - LexExpression *expr1 = LexExpression::cons( term1 ); - LexExpression *expr2 = LexExpression::cons( expr1, term2, LexExpression::OrType ); - LexExpression *expr3 = LexExpression::cons( expr2, term3, LexExpression::OrType ); - LexExpression *expr4 = LexExpression::cons( expr3, term4, LexExpression::OrType ); - return expr4; -} - -LexFactorAug *starFactorAug( LexExpression *expr ) -{ - LexJoin *join = LexJoin::cons( expr ); - LexFactor *factor = LexFactor::cons( join ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); - LexFactorRep *staredRep = LexFactorRep::cons( internal, - factorRep, 0, 0, LexFactorRep::StarType ); - LexFactorAug *factorAug = LexFactorAug::cons( staredRep ); - return factorAug; -} - -LexFactorAug *starFactorAug( LexTerm *term ) -{ - LexExpression *expr = LexExpression::cons( term ); - return starFactorAug( expr ); -} - -LexFactorAug *starFactorAug( LexFactorAug *factorAug ) -{ - LexTerm *term = LexTerm::cons( factorAug ); - return starFactorAug( term ); -} - -LexFactorAug *plusFactorAug( LexExpression *expr ) -{ - LexJoin *join = LexJoin::cons( expr ); - LexFactor *factor = LexFactor::cons( join ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); - LexFactorRep *staredRep = LexFactorRep::cons( internal, factorRep, 0, 0, LexFactorRep::PlusType ); - LexFactorAug *factorAug = LexFactorAug::cons( staredRep ); - return factorAug; -} - -LexTerm *concatTerm( LexFactorAug *fa1, LexFactorAug *fa2 ) -{ - LexTerm *term1 = LexTerm::cons( fa1 ); - LexTerm *term2 = LexTerm::cons( term1, fa2, LexTerm::ConcatType ); - return term2; -} - -LexTerm *concatTerm( LexFactorAug *fa1, LexFactorAug *fa2, LexFactorAug *fa3 ) -{ - LexTerm *term1 = LexTerm::cons( fa1 ); - LexTerm *term2 = LexTerm::cons( term1, fa2, LexTerm::ConcatType ); - LexTerm *term3 = LexTerm::cons( term2, fa3, LexTerm::ConcatType ); - return term3; -} - -LexFactorAug *parensFactorAug( LexExpression *expr ) -{ - LexJoin *join = LexJoin::cons( expr ); - LexFactor *factor = LexFactor::cons( join ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); - LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); - return factorAug; -} - -LexFactorNeg *parensFactorNeg( LexExpression *expr ) -{ - LexJoin *join = LexJoin::cons( expr ); - LexFactor *factor = LexFactor::cons( join ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - return factorNeg; -} - -LexFactorAug *parensFactorAug( LexTerm *term ) -{ - LexExpression *expr = LexExpression::cons( term ); - LexJoin *join = LexJoin::cons( expr ); - LexFactor *factor = LexFactor::cons( join ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); - LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); - return factorAug; -} - -LexFactorAug *charNegFactorAug( LexExpression *expr ) -{ - LexFactorNeg *factorNeg = parensFactorNeg( expr ); - LexFactorNeg *charNeg = LexFactorNeg::cons( factorNeg, LexFactorNeg::CharNegateType ); - LexFactorRep *factorRep = LexFactorRep::cons( charNeg ); - LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); - return factorAug; -} - -LexTerm *charNegTerm( LexExpression *expr ) -{ - LexFactorAug *factorAug = charNegFactorAug( expr ); - LexTerm *term = LexTerm::cons( factorAug ); - return term; -} - -LexTerm *parensTerm( LexExpression *expr ) -{ - LexFactorAug *factorAug = parensFactorAug( expr ); - return LexTerm::cons( factorAug ); -} - -void ConsInit::wsIgnore() -{ - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, String(), pd->nextObjectId++ ); - - LexTerm *r1 = litTerm( "' '" ); - LexTerm *r2 = litTerm( "'\t'" ); - LexTerm *r3 = litTerm( "'\v'" ); - LexTerm *r4 = litTerm( "'\n'" ); - LexTerm *r5 = litTerm( "'\r'" ); - LexTerm *r6 = litTerm( "'\f'" ); - - LexExpression *whitespace = orExpr( r1, r2, r3, r4, r5, r6 ); - LexFactorAug *whitespaceRep = plusFactorAug( whitespace ); - - LexTerm *term = LexTerm::cons( whitespaceRep ); - LexExpression *expr = LexExpression::cons( term ); - LexJoin *join = LexJoin::cons( expr ); - - defineToken( internal, String(), join, objectDef, 0, true, false, false ); -} - -void ConsInit::commentIgnore() -{ - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, String(), pd->nextObjectId++ ); - - LexFactorAug *pound = litFactorAug( "'#'" ); - LexExpression *newline = litExpr( "'\\n'" ); - - LexFactorAug *commChars = charNegFactorAug( newline ); - LexFactorAug *restOfLine = starFactorAug( commChars ); - - LexFactorAug *termNewline = litFactorAug( "'\\n'" ); - - LexTerm *concat = concatTerm( pound, restOfLine, termNewline ); - LexExpression *expr = LexExpression::cons( concat ); - - LexJoin *join = LexJoin::cons( expr ); - - defineToken( internal, String(), join, objectDef, 0, true, false, false ); -} - -void ConsInit::idToken() -{ - String hello( "id" ); - - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, hello, pd->nextObjectId++ ); - - LexTerm *r1 = rangeTerm( "'a'", "'z'" ); - LexTerm *r2 = rangeTerm( "'A'", "'Z'" ); - LexTerm *r3 = litTerm( "'_'" ); - LexFactorAug *first = parensFactorAug( orExpr( r1, r2, r3 ) ); - - LexTerm *r4 = rangeTerm( "'a'", "'z'" ); - LexTerm *r5 = rangeTerm( "'A'", "'Z'" ); - LexTerm *r6 = litTerm( "'_'" ); - LexTerm *r7 = rangeTerm( "'0'", "'9'" ); - LexExpression *second = orExpr( r4, r5, r6, r7 ); - LexFactorAug *secondStar = starFactorAug( second ); - - LexTerm *concat = concatTerm( first, secondStar ); - - LexExpression *expr = LexExpression::cons( concat ); - LexJoin *join = LexJoin::cons( expr ); - - defineToken( internal, hello, join, objectDef, 0, false, false, false ); -} - -void ConsInit::literalToken() -{ - String hello( "literal" ); - - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, hello, pd->nextObjectId++ ); - - LexFactorAug *r1 = litFactorAug( "'\\''" ); - - /* [^'\\] */ - LexExpression *singleQuoteBackSlash = orExpr( - litTerm( "'\\''" ), - litTerm( "'\\\\'" ) ); - - LexTerm *freeChars = charNegTerm( singleQuoteBackSlash ); - - /* '\\' any */ - LexFactorAug *backSlash = litFactorAug( "'\\\\'" ); - LexExpression *any = LexExpression::cons( BT_Any ); - LexTerm *escape = concatTerm( backSlash, parensFactorAug( any ) ); - - /* Union and repeat. */ - LexExpression *charOrEscape = orExpr( freeChars, escape ); - LexFactorAug *r2 = starFactorAug( charOrEscape ); - - LexFactorAug *r3 = litFactorAug( "'\''" ); - - LexTerm *concat = concatTerm( r1, r2, r3 ); - LexExpression *expr = LexExpression::cons( concat ); - LexJoin *join = LexJoin::cons( expr ); - - defineToken( internal, hello, join, objectDef, 0, false, false, false ); -} - -void ConsInit::keyword( const String &name, const String &lit ) -{ - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, name, pd->nextObjectId++ ); - LexTerm *term = litTerm( lit ); - LexExpression *expr = LexExpression::cons( term ); - LexJoin *join = LexJoin::cons( expr ); - defineToken( internal, name, join, objectDef, 0, false, false, false ); -} - -void ConsInit::keyword( const String &kw ) -{ - literalDef( internal, kw, false, false ); -} - -ProdEl *ConsInit::prodRefName( const String &name ) -{ - ProdEl *prodEl = prodElName( internal, name, - NamespaceQual::cons( curNspace() ), 0, - RepeatNone, false ); - return prodEl; -} - -ProdEl *ConsInit::prodRefName( const String &capture, const String &name ) -{ - ObjectField *captureField = ObjectField::cons( internal, - ObjectField::RhsNameType, 0, capture ); - ProdEl *prodEl = prodElName( internal, name, - NamespaceQual::cons( curNspace() ), captureField, - RepeatNone, false ); - return prodEl; -} - -ProdEl *ConsInit::prodRefNameRepeat( const String &name ) -{ - ProdEl *prodEl = prodElName( internal, name, - NamespaceQual::cons( curNspace() ), 0, - RepeatRepeat, false ); - return prodEl; -} - -ProdEl *ConsInit::prodRefNameRepeat( const String &capture, const String &name ) -{ - ObjectField *captureField = ObjectField::cons( internal, - ObjectField::RhsNameType, 0, capture ); - ProdEl *prodEl = prodElName( internal, name, - NamespaceQual::cons( curNspace() ), captureField, - RepeatRepeat, false ); - return prodEl; -} - -ProdEl *ConsInit::prodRefLit( const String &lit ) -{ - ProdEl *prodEl = prodElLiteral( internal, lit, - NamespaceQual::cons( curNspace() ), 0, - RepeatNone, false ); - return prodEl; -} - -Production *ConsInit::production() -{ - ProdElList *prodElList = new ProdElList; - return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); -} - -Production *ConsInit::production( ProdEl *prodEl1 ) -{ - ProdElList *prodElList = new ProdElList; - appendProdEl( prodElList, prodEl1 ); - return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); -} - -Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2 ) -{ - ProdElList *prodElList = new ProdElList; - appendProdEl( prodElList, prodEl1 ); - appendProdEl( prodElList, prodEl2 ); - return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); -} - -Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, - ProdEl *prodEl3 ) -{ - ProdElList *prodElList = new ProdElList; - appendProdEl( prodElList, prodEl1 ); - appendProdEl( prodElList, prodEl2 ); - appendProdEl( prodElList, prodEl3 ); - return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); -} - -Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, - ProdEl *prodEl3, ProdEl *prodEl4 ) -{ - ProdElList *prodElList = new ProdElList; - appendProdEl( prodElList, prodEl1 ); - appendProdEl( prodElList, prodEl2 ); - appendProdEl( prodElList, prodEl3 ); - appendProdEl( prodElList, prodEl4 ); - return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); -} - -Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, - ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5 ) -{ - ProdElList *prodElList = new ProdElList; - appendProdEl( prodElList, prodEl1 ); - appendProdEl( prodElList, prodEl2 ); - appendProdEl( prodElList, prodEl3 ); - appendProdEl( prodElList, prodEl4 ); - appendProdEl( prodElList, prodEl5 ); - return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); -} - -Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, - ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5, - ProdEl *prodEl6, ProdEl *prodEl7 ) -{ - ProdElList *prodElList = new ProdElList; - appendProdEl( prodElList, prodEl1 ); - appendProdEl( prodElList, prodEl2 ); - appendProdEl( prodElList, prodEl3 ); - appendProdEl( prodElList, prodEl4 ); - appendProdEl( prodElList, prodEl5 ); - appendProdEl( prodElList, prodEl6 ); - appendProdEl( prodElList, prodEl7 ); - return BaseParser::production( internal, prodElList, String(), false, 0, 0 ); -} - -void ConsInit::definition( const String &name, Production *prod1, Production *prod2, - Production *prod3, Production *prod4 ) -{ - LelDefList *defList = new LelDefList; - prodAppend( defList, prod1 ); - prodAppend( defList, prod2 ); - prodAppend( defList, prod3 ); - prodAppend( defList, prod4 ); - - NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, - name, pd->nextObjectId++ ); - cflDef( ntDef, objectDef, defList ); -} - -void ConsInit::definition( const String &name, Production *prod1, - Production *prod2, Production *prod3 ) -{ - LelDefList *defList = new LelDefList; - prodAppend( defList, prod1 ); - prodAppend( defList, prod2 ); - prodAppend( defList, prod3 ); - - NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, - name, pd->nextObjectId++ ); - cflDef( ntDef, objectDef, defList ); -} - -void ConsInit::definition( const String &name, Production *prod1, Production *prod2 ) -{ - LelDefList *defList = new LelDefList; - prodAppend( defList, prod1 ); - prodAppend( defList, prod2 ); - - NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, - name, pd->nextObjectId++ ); - cflDef( ntDef, objectDef, defList ); -} - -void ConsInit::definition( const String &name, Production *prod ) -{ - LelDefList *defList = new LelDefList; - prodAppend( defList, prod ); - - NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, - name, pd->nextObjectId++ ); - cflDef( ntDef, objectDef, defList ); -} - -void ConsInit::lexFactor() -{ - ProdEl *prodEl1 = prodRefName( "Literal", "literal" ); - Production *prod1 = production( prodEl1 ); - - ProdEl *prodEl8 = prodRefName( "Id", "id" ); - Production *prod4 = production( prodEl8 ); - - ProdEl *prodEl2 = prodRefLit( "'('" ); - ProdEl *prodEl3 = prodRefName( "Expr", "lex_expr" ); - ProdEl *prodEl4 = prodRefLit( "')'" ); - Production *prod2 = production( prodEl2, prodEl3, prodEl4 ); - - ProdEl *prodEl5 = prodRefName( "Low", "literal" ); - ProdEl *prodEl6 = prodRefLit( "'..'" ); - ProdEl *prodEl7 = prodRefName( "High", "literal" ); - Production *prod3 = production( prodEl5, prodEl6, prodEl7 ); - - definition( "lex_factor", prod1, prod2, prod3, prod4 ); -} - -void ConsInit::lexFactorNeg() -{ - ProdEl *prodEl1 = prodRefLit( "'^'" ); - ProdEl *prodEl2 = prodRefName( "FactorNeg", "lex_factor_neg" ); - Production *prod1 = production( prodEl1, prodEl2 ); - - ProdEl *prodEl3 = prodRefName( "Factor", "lex_factor" ); - Production *prod2 = production( prodEl3 ); - - definition( "lex_factor_neg", prod1, prod2 ); -} - -void ConsInit::lexFactorRep() -{ - ProdEl *prodEl1 = prodRefName( "FactorRep", "lex_factor_rep" ); - ProdEl *prodEl2 = prodRefName( "Star", "STAR" ); - Production *prod1 = production( prodEl1, prodEl2 ); - - ProdEl *prodEl3 = prodRefName( "FactorRep", "lex_factor_rep" ); - ProdEl *prodEl4 = prodRefName( "Plus", "PLUS" ); - Production *prod2 = production( prodEl3, prodEl4 ); - - ProdEl *prodEl5 = prodRefName( "FactorNeg", "lex_factor_neg" ); - Production *prod3 = production( prodEl5 ); - - definition( "lex_factor_rep", prod1, prod2, prod3 ); -} - -void ConsInit::lexTerm() -{ - ProdEl *prodEl1 = prodRefName( "Term", "lex_term" ); - ProdEl *prodEl2 = prodRefName( "Dot", "DOT" ); - ProdEl *prodEl3 = prodRefName( "FactorRep", "lex_factor_rep" ); - Production *prod1 = production( prodEl1, prodEl2, prodEl3 ); - - ProdEl *prodEl4 = prodRefName( "Term", "lex_term" ); - ProdEl *prodEl5 = prodRefName( "ColonLt", "COLON_LT" ); - ProdEl *prodEl6 = prodRefName( "FactorRep", "lex_factor_rep" ); - Production *prod2 = production( prodEl4, prodEl5, prodEl6 ); - - ProdEl *prodEl7 = prodRefName( "FactorRep", "lex_factor_rep" ); - Production *prod3 = production( prodEl7 ); - - definition( "lex_term", prod1, prod2, prod3 ); -} - -void ConsInit::lexExpr() -{ - ProdEl *prodEl1 = prodRefName( "Expr", "lex_expr" ); - ProdEl *prodEl2 = prodRefLit( "'|'" ); - ProdEl *prodEl3 = prodRefName( "Term", "lex_term" ); - Production *prod1 = production( prodEl1, prodEl2, prodEl3 ); - - ProdEl *prodEl4 = prodRefName( "Term", "lex_term" ); - Production *prod2 = production( prodEl4 ); - - definition( "lex_expr", prod1, prod2 ); -} - -void ConsInit::token() -{ - ProdEl *prodEl1 = prodRefLit( "'token'" ); - ProdEl *prodEl2 = prodRefName( "Id", "id" ); - ProdEl *prodEl3 = prodRefName( "LeftNi", "opt_ni" ); - ProdEl *prodEl4 = prodRefLit( "'/'" ); - ProdEl *prodEl5 = prodRefName( "Expr", "lex_expr" ); - ProdEl *prodEl6 = prodRefLit( "'/'" ); - ProdEl *prodEl7 = prodRefName( "RightNi", "opt_ni" ); - Production *prod1 = production( prodEl1, prodEl2, prodEl3, - prodEl4, prodEl5, prodEl6, prodEl7 ); - definition( "token_def", prod1 ); -} - -void ConsInit::ignore() -{ - ProdEl *prodEl1 = prodRefLit( "'ignore'" ); - ProdEl *prodEl2 = prodRefLit( "'/'" ); - ProdEl *prodEl3 = prodRefName( "Expr", "lex_expr" ); - ProdEl *prodEl4 = prodRefLit( "'/'" ); - Production *prod1 = production( prodEl1, prodEl2, prodEl3, prodEl4 ); - definition( "ignore_def", prod1 ); -} - -void ConsInit::tokenList() -{ - ProdEl *prodEl1 = prodRefName( "TokenList", "token_list" ); - ProdEl *prodEl2 = prodRefName( "TokenDef", "token_def" ); - Production *prod1 = production( prodEl1, prodEl2 ); - - ProdEl *prodEl3 = prodRefName( "TokenList", "token_list" ); - ProdEl *prodEl4 = prodRefName( "IgnoreDef", "ignore_def" ); - Production *prod2 = production( prodEl3, prodEl4 ); - - Production *prod3 = production(); - - definition( "token_list", prod1, prod2, prod3 ); -} - -Production *ConsInit::prodLex() -{ - ProdEl *prodEl1 = prodRefLit( "'lex'" ); - ProdEl *prodEl2 = prodRefName( "TokenList", "token_list" ); - ProdEl *prodEl3 = prodRefLit( "'end'" ); - - return production( prodEl1, prodEl2, prodEl3 ); -} - -void ConsInit::optProdElName() -{ - ProdEl *prodEl1 = prodRefName( "Name", "id" ); - ProdEl *prodEl2 = prodRefLit( "':'" ); - Production *prod1 = production( prodEl1, prodEl2 ); - - Production *prod2 = production(); - - definition( "opt_prod_el_name", prod1, prod2 ); -} - -void ConsInit::optNi() -{ - ProdEl *prodEl1 = prodRefName( "Ni", "NI" ); - Production *prod1 = production( prodEl1 ); - - Production *prod2 = production(); - - definition( "opt_ni", prod1, prod2 ); -} - -void ConsInit::optRepeat() -{ - ProdEl *prodEl1 = prodRefName( "Star", "STAR" ); - Production *prod1 = production( prodEl1 ); - - Production *prod2 = production(); - - definition( "opt_prod_repeat", prod1, prod2 ); -} - -void ConsInit::prodEl() -{ - ProdEl *prodEl1 = prodRefName( "OptName", "opt_prod_el_name" ); - ProdEl *prodEl2 = prodRefName( "Id", "id" ); - ProdEl *prodEl3 = prodRefName( "OptRepeat", "opt_prod_repeat" ); - Production *prod1 = production( prodEl1, prodEl2, prodEl3 ); - - definition( "prod_el", prod1 ); -} - -void ConsInit::prodElList() -{ - ProdEl *prodEl1 = prodRefName( "ProdElList", "prod_el_list" ); - ProdEl *prodEl2 = prodRefName( "ProdEl", "prod_el" ); - Production *prod1 = production( prodEl1, prodEl2 ); - - Production *prod2 = production(); - - definition( "prod_el_list", prod1, prod2 ); -} - -void ConsInit::optCommit() -{ - ProdEl *prodEl1 = prodRefName( "Commit", "COMMIT" ); - Production *prod1 = production( prodEl1 ); - - Production *prod2 = production(); - - definition( "opt_commit", prod1, prod2 ); -} - -void ConsInit::optProdName() -{ - ProdEl *prodEl1 = prodRefLit( "':'" ); - ProdEl *prodEl2 = prodRefName( "Name", "id" ); - Production *prod1 = production( prodEl1, prodEl2 ); - - Production *prod2 = production(); - - definition( "opt_prod_name", prod1, prod2 ); -} - -void ConsInit::prod() -{ - ProdEl *prodEl1 = prodRefLit( "'['" ); - ProdEl *prodEl2 = prodRefName( "ProdElList", "prod_el_list" ); - ProdEl *prodEl3 = prodRefLit( "']'" ); - ProdEl *prodEl4 = prodRefName( "OptName", "opt_prod_name" ); - ProdEl *prodEl5 = prodRefName( "OptCommit", "opt_commit" ); - Production *prod1 = production( prodEl1, prodEl2, prodEl3, prodEl4, prodEl5 ); - - definition( "prod", prod1 ); -} - -void ConsInit::prodList() -{ - ProdEl *prodEl1 = prodRefName( "ProdList", "prod_list" ); - ProdEl *prodEl2 = prodRefLit( "'|'" ); - ProdEl *prodEl3 = prodRefName( "Prod", "prod" ); - Production *prod1 = production( prodEl1, prodEl2, prodEl3 ); - - ProdEl *prodEl4 = prodRefName( "Prod", "prod" ); - Production *prod2 = production( prodEl4 ); - - definition( "prod_list", prod1, prod2 ); -} - -Production *ConsInit::prodProd() -{ - ProdEl *prodEl1 = prodRefLit( "'def'" ); - ProdEl *prodEl2 = prodRefName( "DefId", "id" ); - ProdEl *prodEl3 = prodRefName( "ProdList", "prod_list" ); - - return production( prodEl1, prodEl2, prodEl3 ); -} - -void ConsInit::item() -{ - Production *prod1 = prodLex(); - Production *prod2 = prodProd(); - definition( "item", prod1, prod2 ); -} - -void ConsInit::startProd() -{ - ProdEl *prodEl1 = prodRefNameRepeat( "ItemList", "item" ); - Production *prod1 = production( prodEl1 ); - - definition( "start", prod1 ); -} - -void ConsInit::parseInput( StmtList *stmtList ) -{ - /* Pop argv, this yields the file name . */ - CallArgVect *popArgs = new CallArgVect; - QualItemVect *popQual = new QualItemVect; - popQual->append( QualItem( QualItem::Arrow, internal, String( "argv" ) ) ); - - LangVarRef *popRef = LangVarRef::cons( internal, - curNspace(), 0, curLocalFrame()->rootScope, - NamespaceQual::cons( curNspace() ), popQual, String("pop") ); - LangExpr *pop = LangExpr::cons( LangTerm::cons( InputLoc(), popRef, popArgs ) ); - - TypeRef *typeRef = TypeRef::cons( internal, pd->uniqueTypeStr ); - ObjectField *objField = ObjectField::cons( internal, - ObjectField::UserLocalType, typeRef, "A" ); - - LangStmt *stmt = varDef( objField, pop, LangStmt::AssignType ); - stmtList->append( stmt ); - - /* Construct a literal string 'r', for second arg to open. */ - ConsItem *modeConsItem = ConsItem::cons( internal, - ConsItem::InputText, String("r") ); - ConsItemList *modeCons = new ConsItemList; - modeCons->append( modeConsItem ); - LangExpr *modeExpr = LangExpr::cons( LangTerm::cons( internal, modeCons ) ); - - /* Reference A->value */ - QualItemVect *qual = new QualItemVect; - LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, - curLocalFrame()->rootScope, NamespaceQual::cons( curNspace() ), - qual, String("A") ); - LangExpr *Avalue = LangExpr::cons( LangTerm::cons( internal, - LangTerm::VarRefType, varRef ) ); - - /* Call open. */ - QualItemVect *openQual = new QualItemVect; - LangVarRef *openRef = LangVarRef::cons( internal, - 0, 0, curLocalFrame()->rootScope, - NamespaceQual::cons( curNspace() ), openQual, String("open") ); - CallArgVect *openArgs = new CallArgVect; - openArgs->append( new CallArg(Avalue) ); - openArgs->append( new CallArg(modeExpr) ); - LangExpr *open = LangExpr::cons( LangTerm::cons( InputLoc(), openRef, openArgs ) ); - - /* Construct a list containing the open stream. */ - ConsItem *consItem = ConsItem::cons( internal, ConsItem::ExprType, open, ConsItem::TrimDefault ); - ConsItemList *list = ConsItemList::cons( consItem ); - - /* Will capture the parser to "P" */ - objField = ObjectField::cons( internal, - ObjectField::UserLocalType, 0, String("P") ); - - /* Parse the "start" def. */ - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - typeRef = TypeRef::cons( internal, nspaceQual, - String("start"), RepeatNone ); - - /* Parse the above list. */ - LangExpr *parseExpr = parseCmd( internal, false, false, objField, - typeRef, 0, list, true, false, false, "" ); - LangStmt *parseStmt = LangStmt::cons( internal, LangStmt::ExprType, parseExpr ); - stmtList->append( parseStmt ); -} - -void ConsInit::exportTree( StmtList *stmtList ) -{ - /* reference P */ - QualItemVect *qual = new QualItemVect; - LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, - curLocalFrame()->rootScope, NamespaceQual::cons( curNspace() ), qual, String("P") ); - LangExpr *expr = LangExpr::cons( LangTerm::cons( internal, - LangTerm::VarRefType, varRef ) ); - - /* Assign P to ColmTree */ - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, String("start"), RepeatNone ); - ObjectField *program = ObjectField::cons( internal, - ObjectField::StructFieldType, typeRef, String("ColmTree") ); - LangStmt *programExport = exportStmt( program, LangStmt::AssignType, expr ); - stmtList->append( programExport ); -} - -void ConsInit::go( long activeRealm ) -{ - ConsInit::init(); - - StmtList *stmtList = new StmtList; - - /* The token region */ - pushRegionSet( internal ); - - wsIgnore(); - commentIgnore(); - - keyword( "'def'" ); - keyword( "'lex'" ); - keyword( "'end'" ); - keyword( "'token'" ); - keyword( "'ignore'" ); - keyword( "NI", "'ni'" ); - keyword( "COMMIT", "'commit'" ); - - idToken(); - literalToken(); - - keyword( "STAR", "'*'"); - keyword( "PLUS", "'+'"); - keyword( "'['" ); - keyword( "']'" ); - keyword( "'|'" ); - keyword( "'/'" ); - keyword( "':'" ); - keyword( "DOT", "'.'" ); - keyword( "COLON_LT", "':>'" ); - keyword( "'('" ); - keyword( "')'" ); - keyword( "'..'" ); - keyword( "'^'" ); - - popRegionSet(); - - lexFactor(); - lexFactorNeg(); - lexFactorRep(); - lexTerm(); - lexExpr(); - - optNi(); - optRepeat(); - optProdElName(); - prodEl(); - prodElList(); - optCommit(); - optProdName(); - prod(); - prodList(); - ignore(); - token(); - tokenList(); - item(); - startProd(); - - parseInput( stmtList ); - exportTree( stmtList ); - - pd->rootCodeBlock = CodeBlock::cons( stmtList, 0 ); -} diff --git a/src/consinit.h b/src/consinit.h deleted file mode 100644 index 76ccabdf..00000000 --- a/src/consinit.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <iostream> - -#include <avltree.h> - -#include "compiler.h" -#include "parser.h" - -#ifndef _COLM_CONSINIT_H -#define _COLM_CONSINIT_H - -struct ConsInit -: - public BaseParser -{ - ConsInit( Compiler *pd ) - : - BaseParser(pd) - {} - - ProdEl *prodRefName( const String &name ); - ProdEl *prodRefName( const String &capture, const String &name ); - ProdEl *prodRefNameRepeat( const String &name ); - ProdEl *prodRefNameRepeat( const String &capture, const String &name ); - ProdEl *prodRefLit( const String &lit ); - - Production *production(); - Production *production( ProdEl *prodEl1 ); - Production *production( ProdEl *prodEl1, ProdEl *prodEl2 ); - Production *production( ProdEl *prodEl1, ProdEl *prodEl2, - ProdEl *prodEl3 ); - Production *production( ProdEl *prodEl1, ProdEl *prodEl2, - ProdEl *prodEl3, ProdEl *prodEl4 ); - Production *production( ProdEl *prodEl1, ProdEl *prodEl2, - ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5 ); - Production *production( ProdEl *prodEl1, ProdEl *prodEl2, - ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5, - ProdEl *prodEl6, ProdEl *prodEl7 ); - - void definition( const String &name, Production *prod ); - void definition( const String &name, Production *prod1, Production *prod2 ); - void definition( const String &name, Production *prod1, Production *prod2, Production *prod3 ); - void definition( const String &name, Production *prod1, Production *prod2, - Production *prod3, Production *prod4 ); - - void keyword( const String &name, const String &lit ); - void keyword( const String &kw ); - - void printParseTree( StmtList *stmtList ); - void printParseTree(); - - void literalToken(); - void commentIgnore(); - void wsIgnore(); - void idToken(); - - void token(); - void ignore(); - void tokenList(); - - void lexFactor(); - void lexFactorNeg(); - void lexFactorRep(); - void lexExpr(); - void lexTerm(); - - Production *prodProd(); - Production *prodLex(); - - void optNi(); - void optRepeat(); - void optProdElName(); - void prodEl(); - void prodElList(); - void item(); - void prodList(); - void optProdName(); - void prod(); - void startProd(); - void optCommit(); - - void parseInput( StmtList *stmtList ); - void exportTree( StmtList *stmtList ); - - virtual void go( long activeRealm ); -}; - -#endif /* _COLM_CONSINIT_H */ - diff --git a/src/cstring.h b/src/cstring.h deleted file mode 100644 index 3c285153..00000000 --- a/src/cstring.h +++ /dev/null @@ -1,862 +0,0 @@ -/* - * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _AAPL_ASTRING_H -#define _AAPL_ASTRING_H - -#include <stdlib.h> -#include <stdarg.h> -#include <stdio.h> -#include <string.h> -#include <assert.h> - -#include <new> -#include <iostream> - -#include "tree.h" - -struct colm_data; - -#ifdef AAPL_NAMESPACE -namespace Aapl { -#endif - -#ifdef AAPL_DOCUMENTATION - -/** - * \defgroup astring String - * \brief Implicitly shared copy-on-write string. - * - * @{ - */ - -/** - * \class String - * \brief Implicitly shared copy-on-write string. - */ - -/*@}*/ - -class String -{ -public: - /** - * \brief Create a null string. Data points to NULL. - */ - String(); - - /** - * \brief Construct a string from a c-style string. - * - * A new buffer is allocated for the c string. Initially, this string will - * be the only String class referencing the data. - */ - String( const char *s ); - - /** - * \brief Construct a string from a c-style string of specific length. - * - * A new buffer is allocated for the c string. Initially, this string will - * be the only String class referencing the data. - */ - String( const char *s, long len ); - - /** - * \brief Construct a string from another String. - * - * A refernce to the buffer allocated for s is taken. A new buffer is - * not allocated. - */ - String( const String &s ); - - /** - * \brief Construct a string using snprintf. - * - * Requires a maximum length for the resulting string. If the formatting - * (not including trailing null) requires more space than maxLen, the - * result will be truncated to maxLen long. Only the length actually - * written will be used by the new string. This string will be the only - * String class referencing the data. - */ - String( long maxLen, const char *format, ... ) - - /** - * \brief Clean up the string. - * - * If the string is not null, the referenced data is detached. If no other - * string refernces the detached data, it is deleted. - */ - ~String(); - - /** - * \brief Set the string from a c-style string. - * - * If this string is not null, the current buffer is dereferenced and - * possibly deleted. A new buffer is allocated (or possibly the old buffer - * reused) for the string. Initially, this string will be the only String - * class referencing the data. - * - * If s is null, then this string becomes a null ptr. - * - * \returns A reference to this. - */ - String &operator=( const char *s ); - - /** - * \brief Set the string from a c-style of specific length. - * - * If this string is not null, the current buffer is dereferenced and - * possibly deleted. A new buffer is allocated (or possibly the old buffer - * reused) for the string. Initially, this string will be the only String - * class referencing the data. - * - * If s is null, then this string becomes a null ptr. - * - * \returns A reference to this. - */ - void setAs( const char *s, long len ); - - /** - * \brief Set the string from a single char. - * - * The current buffer is dereferenced and possibly deleted. A new buffer - * is allocated (or possibly the old buffer reused) for the string. - * Initially, this string will be the only String class referencing the - * data. - * - * If s is null, then this string becomes a null ptr. - * - * \returns A reference to this. - */ - String &operator=( const char c ); - - - /** - * \brief Set the string from another String. - * - * If this string is not null, the current buffer is dereferenced and - * possibly deleted. A reference to the buffer allocated for s is taken. - * A new buffer is not allocated. - * - * If s is null, then this string becomes a null ptr. - * - * \returns a reference to this. - */ - String &operator=( const String &s ); - - /** - * \brief Append a c string to the end of this string. - * - * If this string shares its allocation with another, a copy is first - * taken. The buffer for this string is grown and s is appended to the - * end. - * - * If s is null nothing happens. - * - * \returns a reference to this. - */ - String &operator+=( const char *s ); - - /** - * \brief Append a c string of specific length to the end of this string. - * - * If this string shares its allocation with another, a copy is first - * taken. The buffer for this string is grown and s is appended to the - * end. - * - * If s is null nothing happens. - * - * \returns a reference to this. - */ - void append( const char *s, long len ); - - /** - * \brief Append a single char to the end of this string. - * - * If this string shares its allocation with another, a copy is first - * taken. The buffer for this string is grown and s is appended to the - * end. - * - * \returns a reference to this. - */ - String &operator+=( const char c ); - - /** - * \brief Append a String to the end of this string. - * - * If this string shares its allocation with another, a copy is first - * taken. The buffer for this string is grown and the data of s is - * appeneded to the end. - * - * If s is null nothing happens. - * - * returns a reference to this. - */ - String &operator+=( const String &s ); - - /** - * \brief Cast to a char star. - * - * \returns the string data. A null string returns 0. - */ - operator char*() const; - - /** - * \brief Get a pointer to the data. - * - * \returns the string Data - */ - char *get() const; - - /** - * \brief Get the length of the string - * - * If the string is null, then undefined behaviour results. - * - * \returns the length of the string. - */ - long length() const; - - /** - * \brief Pointer to the data. - * - * Publically accessible pointer to the data. Immediately in front of the - * string data block is the string header which stores the refcount and - * length. Consequently, care should be taken if modifying this pointer. - */ - char *data; -}; - -/** - * \relates String - * \brief Concatenate a c-style string and a String. - * - * \returns The concatenation of the two strings in a String. - */ -String operator+( const String &s1, const char *s2 ); - -/** - * \relates String - * \brief Concatenate a String and a c-style string. - * - * \returns The concatenation of the two strings in a String. - */ -String operator+( const char *s1, const String &s2 ); - -/** - * \relates String - * \brief Concatenate two String classes. - * - * \returns The concatenation of the two strings in a String. - */ -String operator+( const String &s1, const String &s2 ); - -#endif - -template<class T> class StrTmpl -{ -public: - class Fresh {}; - - /* Header located just before string data. Keeps the length and a refcount on - * the data. */ - struct Head - { - long refCount; - long length; - }; - - /** - * \brief Create a null string. - */ - StrTmpl() : data(0) { } - - /* Clean up the string. */ - ~StrTmpl(); - - /* Construct a string from a c-style string. */ - StrTmpl( const char *s ); - - /* Construct a string from a c-style string of specific len. */ - StrTmpl( const char *s, long len ); - - /* Allocate len spaces. */ - StrTmpl( const Fresh &, long len ); - - /* Construct a string from another StrTmpl. */ - StrTmpl( const StrTmpl &s ); - - /* Construct a string from with, sprintf. */ - StrTmpl( long lenGuess, const char *format, ... ); - - /* Construct a string from with, sprintf. */ - StrTmpl( const colm_data *cd ); - - /* Set the string from a c-style string. */ - StrTmpl &operator=( const char *s ); - - /* Set the string from a c-style string of specific len. */ - void setAs( const char *s, long len ); - - /* Allocate len spaces. */ - void setAs( const Fresh &, long len ); - - void chop( long len ); - - /* Construct a string from with, sprintf. */ - void setAs( long lenGuess, const char *format, ... ); - - /* Set the string from a single char. */ - StrTmpl &operator=( const char c ); - - /* Set the string from another StrTmpl. */ - StrTmpl &operator=( const StrTmpl &s ); - - /* Append a c string to the end of this string. */ - StrTmpl &operator+=( const char *s ); - - /* Append a c string to the end of this string of specifi len. */ - void append( const char *s, long len ); - - /* Append a single char to the end of this string. */ - StrTmpl &operator+=( const char c ); - - /* Append an StrTmpl to the end of this string. */ - StrTmpl &operator+=( const StrTmpl &s ); - - /* Cast to a char star. */ - operator char*() const { return data; } - - /* Get a pointer to the data. */ - char *get() const { return data; } - - /* Return the length of the string. Must check for null data pointer. */ - long length() const { return data ? (((Head*)data)-1)->length : 0; } - - /** - * \brief Pointer to the data. - */ - char *data; - -protected: - /* Make space for a string of length len to be appended. */ - char *appendSpace( long len ); - void initSpace( long length ); - void setSpace( long length ); - - template <class FT> friend StrTmpl<FT> operator+( - const StrTmpl<FT> &s1, const char *s2 ); - template <class FT> friend StrTmpl<FT> operator+( - const char *s1, const StrTmpl<FT> &s2 ); - template <class FT> friend StrTmpl<FT> operator+( - const StrTmpl<FT> &s1, const StrTmpl<FT> &s2 ); - -private: - /* A dummy struct solely to make a constructor that will never be - * ambiguous with the public constructors. */ - struct DisAmbig { }; - StrTmpl( char *data, const DisAmbig & ) : data(data) { } -}; - -/* Free all mem used by the string. */ -template<class T> StrTmpl<T>::~StrTmpl() -{ - if ( data != 0 ) { - /* If we are the only ones referencing the string, then delete it. */ - Head *head = ((Head*) data) - 1; - head->refCount -= 1; - if ( head->refCount == 0 ) - free( head ); - } -} - -/* Create from a c-style string. */ -template<class T> StrTmpl<T>::StrTmpl( const char *s ) -{ - if ( s == 0 ) - data = 0; - else { - /* Find the length and allocate the space for the shared string. */ - long length = strlen( s ); - - /* Init space for the data. */ - initSpace( length ); - - /* Copy in the data. */ - memcpy( data, s, length+1 ); - } -} - -/* Create from a c-style string. */ -template<class T> StrTmpl<T>::StrTmpl( const char *s, long length ) -{ - if ( s == 0 ) - data = 0; - else { - /* Init space for the data. */ - initSpace( length ); - - /* Copy in the data. */ - memcpy( data, s, length ); - data[length] = 0; - } -} - -/* Create from a c-style string. */ -template<class T> StrTmpl<T>::StrTmpl( const Fresh &, long length ) -{ - /* Init space for the data. */ - initSpace( length ); - data[length] = 0; -} - -/* Create from another string class. */ -template<class T> StrTmpl<T>::StrTmpl( const StrTmpl &s ) -{ - if ( s.data == 0 ) - data = 0; - else { - /* Take a reference to the string. */ - Head *strHead = ((Head*)s.data) - 1; - strHead->refCount += 1; - data = (char*) (strHead+1); - } -} - -/* Construct a string from with, sprintf. */ -template<class T> StrTmpl<T>::StrTmpl( long lenGuess, const char *format, ... ) -{ - /* Set the string for len. */ - initSpace( lenGuess ); - - va_list args; - - va_start( args, format ); - long written = vsnprintf( data, lenGuess+1, format, args ); - va_end( args ); - - if ( written > lenGuess ) { - setSpace( written ); - va_start( args, format ); - written = vsnprintf( data, written+1, format, args ); - va_end( args ); - } - chop( written ); - - va_end( args ); -} - -/* Create from another string class. */ -template<class T> StrTmpl<T>::StrTmpl( const colm_data *cd ) -{ - if ( cd->data == 0 ) - data = 0; - else { - /* Init space for the data. */ - initSpace( cd->length ); - - /* Copy in the data. */ - memcpy( data, cd->data, cd->length ); - data[cd->length] = 0; - } -} - - - -/* Construct a string from with, sprintf. */ -template<class T> void StrTmpl<T>::setAs( long lenGuess, const char *format, ... ) -{ - /* Set the string for len. */ - setSpace( lenGuess ); - - va_list args; - - /* Write to the temporary buffer. */ - va_start( args, format ); - - long written = vsnprintf( data, lenGuess+1, format, args ); - if ( written > lenGuess ) { - setSpace( written ); - written = vsnprintf( data, written+1, format, args ); - } - chop( written ); - - va_end( args ); -} - -template<class T> void StrTmpl<T>::initSpace( long length ) -{ - /* Find the length and allocate the space for the shared string. */ - Head *head = (Head*) malloc( sizeof(Head) + length+1 ); - if ( head == 0 ) - throw std::bad_alloc(); - - /* Init the header. */ - head->refCount = 1; - head->length = length; - - /* Save the pointer to the data. */ - data = (char*) (head+1); -} - - -/* Set this string to be the c string exactly. The old string is discarded. - * Returns a reference to this. */ -template<class T> StrTmpl<T> &StrTmpl<T>::operator=( const char *s ) -{ - if ( s == 0 ) { - /* Just free the data, we are being set to null. */ - if ( data != 0 ) { - Head *head = ((Head*)data) - 1; - head->refCount -= 1; - if ( head->refCount == 0 ) - free(head); - data = 0; - } - } - else { - /* Find the length of the string we are setting. */ - long length = strlen( s ); - - /* Set the string for len. */ - setSpace( length ); - - /* Copy in the data. */ - memcpy( data, s, length+1 ); - } - return *this; -} - -/* Set this string to be the c string exactly. The old string is discarded. - * Returns a reference to this. */ -template<class T> void StrTmpl<T>::setAs( const char *s, long length ) -{ - if ( s == 0 ) { - /* Just free the data, we are being set to null. */ - if ( data != 0 ) { - Head *head = ((Head*)data) - 1; - head->refCount -= 1; - if ( head->refCount == 0 ) - free(head); - data = 0; - } - } - else { - /* Set the string for len. */ - setSpace( length ); - - /* Copy in the data. */ - memcpy( data, s, length ); - data[length] = 0; - } -} - -template<class T> void StrTmpl<T>::chop( long length ) -{ - Head *head = ((Head*)data) - 1; - assert( head->refCount == 1 ); - assert( length <= head->length ); - head->length = length; - data[length] = 0; -} - -/* Set this string to be the c string exactly. The old string is discarded. - * Returns a reference to this. */ -template<class T> void StrTmpl<T>::setAs( const Fresh &, long length ) -{ - setSpace( length ); - data[length] = 0; -} - -/* Set this string to be the single char exactly. The old string is discarded. - * Returns a reference to this. */ -template<class T> StrTmpl<T> &StrTmpl<T>::operator=( const char c ) -{ - /* Set to length 1. */ - setSpace( 1 ); - - /* Copy in the data. */ - data[0] = c; - data[1] = 0; - - /* Return ourselves. */ - return *this; -} - -/* Set this string to be the StrTmpl s exactly. The old string is - * discarded. */ -template<class T> StrTmpl<T> &StrTmpl<T>::operator=( const StrTmpl &s ) -{ - /* Detach from the existing string. */ - if ( data != 0 ) { - Head *head = ((Head*)data) - 1; - head->refCount -= 1; - if ( head->refCount == 0 ) - free( head ); - } - - if ( s.data != 0 ) { - /* Take a reference to the string. */ - Head *strHead = ((Head*)s.data) - 1; - strHead->refCount += 1; - data = (char*)(strHead+1); - } - else { - /* Setting from a null string, just null our pointer. */ - data = 0; - } - return *this; -} - -/* Prepare the string to be set to something else of the given length. */ -template<class T> void StrTmpl<T>::setSpace( long length ) -{ - /* Detach from the existing string. */ - Head *head = ((Head*)data) - 1; - if ( data != 0 && --head->refCount == 0 ) { - /* Resuse the space. */ - head = (Head*) realloc( head, sizeof(Head) + length+1 ); - } - else { - /* Need to make new space, there is no usable old space. */ - head = (Head*) malloc( sizeof(Head) + length+1 ); - } - if ( head == 0 ) - throw std::bad_alloc(); - - /* Init the header. */ - head->refCount = 1; - head->length = length; - - /* Copy in the data and save the pointer to it. */ - data = (char*) (head+1); -} - - -/* Append a c-style string to the end of this string. Returns a reference to - * this */ -template<class T> StrTmpl<T> &StrTmpl<T>::operator+=( const char *s ) -{ - /* Find the length of the string appended. */ - if ( s != 0 ) { - /* Get the string length and make space on the end. */ - long addedLen = strlen( s ); - char *dest = appendSpace( addedLen ); - - /* Copy the data in. Plus one for the null. */ - memcpy( dest, s, addedLen+1 ); - } - return *this; -} - -/* Append a c-style string of specific length to the end of this string. - * Returns a reference to this */ -template<class T> void StrTmpl<T>::append( const char *s, long length ) -{ - /* Find the length of the string appended. */ - if ( s != 0 ) { - /* Make space on the end. */ - char *dest = appendSpace( length ); - - /* Copy the data in. Plus one for the null. */ - memcpy( dest, s, length ); - dest[length] = 0; - } -} - -/* Append a single char to the end of this string. Returns a reference to - * this */ -template<class T> StrTmpl<T> &StrTmpl<T>::operator+=( const char c ) -{ - /* Grow on the end. */ - char *dst = appendSpace( 1 ); - - /* Append a single charachter. */ - dst[0] = c; - dst[1] = 0; - return *this; -} - - -/* Append an StrTmpl string to the end of this string. Returns a reference - * to this */ -template<class T> StrTmpl<T> &StrTmpl<T>::operator+=( const StrTmpl &s ) -{ - /* Find the length of the string appended. */ - if ( s.data != 0 ) { - /* Find the length to append. */ - long addedLen = (((Head*)s.data) - 1)->length; - - /* Make space on the end to put the string. */ - char *dest = appendSpace( addedLen ); - - /* Append the data, add one for the null. */ - memcpy( dest, s.data, addedLen+1 ); - } - return *this; -} - -/* Make space for a string of length len to be appended. */ -template<class T> char *StrTmpl<T>::appendSpace( long len ) -{ - /* Find the length of this and the string appended. */ - Head *head = (((Head*)data) - 1); - long thisLen = head->length; - - if ( head->refCount == 1 ) { - /* No other string is using the space, grow this space. */ - head = (Head*) realloc( head, - sizeof(Head) + thisLen + len + 1 ); - if ( head == 0 ) - throw std::bad_alloc(); - data = (char*) (head+1); - - /* Adjust the length. */ - head->length += len; - } - else { - /* Another string is using this space, make new space. */ - head->refCount -= 1; - Head *newHead = (Head*) malloc( - sizeof(Head) + thisLen + len + 1 ); - if ( newHead == 0 ) - throw std::bad_alloc(); - data = (char*) (newHead+1); - - /* Set the new header and data from this. */ - newHead->refCount = 1; - newHead->length = thisLen + len; - memcpy( data, head+1, thisLen ); - } - - /* Return writing position. */ - return data + thisLen; -} - -/* Concatenate a String and a c-style string. */ -template<class T> StrTmpl<T> operator+( const StrTmpl<T> &s1, const char *s2 ) -{ - /* Find s2 length and alloc the space for the result. */ - long str1Len = (((typename StrTmpl<T>::Head*)(s1.data)) - 1)->length; - long str2Len = strlen( s2 ); - - typename StrTmpl<T>::Head *head = (typename StrTmpl<T>::Head*) - malloc( sizeof(typename StrTmpl<T>::Head) + str1Len + str2Len + 1 ); - if ( head == 0 ) - throw std::bad_alloc(); - - /* Set up the header. */ - head->refCount = 1; - head->length = str1Len + str2Len; - - /* Save the pointer to data and copy the data in. */ - char *data = (char*) (head+1); - memcpy( data, s1.data, str1Len ); - memcpy( data + str1Len, s2, str2Len + 1 ); - return StrTmpl<T>( data, typename StrTmpl<T>::DisAmbig() ); -} - -/* Concatenate a c-style string and a String. */ -template<class T> StrTmpl<T> operator+( const char *s1, const StrTmpl<T> &s2 ) -{ - /* Find s2 length and alloc the space for the result. */ - long str1Len = strlen( s1 ); - long str2Len = (((typename StrTmpl<T>::Head*)(s2.data)) - 1)->length; - - typename StrTmpl<T>::Head *head = (typename StrTmpl<T>::Head*) - malloc( sizeof(typename StrTmpl<T>::Head) + str1Len + str2Len + 1 ); - if ( head == 0 ) - throw std::bad_alloc(); - - /* Set up the header. */ - head->refCount = 1; - head->length = str1Len + str2Len; - - /* Save the pointer to data and copy the data in. */ - char *data = (char*) (head+1); - memcpy( data, s1, str1Len ); - memcpy( data + str1Len, s2.data, str2Len + 1 ); - return StrTmpl<T>( data, typename StrTmpl<T>::DisAmbig() ); -} - -/* Add two StrTmpl strings. */ -template<class T> StrTmpl<T> operator+( const StrTmpl<T> &s1, const StrTmpl<T> &s2 ) -{ - /* Find s2 length and alloc the space for the result. */ - long str1Len = (((typename StrTmpl<T>::Head*)(s1.data)) - 1)->length; - long str2Len = (((typename StrTmpl<T>::Head*)(s2.data)) - 1)->length; - typename StrTmpl<T>::Head *head = (typename StrTmpl<T>::Head*) - malloc( sizeof(typename StrTmpl<T>::Head) + str1Len + str2Len + 1 ); - if ( head == 0 ) - throw std::bad_alloc(); - - /* Set up the header. */ - head->refCount = 1; - head->length = str1Len + str2Len; - - /* Save the pointer to data and copy the data in. */ - char *data = (char*) (head+1); - memcpy( data, s1.data, str1Len ); - memcpy( data + str1Len, s2.data, str2Len + 1 ); - return StrTmpl<T>( data, typename StrTmpl<T>::DisAmbig() ); -} - -/* Operator used in case the compiler does not support the conversion. */ -template <class T> inline std::ostream &operator<<( std::ostream &o, const StrTmpl<T> &s ) -{ - return o.write( s.data, s.length() ); -} - -typedef StrTmpl<char> String; - -/** - * \brief Compare two null terminated character sequences. - * - * This comparision class is a wrapper for strcmp. - */ -template<class T> struct CmpStrTmpl -{ - /** - * \brief Compare two null terminated string types. - */ - static inline long compare( const char *k1, const char *k2 ) - { return strcmp(k1, k2); } - - static int compare( const StrTmpl<T> &s1, const StrTmpl<T> &s2 ) - { - if ( s1.length() < s2.length() ) - return -1; - else if ( s1.length() > s2.length() ) - return 1; - else - return memcmp( s1.data, s2.data, s1.length() ); - } -}; - -typedef CmpStrTmpl<char> ColmCmpStr; - -#ifdef AAPL_NAMESPACE -} -#endif - -#endif /* _AAPL_ASTRING_H */ - diff --git a/src/ctinput.cc b/src/ctinput.cc deleted file mode 100644 index 2adf2a94..00000000 --- a/src/ctinput.cc +++ /dev/null @@ -1,550 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <assert.h> - -#include <iostream> - -#include "compiler.h" -#include "pool.h" -//#include "debug.h" - -using std::cerr; -using std::endl; - -DEF_INPUT_FUNCS( input_funcs_ct, input_impl_ct ); - -extern input_funcs_ct pat_funcs; -extern input_funcs_ct repl_funcs; - -struct input_impl_ct -{ - struct input_funcs *funcs; - - char *name; - long line; - long column; - long byte; - - struct Pattern *pattern; - struct PatternItem *pat_item; - struct Constructor *constructor; - struct ConsItem *cons_item; - - char eof_mark; - char eof_sent; - - int offset; -}; - -void ct_destructor( program_t *prg, tree_t **sp, struct input_impl_ct *ss ) -{ -} - -char ct_get_eof_sent( struct colm_program *prg, struct input_impl_ct *si ) -{ - return si->eof_sent; -} - -void ct_set_eof_sent( struct colm_program *prg, struct input_impl_ct *si, char eof_sent ) -{ - si->eof_sent = eof_sent; -} - -/* - * Pattern - */ - -struct input_impl *colm_impl_new_pat( char *name, Pattern *pattern ) -{ - struct input_impl_ct *ss = (struct input_impl_ct*)malloc(sizeof(struct input_impl_ct)); - memset( ss, 0, sizeof(struct input_impl_ct) ); - ss->pattern = pattern; - ss->pat_item = pattern->list->head; - ss->funcs = (struct input_funcs*)&pat_funcs; - return (struct input_impl*) ss; -} - -int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int *pskip, - char **pdp, int *copied ) -{ - *copied = 0; - - PatternItem *buf = ss->pat_item; - int offset = ss->offset; - - while ( true ) { - if ( buf == 0 ) - return INPUT_EOF; - - if ( buf->form == PatternItem::TypeRefForm ) - return INPUT_LANG_EL; - - assert ( buf->form == PatternItem::InputTextForm ); - int avail = buf->data.length() - offset; - - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[offset]; - int slen = avail; - - /* Need to skip? */ - if ( *pskip > 0 && slen <= *pskip ) { - /* Skipping the the whole source. */ - *pskip -= slen; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += *pskip; - slen -= *pskip; - *pskip = 0; - - *pdp = src; - *copied += slen; - break; - } - } - - buf = buf->next; - offset = 0; - } - - return INPUT_DATA; -} - - -int pat_get_data( struct colm_program *prg, struct input_impl_ct *ss, char *dest, int length ) -{ - int copied = 0; - - PatternItem *buf = ss->pat_item; - int offset = ss->offset; - - while ( true ) { - if ( buf == 0 ) - break; - - if ( buf->form == PatternItem::TypeRefForm ) - break; - - assert ( buf->form == PatternItem::InputTextForm ); - int avail = buf->data.length() - offset; - - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; - - memcpy( dest+copied, src, slen ) ; - copied += slen; - length -= slen; - } - - if ( length == 0 ) - break; - - buf = buf->next; - offset = 0; - } - - return copied; -} - -void pat_backup( struct input_impl_ct *ss ) -{ - if ( ss->pat_item == 0 ) - ss->pat_item = ss->pattern->list->tail; - else - ss->pat_item = ss->pat_item->prev; -} - -int pat_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int length, location_t *loc ) -{ - //debug( REALM_INPUT, "consuming %ld bytes\n", length ); - - int consumed = 0; - - while ( true ) { - if ( ss->pat_item == 0 ) - break; - - int avail = ss->pat_item->data.length() - ss->offset; - - if ( length >= avail ) { - /* Read up to the end of the data. Advance the - * pattern item. */ - ss->pat_item = ss->pat_item->next; - ss->offset = 0; - - length -= avail; - consumed += avail; - - if ( length == 0 ) - break; - } - else { - ss->offset += length; - consumed += length; - break; - } - } - - return consumed; -} - -int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const char *data, int length ) -{ - ss->offset -= length; - return length; -} - -LangEl *pat_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, long *bindId, - char **data, long *length ) -{ - LangEl *klangEl = ss->pat_item->prodEl->langEl; - *bindId = ss->pat_item->bindId; - *data = 0; - *length = 0; - - ss->pat_item = ss->pat_item->next; - ss->offset = 0; - return klangEl; -} - -void pat_undo_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss ) -{ - pat_backup( ss ); - ss->offset = ss->pat_item->data.length(); -} - -void ct_set_eof_mark( struct colm_program *prg, struct input_impl_ct *si, char eof_mark ) -{ - si->eof_mark = eof_mark; -} - -void ct_transfer_loc_seq( struct colm_program *prg, location_t *loc, struct input_impl_ct *ss ) -{ - loc->name = ss->name; - loc->line = ss->line; - loc->column = ss->column; - loc->byte = ss->byte; -} - -input_funcs_ct pat_funcs = -{ - &pat_get_parse_block, - &pat_get_data, - - &pat_consume_data, - &pat_undo_consume_data, - - 0, /* consume_tree */ - 0, /* undo_consume_tree */ - - &pat_consume_lang_el, - &pat_undo_consume_lang_el, - - 0, 0, 0, 0, 0, 0, /* prepend funcs. */ - 0, 0, 0, 0, 0, 0, /* append funcs */ - - &ct_set_eof_mark, - - &ct_transfer_loc_seq, - &ct_destructor, - - 0, 0 -}; - - -/* - * Replacements - */ - -struct input_impl *colm_impl_new_cons( char *name, Constructor *constructor ) -{ - struct input_impl_ct *ss = (struct input_impl_ct*)malloc(sizeof(struct input_impl_ct)); - memset( ss, 0, sizeof(struct input_impl_ct) ); - ss->constructor = constructor; - ss->cons_item = constructor->list->head; - ss->funcs = (struct input_funcs*)&repl_funcs; - return (struct input_impl*)ss; -} - -LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, long *bindId, char **data, long *length ) -{ - LangEl *klangEl = ss->cons_item->type == ConsItem::ExprType ? - ss->cons_item->langEl : ss->cons_item->prodEl->langEl; - *bindId = ss->cons_item->bindId; - - *data = 0; - *length = 0; - - if ( ss->cons_item->type == ConsItem::LiteralType ) { - if ( ss->cons_item->prodEl->typeRef->pdaLiteral != 0 ) { - bool unusedCI; - prepareLitString( ss->cons_item->data, unusedCI, - ss->cons_item->prodEl->typeRef->pdaLiteral->data, - ss->cons_item->prodEl->typeRef->pdaLiteral->loc ); - - *data = ss->cons_item->data; - *length = ss->cons_item->data.length(); - } - } - - ss->cons_item = ss->cons_item->next; - ss->offset = 0; - return klangEl; -} - -int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, - int *pskip, char **pdp, int *copied ) -{ - *copied = 0; - - ConsItem *buf = ss->cons_item; - int offset = ss->offset; - - while ( true ) { - if ( buf == 0 ) - return INPUT_EOF; - - if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::LiteralType ) - return INPUT_LANG_EL; - - assert ( buf->type == ConsItem::InputText ); - int avail = buf->data.length() - offset; - - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[offset]; - int slen = avail; - - /* Need to skip? */ - if ( *pskip > 0 && slen <= *pskip ) { - /* Skipping the the whole source. */ - *pskip -= slen; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += *pskip; - slen -= *pskip; - *pskip = 0; - - *pdp = src; - *copied += slen; - break; - } - } - - buf = buf->next; - offset = 0; - } - - return INPUT_DATA; -} - -int repl_get_data( struct colm_program *prg, struct input_impl_ct *ss, char *dest, int length ) -{ - int copied = 0; - - ConsItem *buf = ss->cons_item; - int offset = ss->offset; - - while ( true ) { - if ( buf == 0 ) - break; - - if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::LiteralType ) - break; - - assert ( buf->type == ConsItem::InputText ); - int avail = buf->data.length() - offset; - - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; - - memcpy( dest+copied, src, slen ) ; - copied += slen; - length -= slen; - } - - if ( length == 0 ) - break; - - buf = buf->next; - offset = 0; - } - - return copied; -} - -void repl_backup( struct input_impl_ct *ss ) -{ - if ( ss->cons_item == 0 ) - ss->cons_item = ss->constructor->list->tail; - else - ss->cons_item = ss->cons_item->prev; -} - -void repl_undo_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss ) -{ - repl_backup( ss ); - ss->offset = ss->cons_item->data.length(); -} - - -int repl_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int length, location_t *loc ) -{ - int consumed = 0; - - while ( true ) { - if ( ss->cons_item == 0 ) - break; - - int avail = ss->cons_item->data.length() - ss->offset; - - if ( length >= avail ) { - /* Read up to the end of the data. Advance the - * pattern item. */ - ss->cons_item = ss->cons_item->next; - ss->offset = 0; - - length -= avail; - consumed += avail; - - if ( length == 0 ) - break; - } - else { - ss->offset += length; - consumed += length; - break; - } - } - - return consumed; -} - -int repl_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const char *data, int length ) -{ - int origLen = length; - while ( true ) { - int avail = ss->offset; - - /* Okay to go up to the front of the buffer. */ - if ( length > avail ) { - ss->cons_item= ss->cons_item->prev; - ss->offset = ss->cons_item->data.length(); - length -= avail; - } - else { - ss->offset -= length; - break; - } - } - - return origLen; -} - -input_funcs_ct repl_funcs = -{ - &repl_get_parse_block, - &repl_get_data, - - &repl_consume_data, - &repl_undo_consume_data, - - 0, /* consume_tree */ - 0, /* undo_consume_tree. */ - - &repl_consume_lang_el, - &repl_undo_consume_lang_el, - - 0, 0, 0, 0, 0, 0, /* prepend. */ - 0, 0, 0, 0, 0, 0, /* append. */ - - &ct_set_eof_mark, - - &ct_transfer_loc_seq, - &ct_destructor, - - 0, 0 -}; - -void pushBinding( pda_run *pdaRun, parse_tree_t *parseTree ) -{ - /* If the item is bound then store it in the bindings array. */ - pdaRun->bindings->push( parseTree ); -} - -extern "C" void internalSendNamedLangEl( program_t *prg, tree_t **sp, - struct pda_run *pdaRun, struct input_impl *is ) -{ - /* All three set by consumeLangEl. */ - long bindId; - char *data; - long length; - - LangEl *klangEl = is->funcs->consume_lang_el( prg, is, &bindId, &data, &length ); - - //cerr << "named langEl: " << prg->rtd->lelInfo[klangEl->id].name << endl; - - /* Copy the token data. */ - head_t *tokdata = 0; - if ( data != 0 ) - tokdata = string_alloc_full( prg, data, length ); - - kid_t *input = make_token_with_data( prg, pdaRun, is, klangEl->id, tokdata ); - - colm_increment_steps( pdaRun ); - - parse_tree_t *parseTree = parse_tree_allocate( pdaRun ); - parseTree->id = input->tree->id; - parseTree->flags |= PF_NAMED; - parseTree->shadow = input; - - if ( bindId > 0 ) - pushBinding( pdaRun, parseTree ); - - pdaRun->parse_input = parseTree; -} - -extern "C" void internalInitBindings( pda_run *pdaRun ) -{ - /* Bindings are indexed at 1. Need a no-binding. */ - pdaRun->bindings = new bindings; - pdaRun->bindings->push(0); -} - -extern "C" void internalPopBinding( pda_run *pdaRun, parse_tree_t *parseTree ) -{ - parse_tree_t *lastBound = pdaRun->bindings->top(); - if ( lastBound == parseTree ) - pdaRun->bindings->pop(); -} diff --git a/src/debug.c b/src/debug.c deleted file mode 100644 index 1cfd24d7..00000000 --- a/src/debug.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <colm/debug.h> - -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> - -#include <colm/program.h> - -const char *const colm_realm_names[REALMS] = - // @NOTE: keep this in sync with 'main.cc': 'processArgs()' '-D' option - { - "BYTECODE", - "PARSE", - "MATCH", - "COMPILE", - "POOL", - "PRINT", - "INPUT", - "SCAN", - }; - -int _debug( struct colm_program *prg, long realm, const char *fmt, ... ) -{ - int result = 0; - if ( prg->active_realm & realm ) { - /* Compute the index by shifting. */ - int ind = 0; - while ( (realm & 0x1) != 0x1 ) { - realm >>= 1; - ind += 1; - } - - fprintf( stderr, "%s: ", colm_realm_names[ind] ); - va_list args; - va_start( args, fmt ); - result = vfprintf( stderr, fmt, args ); - va_end( args ); - } - - return result; -} - -void fatal( const char *fmt, ... ) -{ - va_list args; - fprintf( stderr, "fatal: " ); - va_start( args, fmt ); - vfprintf( stderr, fmt, args ); - va_end( args ); - exit(1); -} - -void message( const char *fmt, ... ) -{ - va_list args; - fprintf( stderr, "message: " ); - va_start( args, fmt ); - vfprintf( stderr, fmt, args ); - va_end( args ); -} diff --git a/src/debug.h b/src/debug.h deleted file mode 100644 index c7072a44..00000000 --- a/src/debug.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_DEBUG_H -#define _COLM_DEBUG_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "config.h" -#include "colm.h" - -void fatal( const char *fmt, ... ); - -#ifdef DEBUG -#define debug( prg, realm, ... ) _debug( prg, realm, __VA_ARGS__ ) -#define check_realm( realm ) _check_realm( realm ) -#else -#define debug( prg, realm, ... ) -#define check_realm( realm ) -#endif - -int _debug( struct colm_program *prg, long realm, const char *fmt, ... ); - -void message( const char *fmt, ... ); - -#define REALM_BYTECODE COLM_DBG_BYTECODE -#define REALM_PARSE COLM_DBG_PARSE -#define REALM_MATCH COLM_DBG_MATCH -#define REALM_COMPILE COLM_DBG_COMPILE -#define REALM_POOL COLM_DBG_POOL -#define REALM_PRINT COLM_DBG_PRINT -#define REALM_INPUT COLM_DBG_INPUT -#define REALM_SCAN COLM_DBG_SCAN - -#define REALMS 32 - -extern const char *const colm_realm_names[REALMS]; - -#ifdef __cplusplus -} -#endif - -#endif /* _COLM_DEBUG_H */ - diff --git a/src/declare.cc b/src/declare.cc deleted file mode 100644 index 884c446a..00000000 --- a/src/declare.cc +++ /dev/null @@ -1,1623 +0,0 @@ -/* - * Copyright 2012-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdbool.h> -#include <assert.h> - -#include <iostream> - -#include "compiler.h" - -void Compiler::initUniqueTypes( ) -{ - uniqueTypeNil = new UniqueType( TYPE_NIL ); - uniqueTypeVoid = new UniqueType( TYPE_VOID ); - uniqueTypePtr = new UniqueType( TYPE_TREE, ptrLangEl ); - uniqueTypeBool = new UniqueType( TYPE_BOOL ); - uniqueTypeInt = new UniqueType( TYPE_INT ); - uniqueTypeStr = new UniqueType( TYPE_TREE, strLangEl ); - uniqueTypeIgnore = new UniqueType( TYPE_TREE, ignoreLangEl ); - uniqueTypeAny = new UniqueType( TYPE_TREE, anyLangEl ); - - uniqueTypeInput = new UniqueType( TYPE_STRUCT, inputSel ); - uniqueTypeStream = new UniqueType( TYPE_STRUCT, streamSel ); - - uniqeTypeMap.insert( uniqueTypeNil ); - uniqeTypeMap.insert( uniqueTypeVoid ); - uniqeTypeMap.insert( uniqueTypePtr ); - uniqeTypeMap.insert( uniqueTypeBool ); - uniqeTypeMap.insert( uniqueTypeInt ); - uniqeTypeMap.insert( uniqueTypeStr ); - uniqeTypeMap.insert( uniqueTypeIgnore ); - uniqeTypeMap.insert( uniqueTypeAny ); - - uniqeTypeMap.insert( uniqueTypeInput ); - uniqeTypeMap.insert( uniqueTypeStream ); -} - -ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, - int nargs, UniqueType **args, bool isConst, bool useFnInstr, - GenericType *useGeneric ) -{ - ObjectMethod *objMethod = new ObjectMethod( retType, name, - methIdWV, methIdWC, nargs, args, 0, isConst ); - objMethod->type = type; - objMethod->useFnInstr = useFnInstr; - - if ( nspace != 0 ) - nspace->rootScope->methodMap.insert( name, objMethod ); - else - obj->rootScope->methodMap.insert( name, objMethod ); - - if ( useGeneric ) { - objMethod->useGenericId = true; - objMethod->generic = useGeneric; - } - - return objMethod; -} - -ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, bool isConst, - bool useFnInstr, GenericType *useGeneric ) -{ - return initFunction( retType, 0, obj, type, name, methIdWV, methIdWC, - 0, 0, isConst, useFnInstr, useGeneric ); -} - -ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, UniqueType *arg1, - bool isConst, bool useFnInstr, GenericType *useGeneric ) -{ - UniqueType *args[] = { arg1 }; - return initFunction( retType, 0, obj, type, name, methIdWV, methIdWC, - 1, args, isConst, useFnInstr, useGeneric ); -} - -ObjectMethod *initFunction( UniqueType *retType, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, - UniqueType *arg1, UniqueType *arg2, - bool isConst, bool useFnInstr, GenericType *useGeneric ) -{ - UniqueType *args[] = { arg1, arg2 }; - return initFunction( retType, 0, obj, type, name, methIdWV, methIdWC, - 2, args, isConst, useFnInstr, useGeneric ); -} - -/* - * With namespace supplied. Global functions. - */ - -ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, bool isConst, - bool useFnInstr, GenericType *useGeneric ) -{ - return initFunction( retType, nspace, obj, type, name, methIdWV, methIdWC, - 0, 0, isConst, useFnInstr, useGeneric ); -} - -ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, UniqueType *arg1, - bool isConst, bool useFnInstr, GenericType *useGeneric ) -{ - UniqueType *args[] = { arg1 }; - return initFunction( retType, nspace, obj, type, name, methIdWV, methIdWC, - 1, args, isConst, useFnInstr, useGeneric ); -} - -ObjectMethod *initFunction( UniqueType *retType, Namespace *nspace, ObjectDef *obj, - ObjectMethod::Type type, const String &name, int methIdWV, int methIdWC, - UniqueType *arg1, UniqueType *arg2, - bool isConst, bool useFnInstr, GenericType *useGeneric ) -{ - UniqueType *args[] = { arg1, arg2 }; - return initFunction( retType, nspace, obj, type, name, methIdWV, methIdWC, - 2, args, isConst, useFnInstr, useGeneric ); -} - -ObjectField *NameScope::checkRedecl( const String &name ) -{ - return owningObj->checkRedecl( this, name ); -} - -void NameScope::insertField( const String &name, ObjectField *value ) -{ - return owningObj->insertField( this, name, value ); -} - -ObjectField *ObjectDef::checkRedecl( NameScope *inScope, const String &name ) -{ - FieldMapEl *objDefMapEl = inScope->fieldMap.find( name ); - if ( objDefMapEl != 0 ) - return objDefMapEl->value; - return 0; -} - -void ObjectDef::insertField( NameScope *inScope, const String &name, ObjectField *value ) -{ - inScope->fieldMap.insert( name, value ); - fieldList.append( value ); - value->scope = inScope; -} - -NameScope *ObjectDef::pushScope( NameScope *curScope ) -{ - NameScope *newScope = new NameScope; - - newScope->owningObj = this; - newScope->parentScope = curScope; - curScope->children.append( newScope ); - - return newScope; -} - -void LexJoin::varDecl( Compiler *pd, TokenDef *tokenDef ) -{ - expr->varDecl( pd, tokenDef ); -} - -void LexExpression::varDecl( Compiler *pd, TokenDef *tokenDef ) -{ - switch ( type ) { - case OrType: case IntersectType: case SubtractType: - case StrongSubtractType: - expression->varDecl( pd, tokenDef ); - term->varDecl( pd, tokenDef ); - break; - case TermType: - term->varDecl( pd, tokenDef ); - break; - case BuiltinType: - break; - } -} - -void LexTerm::varDecl( Compiler *pd, TokenDef *tokenDef ) -{ - switch ( type ) { - case ConcatType: - case RightStartType: - case RightFinishType: - case LeftType: - term->varDecl( pd, tokenDef ); - factorAug->varDecl( pd, tokenDef ); - break; - case FactorAugType: - factorAug->varDecl( pd, tokenDef ); - break; - } -} - -void LexFactorAug::varDecl( Compiler *pd, TokenDef *tokenDef ) -{ - for ( ReCaptureVect::Iter re = reCaptureVect; re.lte(); re++ ) { - if ( tokenDef->objectDef->rootScope->checkRedecl( re->objField->name ) != 0 ) { - error(re->objField->loc) << "label name \"" << - re->objField->name << "\" already in use" << endp; - } - - /* Insert it into the map. */ - tokenDef->objectDef->rootScope->insertField( re->objField->name, re->objField ); - - /* Store it in the TokenDef. */ - tokenDef->reCaptureVect.append( *re ); - } -} - -void Compiler::declareReVars() -{ - for ( NamespaceList::Iter n = namespaceList; n.lte(); n++ ) { - for ( TokenDefListNs::Iter tok = n->tokenDefList; tok.lte(); tok++ ) { - if ( tok->join != 0 ) - tok->join->varDecl( this, tok ); - } - } - - /* FIXME: declare RE captures in token generation actions. */ -#if 0 - /* Add captures to the local frame. We Depend on these becoming the - * first local variables so we can compute their location. */ - - /* Make local variables corresponding to the local capture vector. */ - for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ ) - { - ObjectField *objField = ObjectField::cons( c->objField->loc, - c->objField->typeRef, c->objField->name ); - - /* Insert it into the field map. */ - pd->curLocalFrame->insertField( objField->name, objField ); - } -#endif -} - -LangEl *declareLangEl( Compiler *pd, Namespace *nspace, - const String &data, LangEl::Type type ) -{ - /* If the id is already in the dict, it will be placed in last found. If - * it is not there then it will be inserted and last found will be set to it. */ - TypeMapEl *inDict = nspace->typeMap.find( data ); - if ( inDict != 0 ) - error() << "language element '" << data << "' already defined as something else" << endp; - - /* Language element not there. Make the new lang el and insert.. */ - LangEl *langEl = new LangEl( nspace, data, type ); - TypeMapEl *typeMapEl = new TypeMapEl( TypeMapEl::LangElType, data, langEl ); - nspace->typeMap.insert( typeMapEl ); - pd->langEls.append( langEl ); - - return langEl; -} - -StructEl *declareStruct( Compiler *pd, Namespace *inNspace, - const String &data, StructDef *structDef ) -{ - if ( inNspace != 0 ) { - TypeMapEl *inDict = inNspace->typeMap.find( data ); - if ( inDict != 0 ) - error() << "struct '" << data << "' already defined as something else" << endp; - } - - StructEl *structEl = new StructEl( data, structDef ); - pd->structEls.append( structEl ); - structDef->structEl = structEl; - - if ( inNspace ) { - TypeMapEl *typeMapEl = new TypeMapEl( TypeMapEl::StructType, data, structEl ); - inNspace->typeMap.insert( typeMapEl ); - } - - return structEl; -} - -/* Does not map the new language element. */ -LangEl *addLangEl( Compiler *pd, Namespace *inNspace, - const String &data, LangEl::Type type ) -{ - LangEl *langEl = new LangEl( inNspace, data, type ); - pd->langEls.append( langEl ); - return langEl; -} - -void declareTypeAlias( Compiler *pd, Namespace *nspace, - const String &data, TypeRef *typeRef ) -{ - /* If the id is already in the dict, it will be placed in last found. If - * it is not there then it will be inserted and last found will be set to it. */ - TypeMapEl *inDict = nspace->typeMap.find( data ); - if ( inDict != 0 ) - error() << "alias '" << data << "' already defined as something else" << endp; - - /* Language element not there. Make the new lang el and insert. */ - TypeMapEl *typeMapEl = new TypeMapEl( TypeMapEl::AliasType, data, typeRef ); - nspace->typeMap.insert( typeMapEl ); -} - -LangEl *findType( Compiler *pd, Namespace *nspace, const String &data ) -{ - /* If the id is already in the dict, it will be placed in last found. If - * it is not there then it will be inserted and last found will be set to it. */ - TypeMapEl *inDict = nspace->typeMap.find( data ); - - if ( inDict == 0 ) - error() << "'" << data << "' not declared as anything" << endp; - - return inDict->value; -} - - -void Compiler::declareBaseLangEls() -{ - /* Order here is important because we make assumptions about the inbuilt - * language elements in the runtime. Note tokens are have identifiers set - * in an initial pass. */ - - /* Make a "_notoken" language element. This element is used when a - * generation action fails to generate anything, but there is reverse code - * that needs to be associated with a language element. This allows us to - * always associate reverse code with the first language element produced - * after a generation action. */ - noTokenLangEl = declareLangEl( this, rootNamespace, "_notoken", LangEl::Term ); - noTokenLangEl->isIgnore = true; - - ptrLangEl = declareLangEl( this, rootNamespace, "ptr", LangEl::Term ); - strLangEl = declareLangEl( this, rootNamespace, "str", LangEl::Term ); - ignoreLangEl = declareLangEl( this, rootNamespace, "il", LangEl::Term ); - - /* Make the EOF language element. */ - eofLangEl = 0; - - /* Make the "any" language element */ - anyLangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm ); -} - - -void Compiler::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm ) -{ - UniqueType *prodNameUT = findUniqueType( TYPE_TREE, nonTerm ); - TypeRef *typeRef = TypeRef::cons( internal, prodNameUT ); - ObjectField *el = ObjectField::cons( internal, - ObjectField::LhsElType, typeRef, "lhs" ); - - localFrame->rootScope->insertField( el->name, el ); -} - -void Compiler::addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ) -{ - long position = 1; - for ( ProdElList::Iter rhsEl = *prodElList; rhsEl.lte(); rhsEl++, position++ ) { - if ( rhsEl->type == ProdEl::ReferenceType ) { - /* Use an offset of zero. For frame objects we compute the offset on - * demand. */ - String name( 8, "r%d", position ); - ObjectField *el = ObjectField::cons( InputLoc(), - ObjectField::RedRhsType, rhsEl->typeRef, name ); - rhsEl->rhsElField = el; - - /* Right hand side elements are constant. */ - el->isConst = true; - localFrame->rootScope->insertField( el->name, el ); - } - } -} - -void GenericType::declare( Compiler *pd, Namespace *nspace ) -{ - elUt = elTr->resolveType( pd ); - - if ( typeId == GEN_MAP ) - keyUt = keyTr->resolveType( pd ); - - if ( typeId == GEN_MAP || typeId == GEN_LIST ) - valueUt = valueTr->resolveType( pd ); - - objDef = ObjectDef::cons( ObjectDef::BuiltinType, - "generic", pd->nextObjectId++ ); - - switch ( typeId ) { - case GEN_MAP: - pd->initMapFunctions( this ); - pd->initMapFields( this ); - break; - case GEN_LIST: - pd->initListFunctions( this ); - pd->initListFields( this ); - break; - case GEN_PARSER: - elUt->langEl->parserId = pd->nextParserId++; - pd->initParserFunctions( this ); - pd->initParserFields( this ); - break; - } -} - -void Namespace::declare( Compiler *pd ) -{ - for ( GenericList::Iter g = genericList; g.lte(); g++ ) - g->declare( pd, this ); - - for ( TokenDefListNs::Iter tokenDef = tokenDefList; tokenDef.lte(); tokenDef++ ) { - if ( tokenDef->isLiteral ) { - if ( tokenDef->isZero ) { - assert( tokenDef->regionSet->collectIgnore->zeroLel != 0 ); - tokenDef->tdLangEl = tokenDef->regionSet->collectIgnore->zeroLel; - } - else { - /* Original. Create a token for the literal. */ - LangEl *litEl = declareLangEl( pd, this, tokenDef->name, LangEl::Term ); - - litEl->lit = tokenDef->literal; - litEl->isLiteral = true; - litEl->tokenDef = tokenDef; - litEl->objectDef = tokenDef->objectDef; - - tokenDef->tdLangEl = litEl; - - if ( tokenDef->noPreIgnore ) - litEl->noPreIgnore = true; - if ( tokenDef->noPostIgnore ) - litEl->noPostIgnore = true; - } - } - } - - for ( StructDefList::Iter s = structDefList; s.lte(); s++ ) - declareStruct( pd, this, s->name, s ); - - for ( TokenDefListNs::Iter tokenDef = tokenDefList; tokenDef.lte(); tokenDef++ ) { - /* Literals already taken care of. */ - if ( ! tokenDef->isLiteral ) { - /* Create the token. */ - LangEl *tokEl = declareLangEl( pd, this, tokenDef->name, LangEl::Term ); - tokEl->isIgnore = tokenDef->isIgnore; - tokEl->transBlock = tokenDef->codeBlock; - tokEl->objectDef = tokenDef->objectDef; - tokEl->contextIn = tokenDef->contextIn; - tokEl->tokenDef = tokenDef; - - if ( tokenDef->noPreIgnore ) - tokEl->noPreIgnore = true; - if ( tokenDef->noPostIgnore ) - tokEl->noPostIgnore = true; - - tokenDef->tdLangEl = tokEl; - - if ( tokenDef->isZero ) { - /* Setting zero lel to newly created tokEl. */ - tokenDef->regionSet->collectIgnore->zeroLel = tokEl; - tokEl->isZero = true; - } - } - } - - for ( NtDefList::Iter n = ntDefList; n.lte(); n++ ) { - /* Get the language element. */ - LangEl *langEl = declareLangEl( pd, this, n->name, LangEl::NonTerm ); - //$$->langEl = langEl; - - /* Get the language element. */ - langEl->objectDef = n->objectDef; - langEl->reduceFirst = n->reduceFirst; - langEl->contextIn = n->contextIn; - langEl->defList.transfer( *n->defList ); - - for ( LelDefList::Iter d = langEl->defList; d.lte(); d++ ) { - d->prodName = langEl; - - if ( d->redBlock != 0 ) { - pd->addProdRedObjectVar( d->redBlock->localFrame, langEl ); - pd->addProdRHSVars( d->redBlock->localFrame, d->prodElList ); - } - - /* References to the reduce item. */ - } - } - - for ( TypeAliasList::Iter ta = typeAliasList; ta.lte(); ta++ ) - declareTypeAlias( pd, this, ta->name, ta->typeRef ); - - /* Go into child aliases. */ - for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) - (*c)->declare( pd ); -} - -void Compiler::makeIgnoreCollectors() -{ - for ( RegionSetList::Iter regionSet = regionSetList; regionSet.lte(); regionSet++ ) { - if ( regionSet->collectIgnore->zeroLel == 0 ) { - String name( 128, "_ign_%p", regionSet->tokenIgnore ); - LangEl *zeroLel = new LangEl( rootNamespace, name, LangEl::Term ); - langEls.append( zeroLel ); - zeroLel->isZero = true; - zeroLel->regionSet = regionSet; - - regionSet->collectIgnore->zeroLel = zeroLel; - } - } -} - -void LangStmt::chooseDefaultIter( Compiler *pd, IterCall *iterCall ) const -{ - /* This is two-part, It gets rewritten before evaluation in synthesis. */ - - /* The iterator name. */ - LangVarRef *callVarRef = LangVarRef::cons( loc, 0, context, scope, "triter" ); - - /* The parameters. */ - CallArgVect *callExprVect = new CallArgVect; - callExprVect->append( new CallArg( iterCall->langExpr ) ); - iterCall->langTerm = LangTerm::cons( InputLoc(), callVarRef, callExprVect ); - iterCall->langExpr = 0; - iterCall->form = IterCall::Call; - iterCall->wasExpr = true; -} - -void LangStmt::declareForIter( Compiler *pd ) const -{ - if ( iterCall->form != IterCall::Call ) - chooseDefaultIter( pd, iterCall ); - - objField->typeRef = TypeRef::cons( loc, typeRef, iterCall ); -} - -void LangStmt::declare( Compiler *pd ) const -{ - switch ( type ) { - case ExprType: - break; - case IfType: - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->declare( pd ); - - if ( elsePart != 0 ) - elsePart->declare( pd ); - break; - - case ElseType: - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->declare( pd ); - break; - case RejectType: - break; - case WhileType: - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->declare( pd ); - break; - case AssignType: - break; - case ForIterType: - declareForIter( pd ); - - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->declare( pd ); - break; - case ReturnType: - break; - case BreakType: - break; - case YieldType: - break; - } -} - -void CodeBlock::declare( Compiler *pd ) const -{ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->declare( pd ); -} - -void Compiler::declareFunction( Function *func ) -{ - CodeBlock *block = func->codeBlock; - block->declare( this ); -} - -void Compiler::declareReductionCode( Production *prod ) -{ - CodeBlock *block = prod->redBlock; - block->declare( this ); -} - -void Compiler::declareTranslateBlock( LangEl *langEl ) -{ - CodeBlock *block = langEl->transBlock; - - /* References to the reduce item. */ - addMatchLength( block->localFrame, langEl ); - addMatchText( block->localFrame, langEl ); - addInput( block->localFrame ); - addThis( block->localFrame ); - - block->declare( this ); -} - -void Compiler::declarePreEof( TokenRegion *region ) -{ - CodeBlock *block = region->preEofBlock; - - addInput( block->localFrame ); - addThis( block->localFrame ); - - block->declare( this ); -} - -void Compiler::declareRootBlock() -{ - CodeBlock *block = rootCodeBlock; - block->declare( this ); -} - -void Compiler::declareByteCode() -{ - for ( FunctionList::Iter f = functionList; f.lte(); f++ ) - declareFunction( f ); - - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->redBlock != 0 ) - declareReductionCode( prod ); - } - - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->transBlock != 0 ) - declareTranslateBlock( lel ); - } - - for ( RegionList::Iter r = regionList; r.lte(); r++ ) { - if ( r->preEofBlock != 0 ) - declarePreEof( r ); - } - - declareRootBlock( ); -} - -void Compiler::makeDefaultIterators() -{ - /* Tree iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, - ObjectMethod::Call, "triter", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::Tree ); - objMethod->iterDef = triter; - } - - /* Child iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, - ObjectMethod::Call, "child", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::Child ); - objMethod->iterDef = triter; - } - - /* Reverse iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, - ObjectMethod::Call, "rev_child", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::RevChild ); - objMethod->iterDef = triter; - } - - /* Repeat iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, - ObjectMethod::Call, "repeat", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::Repeat ); - objMethod->iterDef = triter; - } - - /* Reverse repeat iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, - ObjectMethod::Call, "rev_repeat", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::RevRepeat ); - objMethod->iterDef = triter; - } - - /* List iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, - ObjectMethod::Call, "list_iter", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::ListEl ); - objMethod->iterDef = triter; - } - - /* Reverse Value List iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, - ObjectMethod::Call, "rev_list_iter", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::RevListVal ); - objMethod->iterDef = triter; - } - - /* Map iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjectMethod *objMethod = initFunction( uniqueTypeAny, rootNamespace, globalObjectDef, - ObjectMethod::Call, "map_iter", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::MapEl ); - objMethod->iterDef = triter; - } -} - -void Compiler::addMatchLength( ObjectDef *frame, LangEl *lel ) -{ - /* Make the type ref. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); - - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( InputLoc(), - ObjectField::InbuiltFieldType, typeRef, "match_length" ); - el->isConst = true; - el->inGetR = IN_GET_MATCH_LENGTH_R; - el->inGetValR = IN_GET_MATCH_LENGTH_R; - frame->rootScope->insertField( el->name, el ); -} - -void Compiler::addMatchText( ObjectDef *frame, LangEl *lel ) -{ - /* Make the type ref. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); - - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "match_text" ); - el->isConst = true; - el->inGetR = IN_GET_MATCH_TEXT_R; - el->inGetValR = IN_GET_MATCH_TEXT_R; - frame->rootScope->insertField( el->name, el ); -} - -void Compiler::addInput( ObjectDef *frame ) -{ - /* Make the type ref. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInput ); - - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltObjectType, typeRef, "input" ); - el->inGetR = IN_LOAD_INPUT_R; - el->inGetWV = IN_LOAD_INPUT_WV; - el->inGetWC = IN_LOAD_INPUT_WC; - el->inGetValR = IN_LOAD_INPUT_R; - el->inGetValWC = IN_LOAD_INPUT_WC; - el->inGetValWV = IN_LOAD_INPUT_WV; - frame->rootScope->insertField( el->name, el ); -} - -void Compiler::addThis( ObjectDef *frame ) -{ - /* Make the type ref. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream ); - - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltObjectType, typeRef, "this" ); - el->inGetR = IN_LOAD_CONTEXT_R; - el->inGetWV = IN_LOAD_CONTEXT_WV; - el->inGetWC = IN_LOAD_CONTEXT_WC; - el->inGetValR = IN_LOAD_CONTEXT_R; - el->inGetValWC = IN_LOAD_CONTEXT_WC; - el->inGetValWV = IN_LOAD_CONTEXT_WV; - frame->rootScope->insertField( el->name, el ); -} - -void Compiler::declareIntFields( ) -{ - intObj = ObjectDef::cons( ObjectDef::BuiltinType, "int", nextObjectId++ ); -// intLangEl->objectDef = intObj; - - initFunction( uniqueTypeStr, intObj, ObjectMethod::Call, "to_string", IN_INT_TO_STR, IN_INT_TO_STR, true ); -} - -void Compiler::declareStrFields( ) -{ - strObj = ObjectDef::cons( ObjectDef::BuiltinType, "str", nextObjectId++ ); - strLangEl->objectDef = strObj; - - initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "atoi", - FN_STR_ATOI, FN_STR_ATOI, true, true ); - - initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "atoo", - FN_STR_ATOO, FN_STR_ATOO, true, true ); - - initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "uord8", - FN_STR_UORD8, FN_STR_UORD8, true, true ); - - initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "sord8", - FN_STR_SORD8, FN_STR_SORD8, true, true ); - - initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "uord16", - FN_STR_UORD16, FN_STR_UORD16, true, true ); - - initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "sord16", - FN_STR_SORD16, FN_STR_SORD16, true, true ); - - initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "uord32", - FN_STR_UORD32, FN_STR_UORD32, true, true ); - - initFunction( uniqueTypeInt, strObj, ObjectMethod::Call, "sord32", - FN_STR_SORD32, FN_STR_SORD32, true, true ); - - initFunction( uniqueTypeStr, strObj, ObjectMethod::Call, "prefix", - FN_STR_PREFIX, FN_STR_PREFIX, uniqueTypeInt, true, true ); - - initFunction( uniqueTypeStr, strObj, ObjectMethod::Call, "suffix", - FN_STR_SUFFIX, FN_STR_SUFFIX, uniqueTypeInt, true, true ); - - initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, - ObjectMethod::Call, "sprintf", FN_SPRINTF, FN_SPRINTF, - uniqueTypeStr, uniqueTypeInt, true, true ); - - addLengthField( strObj, IN_STR_LENGTH ); -} - -void Compiler::declareInputField( ObjectDef *objDef, code_t getLength ) -{ - /* Create the "length" field. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "tree" ); - el->isConst = true; - el->inGetR = IN_GET_COLLECT_STRING; - el->inGetValR = IN_GET_COLLECT_STRING; - - objDef->rootScope->insertField( el->name, el ); -} - -void Compiler::declareStreamField( ObjectDef *objDef, code_t getLength ) -{ - /* Create the "length" field. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "tree" ); - el->isConst = true; - el->inGetR = IN_GET_COLLECT_STRING; - el->inGetValR = IN_GET_COLLECT_STRING; - - objDef->rootScope->insertField( el->name, el ); -} - -void Compiler::declareInputFields( ) -{ - inputObj = inputSel->structDef->objectDef; - - initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "pull", - IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false ); - - initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push", - IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false ); - - initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push_ignore", - IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false ); - - initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push_stream", - IN_INPUT_PUSH_STREAM_WV, IN_INPUT_PUSH_STREAM_WV, uniqueTypeStream, false ); - - initFunction( uniqueTypeVoid, inputObj, ObjectMethod::Call, "close", - IN_INPUT_CLOSE_WC, IN_INPUT_CLOSE_WC, false ); - - initFunction( uniqueTypeVoid, inputObj, ObjectMethod::Call, "auto_trim", - IN_IINPUT_AUTO_TRIM_WC, IN_IINPUT_AUTO_TRIM_WC, uniqueTypeBool, false ); - - declareInputField( inputObj, 0 ); -} - -void Compiler::declareStreamFields( ) -{ - streamObj = streamSel->structDef->objectDef; - - initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "pull", - IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false ); - - initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "push", - IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false ); - - initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "push_ignore", - IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false ); - - initFunction( uniqueTypeStr, streamObj, ObjectMethod::Call, "push_stream", - IN_INPUT_PUSH_STREAM_WV, IN_INPUT_PUSH_STREAM_WV, uniqueTypeStream, false ); - - initFunction( uniqueTypeVoid, streamObj, ObjectMethod::Call, "close", - IN_INPUT_CLOSE_WC, IN_INPUT_CLOSE_WC, false ); - - initFunction( uniqueTypeVoid, streamObj, ObjectMethod::Call, "auto_trim", - IN_INPUT_AUTO_TRIM_WC, IN_INPUT_AUTO_TRIM_WC, uniqueTypeBool, false ); - - declareStreamField( streamObj, 0 ); -} - -ObjectField *Compiler::makeDataEl() -{ - /* Create the "data" field. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "data" ); - - el->inGetR = IN_GET_TOKEN_DATA_R; - el->inSetWC = IN_SET_TOKEN_DATA_WC; - el->inSetWV = IN_SET_TOKEN_DATA_WV; - return el; -} - -ObjectField *Compiler::makeFileEl() -{ - /* Create the "file" field. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "file" ); - - el->isConst = true; - el->inGetR = IN_GET_TOKEN_FILE_R; - el->inGetValR = IN_GET_TOKEN_FILE_R; - return el; -} - -ObjectField *Compiler::makeLineEl() -{ - /* Create the "line" field. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "line" ); - - el->isConst = true; - el->inGetR = IN_GET_TOKEN_LINE_R; - el->inGetValR = IN_GET_TOKEN_LINE_R; - return el; -} - -ObjectField *Compiler::makeColEl() -{ - /* Create the "col" field. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "col" ); - - el->isConst = true; - el->inGetR = IN_GET_TOKEN_COL_R; - el->inGetValR = IN_GET_TOKEN_COL_R; - return el; -} - -ObjectField *Compiler::makePosEl() -{ - /* Create the "data" field. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "pos" ); - - el->isConst = true; - el->inGetR = IN_GET_TOKEN_POS_R; - el->inGetValR = IN_GET_TOKEN_POS_R; - return el; -} - -/* Add a constant length field to the object. - * Opcode supplied by the caller. */ -void Compiler::addLengthField( ObjectDef *objDef, code_t getLength ) -{ - /* Create the "length" field. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInt ); - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "length" ); - el->isConst = true; - el->inGetR = getLength; - el->inGetValR = getLength; - - objDef->rootScope->insertField( el->name, el ); -} - -void Compiler::declareTokenFields( ) -{ - /* Give all user terminals the token object type. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->type == LangEl::Term ) { - if ( lel->objectDef != 0 ) { - /* Create the "data" field. */ - ObjectField *dataEl = makeDataEl(); - lel->objectDef->rootScope->insertField( dataEl->name, dataEl ); - - /* Create the "file" field. */ - ObjectField *fileEl = makeFileEl(); - lel->objectDef->rootScope->insertField( fileEl->name, fileEl ); - - /* Create the "line" field. */ - ObjectField *lineEl = makeLineEl(); - lel->objectDef->rootScope->insertField( lineEl->name, lineEl ); - - /* Create the "col" field. */ - ObjectField *colEl = makeColEl(); - lel->objectDef->rootScope->insertField( colEl->name, colEl ); - - /* Create the "pos" field. */ - ObjectField *posEl = makePosEl(); - lel->objectDef->rootScope->insertField( posEl->name, posEl ); - } - } - } -} - -void Compiler::declareGlobalFields() -{ - ObjectMethod *method; - - method = initFunction( uniqueTypeStream, rootNamespace, globalObjectDef, ObjectMethod::Call, "open", - IN_OPEN_FILE, IN_OPEN_FILE, uniqueTypeStr, uniqueTypeStr, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "tolower", - IN_TO_LOWER, IN_TO_LOWER, uniqueTypeStr, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "toupper", - IN_TO_UPPER, IN_TO_UPPER, uniqueTypeStr, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "atoi", - FN_STR_ATOI, FN_STR_ATOI, uniqueTypeStr, true, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "atoo", - FN_STR_ATOO, FN_STR_ATOO, uniqueTypeStr, true, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "prefix", - FN_PREFIX, FN_PREFIX, uniqueTypeStr, uniqueTypeInt, true, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "suffix", - FN_SUFFIX, FN_SUFFIX, uniqueTypeStr, uniqueTypeInt, true, true ); - method->useCallObj = false; - - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "uord8", - FN_STR_UORD8, FN_STR_UORD8, uniqueTypeStr, true, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "sord8", - FN_STR_SORD8, FN_STR_SORD8, uniqueTypeStr, true, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "uord16", - FN_STR_UORD16, FN_STR_UORD16, uniqueTypeStr, true, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "sord16", - FN_STR_SORD16, FN_STR_SORD16, uniqueTypeStr, true, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "uord32", - FN_STR_UORD32, FN_STR_UORD32, uniqueTypeStr, true, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "sord32", - FN_STR_SORD32, FN_STR_SORD32, uniqueTypeStr, true, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "exit", - FN_EXIT, FN_EXIT, uniqueTypeInt, true, true ); - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "exit_hard", - FN_EXIT_HARD, FN_EXIT_HARD, uniqueTypeInt, true, true ); - - method = initFunction( uniqueTypeInt, rootNamespace, globalObjectDef, ObjectMethod::Call, "system", - IN_SYSTEM, IN_SYSTEM, uniqueTypeStr, true ); - - method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "xml", - IN_TREE_TO_STR_XML, IN_TREE_TO_STR_XML, uniqueTypeAny, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "xmlac", - IN_TREE_TO_STR_XML_AC, IN_TREE_TO_STR_XML_AC, uniqueTypeAny, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeStr, rootNamespace, globalObjectDef, ObjectMethod::Call, "postfix", - IN_TREE_TO_STR_POSTFIX, IN_TREE_TO_STR_POSTFIX, uniqueTypeAny, true ); - method->useCallObj = false; - - addStdin(); - addStdout(); - addStderr(); - addStds(); - addArgv(); - addError(); - addDefineArgs(); -} - -void Compiler::addStdin() -{ - /* Make the type ref. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream ); - - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "stdin" ); - - el->isConst = true; - - el->inGetR = IN_GET_CONST; - el->inGetWC = IN_GET_CONST; - el->inGetWV = IN_GET_CONST; - el->inGetValR = IN_GET_CONST; - el->inGetValWC = IN_GET_CONST; - el->inGetValWV = IN_GET_CONST; - - el->isConstVal = true; - el->constValId = CONST_STDIN; - - rootNamespace->rootScope->insertField( el->name, el ); -} - -void Compiler::addStdout() -{ - /* Make the type ref. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream ); - - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "stdout" ); - el->isConst = true; - - el->inGetR = IN_GET_CONST; - el->inGetWC = IN_GET_CONST; - el->inGetWV = IN_GET_CONST; - el->inGetValR = IN_GET_CONST; - el->inGetValWC = IN_GET_CONST; - el->inGetValWV = IN_GET_CONST; - - el->isConstVal = true; - el->constValId = CONST_STDOUT; - - rootNamespace->rootScope->insertField( el->name, el ); -} - -void Compiler::addStderr() -{ - /* Make the type ref. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream ); - - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "stderr" ); - el->isConst = true; - - el->inGetR = IN_GET_CONST; - el->inGetWC = IN_GET_CONST; - el->inGetWV = IN_GET_CONST; - el->inGetValR = IN_GET_CONST; - el->inGetValWC = IN_GET_CONST; - el->inGetValWV = IN_GET_CONST; - - el->isConstVal = true; - el->constValId = CONST_STDERR; - - rootNamespace->rootScope->insertField( el->name, el ); -} - -void Compiler::addArgv() -{ - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::StructFieldType, argvTypeRef, "argv" ); - el->isConst = true; - rootNamespace->rootScope->insertField( el->name, el ); - argv = el; - - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); - - el = ObjectField::cons( internal, - ObjectField::StructFieldType, typeRef, "arg0" ); - el->isConst = true; - rootNamespace->rootScope->insertField( el->name, el ); - arg0 = el; -} - -void Compiler::addStds() -{ - ObjectField *el = ObjectField::cons( internal, - ObjectField::StructFieldType, stdsTypeRef, "stds" ); - rootNamespace->rootScope->insertField( el->name, el ); - stds = el; -} - -void Compiler::addError() -{ - /* Make the type ref. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); - - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, "error" ); - el->isConst = true; - el->inGetR = IN_GET_ERROR; - el->inGetWC = IN_GET_ERROR; - el->inGetWV = IN_GET_ERROR; - rootNamespace->rootScope->insertField( el->name, el ); -} - -void Compiler::addDefineArgs() -{ - for ( DefineVector::Iter d = defineArgs; d.lte(); d++ ) { - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); - - /* Create the field and insert it into the map. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltFieldType, typeRef, d->name ); - - el->isConst = true; - - el->inGetR = IN_GET_CONST; - el->inGetWC = IN_GET_CONST; - el->inGetWV = IN_GET_CONST; - el->inGetValR = IN_GET_CONST; - el->inGetValWC = IN_GET_CONST; - el->inGetValWV = IN_GET_CONST; - - el->isConstVal = true; - el->constValId = CONST_ARG; - el->constValArg = d->value; - - rootNamespace->rootScope->insertField( el->name, el ); - } -} - -void Compiler::initMapFunctions( GenericType *gen ) -{ - /* Value functions. */ - initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "find", - FN_VMAP_FIND, FN_VMAP_FIND, gen->keyUt, true, true, gen ); - - initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "insert", - FN_VMAP_INSERT_WV, FN_VMAP_INSERT_WC, gen->keyUt, gen->valueUt, - false, true, gen ); - - initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "remove", - FN_VMAP_REMOVE_WV, FN_VMAP_REMOVE_WC, gen->keyUt, false, true, gen ); - - /* - * Element Functions - */ - initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "find_el", - FN_MAP_FIND, FN_MAP_FIND, gen->keyUt, true, true, gen ); - - initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "insert_el", - FN_MAP_INSERT_WV, FN_MAP_INSERT_WC, gen->elUt, false, true, gen ); - - initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "detach_el", - FN_MAP_DETACH_WV, FN_MAP_DETACH_WC, gen->elUt, false, true, gen ); -} - -void Compiler::initMapField( GenericType *gen, const char *name, int offset ) -{ - /* Make the type ref and create the field. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltOffType, gen->elTr, name ); - - el->inGetR = IN_GET_MAP_MEM_R; - el->inGetWC = IN_GET_MAP_MEM_WC; - el->inGetWV = IN_GET_MAP_MEM_WV; -// el->inSetWC = IN_SET_MAP_MEM_WC; -// el->inSetWV = IN_SET_MAP_MEM_WV; - - el->inGetValR = IN_GET_MAP_MEM_R; - el->inGetValWC = IN_GET_MAP_MEM_WC; - el->inGetValWV = IN_GET_MAP_MEM_WV; - - gen->objDef->rootScope->insertField( el->name, el ); - - el->useGenericId = true; - el->generic = gen; - - /* Zero for head, One for tail. */ - el->offset = offset; -} - -void Compiler::initMapFields( GenericType *gen ) -{ - addLengthField( gen->objDef, IN_MAP_LENGTH ); - - initMapField( gen, "head_el", 0 ); - initMapField( gen, "tail_el", 1 ); - - initMapElKey( gen, "key", 0 ); - - initMapElField( gen, "prev", 0 ); - initMapElField( gen, "next", 1 ); -} - -void Compiler::initMapElKey( GenericType *gen, const char *name, int offset ) -{ - /* Make the type ref and create the field. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::GenericDependentType, gen->keyTr, name ); - - gen->el->mapKeyField = el; - - /* Offset will be computed when the offset of the owning map element field - * is computed. */ - - gen->elUt->structEl->structDef->objectDef->rootScope->insertField( el->name, el ); -} - -void Compiler::initMapElField( GenericType *gen, const char *name, int offset ) -{ - /* Make the type ref and create the field. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltOffType, gen->elTr, name ); - - el->inGetR = IN_GET_MAP_EL_MEM_R; - el->inGetValR = IN_GET_MAP_EL_MEM_R; -// el->inGetWC = IN_GET_LIST2EL_MEM_WC; -// el->inGetWV = IN_GET_LIST2EL_MEM_WV; -// el->inSetWC = IN_SET_LIST2EL_MEM_WC; -// el->inSetWV = IN_SET_LIST2EL_MEM_WV; - - el->useGenericId = true; - el->generic = gen; - - /* Zero for head, One for tail. */ - el->offset = offset; - - gen->elUt->structEl->structDef->objectDef->rootScope->insertField( el->name, el ); -} - -void Compiler::initListFunctions( GenericType *gen ) -{ - initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_head", - FN_VLIST_PUSH_HEAD_WV, FN_VLIST_PUSH_HEAD_WC, gen->valueUt, false, true, gen ); - - initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_tail", - FN_VLIST_PUSH_TAIL_WV, FN_VLIST_PUSH_TAIL_WC, gen->valueUt, false, true, gen ); - - initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push", - FN_VLIST_PUSH_HEAD_WV, FN_VLIST_PUSH_HEAD_WC, gen->valueUt, false, true, gen ); - - initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "pop_head", - FN_VLIST_POP_HEAD_WV, FN_VLIST_POP_HEAD_WC, false, true, gen ); - - initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "pop_tail", - FN_VLIST_POP_TAIL_WV, FN_VLIST_POP_TAIL_WC, false, true, gen ); - - initFunction( gen->valueUt, gen->objDef, ObjectMethod::Call, "pop", - FN_VLIST_POP_HEAD_WV, FN_VLIST_POP_HEAD_WC, false, true, gen ); - - initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_head_el", - FN_LIST_PUSH_HEAD_WV, FN_LIST_PUSH_HEAD_WC, gen->elUt, false, true, gen ); - - initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_tail_el", - FN_LIST_PUSH_TAIL_WV, FN_LIST_PUSH_TAIL_WC, gen->elUt, false, true, gen ); - - initFunction( uniqueTypeInt, gen->objDef, ObjectMethod::Call, "push_el", - FN_LIST_PUSH_HEAD_WV, FN_LIST_PUSH_HEAD_WC, gen->elUt, false, true, gen ); - - initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "pop_head_el", - FN_LIST_POP_HEAD_WV, FN_LIST_POP_HEAD_WC, false, true, gen ); - - initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "pop_tail_el", - FN_LIST_POP_TAIL_WV, FN_LIST_POP_TAIL_WC, false, true, gen ); - - initFunction( gen->elUt, gen->objDef, ObjectMethod::Call, "pop_el", - FN_LIST_POP_HEAD_WV, FN_LIST_POP_HEAD_WC, false, true, gen ); -} - -void Compiler::initListElField( GenericType *gen, const char *name, int offset ) -{ - /* Make the type ref and create the field. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltOffType, gen->elTr, name ); - - el->inGetR = IN_GET_LIST_EL_MEM_R; - el->inGetValR = IN_GET_LIST_EL_MEM_R; -// el->inGetWC = IN_GET_LIST2EL_MEM_WC; -// el->inGetWV = IN_GET_LIST2EL_MEM_WV; -// el->inSetWC = IN_SET_LIST2EL_MEM_WC; -// el->inSetWV = IN_SET_LIST2EL_MEM_WV; - - el->useGenericId = true; - el->generic = gen; - - /* Zero for head, One for tail. */ - el->offset = offset; - - gen->elUt->structEl->structDef->objectDef->rootScope->insertField( el->name, el ); -} - -void Compiler::initListFieldEl( GenericType *gen, const char *name, int offset ) -{ - /* Make the type ref and create the field. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltOffType, gen->elTr, name ); - - el->inGetR = IN_GET_LIST_MEM_R; - el->inGetWC = IN_GET_LIST_MEM_WC; - el->inGetWV = IN_GET_LIST_MEM_WV; -// el->inSetWC = IN_SET_LIST_MEM_WC; -// el->inSetWV = IN_SET_LIST_MEM_WV; - - el->inGetValR = IN_GET_LIST_MEM_R; - el->inGetValWC = IN_GET_LIST_MEM_WC; - el->inGetValWV = IN_GET_LIST_MEM_WV; - - gen->objDef->rootScope->insertField( el->name, el ); - - el->useGenericId = true; - el->generic = gen; - - /* Zero for head, One for tail. */ - el->offset = offset; -} - -void Compiler::initListFieldVal( GenericType *gen, const char *name, int offset ) -{ - /* Make the type ref and create the field. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltOffType, gen->valueTr, name ); - - el->inGetR = IN_GET_VLIST_MEM_R; - el->inGetWC = IN_GET_VLIST_MEM_WC; - el->inGetWV = IN_GET_VLIST_MEM_WV; -// el->inSetWC = IN_SET_VLIST_MEM_WC; -// el->inSetWV = IN_SET_VLIST_MEM_WV; - - el->inGetValR = IN_GET_VLIST_MEM_R; - el->inGetValWC = IN_GET_VLIST_MEM_WC; - el->inGetValWV = IN_GET_VLIST_MEM_WV; - - gen->objDef->rootScope->insertField( el->name, el ); - - el->useGenericId = true; - el->generic = gen; - - /* Zero for head, One for tail. */ - el->offset = offset; -} - -void Compiler::initListFields( GenericType *gen ) -{ - /* The value fields. */ - initListFieldVal( gen, "head", 0 ); - initListFieldVal( gen, "tail", 1 ); - initListFieldVal( gen, "top", 0 ); - - /* The element fields. */ - initListFieldEl( gen, "head_el", 0 ); - initListFieldEl( gen, "tail_el", 1 ); - initListFieldEl( gen, "top_el", 0 ); - - addLengthField( gen->objDef, IN_LIST_LENGTH ); - - /* The fields of the list element. */ - initListElField( gen, "prev", 0 ); - initListElField( gen, "next", 1 ); -} - -void Compiler::initParserFunctions( GenericType *gen ) -{ - initFunction( gen->elUt, gen->objDef, ObjectMethod::ParseFinish, "finish", - IN_PARSE_FRAG_W, IN_PARSE_FRAG_W, true ); - - initFunction( gen->elUt, gen->objDef, ObjectMethod::ParseFinish, "eof", - IN_PARSE_FRAG_W, IN_PARSE_FRAG_W, true ); - - initFunction( uniqueTypeInput, gen->objDef, ObjectMethod::Call, "gets", - IN_GET_PARSER_STREAM, IN_GET_PARSER_STREAM, true ); -} - -void Compiler::initParserField( GenericType *gen, const char *name, - int offset, TypeRef *typeRef ) -{ - /* Make the type ref and create the field. */ - ObjectField *el = ObjectField::cons( internal, - ObjectField::InbuiltOffType, typeRef, name ); - - el->inGetR = IN_GET_PARSER_MEM_R; - // el->inGetWC = IN_GET_PARSER_MEM_WC; - // el->inGetWV = IN_GET_PARSER_MEM_WV; - // el->inSetWC = IN_SET_PARSER_MEM_WC; - // el->inSetWV = IN_SET_PARSER_MEM_WV; - - gen->objDef->rootScope->insertField( el->name, el ); - - /* Zero for head, One for tail. */ - el->offset = offset; -} - -void Compiler::initParserFields( GenericType *gen ) -{ - TypeRef *typeRef; - - typeRef = TypeRef::cons( internal, gen->elUt ); - initParserField( gen, "tree", 0, typeRef ); - - typeRef = TypeRef::cons( internal, uniqueTypeStr ); - initParserField( gen, "error", 1, typeRef ); -} - -void Compiler::makeFuncVisible( Function *func, bool isUserIter ) -{ - func->localFrame = func->codeBlock->localFrame; - - /* Set up the parameters. */ - for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { - if ( func->localFrame->rootScope->findField( param->name ) != 0 ) - error(param->loc) << "parameter " << param->name << " redeclared" << endp; - - func->localFrame->rootScope->insertField( param->name, param ); - } - - /* Insert the function into the global function map. */ - ObjectMethod *objMethod = new ObjectMethod( func->typeRef, func->name, - IN_CALL_WV, IN_CALL_WC, - func->paramList->length(), 0, func->paramList, false ); - objMethod->funcId = func->funcId; - objMethod->useFuncId = true; - objMethod->useCallObj = false; - objMethod->func = func; - - if ( isUserIter ) { - IterDef *uiter = findIterDef( IterDef::User, func ); - objMethod->iterDef = uiter; - } - - NameScope *scope = func->nspace->rootScope; // : globalObjectDef->rootScope; - - if ( !scope->methodMap.insert( func->name, objMethod ) ) - error(func->typeRef->loc) << "function " << func->name << " redeclared" << endp; - - func->objMethod = objMethod; -} - -void Compiler::makeInHostVisible( Function *func ) -{ - /* Set up the parameters. */ - for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { - if ( func->localFrame->rootScope->findField( param->name ) != 0 ) - error(param->loc) << "parameter " << param->name << " redeclared" << endp; - - func->localFrame->rootScope->insertField( param->name, param ); - } - - /* Insert the function into the global function map. */ - ObjectMethod *objMethod = new ObjectMethod( func->typeRef, func->name, - IN_HOST, IN_HOST, - func->paramList->length(), 0, func->paramList, false ); - objMethod->funcId = func->funcId; - objMethod->useFuncId = true; - objMethod->useCallObj = false; - objMethod->func = func; - - NameScope *scope = func->nspace->rootScope; - - if ( !scope->methodMap.insert( func->name, objMethod ) ) { - error(func->typeRef->loc) << "in-host function " << func->name << - " redeclared" << endp; - } - - func->objMethod = objMethod; -} - -/* - * Type Declaration Root. - */ -void Compiler::declarePass() -{ - declareReVars(); - - makeDefaultIterators(); - - for ( FunctionList::Iter f = functionList; f.lte(); f++ ) - makeFuncVisible( f, f->isUserIter ); - - for ( FunctionList::Iter f = inHostList; f.lte(); f++ ) - makeInHostVisible( f ); - - rootNamespace->declare( this ); - - /* Will fill in zero lels that were not declared. */ - makeIgnoreCollectors(); - - declareByteCode(); - - declareIntFields(); - declareStrFields(); - declareInputFields(); - declareStreamFields(); - declareTokenFields(); - declareGlobalFields(); - - /* Fill any empty scanners with a default token. */ - initEmptyScanners(); -} diff --git a/src/defs.h.cmake.in b/src/defs.h.cmake.in deleted file mode 100644 index c4cf8844..00000000 --- a/src/defs.h.cmake.in +++ /dev/null @@ -1,11 +0,0 @@ -/* defs.h Generated from defs.h.cmake.in by cmake */ - -#ifndef _COLM_DEFS_H -#define _COLM_DEFS_H - -#cmakedefine SIZEOF_LONG @SIZEOF_LONG@ -#cmakedefine SIZEOF_UNSIGNED_LONG @SIZEOF_UNSIGNED_LONG@ -#cmakedefine SIZEOF_UNSIGNED_LONG_LONG @SIZEOF_UNSIGNED_LONG_LONG@ -#cmakedefine SIZEOF_VOID_P @SIZEOF_VOID_P@ - -#endif /* _COLM_DEFS_H */ diff --git a/src/defs.h.in b/src/defs.h.in deleted file mode 100644 index a91e2ff2..00000000 --- a/src/defs.h.in +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_DEFS_H -#define _COLM_DEFS_H - -/* Configuration */ - -/* The size of `long', as computed by sizeof. */ -#undef SIZEOF_LONG - -/* The size of `unsigned long', as computed by sizeof. */ -#undef SIZEOF_UNSIGNED_LONG - -/* The size of `unsigned long long', as computed by sizeof. */ -#undef SIZEOF_UNSIGNED_LONG_LONG - -/* The size of `void *', as computed by sizeof. */ -#undef SIZEOF_VOID_P - -#endif /* _COLM_DEFS_H */ diff --git a/src/dotgen.cc b/src/dotgen.cc deleted file mode 100644 index 42f54159..00000000 --- a/src/dotgen.cc +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdbool.h> - -#include <iostream> - -#include "compiler.h" - -using namespace std; - -void Compiler::writeTransList( PdaState *state ) -{ - ostream &out = *outStream; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - /* Write out the from and to states. */ - out << "\t" << state->stateNum << " -> " << trans->value->toState->stateNum; - - /* Begin the label. */ - out << " [ label = \""; - long key = trans->key; - LangEl *lel = langElIndex[key]; - if ( lel != 0 ) - out << lel->name; - else - out << (char)key; - - if ( trans->value->actions.length() > 0 ) { - out << " / "; - for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) { - switch ( *act & 0x3 ) { - case 1: - out << "S(" << trans->value->actOrds[act.pos()] << ")"; - break; - case 2: { - out << "R(" << prodIdIndex[(*act >> 2)]->data << - ", " << trans->value->actOrds[act.pos()] << ")"; - break; - } - case 3: { - out << "SR(" << prodIdIndex[(*act >> 2)]->data << - ", " << trans->value->actOrds[act.pos()] << ")"; - break; - }} - if ( ! act.last() ) - out << ", "; - } - } - - out << "\" ];\n"; - } -} - -void Compiler::writeDotFile( PdaGraph *graph ) -{ - ostream &out = *outStream; - out << - "digraph " << parserName << " {\n" - " rankdir=LR;\n" - " ranksep=\"0\"\n" - " nodesep=\"0.25\"\n" - "\n"; - - /* Define the psuedo states. Transitions will be done after the states - * have been defined as either final or not final. */ - out << - " node [ shape = point ];\n"; - - for ( int i = 0; i < graph->entryStateSet.length(); i++ ) - out << "\tENTRY" << i << " [ label = \"\" ];\n"; - - out << - "\n" - " node [ shape = circle, fixedsize = true, height = 0.6 ];\n"; - - /* Walk the states. */ - for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) - out << " " << st->stateNum << " [ label = \"" << st->stateNum << "\" ];\n"; - - out << "\n"; - - /* Walk the states. */ - for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) - writeTransList( st ); - - /* Start state and other entry points. */ - for ( PdaStateSet::Iter st = graph->entryStateSet; st.lte(); st++ ) - out << "\tENTRY" << st.pos() << " -> " << (*st)->stateNum << " [ label = \"\" ];\n"; - - out << - "}\n"; -} - -void Compiler::writeDotFile() -{ - writeDotFile( pdaGraph ); -} - diff --git a/src/dotgen.h b/src/dotgen.h deleted file mode 100644 index 8e8f694b..00000000 --- a/src/dotgen.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_GVDOTGEN_H -#define _COLM_GVDOTGEN_H - -#include <iostream> - -#if 0 - -class GraphvizDotGen : public CodeGenData -{ -public: - GraphvizDotGen( ostream &out ) : CodeGenData(out) { } - - /* Print an fsm to out stream. */ - void writeTransList( RedState *state ); - void writeDotFile( ); - - virtual void finishRagelDef(); - -private: - /* Writing labels and actions. */ - std::ostream &ONCHAR( Key lowKey, Key highKey ); - std::ostream &TRANS_ACTION( RedState *fromState, RedTrans *trans ); - std::ostream &ACTION( RedAction *action ); - std::ostream &KEY( Key key ); -}; - -#endif - -#endif /* _COLM_GVDOTGEN_H */ - diff --git a/src/exports.cc b/src/exports.cc deleted file mode 100644 index 49228eff..00000000 --- a/src/exports.cc +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdbool.h> - -#include <iostream> - -#include "fsmcodegen.h" - -using std::ostream; -using std::ostringstream; -using std::string; -using std::cerr; -using std::endl; - -void Compiler::openNameSpace( ostream &out, Namespace *nspace ) -{ - if ( nspace == rootNamespace ) - return; - - openNameSpace( out, nspace->parentNamespace ); - out << "namespace " << nspace->name << " { "; -} - -void Compiler::closeNameSpace( ostream &out, Namespace *nspace ) -{ - if ( nspace == rootNamespace ) - return; - - openNameSpace( out, nspace->parentNamespace ); - out << " }"; -} - -void Compiler::generateExports() -{ - ostream &out = *outStream; - - out << - "#ifndef _EXPORTS_H\n" - "#define _EXPORTS_H\n" - "\n" - "#include <colm/colm.h>\n" - "#include <string>\n" - "\n"; - - out << - "inline void appendString( colm_print_args *args, const char *data, int length )\n" - "{\n" - " std::string *str = (std::string*)args->arg;\n" - " *str += std::string( data, length );\n" - "}\n" - "\n"; - - out << - "inline std::string printTreeStr( colm_program *prg, colm_tree *tree, bool trim )\n" - "{\n" - " std::string str;\n" - " struct indent_impl indent = { -1, 0 };\n" - " colm_print_args printArgs = { &str, 1, 0, trim, &indent, &appendString, \n" - " &colm_print_null, &colm_print_term_tree, &colm_print_null };\n" - " colm_print_tree_args( prg, colm_vm_root(prg), &printArgs, tree );\n" - " return str;\n" - "}\n" - "\n"; - - /* Declare. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->isEOF ) - continue; - - openNameSpace( out, lel->nspace ); - out << "struct " << lel->fullName << ";"; - closeNameSpace( out, lel->nspace ); - out << "\n"; - } - - /* Class definitions. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->isEOF ) - continue; - - openNameSpace( out, lel->nspace ); - out << "struct " << lel->fullName << "\n"; - out << "{\n"; - out << " std::string text() { return printTreeStr( __prg, __tree, true ); }\n"; - out << " colm_location *loc() { return colm_find_location( __prg, __tree ); }\n"; - out << " std::string text_notrim() { return printTreeStr( __prg, __tree, false ); }\n"; - out << " std::string text_ws() { return printTreeStr( __prg, __tree, false ); }\n"; - out << " colm_data *data() { return __tree->tokdata; }\n"; - out << " operator colm_tree *() { return __tree; }\n"; - out << " colm_program *__prg;\n"; - out << " colm_tree *__tree;\n"; - - if ( mainReturnUT != 0 && mainReturnUT->langEl == lel ) { - out << " " << lel->fullName << - "( colm_program *prg ) : __prg(prg), __tree(returnVal(prg)) {}\n"; - } - - out << " " << lel->fullName << - "( colm_program *prg, colm_tree *tree ) : __prg(prg), __tree(tree) {}\n"; - - if ( lel->objectDef != 0 ) { - FieldList &fieldList = lel->objectDef->fieldList; - for ( FieldList::Iter ofi = fieldList; ofi.lte(); ofi++ ) { - ObjectField *field = ofi->value; - if ( ( field->useOffset() && field->typeRef != 0 ) || field->isRhsGet() ) { - UniqueType *ut = field->typeRef->resolveType( this ); - - if ( ut != 0 && ut->typeId == TYPE_TREE ) - out << " " << ut->langEl->refName << " " << field->name << "();\n"; - } - } - } - - bool prodNames = false; - for ( LelDefList::Iter prod = lel->defList; prod.lte(); prod++ ) { - if ( prod->_name.length() > 0 ) - prodNames = true; - } - - if ( prodNames ) { - out << " enum prod_name {\n"; - for ( LelDefList::Iter prod = lel->defList; prod.lte(); prod++ ) { - if ( prod->_name.length() > 0 ) - out << "\t\t" << prod->_name << " = " << prod->prodNum << ",\n"; - } - out << " };\n"; - out << " enum prod_name prodName() " << - "{ return (enum prod_name)__tree->prod_num; }\n"; - } - - - if ( lel->isRepeat ) { - out << " " << "int end() { return colm_repeat_end( __tree ); }\n"; - out << " " << lel->refName << " next();\n"; - out << " " << lel->repeatOf->refName << " value();\n"; - } - - if ( lel->isList ) { - out << " " << "int last() { return colm_list_last( __tree ); }\n"; - out << " " << lel->refName << " next();\n"; - out << " " << lel->repeatOf->refName << " value();\n"; - } - - - out << "};"; - closeNameSpace( out, lel->nspace ); - out << "\n"; - } - - for ( FieldList::Iter of = globalObjectDef->fieldList; of.lte(); of++ ) { - ObjectField *field = of->value; - if ( field->isExport ) { - UniqueType *ut = field->typeRef->resolveType(this); - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << ut->langEl->refName << " " << field->name << "( colm_program *prg );\n"; - } - } - } - - out << "\n"; - - for ( FunctionList::Iter func = functionList; func.lte(); func++ ) { - if ( func->exprt ) { - char *refName = func->typeRef->uniqueType->langEl->refName; - int paramCount = func->paramList->length(); - out << - refName << " " << func->name << "( colm_program *prg"; - - for ( int p = 0; p < paramCount; p++ ) - out << ", const char *p" << p; - - out << " );\n"; - } - } - - out << "#endif\n"; -} - -void Compiler::generateExportsImpl() -{ - ostream &out = *outStream; - - if ( exportHeaderFn != 0 ) { - out << "#include \"" << exportHeaderFn << "\"\n"; - } - - /* Function implementations. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->objectDef != 0 ) { - FieldList &fieldList = lel->objectDef->fieldList; - for ( FieldList::Iter ofi = fieldList; ofi.lte(); ofi++ ) { - ObjectField *field = ofi->value; - if ( field->useOffset() && field->typeRef != 0 ) { - UniqueType *ut = field->typeRef->resolveType( this ); - - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << ut->langEl->refName << " " << lel->declName << - "::" << field->name << "() { return " << - ut->langEl->refName << "( __prg, colm_get_attr( __tree, " << - field->offset << ") ); }\n"; - } - } - - if ( field->isRhsGet() ) { - UniqueType *ut = field->typeRef->resolveType( this ); - - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << ut->langEl->refName << " " << lel->declName << - "::" << field->name << "() { static int a[] = {"; - - /* Need to place the array computing the val. */ - out << field->rhsVal.length(); - for ( Vector<RhsVal>::Iter rg = field->rhsVal; rg.lte(); rg++ ) { - out << ", " << rg->prodEl->production->prodNum; - out << ", " << rg->prodEl->pos; - } - - out << "}; return " << ut->langEl->refName << - "( __prg, colm_get_rhs_val( __prg, __tree, a ) ); }\n"; - } - } - } - } - - if ( lel->isRepeat ) { - out << lel->refName << " " << lel->declName << "::" << " next" - "() { return " << lel->refName << - "( __prg, colm_get_repeat_next( __tree ) ); }\n"; - - out << lel->repeatOf->refName << " " << lel->declName << "::" << " value" - "() { return " << lel->repeatOf->refName << - "( __prg, colm_get_repeat_val( __tree ) ); }\n"; - } - - if ( lel->isList ) { - out << lel->refName << " " << lel->declName << "::" << " next" - "() { return " << lel->refName << - "( __prg, colm_get_repeat_next( __tree ) ); }\n"; - - out << lel->repeatOf->refName << " " << lel->declName << "::" << " value" - "() { return " << lel->repeatOf->refName << - "( __prg, colm_get_repeat_val( __tree ) ); }\n"; - } - } - - out << "\n"; - - for ( FieldList::Iter of = globalObjectDef->fieldList; of.lte(); of++ ) { - ObjectField *field = of->value; - if ( field->isExport ) { - UniqueType *ut = field->typeRef->resolveType(this); - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << - ut->langEl->refName << " " << field->name << "( colm_program *prg )\n" - "{ return " << ut->langEl->refName << "( prg, colm_get_global( prg, " << - field->offset << ") ); }\n"; - } - } - } - - out << "\n"; - - for ( FunctionList::Iter func = functionList; func.lte(); func++ ) { - if ( func->exprt ) { - char *refName = func->typeRef->uniqueType->langEl->refName; - int paramCount = func->paramList->length(); - out << - refName << " " << func->name << "( colm_program *prg"; - - for ( int p = 0; p < paramCount; p++ ) - out << ", const char *p" << p; - - out << " )\n" - "{\n" - " int funcId = " << func->funcId << ";\n" - " const char *params[" << paramCount << "];\n"; - - for ( int p = 0; p < paramCount; p++ ) - out << " params[" << p << "] = p" << p << ";\n"; - - out << - " return " << refName << - "( prg, colm_run_func( prg, funcId, params, " << paramCount << " ));\n" - "}\n"; - } - } -} diff --git a/src/fsmap.cc b/src/fsmap.cc deleted file mode 100644 index 3e1ae913..00000000 --- a/src/fsmap.cc +++ /dev/null @@ -1,806 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <iostream> - -#include <assert.h> - -#include "fsmgraph.h" - -using std::cerr; -using std::endl; - -KeyOps *keyOps = 0; - -/* Insert an action into an action table. */ -void ActionTable::setAction( int ordering, Action *action ) -{ - /* Multi-insert in case specific instances of an action appear in a - * transition more than once. */ - insertMulti( ordering, action ); -} - -/* Set all the action from another action table in this table. */ -void ActionTable::setActions( const ActionTable &other ) -{ - for ( ActionTable::Iter action = other; action.lte(); action++ ) - insertMulti( action->key, action->value ); -} - -void ActionTable::setActions( int *orderings, Action **actions, int nActs ) -{ - for ( int a = 0; a < nActs; a++ ) - insertMulti( orderings[a], actions[a] ); -} - -bool ActionTable::hasAction( Action *action ) -{ - for ( int a = 0; a < length(); a++ ) { - if ( data[a].value == action ) - return true; - } - return false; -} - -/* Insert an action into an action table. */ -void LmActionTable::setAction( int ordering, TokenInstance *action ) -{ - /* Multi-insert in case specific instances of an action appear in a - * transition more than once. */ - insertMulti( ordering, action ); -} - -/* Set all the action from another action table in this table. */ -void LmActionTable::setActions( const LmActionTable &other ) -{ - for ( LmActionTable::Iter action = other; action.lte(); action++ ) - insertMulti( action->key, action->value ); -} - -void ErrActionTable::setAction( int ordering, Action *action, int transferPoint ) -{ - insertMulti( ErrActionTableEl( action, ordering, transferPoint ) ); -} - -void ErrActionTable::setActions( const ErrActionTable &other ) -{ - for ( ErrActionTable::Iter act = other; act.lte(); act++ ) - insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) ); -} - -/* Insert a priority into this priority table. Looks out for priorities on - * duplicate keys. */ -void PriorTable::setPrior( int ordering, PriorDesc *desc ) -{ - PriorEl *lastHit = 0; - PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit ); - if ( insed == 0 ) { - /* This already has a priority on the same key as desc. Overwrite the - * priority if the ordering is larger (later in time). */ - if ( ordering >= lastHit->ordering ) - *lastHit = PriorEl( ordering, desc ); - } -} - -/* Set all the priorities from a priorTable in this table. */ -void PriorTable::setPriors( const PriorTable &other ) -{ - /* Loop src priorities once to overwrite duplicates. */ - PriorTable::Iter priorIt = other; - for ( ; priorIt.lte(); priorIt++ ) - setPrior( priorIt->ordering, priorIt->desc ); -} - -/* Set the priority of starting transitions. Isolates the start state so it has - * no other entry points, then sets the priorities of all the transitions out - * of the start state. If the start state is final, then the outPrior of the - * start state is also set. The idea is that a machine that accepts the null - * string can still specify the starting trans prior for when it accepts the - * null word. */ -void FsmGraph::startFsmPrior( int ordering, PriorDesc *prior ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - - /* Walk all transitions out of the start state. */ - for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - trans->priorTable.setPrior( ordering, prior ); - } -} - -/* Set the priority of all transitions in a graph. Walks all transition lists - * and all def transitions. */ -void FsmGraph::allTransPrior( int ordering, PriorDesc *prior ) -{ - /* Walk the list of all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Walk the out list of the state. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - trans->priorTable.setPrior( ordering, prior ); - } - } -} - -/* Set the priority of all transitions that go into a final state. Note that if - * any entry states are final, we will not be setting the priority of any - * transitions that may go into those states in the future. The graph does not - * support pending in transitions in the same way pending out transitions are - * supported. */ -void FsmGraph::finishFsmPrior( int ordering, PriorDesc *prior ) -{ - /* Walk all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { - /* Walk all in transitions of the final state. */ - for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) - trans->priorTable.setPrior( ordering, prior ); - } -} - -/* Set the priority of any future out transitions that may be made going out of - * this state machine. */ -void FsmGraph::leaveFsmPrior( int ordering, PriorDesc *prior ) -{ - /* Set priority in all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->outPriorTable.setPrior( ordering, prior ); -} - - -/* Set actions to execute on starting transitions. Isolates the start state - * so it has no other entry points, then adds to the transition functions - * of all the transitions out of the start state. If the start state is final, - * then the func is also added to the start state's out func list. The idea is - * that a machine that accepts the null string can execute a start func when it - * matches the null word, which can only be done when leaving the start/final - * state. */ -void FsmGraph::startFsmAction( int ordering, Action *action ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - - /* Walk the start state's transitions, setting functions. */ - for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - trans->actionTable.setAction( ordering, action ); - } -} - -/* Set functions to execute on all transitions. Walks the out lists of all - * states. */ -void FsmGraph::allTransAction( int ordering, Action *action ) -{ - /* Walk all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Walk the out list of the state. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - trans->actionTable.setAction( ordering, action ); - } - } -} - -/* Specify functions to execute upon entering final states. If the start state - * is final we can't really specify a function to execute upon entering that - * final state the first time. So function really means whenever entering a - * final state from within the same fsm. */ -void FsmGraph::finishFsmAction( int ordering, Action *action ) -{ - /* Walk all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { - /* Walk the final state's in list. */ - for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) - trans->actionTable.setAction( ordering, action ); - } -} - -/* Add functions to any future out transitions that may be made going out of - * this state machine. */ -void FsmGraph::leaveFsmAction( int ordering, Action *action ) -{ - /* Insert the action in the outActionTable of all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->outActionTable.setAction( ordering, action ); -} - -/* Add functions to the longest match action table for constructing scanners. */ -void FsmGraph::longMatchAction( int ordering, TokenInstance *lmPart ) -{ - /* Walk all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { - /* Walk the final state's in list. */ - for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) - trans->lmActionTable.setAction( ordering, lmPart ); - } -} - -void FsmGraph::fillGaps( FsmState *state ) -{ - if ( state->outList.length() == 0 ) { - /* Add the range on the lower and upper bound. */ - attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey ); - } - else { - TransList srcList; - srcList.transfer( state->outList ); - - /* Check for a gap at the beginning. */ - TransList::Iter trans = srcList, next; - if ( keyOps->minKey < trans->lowKey ) { - /* Make the high key and append. */ - Key highKey = trans->lowKey; - highKey.decrement(); - - attachNewTrans( state, 0, keyOps->minKey, highKey ); - } - - /* Write the transition. */ - next = trans.next(); - state->outList.append( trans ); - - /* Keep the last high end. */ - Key lastHigh = trans->highKey; - - /* Loop each source range. */ - for ( trans = next; trans.lte(); trans = next ) { - /* Make the next key following the last range. */ - Key nextKey = lastHigh; - nextKey.increment(); - - /* Check for a gap from last up to here. */ - if ( nextKey < trans->lowKey ) { - /* Make the high end of the range that fills the gap. */ - Key highKey = trans->lowKey; - highKey.decrement(); - - attachNewTrans( state, 0, nextKey, highKey ); - } - - /* Reduce the transition. If it reduced to anything then add it. */ - next = trans.next(); - state->outList.append( trans ); - - /* Keep the last high end. */ - lastHigh = trans->highKey; - } - - /* Now check for a gap on the end to fill. */ - if ( lastHigh < keyOps->maxKey ) { - /* Get a copy of the default. */ - lastHigh.increment(); - - attachNewTrans( state, 0, lastHigh, keyOps->maxKey ); - } - } -} - -void FsmGraph::setErrorAction( FsmState *state, int ordering, Action *action ) -{ - /* Fill any gaps in the out list with an error transition. */ - fillGaps( state ); - - /* Set error transitions in the transitions that go to error. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState == 0 ) - trans->actionTable.setAction( ordering, action ); - } -} - -void FsmGraph::setErrorActions( FsmState *state, const ActionTable &other ) -{ - /* Fill any gaps in the out list with an error transition. */ - fillGaps( state ); - - /* Set error transitions in the transitions that go to error. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState == 0 ) - trans->actionTable.setActions( other ); - } -} - - -/* Give a target state for error transitions. */ -void FsmGraph::setErrorTarget( FsmState *state, FsmState *target, int *orderings, - Action **actions, int nActs ) -{ - /* Fill any gaps in the out list with an error transition. */ - fillGaps( state ); - - /* Set error target in the transitions that go to error. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState == 0 ) { - /* The trans goes to error, redirect it. */ - redirectErrorTrans( trans->fromState, target, trans ); - trans->actionTable.setActions( orderings, actions, nActs ); - } - } -} - -void FsmGraph::transferErrorActions( FsmState *state, int transferPoint ) -{ - for ( int i = 0; i < state->errActionTable.length(); ) { - ErrActionTableEl *act = state->errActionTable.data + i; - if ( act->transferPoint == transferPoint ) { - /* Transfer the error action and remove it. */ - setErrorAction( state, act->ordering, act->action ); - state->errActionTable.vremove( i ); - } - else { - /* Not transfering and deleting, skip over the item. */ - i += 1; - } - } -} - -/* Set error actions in the start state. */ -void FsmGraph::startErrorAction( int ordering, Action *action, int transferPoint ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - - /* Add the actions. */ - startState->errActionTable.setAction( ordering, action, transferPoint ); -} - -/* Set error actions in all states where there is a transition out. */ -void FsmGraph::allErrorAction( int ordering, Action *action, int transferPoint ) -{ - /* Insert actions in the error action table of all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->errActionTable.setAction( ordering, action, transferPoint ); -} - -/* Set error actions in final states. */ -void FsmGraph::finalErrorAction( int ordering, Action *action, int transferPoint ) -{ - /* Add the action to the error table of final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->errActionTable.setAction( ordering, action, transferPoint ); -} - -void FsmGraph::notStartErrorAction( int ordering, Action *action, int transferPoint ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState ) - state->errActionTable.setAction( ordering, action, transferPoint ); - } -} - -void FsmGraph::notFinalErrorAction( int ordering, Action *action, int transferPoint ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( ! state->isFinState() ) - state->errActionTable.setAction( ordering, action, transferPoint ); - } -} - -/* Set error actions in the states that have transitions into a final state. */ -void FsmGraph::middleErrorAction( int ordering, Action *action, int transferPoint ) -{ - /* Isolate the start state in case it is reachable from in inside the - * machine, in which case we don't want it set. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState && ! state->isFinState() ) - state->errActionTable.setAction( ordering, action, transferPoint ); - } -} - -/* Set EOF actions in the start state. */ -void FsmGraph::startEOFAction( int ordering, Action *action ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - - /* Add the actions. */ - startState->eofActionTable.setAction( ordering, action ); -} - -/* Set EOF actions in all states where there is a transition out. */ -void FsmGraph::allEOFAction( int ordering, Action *action ) -{ - /* Insert actions in the EOF action table of all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->eofActionTable.setAction( ordering, action ); -} - -/* Set EOF actions in final states. */ -void FsmGraph::finalEOFAction( int ordering, Action *action ) -{ - /* Add the action to the error table of final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->eofActionTable.setAction( ordering, action ); -} - -void FsmGraph::notStartEOFAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState ) - state->eofActionTable.setAction( ordering, action ); - } -} - -void FsmGraph::notFinalEOFAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( ! state->isFinState() ) - state->eofActionTable.setAction( ordering, action ); - } -} - -/* Set EOF actions in the states that have transitions into a final state. */ -void FsmGraph::middleEOFAction( int ordering, Action *action ) -{ - /* Set the actions in all states that are not the start state and not final. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState && ! state->isFinState() ) - state->eofActionTable.setAction( ordering, action ); - } -} - -/* - * Set To State Actions. - */ - -/* Set to state actions in the start state. */ -void FsmGraph::startToStateAction( int ordering, Action *action ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - startState->toStateActionTable.setAction( ordering, action ); -} - -/* Set to state actions in all states. */ -void FsmGraph::allToStateAction( int ordering, Action *action ) -{ - /* Insert the action on all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->toStateActionTable.setAction( ordering, action ); -} - -/* Set to state actions in final states. */ -void FsmGraph::finalToStateAction( int ordering, Action *action ) -{ - /* Add the action to the error table of final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->toStateActionTable.setAction( ordering, action ); -} - -void FsmGraph::notStartToStateAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState ) - state->toStateActionTable.setAction( ordering, action ); - } -} - -void FsmGraph::notFinalToStateAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( ! state->isFinState() ) - state->toStateActionTable.setAction( ordering, action ); - } -} - -/* Set to state actions in states that are not final and not the start state. */ -void FsmGraph::middleToStateAction( int ordering, Action *action ) -{ - /* Set the action in all states that are not the start state and not final. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState && ! state->isFinState() ) - state->toStateActionTable.setAction( ordering, action ); - } -} - -/* - * Set From State Actions. - */ - -void FsmGraph::startFromStateAction( int ordering, Action *action ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - startState->fromStateActionTable.setAction( ordering, action ); -} - -void FsmGraph::allFromStateAction( int ordering, Action *action ) -{ - /* Insert the action on all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->fromStateActionTable.setAction( ordering, action ); -} - -void FsmGraph::finalFromStateAction( int ordering, Action *action ) -{ - /* Add the action to the error table of final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->fromStateActionTable.setAction( ordering, action ); -} - -void FsmGraph::notStartFromStateAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState ) - state->fromStateActionTable.setAction( ordering, action ); - } -} - -void FsmGraph::notFinalFromStateAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( ! state->isFinState() ) - state->fromStateActionTable.setAction( ordering, action ); - } -} - -void FsmGraph::middleFromStateAction( int ordering, Action *action ) -{ - /* Set the action in all states that are not the start state and not final. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState && ! state->isFinState() ) - state->fromStateActionTable.setAction( ordering, action ); - } -} - -/* Shift the function ordering of the start transitions to start - * at fromOrder and increase in units of 1. Useful before staring. - * Returns the maximum number of order numbers used. */ -int FsmGraph::shiftStartActionOrder( int fromOrder ) -{ - int maxUsed = 0; - - /* Walk the start state's transitions, shifting function ordering. */ - for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { - /* Walk the function data for the transition and set the keys to - * increasing values starting at fromOrder. */ - int curFromOrder = fromOrder; - ActionTable::Iter action = trans->actionTable; - for ( ; action.lte(); action++ ) - action->key = curFromOrder++; - - /* Keep track of the max number of orders used. */ - if ( curFromOrder - fromOrder > maxUsed ) - maxUsed = curFromOrder - fromOrder; - } - - return maxUsed; -} - -/* Remove all priorities. */ -void FsmGraph::clearAllPriorities() -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Clear out priority data. */ - state->outPriorTable.empty(); - - /* Clear transition data from the out transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) - trans->priorTable.empty(); - } -} - -/* Zeros out the function ordering keys. This may be called before minimization - * when it is known that no more fsm operations are going to be done. This - * will achieve greater reduction as states will not be separated on the basis - * of function ordering. */ -void FsmGraph::nullActionKeys( ) -{ - /* For each state... */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Walk the transitions for the state. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - /* Walk the action table for the transition. */ - for ( ActionTable::Iter action = trans->actionTable; - action.lte(); action++ ) - action->key = 0; - - /* Walk the action table for the transition. */ - for ( LmActionTable::Iter action = trans->lmActionTable; - action.lte(); action++ ) - action->key = 0; - } - - /* Null the action keys of the to state action table. */ - for ( ActionTable::Iter action = state->toStateActionTable; - action.lte(); action++ ) - action->key = 0; - - /* Null the action keys of the from state action table. */ - for ( ActionTable::Iter action = state->fromStateActionTable; - action.lte(); action++ ) - action->key = 0; - - /* Null the action keys of the out transtions. */ - for ( ActionTable::Iter action = state->outActionTable; - action.lte(); action++ ) - action->key = 0; - - /* Null the action keys of the error action table. */ - for ( ErrActionTable::Iter action = state->errActionTable; - action.lte(); action++ ) - action->ordering = 0; - - /* Null the action keys eof action table. */ - for ( ActionTable::Iter action = state->eofActionTable; - action.lte(); action++ ) - action->key = 0; - } -} - -/* Walk the list of states and verify that non final states do not have out - * data, that all stateBits are cleared, and that there are no states with - * zero foreign in transitions. */ -void FsmGraph::verifyStates() -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Non final states should not have leaving data. */ - if ( ! (state->stateBits & SB_ISFINAL) ) { - assert( state->outActionTable.length() == 0 ); - assert( state->outCondSet.length() == 0 ); - assert( state->outPriorTable.length() == 0 ); - } - - /* Data used in algorithms should be cleared. */ - assert( (state->stateBits & SB_BOTH) == 0 ); - assert( state->foreignInTrans > 0 ); - } -} - -/* Compare two transitions according to their relative priority. Since the - * base transition has no priority associated with it, the default is to - * return equal. */ -int FsmGraph::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ) -{ - /* Looking for differing priorities on same keys. Need to concurrently - * scan the priority lists. */ - PriorTable::Iter pd1 = priorTable1; - PriorTable::Iter pd2 = priorTable2; - while ( pd1.lte() && pd2.lte() ) { - /* Check keys. */ - if ( pd1->desc->key < pd2->desc->key ) - pd1.increment(); - else if ( pd1->desc->key > pd2->desc->key ) - pd2.increment(); - /* Keys are the same, check priorities. */ - else if ( pd1->desc->priority < pd2->desc->priority ) - return -1; - else if ( pd1->desc->priority > pd2->desc->priority ) - return 1; - else { - /* Keys and priorities are equal, advance both. */ - pd1.increment(); - pd2.increment(); - } - } - - /* No differing priorities on the same key. */ - return 0; -} - -/* Compares two transitions according to priority and functions. Pointers - * should not be null. Does not consider to state or from state. Compare two - * transitions according to the data contained in the transitions. Data means - * any properties added to user transitions that may differentiate them. Since - * the base transition has no data, the default is to return equal. */ -int FsmGraph::compareTransData( FsmTrans *trans1, FsmTrans *trans2 ) -{ - /* Compare the prior table. */ - int cmpRes = CmpPriorTable::compare( trans1->priorTable, - trans2->priorTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Compare longest match action tables. */ - cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, - trans2->lmActionTable); - if ( cmpRes != 0 ) - return cmpRes; - - /* Compare action tables. */ - return CmpActionTable::compare(trans1->actionTable, - trans2->actionTable); -} - -/* Callback invoked when another trans (or possibly this) is added into this - * transition during the merging process. Draw in any properties of srcTrans - * into this transition. AddInTrans is called when a new transitions is made - * that will be a duplicate of another transition or a combination of several - * other transitions. AddInTrans will be called for each transition that the - * new transition is to represent. */ -void FsmGraph::addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ) -{ - /* Protect against adding in from ourselves. */ - if ( srcTrans == destTrans ) { - /* Adding in ourselves, need to make a copy of the source transitions. - * The priorities are not copied in as that would have no effect. */ - destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); - destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); - } - else { - /* Not a copy of ourself, get the functions and priorities. */ - destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); - destTrans->actionTable.setActions( srcTrans->actionTable ); - destTrans->priorTable.setPriors( srcTrans->priorTable ); - } -} - -/* Compare the properties of states that are embedded by users. Compares out - * priorities, out transitions, to, from, out, error and eof action tables. */ -int FsmGraph::compareStateData( const FsmState *state1, const FsmState *state2 ) -{ - /* Compare the out priority table. */ - int cmpRes = CmpPriorTable:: - compare( state1->outPriorTable, state2->outPriorTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test to state action tables. */ - cmpRes = CmpActionTable::compare( state1->toStateActionTable, - state2->toStateActionTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test from state action tables. */ - cmpRes = CmpActionTable::compare( state1->fromStateActionTable, - state2->fromStateActionTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test out action tables. */ - cmpRes = CmpActionTable::compare( state1->outActionTable, - state2->outActionTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test out condition sets. */ - cmpRes = CmpActionSet::compare( state1->outCondSet, - state2->outCondSet ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test out error action tables. */ - cmpRes = CmpErrActionTable::compare( state1->errActionTable, - state2->errActionTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test eof action tables. */ - return CmpActionTable::compare( state1->eofActionTable, - state2->eofActionTable ); -} - -/* Invoked when a state looses its final state status and the leaving - * transition embedding data should be deleted. */ -void FsmGraph::clearOutData( FsmState *state ) -{ - /* Kill the out actions and priorities. */ - state->outActionTable.empty(); - state->outCondSet.empty(); - state->outPriorTable.empty(); -} - -bool FsmGraph::hasOutData( FsmState *state ) -{ - return ( state->outActionTable.length() > 0 || - state->outCondSet.length() > 0 || - state->outPriorTable.length() > 0 ); -} diff --git a/src/fsmattach.cc b/src/fsmattach.cc deleted file mode 100644 index bc8571b2..00000000 --- a/src/fsmattach.cc +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <assert.h> - -#include <iostream> - -#include "fsmgraph.h" - -using namespace std; - -/* Insert a transition into an inlist. The head must be supplied. */ -void FsmGraph::attachToInList( FsmState *from, FsmState *to, - FsmTrans *&head, FsmTrans *trans ) -{ - trans->ilnext = head; - trans->ilprev = 0; - - /* If in trans list is not empty, set the head->prev to trans. */ - if ( head != 0 ) - head->ilprev = trans; - - /* Now insert ourselves at the front of the list. */ - head = trans; - - /* Keep track of foreign transitions for from and to. */ - if ( from != to ) { - if ( misfitAccounting ) { - /* If the number of foreign in transitions is about to go up to 1 then - * move it from the misfit list to the main list. */ - if ( to->foreignInTrans == 0 ) - stateList.append( misfitList.detach( to ) ); - } - - to->foreignInTrans += 1; - } -}; - -/* Detach a transition from an inlist. The head of the inlist must be supplied. */ -void FsmGraph::detachFromInList( FsmState *from, FsmState *to, - FsmTrans *&head, FsmTrans *trans ) -{ - /* Detach in the inTransList. */ - if ( trans->ilprev == 0 ) - head = trans->ilnext; - else - trans->ilprev->ilnext = trans->ilnext; - - if ( trans->ilnext != 0 ) - trans->ilnext->ilprev = trans->ilprev; - - /* Keep track of foreign transitions for from and to. */ - if ( from != to ) { - to->foreignInTrans -= 1; - - if ( misfitAccounting ) { - /* If the number of foreign in transitions goes down to 0 then move it - * from the main list to the misfit list. */ - if ( to->foreignInTrans == 0 ) - misfitList.append( stateList.detach( to ) ); - } - } -} - -/* Attach states on the default transition, range list or on out/in list key. - * First makes a new transition. If there is already a transition out from - * fromState on the default, then will assertion fail. */ -FsmTrans *FsmGraph::attachNewTrans( FsmState *from, FsmState *to, Key lowKey, Key highKey ) -{ - /* Make the new transition. */ - FsmTrans *retVal = new FsmTrans(); - - /* The transition is now attached. Remember the parties involved. */ - retVal->fromState = from; - retVal->toState = to; - - /* Make the entry in the out list for the transitions. */ - from->outList.append( retVal ); - - /* Set the the keys of the new trans. */ - retVal->lowKey = lowKey; - retVal->highKey = highKey; - - /* Attach using inList as the head pointer. */ - if ( to != 0 ) - attachToInList( from, to, to->inList.head, retVal ); - - return retVal; -} - -/* Attach for range lists or for the default transition. This attach should - * be used when a transition already is allocated and must be attached to a - * target state. Does not handle adding the transition into the out list. */ -void FsmGraph::attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) -{ - assert( trans->fromState == 0 && trans->toState == 0 ); - trans->fromState = from; - trans->toState = to; - - if ( to != 0 ) { - /* Attach using the inList pointer as the head pointer. */ - attachToInList( from, to, to->inList.head, trans ); - } -} - -/* Redirect a transition away from error and towards some state. This is just - * like attachTrans except it requires fromState to be set and does not touch - * it. */ -void FsmGraph::redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ) -{ - assert( trans->fromState != 0 && trans->toState == 0 ); - trans->toState = to; - - if ( to != 0 ) { - /* Attach using the inList pointer as the head pointer. */ - attachToInList( from, to, to->inList.head, trans ); - } -} - -/* Detach for out/in lists or for default transition. */ -void FsmGraph::detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) -{ - assert( trans->fromState == from && trans->toState == to ); - trans->fromState = 0; - trans->toState = 0; - - if ( to != 0 ) { - /* Detach using to's inList pointer as the head. */ - detachFromInList( from, to, to->inList.head, trans ); - } -} - - -/* Detach a state from the graph. Detaches and deletes transitions in and out - * of the state. Empties inList and outList. Removes the state from the final - * state set. A detached state becomes useless and should be deleted. */ -void FsmGraph::detachState( FsmState *state ) -{ - /* Detach the in transitions from the inList list of transitions. */ - while ( state->inList.head != 0 ) { - /* Get pointers to the trans and the state. */ - FsmTrans *trans = state->inList.head; - FsmState *fromState = trans->fromState; - - /* Detach the transitions from the source state. */ - detachTrans( fromState, state, trans ); - - /* Ok to delete the transition. */ - fromState->outList.detach( trans ); - delete trans; - } - - /* Remove the entry points in on the machine. */ - while ( state->entryIds.length() > 0 ) - unsetEntry( state->entryIds[0], state ); - - /* Detach out range transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); ) { - TransList::Iter next = trans.next(); - detachTrans( state, trans->toState, trans ); - delete trans; - trans = next; - } - - /* Delete all of the out range pointers. */ - state->outList.abandon(); - - /* Unset final stateness before detaching from graph. */ - if ( state->stateBits & SB_ISFINAL ) - finStateSet.remove( state ); -} - - -/* Duplicate a transition. Makes a new transition that is attached to the same - * dest as srcTrans. The new transition has functions and priority taken from - * srcTrans. Used for merging a transition in to a free spot. The trans can - * just be dropped in. It does not conflict with an existing trans and need - * not be crossed. Returns the new transition. */ -FsmTrans *FsmGraph::dupTrans( FsmState *from, FsmTrans *srcTrans ) -{ - /* Make a new transition. */ - FsmTrans *newTrans = new FsmTrans(); - - /* We can attach the transition, one does not exist. */ - attachTrans( from, srcTrans->toState, newTrans ); - - /* Call the user callback to add in the original source transition. */ - addInTrans( newTrans, srcTrans ); - - return newTrans; -} - -/* In crossing, src trans and dest trans both go to existing states. Make one - * state from the sets of states that src and dest trans go to. */ -FsmTrans *FsmGraph::fsmAttachStates( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ) -{ - /* The priorities are equal. We must merge the transitions. Does the - * existing trans go to the state we are to attach to? ie, are we to - * simply double up the transition? */ - FsmState *toState = srcTrans->toState; - FsmState *existingState = destTrans->toState; - - if ( existingState == toState ) { - /* The transition is a double up to the same state. Copy the src - * trans into itself. We don't need to merge in the from out trans - * data, that was done already. */ - addInTrans( destTrans, srcTrans ); - } - else { - /* The trans is not a double up. Dest trans cannot be the same as src - * trans. Set up the state set. */ - StateSet stateSet; - - /* We go to all the states the existing trans goes to, plus... */ - if ( existingState->stateDictEl == 0 ) - stateSet.insert( existingState ); - else - stateSet.insert( existingState->stateDictEl->stateSet ); - - /* ... all the states that we have been told to go to. */ - if ( toState->stateDictEl == 0 ) - stateSet.insert( toState ); - else - stateSet.insert( toState->stateDictEl->stateSet ); - - /* Look for the state. If it is not there already, make it. */ - StateDictEl *lastFound; - if ( md.stateDict.insert( stateSet, &lastFound ) ) { - /* Make a new state representing the combination of states in - * stateSet. It gets added to the fill list. This means that we - * need to fill in it's transitions sometime in the future. We - * don't do that now (ie, do not recurse). */ - FsmState *combinState = addState(); - - /* Link up the dict element and the state. */ - lastFound->targState = combinState; - combinState->stateDictEl = lastFound; - - /* Add to the fill list. */ - md.fillListAppend( combinState ); - } - - /* Get the state insertted/deleted. */ - FsmState *targ = lastFound->targState; - - /* Detach the state from existing state. */ - detachTrans( from, existingState, destTrans ); - - /* Re-attach to the new target. */ - attachTrans( from, targ, destTrans ); - - /* Add in src trans to the existing transition that we redirected to - * the new state. We don't need to merge in the from out trans data, - * that was done already. */ - addInTrans( destTrans, srcTrans ); - } - - return destTrans; -} - -/* Two transitions are to be crossed, handle the possibility of either going - * to the error state. */ -FsmTrans *FsmGraph::mergeTrans( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ) -{ - FsmTrans *retTrans = 0; - if ( destTrans->toState == 0 && srcTrans->toState == 0 ) { - /* Error added into error. */ - addInTrans( destTrans, srcTrans ); - retTrans = destTrans; - } - else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) { - /* Non error added into error we need to detach and reattach, */ - detachTrans( from, destTrans->toState, destTrans ); - attachTrans( from, srcTrans->toState, destTrans ); - addInTrans( destTrans, srcTrans ); - retTrans = destTrans; - } - else if ( srcTrans->toState == 0 ) { - /* Dest goes somewhere but src doesn't, just add it it in. */ - addInTrans( destTrans, srcTrans ); - retTrans = destTrans; - } - else { - /* Both go somewhere, run the actual cross. */ - retTrans = fsmAttachStates( md, from, destTrans, srcTrans ); - } - - return retTrans; -} - -/* Find the trans with the higher priority. If src is lower priority then dest then - * src is ignored. If src is higher priority than dest, then src overwrites dest. If - * the priorities are equal, then they are merged. */ -FsmTrans *FsmGraph::crossTransitions( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ) -{ - FsmTrans *retTrans; - - /* Compare the priority of the dest and src transitions. */ - int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); - if ( compareRes < 0 ) { - /* Src trans has a higher priority than dest, src overwrites dest. - * Detach dest and return a copy of src. */ - detachTrans( from, destTrans->toState, destTrans ); - retTrans = dupTrans( from, srcTrans ); - } - else if ( compareRes > 0 ) { - /* The dest trans has a higher priority, use dest. */ - retTrans = destTrans; - } - else { - /* Src trans and dest trans have the same priority, they must be merged. */ - retTrans = mergeTrans( md, from, destTrans, srcTrans ); - } - - /* Return the transition that resulted from the cross. */ - return retTrans; -} - -/* Copy the transitions in srcList to the outlist of dest. The srcList should - * not be the outList of dest, otherwise you would be copying the contents of - * srcList into itself as it's iterated: bad news. */ -void FsmGraph::outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ) -{ - /* The destination list. */ - TransList destList; - - /* Set up an iterator to stop at breaks. */ - PairIter<FsmTrans> outPair( dest->outList.head, srcList ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - case RangeInS1: { - /* The pair iter is the authority on the keys. It may have needed - * to break the dest range. */ - FsmTrans *destTrans = outPair.s1Tel.trans; - destTrans->lowKey = outPair.s1Tel.lowKey; - destTrans->highKey = outPair.s1Tel.highKey; - destList.append( destTrans ); - break; - } - case RangeInS2: { - /* Src range may get crossed with dest's default transition. */ - FsmTrans *newTrans = dupTrans( dest, outPair.s2Tel.trans ); - - /* Set up the transition's keys and append to the dest list. */ - newTrans->lowKey = outPair.s2Tel.lowKey; - newTrans->highKey = outPair.s2Tel.highKey; - destList.append( newTrans ); - break; - } - case RangeOverlap: { - /* Exact overlap, cross them. */ - FsmTrans *newTrans = crossTransitions( md, dest, - outPair.s1Tel.trans, outPair.s2Tel.trans ); - - /* Set up the transition's keys and append to the dest list. */ - newTrans->lowKey = outPair.s1Tel.lowKey; - newTrans->highKey = outPair.s1Tel.highKey; - destList.append( newTrans ); - break; - } - case BreakS1: { - /* Since we are always writing to the dest trans, the dest needs - * to be copied when it is broken. The copy goes into the first - * half of the break to "break it off". */ - outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans ); - break; - } - case BreakS2: - break; - } - } - - /* Abandon the old outList and transfer destList into it. */ - dest->outList.transfer( destList ); -} - - -/* Move all the transitions that go into src so that they go into dest. */ -void FsmGraph::inTransMove( FsmState *dest, FsmState *src ) -{ - /* Do not try to move in trans to and from the same state. */ - assert( dest != src ); - - /* If src is the start state, dest becomes the start state. */ - if ( src == startState ) { - unsetStartState(); - setStartState( dest ); - } - - /* For each entry point into, create an entry point into dest, when the - * state is detached, the entry points to src will be removed. */ - for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ ) - changeEntry( *enId, dest, src ); - - /* Move the transitions in inList. */ - while ( src->inList.head != 0 ) { - /* Get trans and from state. */ - FsmTrans *trans = src->inList.head; - FsmState *fromState = trans->fromState; - - /* Detach from src, reattach to dest. */ - detachTrans( fromState, src, trans ); - attachTrans( fromState, dest, trans ); - } -} diff --git a/src/fsmbase.cc b/src/fsmbase.cc deleted file mode 100644 index 52698a1a..00000000 --- a/src/fsmbase.cc +++ /dev/null @@ -1,603 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <assert.h> - -#include "fsmgraph.h" - -/* Simple singly linked list append routine for the fill list. The new state - * goes to the end of the list. */ -void MergeData::fillListAppend( FsmState *state ) -{ - state->alg.next = 0; - - if ( stfillHead == 0 ) { - /* List is empty, state becomes head and tail. */ - stfillHead = state; - stfillTail = state; - } - else { - /* List is not empty, state goes after last element. */ - stfillTail->alg.next = state; - stfillTail = state; - } -} - -/* Graph constructor. */ -FsmGraph::FsmGraph() -: - /* No start state. */ - startState(0), - errState(0), - - /* Misfit accounting is a switch, turned on only at specific times. It - * controls what happens when states have no way in from the outside - * world.. */ - misfitAccounting(false), - - lmRequiresErrorState(false) -{ -} - -/* Copy all graph data including transitions. */ -FsmGraph::FsmGraph( const FsmGraph &graph ) -: - /* Lists start empty. Will be filled by copy. */ - stateList(), - misfitList(), - - /* Copy in the entry points, - * pointers will be resolved later. */ - entryPoints(graph.entryPoints), - startState(graph.startState), - errState(0), - - /* Will be filled by copy. */ - finStateSet(), - - /* Misfit accounting is only on during merging. */ - misfitAccounting(false), - - lmRequiresErrorState(graph.lmRequiresErrorState) -{ - /* Create the states and record their map in the original state. */ - StateList::Iter origState = graph.stateList; - for ( ; origState.lte(); origState++ ) { - /* Make the new state. */ - FsmState *newState = new FsmState( *origState ); - - /* Add the state to the list. */ - stateList.append( newState ); - - /* Set the mapsTo item of the old state. */ - origState->alg.stateMap = newState; - } - - /* Derefernce all the state maps. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - /* The points to the original in the src machine. The taget's duplicate - * is in the statemap. */ - FsmState *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0; - - /* Attach The transition to the duplicate. */ - trans->toState = 0; - attachTrans( state, toState, trans ); - } - } - - /* Fix the state pointers in the entry points array. */ - EntryMapEl *eel = entryPoints.data; - for ( int e = 0; e < entryPoints.length(); e++, eel++ ) { - /* Get the duplicate of the state. */ - eel->value = eel->value->alg.stateMap; - - /* Foreign in transitions must be built up when duping machines so - * increment it here. */ - eel->value->foreignInTrans += 1; - } - - /* Fix the start state pointer and the new start state's count of in - * transiions. */ - startState = startState->alg.stateMap; - startState->foreignInTrans += 1; - - /* Build the final state set. */ - StateSet::Iter st = graph.finStateSet; - for ( ; st.lte(); st++ ) - finStateSet.insert((*st)->alg.stateMap); -} - -/* Deletes all transition data then deletes each state. */ -FsmGraph::~FsmGraph() -{ - /* Delete all the transitions. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Iterate the out transitions, deleting them. */ - state->outList.empty(); - } - - /* Delete all the states. */ - stateList.empty(); -} - -/* Set a state final. The state has its isFinState set to true and the state - * is added to the finStateSet. */ -void FsmGraph::setFinState( FsmState *state ) -{ - /* Is it already a fin state. */ - if ( state->stateBits & SB_ISFINAL ) - return; - - state->stateBits |= SB_ISFINAL; - finStateSet.insert( state ); -} - -/* Set a state non-final. The has its isFinState flag set false and the state - * is removed from the final state set. */ -void FsmGraph::unsetFinState( FsmState *state ) -{ - /* Is it already a non-final state? */ - if ( ! (state->stateBits & SB_ISFINAL) ) - return; - - /* When a state looses its final state status it must relinquish all the - * properties that are allowed only for final states. */ - clearOutData( state ); - - state->stateBits &= ~ SB_ISFINAL; - finStateSet.remove( state ); -} - -/* Set and unset a state as the start state. */ -void FsmGraph::setStartState( FsmState *state ) -{ - /* Sould change from unset to set. */ - assert( startState == 0 ); - startState = state; - - if ( misfitAccounting ) { - /* If the number of foreign in transitions is about to go up to 1 then - * take it off the misfit list and put it on the head list. */ - if ( state->foreignInTrans == 0 ) - stateList.append( misfitList.detach( state ) ); - } - - /* Up the foreign in transitions to the state. */ - state->foreignInTrans += 1; -} - -void FsmGraph::unsetStartState() -{ - /* Should change from set to unset. */ - assert( startState != 0 ); - - /* Decrement the entry's count of foreign entries. */ - startState->foreignInTrans -= 1; - - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 then take - * it off the main list and put it on the misfit list. */ - if ( startState->foreignInTrans == 0 ) - misfitList.append( stateList.detach( startState ) ); - } - - startState = 0; -} - -/* Associate an id with a state. Makes the state a named entry point. Has no - * effect if the entry point is already mapped to the state. */ -void FsmGraph::setEntry( int id, FsmState *state ) -{ - /* Insert the id into the state. If the state is already labelled with id, - * nothing to do. */ - if ( state->entryIds.insert( id ) ) { - /* Insert the entry and assert that it succeeds. */ - entryPoints.insertMulti( id, state ); - - if ( misfitAccounting ) { - /* If the number of foreign in transitions is about to go up to 1 then - * take it off the misfit list and put it on the head list. */ - if ( state->foreignInTrans == 0 ) - stateList.append( misfitList.detach( state ) ); - } - - /* Up the foreign in transitions to the state. */ - state->foreignInTrans += 1; - } -} - -/* Remove the association of an id with a state. The state looses it's entry - * point status. Assumes that the id is indeed mapped to state. */ -void FsmGraph::unsetEntry( int id, FsmState *state ) -{ - /* Find the entry point in on id. */ - EntryMapEl *enLow = 0, *enHigh = 0; - entryPoints.findMulti( id, enLow, enHigh ); - while ( enLow->value != state ) - enLow += 1; - - /* Remove the record from the map. */ - entryPoints.remove( enLow ); - - /* Remove the state's sense of the link. */ - state->entryIds.remove( id ); - state->foreignInTrans -= 1; - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 then take - * it off the main list and put it on the misfit list. */ - if ( state->foreignInTrans == 0 ) - misfitList.append( stateList.detach( state ) ); - } -} - -/* Remove all association of an id with states. Assumes that the id is indeed - * mapped to a state. */ -void FsmGraph::unsetEntry( int id ) -{ - /* Find the entry point in on id. */ - EntryMapEl *enLow = 0, *enHigh = 0; - entryPoints.findMulti( id, enLow, enHigh ); - for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) { - /* Remove the state's sense of the link. */ - mel->value->entryIds.remove( id ); - mel->value->foreignInTrans -= 1; - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 - * then take it off the main list and put it on the misfit list. */ - if ( mel->value->foreignInTrans == 0 ) - misfitList.append( stateList.detach( mel->value ) ); - } - } - - /* Remove the records from the entry points map. */ - entryPoints.removeMulti( enLow, enHigh ); -} - - -void FsmGraph::changeEntry( int id, FsmState *to, FsmState *from ) -{ - /* Find the entry in the entry map. */ - EntryMapEl *enLow = 0, *enHigh = 0; - entryPoints.findMulti( id, enLow, enHigh ); - while ( enLow->value != from ) - enLow += 1; - - /* Change it to the new target. */ - enLow->value = to; - - /* Remove from's sense of the link. */ - from->entryIds.remove( id ); - from->foreignInTrans -= 1; - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 then take - * it off the main list and put it on the misfit list. */ - if ( from->foreignInTrans == 0 ) - misfitList.append( stateList.detach( from ) ); - } - - /* Add to's sense of the link. */ - if ( to->entryIds.insert( id ) != 0 ) { - if ( misfitAccounting ) { - /* If the number of foreign in transitions is about to go up to 1 then - * take it off the misfit list and put it on the head list. */ - if ( to->foreignInTrans == 0 ) - stateList.append( misfitList.detach( to ) ); - } - - /* Up the foreign in transitions to the state. */ - to->foreignInTrans += 1; - } -} - - -/* Clear all entry points from a machine. */ -void FsmGraph::unsetAllEntryPoints() -{ - for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) { - /* Kill all the state's entry points at once. */ - if ( en->value->entryIds.length() > 0 ) { - en->value->foreignInTrans -= en->value->entryIds.length(); - - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 - * then take it off the main list and put it on the misfit - * list. */ - if ( en->value->foreignInTrans == 0 ) - misfitList.append( stateList.detach( en->value ) ); - } - - /* Clear the set of ids out all at once. */ - en->value->entryIds.empty(); - } - } - - /* Now clear out the entry map all at once. */ - entryPoints.empty(); -} - -/* Assigning an epsilon transition into final states. */ -void FsmGraph::epsilonTrans( int id ) -{ - for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ ) - (*fs)->epsilonTrans.append( id ); -} - -/* Mark all states reachable from state. Traverses transitions forward. Used - * for removing states that have no path into them. */ -void FsmGraph::markReachableFromHere( FsmState *state ) -{ - /* Base case: return; */ - if ( state->stateBits & SB_ISMARKED ) - return; - - /* Set this state as processed. We are going to visit all states that this - * state has a transition to. */ - state->stateBits |= SB_ISMARKED; - - /* Recurse on all out transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - markReachableFromHere( trans->toState ); - } -} - -void FsmGraph::markReachableFromHereStopFinal( FsmState *state ) -{ - /* Base case: return; */ - if ( state->stateBits & SB_ISMARKED ) - return; - - /* Set this state as processed. We are going to visit all states that this - * state has a transition to. */ - state->stateBits |= SB_ISMARKED; - - /* Recurse on all out transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - FsmState *toState = trans->toState; - if ( toState != 0 && !toState->isFinState() ) - markReachableFromHereStopFinal( toState ); - } -} - -/* Mark all states reachable from state. Traverse transitions backwards. Used - * for removing dead end paths in graphs. */ -void FsmGraph::markReachableFromHereReverse( FsmState *state ) -{ - /* Base case: return; */ - if ( state->stateBits & SB_ISMARKED ) - return; - - /* Set this state as processed. We are going to visit all states with - * transitions into this state. */ - state->stateBits |= SB_ISMARKED; - - /* Recurse on all items in transitions. */ - for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) - markReachableFromHereReverse( trans->fromState ); -} - -/* Determine if there are any entry points into a start state other than the - * start state. Setting starting transitions requires that the start state be - * isolated. In most cases a start state will already be isolated. */ -bool FsmGraph::isStartStateIsolated() -{ - /* If there are any in transitions then the state is not isolated. */ - if ( startState->inList.head != 0 ) - return false; - - /* If there are any entry points then isolated. */ - if ( startState->entryIds.length() > 0 ) - return false; - - return true; -} - -/* Bring in other's entry points. Assumes others states are going to be - * copied into this machine. */ -void FsmGraph::copyInEntryPoints( FsmGraph *other ) -{ - /* Use insert multi because names are not unique. */ - for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ ) - entryPoints.insertMulti( en->key, en->value ); -} - - -void FsmGraph::unsetAllFinStates() -{ - for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) - (*st)->stateBits &= ~ SB_ISFINAL; - finStateSet.empty(); -} - -void FsmGraph::setFinBits( int finStateBits ) -{ - for ( int s = 0; s < finStateSet.length(); s++ ) - finStateSet.data[s]->stateBits |= finStateBits; -} - - -/* Tests the integrity of the transition lists and the fromStates. */ -void FsmGraph::verifyIntegrity() -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Walk the out transitions and assert fromState is correct. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) - assert( trans->fromState == state ); - - /* Walk the inlist and assert toState is correct. */ - for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) - assert( trans->toState == state ); - } -} - -void FsmGraph::verifyReachability() -{ - /* Mark all the states that can be reached - * through the set of entry points. */ - markReachableFromHere( startState ); - for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) - markReachableFromHere( en->value ); - - /* Check that everything got marked. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - /* Assert it got marked and then clear the mark. */ - assert( st->stateBits & SB_ISMARKED ); - st->stateBits &= ~ SB_ISMARKED; - } -} - -void FsmGraph::verifyNoDeadEndStates() -{ - /* Mark all states that have paths to the final states. */ - for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ ) - markReachableFromHereReverse( *pst ); - - /* Start state gets honorary marking. Must be done AFTER recursive call. */ - startState->stateBits |= SB_ISMARKED; - - /* Make sure everything got marked. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - /* Assert the state got marked and unmark it. */ - assert( st->stateBits & SB_ISMARKED ); - st->stateBits &= ~ SB_ISMARKED; - } -} - -void FsmGraph::depthFirstOrdering( FsmState *state ) -{ - /* Nothing to do if the state is already on the list. */ - if ( state->stateBits & SB_ONLIST ) - return; - - /* Doing depth first, put state on the list. */ - state->stateBits |= SB_ONLIST; - stateList.append( state ); - - /* Recurse on everything ranges. */ - for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) { - if ( tel->toState != 0 ) - depthFirstOrdering( tel->toState ); - } -} - -/* Ordering states by transition connections. */ -void FsmGraph::depthFirstOrdering() -{ - /* Init on state list flags. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) - st->stateBits &= ~SB_ONLIST; - - /* Clear out the state list, we will rebuild it. */ - int stateListLen = stateList.length(); - stateList.abandon(); - - /* Add back to the state list from the start state and all other entry - * points. */ - if ( errState != 0 ) - depthFirstOrdering( errState ); - depthFirstOrdering( startState ); - for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) - depthFirstOrdering( en->value ); - - /* Make sure we put everything back on. */ - assert( stateListLen == stateList.length() ); -} - -/* Stable sort the states by final state status. */ -void FsmGraph::sortStatesByFinal() -{ - /* Move forward through the list and throw final states onto the end. */ - FsmState *state = 0; - FsmState *next = stateList.head; - FsmState *last = stateList.tail; - while ( state != last ) { - /* Move forward and load up the next. */ - state = next; - next = state->next; - - /* Throw to the end? */ - if ( state->isFinState() ) { - stateList.detach( state ); - stateList.append( state ); - } - } -} - -void FsmGraph::setStateNumbers( int base ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->alg.stateNum = base++; -} - - -bool FsmGraph::checkErrTrans( FsmState *state, FsmTrans *trans ) -{ - /* Might go directly to error state. */ - if ( trans->toState == 0 ) - return true; - - if ( trans->prev == 0 ) { - /* If this is the first transition. */ - if ( keyOps->minKey < trans->lowKey ) - return true; - } - else { - /* Not the first transition. Compare against the prev. */ - FsmTrans *prev = trans->prev; - Key nextKey = prev->highKey; - nextKey.increment(); - if ( nextKey < trans->lowKey ) - return true; - } - return false; -} - -bool FsmGraph::checkErrTransFinish( FsmState *state ) -{ - /* Check if there are any ranges already. */ - if ( state->outList.length() == 0 ) - return true; - else { - /* Get the last and check for a gap on the end. */ - FsmTrans *last = state->outList.tail; - if ( last->highKey < keyOps->maxKey ) - return true; - } - return 0; -} - -bool FsmGraph::hasErrorTrans() -{ - bool result; - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) { - result = checkErrTrans( st, tr ); - if ( result ) - return true; - } - result = checkErrTransFinish( st ); - if ( result ) - return true; - } - return false; -} diff --git a/src/fsmcodegen.cc b/src/fsmcodegen.cc deleted file mode 100644 index 89a52f9a..00000000 --- a/src/fsmcodegen.cc +++ /dev/null @@ -1,911 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <assert.h> -#include <string.h> -#include <stdbool.h> - -#include <sstream> -#include <iostream> - -#include "fsmcodegen.h" - -using std::ostream; -using std::ostringstream; -using std::string; -using std::cerr; -using std::endl; - -/* Init code gen with in parameters. */ -FsmCodeGen::FsmCodeGen( ostream &out, - RedFsm *redFsm, fsm_tables *fsmTables ) -: - out(out), - redFsm(redFsm), - fsmTables(fsmTables), - codeGenErrCount(0), - dataPrefix(true), - writeFirstFinal(true), - writeErr(true), - skipTokenLabelNeeded(false) -{ -} - -/* Write out the fsm name. */ -string FsmCodeGen::FSM_NAME() -{ - return "parser"; -} - -/* Emit the offset of the start state as a decimal integer. */ -string FsmCodeGen::START_STATE_ID() -{ - ostringstream ret; - ret << redFsm->startState->id; - return ret.str(); -}; - -/* Write out the array of actions. */ -std::ostream &FsmCodeGen::ACTIONS_ARRAY() -{ - out << "\t0, "; - int totalActions = 1; - for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { - /* Write out the length, which will never be the last character. */ - out << act->key.length() << ", "; - /* Put in a line break every 8 */ - if ( totalActions++ % 8 == 7 ) - out << "\n\t"; - - for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { - out << item->value->actionId; - if ( ! (act.last() && item.last()) ) - out << ", "; - - /* Put in a line break every 8 */ - if ( totalActions++ % 8 == 7 ) - out << "\n\t"; - } - } - out << "\n"; - return out; -} - - -string FsmCodeGen::CS() -{ - ostringstream ret; - /* Expression for retrieving the key, use simple dereference. */ - ret << ACCESS() << "fsm_cs"; - return ret.str(); -} - -string FsmCodeGen::GET_WIDE_KEY() -{ - return GET_KEY(); -} - -string FsmCodeGen::GET_WIDE_KEY( RedState *state ) -{ - return GET_KEY(); -} - -string FsmCodeGen::GET_KEY() -{ - ostringstream ret; - /* Expression for retrieving the key, use simple dereference. */ - ret << "(*" << P() << ")"; - return ret.str(); -} - -/* Write out level number of tabs. Makes the nested binary search nice - * looking. */ -string FsmCodeGen::TABS( int level ) -{ - string result; - while ( level-- > 0 ) - result += "\t"; - return result; -} - -/* Write out a key from the fsm code gen. Depends on wether or not the key is - * signed. */ -string FsmCodeGen::KEY( Key key ) -{ - ostringstream ret; - ret << key.getVal(); - return ret.str(); -} - -void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item ) -{ - ret << ACT() << " = " << item->longestMatchPart->longestMatchId << ";"; -} - -void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item ) -{ - /* The tokend action sets tokend. */ - ret << "{ " << TOKEND() << " = " << TOKLEN() << " + ( " << P() << " - " << BLOCK_START() << " ) + 1; }"; -} -void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item ) -{ - ret << TOKSTART() << " = 0;"; -} - -void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item ) -{ - ret << ACT() << " = 0;"; -} - -void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item ) -{ - ret << TOKSTART() << " = " << P() << ";"; -} - -void FsmCodeGen::EMIT_TOKEN( ostream &ret, LangEl *token ) -{ - ret << " " << MATCHED_TOKEN() << " = " << token->id << ";\n"; -} - -void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, - int targState, int inFinish ) -{ - ret << - " " << TOKLEN() << " = " << TOKEND() << ";\n" - " switch( " << ACT() << " ) {\n"; - - /* If the switch handles error then we also forced the error state. It - * will exist. */ - if ( item->tokenRegion->lmSwitchHandlesError ) { - ret << " case 0: " //<< P() << " = " << TOKSTART() << ";" << - "goto st" << redFsm->errState->id << ";\n"; - } - - for ( TokenInstanceListReg::Iter lmi = item->tokenRegion->tokenInstanceList; lmi.lte(); lmi++ ) { - if ( lmi->inLmSelect ) { - assert( lmi->tokenDef->tdLangEl != 0 ); - ret << " case " << lmi->longestMatchId << ":\n"; - EMIT_TOKEN( ret, lmi->tokenDef->tdLangEl ); - ret << " break;\n"; - } - } - - ret << - " }\n" - "\t" - " goto skip_toklen;\n"; - - skipTokenLabelNeeded = true; -} - -void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) -{ - assert( item->longestMatchPart->tokenDef->tdLangEl != 0 ); - - ret << " " << P() << " += 1;\n"; - EMIT_TOKEN( ret, item->longestMatchPart->tokenDef->tdLangEl ); - ret << " goto out;\n"; -} - -void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item ) -{ - assert( item->longestMatchPart->tokenDef->tdLangEl != 0 ); - - EMIT_TOKEN( ret, item->longestMatchPart->tokenDef->tdLangEl ); - ret << " goto out;\n"; -} - -void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) -{ - assert( item->longestMatchPart->tokenDef->tdLangEl != 0 ); - - ret << " " << TOKLEN() << " = " << TOKEND() << ";\n"; - EMIT_TOKEN( ret, item->longestMatchPart->tokenDef->tdLangEl ); - ret << " goto skip_toklen;\n"; - - skipTokenLabelNeeded = true; -} - - -/* Write out an inline tree structure. Walks the list and possibly calls out - * to virtual functions than handle language specific items in the tree. */ -void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList, - int targState, bool inFinish ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - switch ( item->type ) { - case InlineItem::Text: - assert( false ); - break; - case InlineItem::LmSetActId: - SET_ACT( ret, item ); - break; - case InlineItem::LmSetTokEnd: - SET_TOKEND( ret, item ); - break; - case InlineItem::LmInitTokStart: - assert( false ); - break; - case InlineItem::LmInitAct: - INIT_ACT( ret, item ); - break; - case InlineItem::LmSetTokStart: - SET_TOKSTART( ret, item ); - break; - case InlineItem::LmSwitch: - LM_SWITCH( ret, item, targState, inFinish ); - break; - case InlineItem::LmOnLast: - LM_ON_LAST( ret, item ); - break; - case InlineItem::LmOnNext: - LM_ON_NEXT( ret, item ); - break; - case InlineItem::LmOnLagBehind: - LM_ON_LAG_BEHIND( ret, item ); - break; - } - } -} - -/* Write out paths in line directives. Escapes any special characters. */ -string FsmCodeGen::LDIR_PATH( char *path ) -{ - ostringstream ret; - for ( char *pc = path; *pc != 0; pc++ ) { - if ( *pc == '\\' ) - ret << "\\\\"; - else - ret << *pc; - } - return ret.str(); -} - -void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ) -{ - /* Write the block and close it off. */ - ret << "\t{"; - INLINE_LIST( ret, action->inlineList, targState, inFinish ); - - if ( action->markId > 0 ) - ret << "mark[" << action->markId-1 << "] = " << P() << ";\n"; - - ret << "}\n"; - -} - -void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition ) -{ - ret << "\n"; - INLINE_LIST( ret, condition->inlineList, 0, false ); -} - -string FsmCodeGen::ERROR_STATE() -{ - ostringstream ret; - if ( redFsm->errState != 0 ) - ret << redFsm->errState->id; - else - ret << "-1"; - return ret.str(); -} - -string FsmCodeGen::FIRST_FINAL_STATE() -{ - ostringstream ret; - if ( redFsm->firstFinState != 0 ) - ret << redFsm->firstFinState->id; - else - ret << redFsm->nextStateId; - return ret.str(); -} - -string FsmCodeGen::DATA_PREFIX() -{ - if ( dataPrefix ) - return FSM_NAME() + "_"; - return ""; -} - -/* Emit the alphabet data type. */ -string FsmCodeGen::ALPH_TYPE() -{ - string ret = keyOps->alphType->data1; - if ( keyOps->alphType->data2 != 0 ) { - ret += " "; - ret += + keyOps->alphType->data2; - } - return ret; -} - -/* Emit the alphabet data type. */ -string FsmCodeGen::WIDE_ALPH_TYPE() -{ - string ret; - ret = ALPH_TYPE(); - return ret; -} - - -string FsmCodeGen::PTR_CONST() -{ - return "const "; -} - -std::ostream &FsmCodeGen::OPEN_ARRAY( string type, string name ) -{ - out << "static const " << type << " " << name << "[] = {\n"; - return out; -} - -std::ostream &FsmCodeGen::CLOSE_ARRAY() -{ - return out << "};\n"; -} - -std::ostream &FsmCodeGen::STATIC_VAR( string type, string name ) -{ - out << "static const " << type << " " << name; - return out; -} - -string FsmCodeGen::UINT( ) -{ - return "unsigned int"; -} - -string FsmCodeGen::ARR_OFF( string ptr, string offset ) -{ - return ptr + " + " + offset; -} - -string FsmCodeGen::CAST( string type ) -{ - return "(" + type + ")"; -} - -std::ostream &FsmCodeGen::TO_STATE_ACTION_SWITCH() -{ - /* Walk the list of functions, printing the cases. */ - for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { - /* Write out referenced actions. */ - if ( act->numToStateRefs > 0 ) { - /* Write the case label, the action and the case break. */ - out << "\tcase " << act->actionId << ":\n"; - ACTION( out, act, 0, false ); - out << "\tbreak;\n"; - } - } - - return out; -} - -std::ostream &FsmCodeGen::FROM_STATE_ACTION_SWITCH() -{ - /* Walk the list of functions, printing the cases. */ - for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { - /* Write out referenced actions. */ - if ( act->numFromStateRefs > 0 ) { - /* Write the case label, the action and the case break. */ - out << "\tcase " << act->actionId << ":\n"; - ACTION( out, act, 0, false ); - out << "\tbreak;\n"; - } - } - - return out; -} - -std::ostream &FsmCodeGen::ACTION_SWITCH() -{ - /* Walk the list of functions, printing the cases. */ - for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { - /* Write out referenced actions. */ - if ( act->numTransRefs > 0 ) { - /* Write the case label, the action and the case break. */ - out << "\tcase " << act->actionId << ":\n"; - ACTION( out, act, 0, false ); - out << "\tbreak;\n"; - } - } - - return out; -} - -void FsmCodeGen::emitSingleSwitch( RedState *state ) -{ - /* Load up the singles. */ - int numSingles = state->outSingle.length(); - RedTransEl *data = state->outSingle.data; - - if ( numSingles == 1 ) { - /* If there is a single single key then write it out as an if. */ - out << "\tif ( " << GET_WIDE_KEY(state) << " == " << - KEY(data[0].lowKey) << " )\n\t\t"; - - /* Virtual function for writing the target of the transition. */ - TRANS_GOTO(data[0].value, 0) << "\n"; - } - else if ( numSingles > 1 ) { - /* Write out single keys in a switch if there is more than one. */ - out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n"; - - /* Write out the single indicies. */ - for ( int j = 0; j < numSingles; j++ ) { - out << "\t\tcase " << KEY(data[j].lowKey) << ": "; - TRANS_GOTO(data[j].value, 0) << "\n"; - } - - /* Close off the transition switch. */ - out << "\t}\n"; - } -} - -void FsmCodeGen::emitRangeBSearch( RedState *state, int level, int low, int high ) -{ - /* Get the mid position, staying on the lower end of the range. */ - int mid = (low + high) >> 1; - RedTransEl *data = state->outRange.data; - - /* Determine if we need to look higher or lower. */ - bool anyLower = mid > low; - bool anyHigher = mid < high; - - /* Determine if the keys at mid are the limits of the alphabet. */ - bool limitLow = data[mid].lowKey == keyOps->minKey; - bool limitHigh = data[mid].highKey == keyOps->maxKey; - - if ( anyLower && anyHigher ) { - /* Can go lower and higher than mid. */ - out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << - KEY(data[mid].lowKey) << " ) {\n"; - emitRangeBSearch( state, level+1, low, mid-1 ); - out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " << - KEY(data[mid].highKey) << " ) {\n"; - emitRangeBSearch( state, level+1, mid+1, high ); - out << TABS(level) << "} else\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else if ( anyLower && !anyHigher ) { - /* Can go lower than mid but not higher. */ - out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << - KEY(data[mid].lowKey) << " ) {\n"; - emitRangeBSearch( state, level+1, low, mid-1 ); - - /* if the higher is the highest in the alphabet then there is no - * sense testing it. */ - if ( limitHigh ) { - out << TABS(level) << "} else\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else { - out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " << - KEY(data[mid].highKey) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - } - else if ( !anyLower && anyHigher ) { - /* Can go higher than mid but not lower. */ - out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " << - KEY(data[mid].highKey) << " ) {\n"; - emitRangeBSearch( state, level+1, mid+1, high ); - - /* If the lower end is the lowest in the alphabet then there is no - * sense testing it. */ - if ( limitLow ) { - out << TABS(level) << "} else\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else { - out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " << - KEY(data[mid].lowKey) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - } - else { - /* Cannot go higher or lower than mid. It's mid or bust. What - * tests to do depends on limits of alphabet. */ - if ( !limitLow && !limitHigh ) { - out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << - GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " << - KEY(data[mid].highKey) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else if ( limitLow && !limitHigh ) { - out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " << - KEY(data[mid].highKey) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else if ( !limitLow && limitHigh ) { - out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << - GET_WIDE_KEY(state) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else { - /* Both high and low are at the limit. No tests to do. */ - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - } -} - -std::ostream &FsmCodeGen::STATE_GOTOS() -{ - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { - if ( st == redFsm->errState ) - STATE_GOTO_ERROR(); - else { - /* Writing code above state gotos. */ - GOTO_HEADER( st ); - - /* Try singles. */ - if ( st->outSingle.length() > 0 ) - emitSingleSwitch( st ); - - /* Default case is to binary search for the ranges, if that fails then */ - if ( st->outRange.length() > 0 ) - emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 ); - - /* Write the default transition. */ - TRANS_GOTO( st->defTrans, 1 ) << "\n"; - } - } - return out; -} - -unsigned int FsmCodeGen::TO_STATE_ACTION( RedState *state ) -{ - int act = 0; - if ( state->toStateAction != 0 ) - act = state->toStateAction->location+1; - return act; -} - -unsigned int FsmCodeGen::FROM_STATE_ACTION( RedState *state ) -{ - int act = 0; - if ( state->fromStateAction != 0 ) - act = state->fromStateAction->location+1; - return act; -} - -std::ostream &FsmCodeGen::TO_STATE_ACTIONS() -{ - /* Take one off for the psuedo start state. */ - int numStates = redFsm->stateList.length(); - unsigned int *vals = new unsigned int[numStates]; - memset( vals, 0, sizeof(unsigned int)*numStates ); - - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) - vals[st->id] = TO_STATE_ACTION(st); - - out << "\t"; - for ( int st = 0; st < redFsm->nextStateId; st++ ) { - /* Write any eof action. */ - out << vals[st]; - if ( st < numStates-1 ) { - out << ", "; - if ( (st+1) % IALL == 0 ) - out << "\n\t"; - } - } - out << "\n"; - delete[] vals; - return out; -} - -std::ostream &FsmCodeGen::FROM_STATE_ACTIONS() -{ - /* Take one off for the psuedo start state. */ - int numStates = redFsm->stateList.length(); - unsigned int *vals = new unsigned int[numStates]; - memset( vals, 0, sizeof(unsigned int)*numStates ); - - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) - vals[st->id] = FROM_STATE_ACTION(st); - - out << "\t"; - for ( int st = 0; st < redFsm->nextStateId; st++ ) { - /* Write any eof action. */ - out << vals[st]; - if ( st < numStates-1 ) { - out << ", "; - if ( (st+1) % IALL == 0 ) - out << "\n\t"; - } - } - out << "\n"; - delete[] vals; - return out; -} - -bool FsmCodeGen::IN_TRANS_ACTIONS( RedState *state ) -{ - /* Emit any transitions that have actions and that go to this state. */ - for ( int it = 0; it < state->numInTrans; it++ ) { - RedTrans *trans = state->inTrans[it]; - if ( trans->action != 0 && trans->labelNeeded ) { - /* Write the label for the transition so it can be jumped to. */ - out << "tr" << trans->id << ":\n"; - - /* If the action contains a next, then we must preload the current - * state since the action may or may not set it. */ - if ( trans->action->anyNextStmt() ) - out << " " << CS() << " = " << trans->targ->id << ";\n"; - - /* Write each action in the list. */ - for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ ) - ACTION( out, item->value, trans->targ->id, false ); - - out << "\tgoto st" << trans->targ->id << ";\n"; - } - } - - return 0; -} - -/* Called from FsmCodeGen::STATE_GOTOS just before writing the gotos for each - * state. */ -void FsmCodeGen::GOTO_HEADER( RedState *state ) -{ - IN_TRANS_ACTIONS( state ); - - if ( state->labelNeeded ) - out << "st" << state->id << ":\n"; - - if ( state->toStateAction != 0 ) { - /* Remember that we wrote an action. Write every action in the list. */ - for ( GenActionTable::Iter item = state->toStateAction->key; item.lte(); item++ ) - ACTION( out, item->value, state->id, false ); - } - - /* Give the state a switch case. */ - out << "case " << state->id << ":\n"; - - /* Advance and test buffer pos. */ - out << - " if ( ++" << P() << " == " << PE() << " )\n" - " goto out" << state->id << ";\n"; - - if ( state->fromStateAction != 0 ) { - /* Remember that we wrote an action. Write every action in the list. */ - for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) - ACTION( out, item->value, state->id, false ); - } - - /* Record the prev state if necessary. */ - if ( state->anyRegCurStateRef() ) - out << " _ps = " << state->id << ";\n"; -} - -void FsmCodeGen::STATE_GOTO_ERROR() -{ - /* In the error state we need to emit some stuff that usually goes into - * the header. */ - RedState *state = redFsm->errState; - IN_TRANS_ACTIONS( state ); - - if ( state->labelNeeded ) - out << "st" << state->id << ":\n"; - - /* We do not need a case label here because the the error state is checked - * at the head of the loop. */ - - /* Break out here. */ - out << " goto out" << state->id << ";\n"; -} - - -/* Emit the goto to take for a given transition. */ -std::ostream &FsmCodeGen::TRANS_GOTO( RedTrans *trans, int level ) -{ - if ( trans->action != 0 ) { - /* Go to the transition which will go to the state. */ - out << TABS(level) << "goto tr" << trans->id << ";"; - } - else { - /* Go directly to the target state. */ - out << TABS(level) << "goto st" << trans->targ->id << ";"; - } - return out; -} - -std::ostream &FsmCodeGen::EXIT_STATES() -{ - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { - out << " case " << st->id << ": out" << st->id << ": "; - if ( st->eofTrans != 0 ) { - out << "if ( " << DATA_EOF() << " ) {"; - TRANS_GOTO( st->eofTrans, 0 ); - out << "\n"; - out << "}"; - } - - /* Exit. */ - out << CS() << " = " << st->id << "; goto out; \n"; - } - return out; -} - -/* Set up labelNeeded flag for each state. */ -void FsmCodeGen::setLabelsNeeded() -{ - /* Do not use all labels by default, init all labelNeeded vars to false. */ - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) - st->labelNeeded = false; - - if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() ) - redFsm->errState->labelNeeded = true; - - /* Walk all transitions and set only those that have targs. */ - for ( RedTransSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { - /* If there is no action with a next statement, then the label will be - * needed. */ - if ( trans->action == 0 || !trans->action->anyNextStmt() ) - trans->targ->labelNeeded = true; - } - - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) - st->outNeeded = st->labelNeeded; -} - -void FsmCodeGen::writeData() -{ - out << "#define " << START() << " " << START_STATE_ID() << "\n"; - out << "#define " << FIRST_FINAL() << " " << FIRST_FINAL_STATE() << "\n"; - out << "#define " << ERROR() << " " << ERROR_STATE() << "\n"; - out << "#define false 0\n"; - out << "#define true 1\n"; - out << "\n"; - - out << "static long " << ENTRY_BY_REGION() << "[] = {\n\t"; - for ( int i = 0; i < fsmTables->num_regions; i++ ) { - out << fsmTables->entry_by_region[i]; - - if ( i < fsmTables->num_regions-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << - "static struct fsm_tables fsmTables_start =\n" - "{\n" - " 0, " /* actions */ - " 0, " /* keyOffsets */ - " 0, " /* transKeys */ - " 0, " /* singleLengths */ - " 0, " /* rangeLengths */ - " 0, " /* indexOffsets */ - " 0, " /* transTargsWI */ - " 0, " /* transActionsWI */ - " 0, " /* toStateActions */ - " 0, " /* fromStateActions */ - " 0, " /* eofActions */ - " 0,\n" /* eofTargs */ - " " << ENTRY_BY_REGION() << ",\n" - - "\n" - " 0, " /* numStates */ - " 0, " /* numActions */ - " 0, " /* numTransKeys */ - " 0, " /* numSingleLengths */ - " 0, " /* numRangeLengths */ - " 0, " /* numIndexOffsets */ - " 0, " /* numTransTargsWI */ - " 0,\n" /* numTransActionsWI */ - " " << redFsm->regionToEntry.length() << ",\n" - "\n" - " " << START() << ",\n" - " " << FIRST_FINAL() << ",\n" - " " << ERROR() << ",\n" - "\n" - " 0,\n" /* actionSwitch */ - " 0\n" /* numActionSwitch */ - "};\n" - "\n"; -} - -void FsmCodeGen::writeInit() -{ - out << - " " << CS() << " = " << START() << ";\n"; - - /* If there are any calls, then the stack top needs initialization. */ - if ( redFsm->anyActionCalls() || redFsm->anyActionRets() ) - out << "\t" << TOP() << " = 0;\n"; - - out << - " " << TOKSTART() << " = 0;\n" - " " << TOKEND() << " = 0;\n" - " " << ACT() << " = 0;\n"; - - out << "\n"; -} - -void FsmCodeGen::writeExec() -{ - setLabelsNeeded(); - - out << - "static void fsm_execute( struct pda_run *pdaRun, struct input_impl *inputStream )\n" - "{\n" - " " << BLOCK_START() << " = pdaRun->p;\n" - "/*_resume:*/\n"; - - if ( redFsm->errState != 0 ) { - out << - " if ( " << CS() << " == " << redFsm->errState->id << " )\n" - " goto out;\n"; - } - - out << - " if ( " << P() << " == " << PE() << " )\n" - " goto out_switch;\n" - " --" << P() << ";\n" - "\n" - " switch ( " << CS() << " )\n {\n"; - STATE_GOTOS() << - " }\n"; - - out << - "out_switch:\n" - " switch ( " << CS() << " )\n {\n"; - EXIT_STATES() << - " }\n"; - - out << - "out:\n" - " if ( " << P() << " != 0 )\n" - " " << TOKLEN() << " += " << P() << " - " << BLOCK_START() << ";\n"; - - if ( skipTokenLabelNeeded ) { - out << - "skip_toklen:\n" - " {}\n"; - } - - out << - "}\n" - "\n"; -} - -void FsmCodeGen::writeCode() -{ - redFsm->depthFirstOrdering(); - - writeData(); - writeExec(); - - /* Referenced in the runtime lib, but used only in the compiler. Probably - * should use the preprocessor to make these go away. */ - out << - "static void sendNamedLangEl( struct colm_program *prg, tree_t **tree,\n" - " struct pda_run *pda_run, struct input_impl *input ) { }\n" - "static void initBindings( struct pda_run *pdaRun ) {}\n" - "static void popBinding( struct pda_run *pdaRun, parse_tree_t *tree ) {}\n" - "\n" - "\n"; -} - - diff --git a/src/fsmcodegen.h b/src/fsmcodegen.h deleted file mode 100644 index 8f79186c..00000000 --- a/src/fsmcodegen.h +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_FSMCODEGEN_H -#define _COLM_FSMCODEGEN_H - -#include <stdio.h> - -#include <string> -#include <iostream> - -#include "keyops.h" -#include "compiler.h" -#include "redfsm.h" - -using std::string; -using std::ostream; - -/* Integer array line length. */ -#define IALL 8 - -/* Forwards. */ -struct RedFsm; -struct RedState; -struct GenAction; -struct NameInst; -struct RedAction; -struct LongestMatch; -struct TokenInstance; -struct InlineList; -struct InlineItem; -struct NameInst; -struct FsmCodeGen; - -typedef unsigned long ulong; -typedef unsigned char uchar; - - -/* - * The interface to the parser - */ - -std::ostream *openOutput( char *inputFile ); - -inline string itoa( int i ) -{ - char buf[16]; - sprintf( buf, "%i", i ); - return buf; -} - -/* - * class FsmCodeGen - */ -struct FsmCodeGen -{ -public: - FsmCodeGen( ostream &out, RedFsm *redFsm, fsm_tables *fsmTables ); - -protected: - - string FSM_NAME(); - string START_STATE_ID(); - ostream &ACTIONS_ARRAY(); - string GET_WIDE_KEY(); - string GET_WIDE_KEY( RedState *state ); - string TABS( int level ); - string KEY( Key key ); - string LDIR_PATH( char *path ); - void ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ); - void CONDITION( ostream &ret, GenAction *condition ); - string ALPH_TYPE(); - string WIDE_ALPH_TYPE(); - string ARRAY_TYPE( unsigned long maxVal ); - - string ARR_OFF( string ptr, string offset ); - string CAST( string type ); - string UINT(); - string GET_KEY(); - - string ACCESS() { return "pdaRun->"; } - - string P() { return ACCESS() + "p"; } - string PE() { return ACCESS() + "pe"; } - string DATA_EOF() { return ACCESS() + "scan_eof"; } - - string CS(); - string TOP() { return ACCESS() + "top"; } - string TOKSTART() { return ACCESS() + "tokstart"; } - string TOKEND() { return ACCESS() + "tokend"; } - string BLOCK_START() { return ACCESS() + "start"; } - string TOKLEN() { return ACCESS() + "toklen"; } - string ACT() { return ACCESS() + "act"; } - string MATCHED_TOKEN() { return ACCESS() + "matched_token"; } - - string DATA_PREFIX(); - - string START() { return DATA_PREFIX() + "start"; } - string ERROR() { return DATA_PREFIX() + "error"; } - string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } - - string ENTRY_BY_REGION() { return DATA_PREFIX() + "entry_by_region"; } - - - void INLINE_LIST( ostream &ret, InlineList *inlineList, - int targState, bool inFinish ); - void EXEC_TOKEND( ostream &ret, InlineItem *item, int targState, int inFinish ); - void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish ); - void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ); - void SET_ACT( ostream &ret, InlineItem *item ); - void INIT_TOKSTART( ostream &ret, InlineItem *item ); - void INIT_ACT( ostream &ret, InlineItem *item ); - void SET_TOKSTART( ostream &ret, InlineItem *item ); - void SET_TOKEND( ostream &ret, InlineItem *item ); - void GET_TOKEND( ostream &ret, InlineItem *item ); - void SUB_ACTION( ostream &ret, InlineItem *item, int targState, bool inFinish ); - void LM_ON_LAST( ostream &ret, InlineItem *item ); - void LM_ON_NEXT( ostream &ret, InlineItem *item ); - void LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ); - void EXEC_TOKEND( ostream &ret ); - void EMIT_TOKEN( ostream &ret, LangEl *token ); - - string ERROR_STATE(); - string FIRST_FINAL_STATE(); - - string PTR_CONST(); - ostream &OPEN_ARRAY( string type, string name ); - ostream &CLOSE_ARRAY(); - ostream &STATIC_VAR( string type, string name ); - - string CTRL_FLOW(); - - unsigned int arrayTypeSize( unsigned long maxVal ); - -public: - ostream &out; - RedFsm *redFsm; - fsm_tables *fsmTables; - int codeGenErrCount; - - /* Write options. */ - bool dataPrefix; - bool writeFirstFinal; - bool writeErr; - bool skipTokenLabelNeeded; - - std::ostream &TO_STATE_ACTION_SWITCH(); - std::ostream &FROM_STATE_ACTION_SWITCH(); - std::ostream &ACTION_SWITCH(); - std::ostream &STATE_GOTOS(); - std::ostream &TRANSITIONS(); - std::ostream &EXEC_FUNCS(); - - unsigned int TO_STATE_ACTION( RedState *state ); - unsigned int FROM_STATE_ACTION( RedState *state ); - - std::ostream &TO_STATE_ACTIONS(); - std::ostream &FROM_STATE_ACTIONS(); - - void emitCondBSearch( RedState *state, int level, int low, int high ); - void STATE_CONDS( RedState *state, bool genDefault ); - - void emitSingleSwitch( RedState *state ); - void emitRangeBSearch( RedState *state, int level, int low, int high ); - - std::ostream &EXIT_STATES(); - std::ostream &TRANS_GOTO( RedTrans *trans, int level ); - std::ostream &FINISH_CASES(); - - void writeIncludes(); - void writeData(); - void writeInit(); - void writeExec(); - void writeCode(); - void writeMain( long activeRealm ); - -protected: - bool useAgainLabel(); - - /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for - * each state. */ - bool IN_TRANS_ACTIONS( RedState *state ); - void GOTO_HEADER( RedState *state ); - void STATE_GOTO_ERROR(); - - /* Set up labelNeeded flag for each state. */ - void setLabelsNeeded(); -}; - -#endif /* _COLM_FSMCODEGEN_H */ - diff --git a/src/fsmexec.cc b/src/fsmexec.cc deleted file mode 100644 index 41fe0e64..00000000 --- a/src/fsmexec.cc +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdbool.h> - -#include <assert.h> - -#include "redfsm.h" -#include "compiler.h" - -void execAction( struct pda_run *pdaRun, GenAction *genAction ) -{ - for ( InlineList::Iter item = *genAction->inlineList; item.lte(); item++ ) { - switch ( item->type ) { - case InlineItem::Text: - assert(false); - break; - case InlineItem::LmSetActId: - pdaRun->act = item->longestMatchPart->longestMatchId; - break; - case InlineItem::LmSetTokEnd: - pdaRun->tokend = pdaRun->toklen + ( pdaRun->p - pdaRun->start ) + 1; - break; - case InlineItem::LmInitTokStart: - assert(false); - break; - case InlineItem::LmInitAct: - pdaRun->act = 0; - break; - case InlineItem::LmSetTokStart: - pdaRun->tokstart = pdaRun->p; - break; - case InlineItem::LmSwitch: - /* If the switch handles error then we also forced the error state. It - * will exist. */ - pdaRun->toklen = pdaRun->tokend; - if ( item->tokenRegion->lmSwitchHandlesError && pdaRun->act == 0 ) { - pdaRun->fsm_cs = pdaRun->fsm_tables->error_state; - } - else { - for ( TokenInstanceListReg::Iter lmi = item->tokenRegion->tokenInstanceList; - lmi.lte(); lmi++ ) - { - if ( lmi->inLmSelect && pdaRun->act == lmi->longestMatchId ) - pdaRun->matched_token = lmi->tokenDef->tdLangEl->id; - } - } - pdaRun->return_result = true; - pdaRun->skip_toklen = true; - break; - case InlineItem::LmOnLast: - pdaRun->p += 1; - pdaRun->matched_token = item->longestMatchPart->tokenDef->tdLangEl->id; - pdaRun->return_result = true; - break; - case InlineItem::LmOnNext: - pdaRun->matched_token = item->longestMatchPart->tokenDef->tdLangEl->id; - pdaRun->return_result = true; - break; - case InlineItem::LmOnLagBehind: - pdaRun->toklen = pdaRun->tokend; - pdaRun->matched_token = item->longestMatchPart->tokenDef->tdLangEl->id; - pdaRun->return_result = true; - pdaRun->skip_toklen = true; - break; - } - } - - if ( genAction->markType == MarkMark ) - pdaRun->mark[genAction->markId-1] = pdaRun->p; -} - -extern "C" void internalFsmExecute( struct pda_run *pdaRun, struct input_impl *inputStream ) -{ - int _klen; - unsigned int _trans; - const long *_acts; - unsigned int _nacts; - const char *_keys; - - pdaRun->start = pdaRun->p; - - /* Init the token match to nothing (the sentinal). */ - pdaRun->matched_token = 0; - -/*_resume:*/ - if ( pdaRun->fsm_cs == pdaRun->fsm_tables->error_state ) - goto out; - - if ( pdaRun->p == pdaRun->pe ) - goto out; - -_loop_head: - _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->from_state_actions[pdaRun->fsm_cs]; - _nacts = (unsigned int) *_acts++; - while ( _nacts-- > 0 ) - execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] ); - - _keys = pdaRun->fsm_tables->trans_keys + pdaRun->fsm_tables->key_offsets[pdaRun->fsm_cs]; - _trans = pdaRun->fsm_tables->index_offsets[pdaRun->fsm_cs]; - - _klen = pdaRun->fsm_tables->single_lengths[pdaRun->fsm_cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_mid; - const char *_upper = _keys + _klen - 1; - while (1) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( (*pdaRun->p) < *_mid ) - _upper = _mid - 1; - else if ( (*pdaRun->p) > *_mid ) - _lower = _mid + 1; - else { - _trans += (_mid - _keys); - goto _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = pdaRun->fsm_tables->range_lengths[pdaRun->fsm_cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_mid; - const char *_upper = _keys + (_klen<<1) - 2; - while (1) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( (*pdaRun->p) < _mid[0] ) - _upper = _mid - 2; - else if ( (*pdaRun->p) > _mid[1] ) - _lower = _mid + 2; - else { - _trans += ((_mid - _keys)>>1); - goto _match; - } - } - _trans += _klen; - } - -_match: - pdaRun->fsm_cs = pdaRun->fsm_tables->transTargsWI[_trans]; - - if ( pdaRun->fsm_tables->transActionsWI[_trans] == 0 ) - goto _again; - - pdaRun->return_result = false; - pdaRun->skip_toklen = false; - _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->transActionsWI[_trans]; - _nacts = (unsigned int) *_acts++; - while ( _nacts-- > 0 ) - execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] ); - if ( pdaRun->return_result ) { - if ( pdaRun->skip_toklen ) - goto skip_toklen; - goto final; - } - -_again: - _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->to_state_actions[pdaRun->fsm_cs]; - _nacts = (unsigned int) *_acts++; - while ( _nacts-- > 0 ) - execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] ); - - if ( pdaRun->fsm_cs == pdaRun->fsm_tables->error_state ) - goto out; - - if ( ++pdaRun->p != pdaRun->pe ) - goto _loop_head; -out: - if ( pdaRun->scan_eof ) { - pdaRun->return_result = false; - pdaRun->skip_toklen = false; - _acts = pdaRun->fsm_tables->actions + pdaRun->fsm_tables->eof_actions[pdaRun->fsm_cs]; - _nacts = (unsigned int) *_acts++; - - if ( pdaRun->fsm_tables->eof_targs[pdaRun->fsm_cs] >= 0 ) - pdaRun->fsm_cs = pdaRun->fsm_tables->eof_targs[pdaRun->fsm_cs]; - - while ( _nacts-- > 0 ) - execAction( pdaRun, pdaRun->fsm_tables->action_switch[*_acts++] ); - if ( pdaRun->return_result ) { - if ( pdaRun->skip_toklen ) - goto skip_toklen; - goto final; - } - } - -final: - - if ( pdaRun->p != 0 ) - pdaRun->toklen += pdaRun->p - pdaRun->start; -skip_toklen: - {} -} diff --git a/src/fsmgraph.cc b/src/fsmgraph.cc deleted file mode 100644 index 8cbfe29c..00000000 --- a/src/fsmgraph.cc +++ /dev/null @@ -1,981 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "fsmgraph.h" - -#include <assert.h> - -#include <iostream> - -using std::cerr; -using std::endl; - -/* Make a new state. The new state will be put on the graph's - * list of state. The new state can be created final or non final. */ -FsmState *FsmGraph::addState() -{ - /* Make the new state to return. */ - FsmState *state = new FsmState(); - - if ( misfitAccounting ) { - /* Create the new state on the misfit list. All states are created - * with no foreign in transitions. */ - misfitList.append( state ); - } - else { - /* Create the new state. */ - stateList.append( state ); - } - - return state; -} - -/* Construct an FSM that is the concatenation of an array of characters. A new - * machine will be made that has len+1 states with one transition between each - * state for each integer in str. IsSigned determines if the integers are to - * be considered as signed or unsigned ints. */ -void FsmGraph::concatFsm( Key *str, int len ) -{ - /* Make the first state and set it as the start state. */ - FsmState *last = addState(); - setStartState( last ); - - /* Attach subsequent states. */ - for ( int i = 0; i < len; i++ ) { - FsmState *newState = addState(); - attachNewTrans( last, newState, str[i], str[i] ); - last = newState; - } - - /* Make the last state the final state. */ - setFinState( last ); -} - -/* Case insensitive version of concatFsm. */ -void FsmGraph::concatFsmCI( Key *str, int len ) -{ - /* Make the first state and set it as the start state. */ - FsmState *last = addState(); - setStartState( last ); - - /* Attach subsequent states. */ - for ( int i = 0; i < len; i++ ) { - FsmState *newState = addState(); - - KeySet keySet; - if ( str[i].isLower() ) - keySet.insert( str[i].toUpper() ); - if ( str[i].isUpper() ) - keySet.insert( str[i].toLower() ); - keySet.insert( str[i] ); - - for ( int i = 0; i < keySet.length(); i++ ) - attachNewTrans( last, newState, keySet[i], keySet[i] ); - - last = newState; - } - - /* Make the last state the final state. */ - setFinState( last ); -} - -/* Construct a machine that matches one character. A new machine will be made - * that has two states with a single transition between the states. IsSigned - * determines if the integers are to be considered as signed or unsigned ints. */ -void FsmGraph::concatFsm( Key chr ) -{ - /* Two states first start, second final. */ - setStartState( addState() ); - - FsmState *end = addState(); - setFinState( end ); - - /* Attach on the character. */ - attachNewTrans( startState, end, chr, chr ); -} - -/* Construct a machine that matches any character in set. A new machine will - * be made that has two states and len transitions between the them. The set - * should be ordered correctly accroding to KeyOps and should not contain - * any duplicates. */ -void FsmGraph::orFsm( Key *set, int len ) -{ - /* Two states first start, second final. */ - setStartState( addState() ); - - FsmState *end = addState(); - setFinState( end ); - - for ( int i = 1; i < len; i++ ) - assert( set[i-1] < set[i] ); - - /* Attach on all the integers in the given string of ints. */ - for ( int i = 0; i < len; i++ ) - attachNewTrans( startState, end, set[i], set[i] ); -} - -/* Construct a machine that matches a range of characters. A new machine will - * be made with two states and a range transition between them. The range will - * match any characters from low to high inclusive. Low should be less than or - * equal to high otherwise undefined behaviour results. IsSigned determines - * if the integers are to be considered as signed or unsigned ints. */ -void FsmGraph::rangeFsm( Key low, Key high ) -{ - /* Two states first start, second final. */ - setStartState( addState() ); - - FsmState *end = addState(); - setFinState( end ); - - /* Attach using the range of characters. */ - attachNewTrans( startState, end, low, high ); -} - -/* Construct a machine that a repeated range of characters. */ -void FsmGraph::rangeStarFsm( Key low, Key high) -{ - /* One state which is final and is the start state. */ - setStartState( addState() ); - setFinState( startState ); - - /* Attach start to start using range of characters. */ - attachNewTrans( startState, startState, low, high ); -} - -/* Construct a machine that matches the empty string. A new machine will be - * made with only one state. The new state will be both a start and final - * state. IsSigned determines if the machine has a signed or unsigned - * alphabet. Fsm operations must be done on machines with the same alphabet - * signedness. */ -void FsmGraph::lambdaFsm( ) -{ - /* Give it one state with no transitions making it - * the start state and final state. */ - setStartState( addState() ); - setFinState( startState ); -} - -/* Construct a machine that matches nothing at all. A new machine will be - * made with only one state. It will not be final. */ -void FsmGraph::emptyFsm( ) -{ - /* Give it one state with no transitions making it - * the start state and final state. */ - setStartState( addState() ); -} - -void FsmGraph::transferOutData( FsmState *destState, FsmState *srcState ) -{ - for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) { - /* Get the actions data from the outActionTable. */ - trans->actionTable.setActions( srcState->outActionTable ); - - /* Get the priorities from the outPriorTable. */ - trans->priorTable.setPriors( srcState->outPriorTable ); - } - } -} - -/* Kleene star operator. Makes this machine the kleene star of itself. Any - * transitions made going out of the machine and back into itself will be - * notified that they are leaving transitions by having the leavingFromState - * callback invoked. */ -void FsmGraph::starOp( ) -{ - /* For the merging process. */ - MergeData md; - - /* Turn on misfit accounting to possibly catch the old start state. */ - setMisfitAccounting( true ); - - /* Create the new new start state. It will be set final after the merging - * of the final states with the start state is complete. */ - FsmState *prevStartState = startState; - unsetStartState(); - setStartState( addState() ); - - /* Merge the new start state with the old one to isolate it. */ - mergeStates( md, startState, prevStartState ); - - /* Merge the start state into all final states. Except the start state on - * the first pass. If the start state is set final we will be doubling up - * its transitions, which will get transfered to any final states that - * follow it in the final state set. This will be determined by the order - * of items in the final state set. To prevent this we just merge with the - * start on a second pass. */ - for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) { - if ( *st != startState ) - mergeStatesLeaving( md, *st, startState ); - } - - /* Now it is safe to merge the start state with itself (provided it - * is set final). */ - if ( startState->isFinState() ) - mergeStatesLeaving( md, startState, startState ); - - /* Now ensure the new start state is a final state. */ - setFinState( startState ); - - /* Fill in any states that were newed up as combinations of others. */ - fillInStates( md ); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -void FsmGraph::repeatOp( int times ) -{ - /* Must be 1 and up. 0 produces null machine and requires deleting this. */ - assert( times > 0 ); - - /* A repeat of one does absolutely nothing. */ - if ( times == 1 ) - return; - - /* Make a machine to make copies from. */ - FsmGraph *copyFrom = new FsmGraph( *this ); - - /* Concatentate duplicates onto the end up until before the last. */ - for ( int i = 1; i < times-1; i++ ) { - FsmGraph *dup = new FsmGraph( *copyFrom ); - doConcat( dup, 0, false ); - } - - /* Now use the copyFrom on the end. */ - doConcat( copyFrom, 0, false ); -} - -void FsmGraph::optionalRepeatOp( int times ) -{ - /* Must be 1 and up. 0 produces null machine and requires deleting this. */ - assert( times > 0 ); - - /* A repeat of one optional merely allows zero string. */ - if ( times == 1 ) { - setFinState( startState ); - return; - } - - /* Make a machine to make copies from. */ - FsmGraph *copyFrom = new FsmGraph( *this ); - - /* The state set used in the from end of the concatentation. Starts with - * the initial final state set, then after each concatenation, gets set to - * the the final states that come from the the duplicate. */ - StateSet lastFinSet( finStateSet ); - - /* Set the initial state to zero to allow zero copies. */ - setFinState( startState ); - - /* Concatentate duplicates onto the end up until before the last. */ - for ( int i = 1; i < times-1; i++ ) { - /* Make a duplicate for concating and set the fin bits to graph 2 so we - * can pick out it's final states after the optional style concat. */ - FsmGraph *dup = new FsmGraph( *copyFrom ); - dup->setFinBits( SB_GRAPH2 ); - doConcat( dup, &lastFinSet, true ); - - /* Clear the last final state set and make the new one by taking only - * the final states that come from graph 2.*/ - lastFinSet.empty(); - for ( int i = 0; i < finStateSet.length(); i++ ) { - /* If the state came from graph 2, add it to the last set and clear - * the bits. */ - FsmState *fs = finStateSet[i]; - if ( fs->stateBits & SB_GRAPH2 ) { - lastFinSet.insert( fs ); - fs->stateBits &= ~SB_GRAPH2; - } - } - } - - /* Now use the copyFrom on the end, no bits set, no bits to clear. */ - doConcat( copyFrom, &lastFinSet, true ); -} - - -/* Fsm concatentation worker. Supports treating the concatentation as optional, - * which essentially leaves the final states of machine one as final. */ -void FsmGraph::doConcat( FsmGraph *other, StateSet *fromStates, bool optional ) -{ - /* For the merging process. */ - StateSet finStateSetCopy, startStateSet; - MergeData md; - - /* Turn on misfit accounting for both graphs. */ - setMisfitAccounting( true ); - other->setMisfitAccounting( true ); - - /* Get the other's start state. */ - FsmState *otherStartState = other->startState; - - /* Unset other's start state before bringing in the entry points. */ - other->unsetStartState(); - - /* Bring in the rest of other's entry points. */ - copyInEntryPoints( other ); - other->entryPoints.empty(); - - /* Bring in other's states into our state lists. */ - stateList.append( other->stateList ); - misfitList.append( other->misfitList ); - - /* If from states is not set, then get a copy of our final state set before - * we clobber it and use it instead. */ - if ( fromStates == 0 ) { - finStateSetCopy = finStateSet; - fromStates = &finStateSetCopy; - } - - /* Unset all of our final states and get the final states from other. */ - if ( !optional ) - unsetAllFinStates(); - finStateSet.insert( other->finStateSet ); - - /* Since other's lists are empty, we can delete the fsm without - * affecting any states. */ - delete other; - - /* Merge our former final states with the start state of other. */ - for ( int i = 0; i < fromStates->length(); i++ ) { - FsmState *state = fromStates->data[i]; - - /* Merge the former final state with other's start state. */ - mergeStatesLeaving( md, state, otherStartState ); - - /* If the former final state was not reset final then we must clear - * the state's out trans data. If it got reset final then it gets to - * keep its out trans data. This must be done before fillInStates gets - * called to prevent the data from being sourced. */ - if ( ! state->isFinState() ) - clearOutData( state ); - } - - /* Fill in any new states made from merging. */ - fillInStates( md ); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -/* Concatenates other to the end of this machine. Other is deleted. Any - * transitions made leaving this machine and entering into other are notified - * that they are leaving transitions by having the leavingFromState callback - * invoked. */ -void FsmGraph::concatOp( FsmGraph *other ) -{ - /* Assert same signedness and return graph concatenation op. */ - doConcat( other, 0, false ); -} - - -void FsmGraph::doOr( FsmGraph *other ) -{ - /* For the merging process. */ - MergeData md; - - /* Build a state set consisting of both start states */ - StateSet startStateSet; - startStateSet.insert( startState ); - startStateSet.insert( other->startState ); - - /* Both of the original start states loose their start state status. */ - unsetStartState(); - other->unsetStartState(); - - /* Bring in the rest of other's entry points. */ - copyInEntryPoints( other ); - other->entryPoints.empty(); - - /* Merge the lists. This will move all the states from other - * into this. No states will be deleted. */ - stateList.append( other->stateList ); - misfitList.append( other->misfitList ); - - /* Move the final set data from other into this. */ - finStateSet.insert(other->finStateSet); - other->finStateSet.empty(); - - /* Since other's list is empty, we can delete the fsm without - * affecting any states. */ - delete other; - - /* Create a new start state. */ - setStartState( addState() ); - - /* Merge the start states. */ - mergeStates( md, startState, startStateSet.data, startStateSet.length() ); - - /* Fill in any new states made from merging. */ - fillInStates( md ); -} - -/* Unions other with this machine. Other is deleted. */ -void FsmGraph::unionOp( FsmGraph *other ) -{ - /* Turn on misfit accounting for both graphs. */ - setMisfitAccounting( true ); - other->setMisfitAccounting( true ); - - /* Call Worker routine. */ - doOr( other ); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -/* Intersects other with this machine. Other is deleted. */ -void FsmGraph::intersectOp( FsmGraph *other ) -{ - /* Turn on misfit accounting for both graphs. */ - setMisfitAccounting( true ); - other->setMisfitAccounting( true ); - - /* Set the fin bits on this and other to want each other. */ - setFinBits( SB_GRAPH1 ); - other->setFinBits( SB_GRAPH2 ); - - /* Call worker Or routine. */ - doOr( other ); - - /* Unset any final states that are no longer to - * be final due to final bits. */ - unsetIncompleteFinals(); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); - - /* Remove states that have no path to a final state. */ - removeDeadEndStates(); -} - -/* Set subtracts other machine from this machine. Other is deleted. */ -void FsmGraph::subtractOp( FsmGraph *other ) -{ - /* Turn on misfit accounting for both graphs. */ - setMisfitAccounting( true ); - other->setMisfitAccounting( true ); - - /* Set the fin bits of other to be killers. */ - other->setFinBits( SB_GRAPH1 ); - - /* Call worker Or routine. */ - doOr( other ); - - /* Unset any final states that are no longer to - * be final due to final bits. */ - unsetKilledFinals(); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); - - /* Remove states that have no path to a final state. */ - removeDeadEndStates(); -} - -bool FsmGraph::inEptVect( EptVect *eptVect, FsmState *state ) -{ - if ( eptVect != 0 ) { - /* Vect is there, walk it looking for state. */ - for ( int i = 0; i < eptVect->length(); i++ ) { - if ( eptVect->data[i].targ == state ) - return true; - } - } - return false; -} - -/* Fill epsilon vectors in a root state from a given starting point. Epmploys - * a depth first search through the graph of epsilon transitions. */ -void FsmGraph::epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ) -{ - /* Walk the epsilon transitions out of the state. */ - for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { - /* Find the entry point, if the it does not resove, ignore it. */ - EntryMapEl *enLow, *enHigh; - if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { - /* Loop the targets. */ - for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { - /* Do not add the root or states already in eptVect. */ - FsmState *targ = en->value; - if ( targ != from && !inEptVect(root->eptVect, targ) ) { - /* Maybe need to create the eptVect. */ - if ( root->eptVect == 0 ) - root->eptVect = new EptVect(); - - /* If moving to a different graph or if any parent is - * leaving then we are leaving. */ - bool leaving = parentLeaving || - root->owningGraph != targ->owningGraph; - - /* All ok, add the target epsilon and recurse. */ - root->eptVect->append( EptVectEl(targ, leaving) ); - epsilonFillEptVectFrom( root, targ, leaving ); - } - } - } - } -} - -void FsmGraph::shadowReadWriteStates( MergeData &md ) -{ - /* Init isolatedShadow algorithm data. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) - st->isolatedShadow = 0; - - /* Any states that may be both read from and written to must - * be shadowed. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - /* Find such states by looping through stateVect lists, which give us - * the states that will be read from. May cause us to visit the states - * that we are interested in more than once. */ - if ( st->eptVect != 0 ) { - /* For all states that will be read from. */ - for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { - /* Check for read and write to the same state. */ - FsmState *targ = ept->targ; - if ( targ->eptVect != 0 ) { - /* State is to be written to, if the shadow is not already - * there, create it. */ - if ( targ->isolatedShadow == 0 ) { - FsmState *shadow = addState(); - mergeStates( md, shadow, targ ); - targ->isolatedShadow = shadow; - } - - /* Write shadow into the state vector so that it is the - * state that the epsilon transition will read from. */ - ept->targ = targ->isolatedShadow; - } - } - } - } -} - -void FsmGraph::resolveEpsilonTrans( MergeData &md ) -{ - /* Walk the state list and invoke recursive worker on each state. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) - epsilonFillEptVectFrom( st, st, false ); - - /* Prevent reading from and writing to of the same state. */ - shadowReadWriteStates( md ); - - /* For all states that have epsilon transitions out, draw the transitions, - * clear the epsilon transitions. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - /* If there is a state vector, then create the pre-merge state. */ - if ( st->eptVect != 0 ) { - /* Merge all the epsilon targets into the state. */ - for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { - if ( ept->leaving ) - mergeStatesLeaving( md, st, ept->targ ); - else - mergeStates( md, st, ept->targ ); - } - - /* Clean up the target list. */ - delete st->eptVect; - st->eptVect = 0; - } - - /* Clear the epsilon transitions vector. */ - st->epsilonTrans.empty(); - } -} - -void FsmGraph::epsilonOp() -{ - /* For merging process. */ - MergeData md; - - setMisfitAccounting( true ); - - for ( StateList::Iter st = stateList; st.lte(); st++ ) - st->owningGraph = 0; - - /* Perform merges. */ - resolveEpsilonTrans( md ); - - /* Epsilons can caused merges which leave behind unreachable states. */ - fillInStates( md ); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -/* Make a new maching by joining together a bunch of machines without making - * any transitions between them. A negative finalId results in there being no - * final id. */ -void FsmGraph::joinOp( int startId, int finalId, FsmGraph **others, int numOthers ) -{ - /* For the merging process. */ - MergeData md; - - /* Set the owning machines. Start at one. Zero is reserved for the start - * and final states. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) - st->owningGraph = 1; - for ( int m = 0; m < numOthers; m++ ) { - for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) - st->owningGraph = 2+m; - } - - /* All machines loose start state status. */ - unsetStartState(); - for ( int m = 0; m < numOthers; m++ ) - others[m]->unsetStartState(); - - /* Bring the other machines into this. */ - for ( int m = 0; m < numOthers; m++ ) { - /* Bring in the rest of other's entry points. */ - copyInEntryPoints( others[m] ); - others[m]->entryPoints.empty(); - - /* Merge the lists. This will move all the states from other into - * this. No states will be deleted. */ - stateList.append( others[m]->stateList ); - assert( others[m]->misfitList.length() == 0 ); - - /* Move the final set data from other into this. */ - finStateSet.insert( others[m]->finStateSet ); - others[m]->finStateSet.empty(); - - /* Since other's list is empty, we can delete the fsm without - * affecting any states. */ - delete others[m]; - } - - /* Look up the start entry point. */ - EntryMapEl *enLow = 0, *enHigh = 0; - bool findRes = entryPoints.findMulti( startId, enLow, enHigh ); - if ( ! findRes ) { - /* No start state. Set a default one and proceed with the join. Note - * that the result of the join will be a very uninteresting machine. */ - setStartState( addState() ); - } - else { - /* There is at least one start state, create a state that will become - * the new start state. */ - FsmState *newStart = addState(); - setStartState( newStart ); - - /* The start state is in an owning machine class all it's own. */ - newStart->owningGraph = 0; - - /* Create the set of states to merge from. */ - StateSet stateSet; - for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) - stateSet.insert( en->value ); - - /* Merge in the set of start states into the new start state. */ - mergeStates( md, newStart, stateSet.data, stateSet.length() ); - } - - /* Take a copy of the final state set, before unsetting them all. This - * will allow us to call clearOutData on the states that don't get - * final state status back back. */ - StateSet finStateSetCopy = finStateSet; - - /* Now all final states are unset. */ - unsetAllFinStates(); - - if ( finalId >= 0 ) { - /* Create the implicit final state. */ - FsmState *finState = addState(); - setFinState( finState ); - - /* Assign an entry into the final state on the final state entry id. Note - * that there may already be an entry on this id. That's ok. Also set the - * final state owning machine id. It's in a class all it's own. */ - setEntry( finalId, finState ); - finState->owningGraph = 0; - } - - /* Hand over to workers for resolving epsilon trans. This will merge states - * with the targets of their epsilon transitions. */ - resolveEpsilonTrans( md ); - - /* Invoke the relinquish final callback on any states that did not get - * final state status back. */ - for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { - if ( !((*st)->stateBits & SB_ISFINAL) ) - clearOutData( *st ); - } - - /* Fill in any new states made from merging. */ - fillInStates( md ); - - /* Joining can be messy. Instead of having misfit accounting on (which is - * tricky here) do a full cleaning. */ - removeUnreachableStates(); -} - -void FsmGraph::globOp( FsmGraph **others, int numOthers ) -{ - /* All other machines loose start states status. */ - for ( int m = 0; m < numOthers; m++ ) - others[m]->unsetStartState(); - - /* Bring the other machines into this. */ - for ( int m = 0; m < numOthers; m++ ) { - /* Bring in the rest of other's entry points. */ - copyInEntryPoints( others[m] ); - others[m]->entryPoints.empty(); - - /* Merge the lists. This will move all the states from other into - * this. No states will be deleted. */ - stateList.append( others[m]->stateList ); - assert( others[m]->misfitList.length() == 0 ); - - /* Move the final set data from other into this. */ - finStateSet.insert( others[m]->finStateSet ); - others[m]->finStateSet.empty(); - - /* Since other's list is empty, we can delete the fsm without - * affecting any states. */ - delete others[m]; - } -} - -void FsmGraph::deterministicEntry() -{ - /* For the merging process. */ - MergeData md; - - /* States may loose their entry points, turn on misfit accounting. */ - setMisfitAccounting( true ); - - /* Get a copy of the entry map then clear all the entry points. As we - * iterate the old entry map finding duplicates we will add the entry - * points for the new states that we create. */ - EntryMap prevEntry = entryPoints; - unsetAllEntryPoints(); - - for ( int enId = 0; enId < prevEntry.length(); ) { - /* Count the number of states on this entry key. */ - int highId = enId; - while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) - highId += 1; - - int numIds = highId - enId; - if ( numIds == 1 ) { - /* Only a single entry point, just set the entry. */ - setEntry( prevEntry[enId].key, prevEntry[enId].value ); - } - else { - /* Multiple entry points, need to create a new state and merge in - * all the targets of entry points. */ - FsmState *newEntry = addState(); - for ( int en = enId; en < highId; en++ ) - mergeStates( md, newEntry, prevEntry[en].value ); - - /* Add the new state as the single entry point. */ - setEntry( prevEntry[enId].key, newEntry ); - } - - enId += numIds; - } - - /* The old start state may be unreachable. Remove the misfits and turn off - * misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -/* Unset any final states that are no longer to be final due to final bits. */ -void FsmGraph::unsetKilledFinals() -{ - /* Duplicate the final state set before we begin modifying it. */ - StateSet fin( finStateSet ); - - for ( int s = 0; s < fin.length(); s++ ) { - /* Check for killing bit. */ - FsmState *state = fin.data[s]; - if ( state->stateBits & SB_GRAPH1 ) { - /* One final state is a killer, set to non-final. */ - unsetFinState( state ); - } - - /* Clear all killing bits. Non final states should never have had those - * state bits set in the first place. */ - state->stateBits &= ~SB_GRAPH1; - } -} - -/* Unset any final states that are no longer to be final due to final bits. */ -void FsmGraph::unsetIncompleteFinals() -{ - /* Duplicate the final state set before we begin modifying it. */ - StateSet fin( finStateSet ); - - for ( int s = 0; s < fin.length(); s++ ) { - /* Check for one set but not the other. */ - FsmState *state = fin.data[s]; - if ( state->stateBits & SB_BOTH && - (state->stateBits & SB_BOTH) != SB_BOTH ) - { - /* One state wants the other but it is not there. */ - unsetFinState( state ); - } - - /* Clear wanting bits. Non final states should never have had those - * state bits set in the first place. */ - state->stateBits &= ~SB_BOTH; - } -} - -/* Ensure that the start state is free of entry points (aside from the fact - * that it is the start state). If the start state has entry points then Make a - * new start state by merging with the old one. Useful before modifying start - * transitions. If the existing start state has any entry points other than the - * start state entry then modifying its transitions changes more than the start - * transitions. So isolate the start state by separating it out such that it - * only has start stateness as it's entry point. */ -void FsmGraph::isolateStartState( ) -{ - /* For the merging process. */ - MergeData md; - - /* Bail out if the start state is already isolated. */ - if ( isStartStateIsolated() ) - return; - - /* Turn on misfit accounting to possibly catch the old start state. */ - setMisfitAccounting( true ); - - /* This will be the new start state. The existing start - * state is merged with it. */ - FsmState *prevStartState = startState; - unsetStartState(); - setStartState( addState() ); - - /* Merge the new start state with the old one to isolate it. */ - mergeStates( md, startState, prevStartState ); - - /* Stfil and stateDict will be empty because the merging of the old start - * state into the new one will not have any conflicting transitions. */ - assert( md.stateDict.treeSize == 0 ); - assert( md.stfillHead == 0 ); - - /* The old start state may be unreachable. Remove the misfits and turn off - * misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -/* A state merge which represents the drawing in of leaving transitions. If - * there is any out data then we duplicate the souce state, transfer the out - * data, then merge in the state. The new state will be reaped because it will - * not be given any in transitions. */ -void FsmGraph::mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ) -{ - if ( !hasOutData( destState ) ) - mergeStates( md, destState, srcState ); - else { - FsmState *ssMutable = addState(); - mergeStates( md, ssMutable, srcState ); - transferOutData( ssMutable, destState ); - mergeStates( md, destState, ssMutable ); - } -} - -void FsmGraph::mergeStates( MergeData &md, FsmState *destState, - FsmState **srcStates, int numSrc ) -{ - for ( int s = 0; s < numSrc; s++ ) - mergeStates( md, destState, srcStates[s] ); -} - -void FsmGraph::mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ) -{ - outTransCopy( md, destState, srcState->outList.head ); - - /* Get its bits and final state status. */ - destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL ); - if ( srcState->isFinState() ) - setFinState( destState ); - - /* Draw in any properties of srcState into destState. */ - if ( srcState == destState ) { - /* Duplicate the list to protect against write to source. The - * priorities sets are not copied in because that would have no - * effect. */ - destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); - - /* Get all actions, duplicating to protect against write to source. */ - destState->toStateActionTable.setActions( - ActionTable( srcState->toStateActionTable ) ); - destState->fromStateActionTable.setActions( - ActionTable( srcState->fromStateActionTable ) ); - destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); - destState->outCondSet.insert( ActionSet( srcState->outCondSet ) ); - destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); - destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); - } - else { - /* Get the epsilons, out priorities. */ - destState->epsilonTrans.append( srcState->epsilonTrans ); - destState->outPriorTable.setPriors( srcState->outPriorTable ); - - /* Get all actions. */ - destState->toStateActionTable.setActions( srcState->toStateActionTable ); - destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); - destState->outActionTable.setActions( srcState->outActionTable ); - destState->outCondSet.insert( srcState->outCondSet ); - destState->errActionTable.setActions( srcState->errActionTable ); - destState->eofActionTable.setActions( srcState->eofActionTable ); - } -} - -void FsmGraph::fillInStates( MergeData &md ) -{ - /* Merge any states that are awaiting merging. This will likey cause - * other states to be added to the stfil list. */ - FsmState *state = md.stfillHead; - while ( state != 0 ) { - StateSet *stateSet = &state->stateDictEl->stateSet; - mergeStates( md, state, stateSet->data, stateSet->length() ); - state = state->alg.next; - } - - /* Delete the state sets of all states that are on the fill list. */ - state = md.stfillHead; - while ( state != 0 ) { - /* Delete and reset the state set. */ - delete state->stateDictEl; - state->stateDictEl = 0; - - /* Next state in the stfill list. */ - state = state->alg.next; - } - - /* StateDict will still have its ptrs/size set but all of it's element - * will be deleted so we don't need to clean it up. */ -} diff --git a/src/fsmgraph.h b/src/fsmgraph.h deleted file mode 100644 index 5b357499..00000000 --- a/src/fsmgraph.h +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_FSMGRAPH_H -#define _COLM_FSMGRAPH_H - -#include <assert.h> - -#include <avltree.h> -#include <avlmap.h> -#include <avlset.h> -#include <bstmap.h> -#include <vector.h> -#include <sbstmap.h> -#include <sbstset.h> -#include <sbsttable.h> -#include <bstset.h> -#include <compare.h> -#include <dlist.h> - -#include "keyops.h" - -/* Flags that control merging. */ -#define SB_GRAPH1 0x01 -#define SB_GRAPH2 0x02 -#define SB_BOTH 0x03 -#define SB_ISFINAL 0x04 -#define SB_ISMARKED 0x08 -#define SB_ONLIST 0x10 - -struct FsmTrans; -struct FsmState; -struct FsmGraph; -struct Action; -struct TokenInstance; -struct NameInst; - -/* State list element for unambiguous access to list element. */ -struct FsmListEl -{ - FsmState *prev, *next; -}; - -/* This is the marked index for a state pair. Used in minimization. It keeps - * track of whether or not the state pair is marked. */ -struct MarkIndex -{ - MarkIndex(int states); - ~MarkIndex(); - - void markPair(int state1, int state2); - bool isPairMarked(int state1, int state2); - -private: - int numStates; - bool *array; -}; - -extern KeyOps *keyOps; - -/* Transistion Action Element. */ -typedef SBstMapEl< int, Action* > ActionTableEl; - -/* Transition Action Table. */ -struct ActionTable - : public SBstMap< int, Action*, CmpOrd<int> > -{ - void setAction( int ordering, Action *action ); - void setActions( int *orderings, Action **actions, int nActs ); - void setActions( const ActionTable &other ); - - bool hasAction( Action *action ); -}; - -typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet; -typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet; - -/* Transistion Action Element. */ -typedef SBstMapEl< int, TokenInstance* > LmActionTableEl; - -/* Transition Action Table. */ -struct LmActionTable - : public SBstMap< int, TokenInstance*, CmpOrd<int> > -{ - void setAction( int ordering, TokenInstance *action ); - void setActions( const LmActionTable &other ); -}; - -/* Compare of a whole action table element (key & value). */ -struct CmpActionTableEl -{ - static int compare( const ActionTableEl &action1, - const ActionTableEl &action2 ) - { - if ( action1.key < action2.key ) - return -1; - else if ( action1.key > action2.key ) - return 1; - else if ( action1.value < action2.value ) - return -1; - else if ( action1.value > action2.value ) - return 1; - return 0; - } -}; - -/* Compare for ActionTable. */ -typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; - -/* Compare of a whole lm action table element (key & value). */ -struct CmpLmActionTableEl -{ - static int compare( const LmActionTableEl &lmAction1, - const LmActionTableEl &lmAction2 ) - { - if ( lmAction1.key < lmAction2.key ) - return -1; - else if ( lmAction1.key > lmAction2.key ) - return 1; - else if ( lmAction1.value < lmAction2.value ) - return -1; - else if ( lmAction1.value > lmAction2.value ) - return 1; - return 0; - } -}; - -/* Compare for ActionTable. */ -typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; - -/* Action table element for error action tables. Adds the encoding of transfer - * point. */ -struct ErrActionTableEl -{ - ErrActionTableEl( Action *action, int ordering, int transferPoint ) - : ordering(ordering), action(action), transferPoint(transferPoint) { } - - /* Ordering and id of the action embedding. */ - int ordering; - Action *action; - - /* Id of point of transfere from Error action table to transtions and - * eofActionTable. */ - int transferPoint; - - int getKey() const { return ordering; } -}; - -struct ErrActionTable - : public SBstTable< ErrActionTableEl, int, CmpOrd<int> > -{ - void setAction( int ordering, Action *action, int transferPoint ); - void setActions( const ErrActionTable &other ); -}; - -/* Compare of an error action table element (key & value). */ -struct CmpErrActionTableEl -{ - static int compare( const ErrActionTableEl &action1, - const ErrActionTableEl &action2 ) - { - if ( action1.ordering < action2.ordering ) - return -1; - else if ( action1.ordering > action2.ordering ) - return 1; - else if ( action1.action < action2.action ) - return -1; - else if ( action1.action > action2.action ) - return 1; - else if ( action1.transferPoint < action2.transferPoint ) - return -1; - else if ( action1.transferPoint > action2.transferPoint ) - return 1; - return 0; - } -}; - -/* Compare for ErrActionTable. */ -typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; - - -/* Descibe a priority, shared among PriorEls. - * Has key and whether or not used. */ -struct PriorDesc -{ - int key; - int priority; -}; - -/* Element in the arrays of priorities for transitions and arrays. Ordering is - * unique among instantiations of machines, desc is shared. */ -struct PriorEl -{ - PriorEl( int ordering, PriorDesc *desc ) - : ordering(ordering), desc(desc) { } - - int ordering; - PriorDesc *desc; -}; - -/* Compare priority elements, which are ordered by the priority descriptor - * key. */ -struct PriorElCmp -{ - static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) - { - if ( pel1.desc->key < pel2.desc->key ) - return -1; - else if ( pel1.desc->key > pel2.desc->key ) - return 1; - else - return 0; - } -}; - - -/* Priority Table. */ -struct PriorTable - : public SBstSet< PriorEl, PriorElCmp > -{ - void setPrior( int ordering, PriorDesc *desc ); - void setPriors( const PriorTable &other ); -}; - -/* Compare of prior table elements for distinguising state data. */ -struct CmpPriorEl -{ - static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) - { - if ( pel1.desc < pel2.desc ) - return -1; - else if ( pel1.desc > pel2.desc ) - return 1; - else if ( pel1.ordering < pel2.ordering ) - return -1; - else if ( pel1.ordering > pel2.ordering ) - return 1; - return 0; - } -}; - -/* Compare of PriorTable distinguising state data. Using a compare of the - * pointers is a little more strict than it needs be. It requires that - * prioritiy tables have the exact same set of priority assignment operators - * (from the input lang) to be considered equal. - * - * Really only key-value pairs need be tested and ordering be merged. However - * this would require that in the fuseing of states, priority descriptors be - * chosen for the new fused state based on priority. Since the out transition - * lists and ranges aren't necessarily going to line up, this is more work for - * little gain. Final compression resets all priorities first, so this would - * only be useful for compression at every operator, which is only an - * undocumented test feature. - */ -typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable; - -/* Plain action list that imposes no ordering. */ -typedef Vector<int> TransFuncList; - -/* Comparison for TransFuncList. */ -typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare; - -/* Transition class that implements actions and priorities. */ -struct FsmTrans -{ - FsmTrans() : fromState(0), toState(0) {} - FsmTrans( const FsmTrans &other ) : - lowKey(other.lowKey), - highKey(other.highKey), - fromState(0), toState(0), - actionTable(other.actionTable), - priorTable(other.priorTable) - { - assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 ); - } - - Key lowKey, highKey; - FsmState *fromState; - FsmState *toState; - - /* Pointers for outlist. */ - FsmTrans *prev, *next; - - /* Pointers for in-list. */ - FsmTrans *ilprev, *ilnext; - - /* The function table and priority for the transition. */ - ActionTable actionTable; - PriorTable priorTable; - - LmActionTable lmActionTable; -}; - -/* In transition list. Like DList except only has head pointers, which is all - * that is required. Insertion and deletion is handled by the graph. This - * class provides the iterator of a single list. */ -struct TransInList -{ - TransInList() : head(0) { } - - FsmTrans *head; - - struct Iter - { - /* Default construct. */ - Iter() : ptr(0) { } - - /* Construct, assign from a list. */ - Iter( const TransInList &il ) : ptr(il.head) { } - Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; } - - /* At the end */ - bool lte() const { return ptr != 0; } - bool end() const { return ptr == 0; } - - /* At the first, last element. */ - bool first() const { return ptr && ptr->ilprev == 0; } - bool last() const { return ptr && ptr->ilnext == 0; } - - /* Cast, dereference, arrow ops. */ - operator FsmTrans*() const { return ptr; } - FsmTrans &operator *() const { return *ptr; } - FsmTrans *operator->() const { return ptr; } - - /* Increment, decrement. */ - inline void operator++(int) { ptr = ptr->ilnext; } - inline void operator--(int) { ptr = ptr->ilprev; } - - /* The iterator is simply a pointer. */ - FsmTrans *ptr; - }; -}; - -typedef DList<FsmTrans> TransList; - -/* Set of states, list of states. */ -typedef BstSet<FsmState*> StateSet; -typedef DList<FsmState> StateList; - -/* A element in a state dict. */ -struct StateDictEl -: - public AvlTreeEl<StateDictEl> -{ - StateDictEl(const StateSet &stateSet) - : stateSet(stateSet) { } - - const StateSet &getKey() { return stateSet; } - StateSet stateSet; - FsmState *targState; -}; - -/* Dictionary mapping a set of states to a target state. */ -typedef AvlTree< StateDictEl, StateSet, CmpTable<FsmState*> > StateDict; - -/* Data needed for a merge operation. */ -struct MergeData -{ - MergeData() - : stfillHead(0), stfillTail(0) { } - - StateDict stateDict; - - FsmState *stfillHead; - FsmState *stfillTail; - - void fillListAppend( FsmState *state ); -}; - -struct TransEl -{ - /* Constructors. */ - TransEl() { } - TransEl( Key lowKey, Key highKey ) - : lowKey(lowKey), highKey(highKey) { } - TransEl( Key lowKey, Key highKey, FsmTrans *value ) - : lowKey(lowKey), highKey(highKey), value(value) { } - - Key lowKey, highKey; - FsmTrans *value; -}; - -struct CmpKey -{ - static int compare( const Key key1, const Key key2 ) - { - if ( key1 < key2 ) - return -1; - else if ( key1 > key2 ) - return 1; - else - return 0; - } -}; - -/* Vector based set of key items. */ -typedef BstSet<Key, CmpKey> KeySet; - -struct MinPartition -{ - MinPartition() : active(false) { } - - StateList list; - bool active; - - MinPartition *prev, *next; -}; - -/* Epsilon transition stored in a state. Specifies the target */ -typedef Vector<int> EpsilonTrans; - -/* List of states that are to be drawn into this. */ -struct EptVectEl -{ - EptVectEl( FsmState *targ, bool leaving ) - : targ(targ), leaving(leaving) { } - - FsmState *targ; - bool leaving; -}; -typedef Vector<EptVectEl> EptVect; - -/* Set of entry ids that go into this state. */ -typedef BstSet<int> EntryIdSet; - -/* Set of longest match items that may be active in a given state. */ -typedef BstSet<TokenInstance*> LmItemSet; - -/* Conditions. */ -typedef BstSet< Action*, CmpOrd<Action*> > CondSet; -typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet; - -struct CondSpace - : public AvlTreeEl<CondSpace> -{ - CondSpace( const CondSet &condSet ) - : condSet(condSet) {} - - const CondSet &getKey() { return condSet; } - - CondSet condSet; - Key baseKey; - long condSpaceId; -}; - -typedef Vector<CondSpace*> CondSpaceVect; - -typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap; - -struct StateCond -{ - StateCond( Key lowKey, Key highKey ) : - lowKey(lowKey), highKey(highKey) {} - - Key lowKey; - Key highKey; - CondSpace *condSpace; - - StateCond *prev, *next; -}; - -typedef DList<StateCond> StateCondList; -typedef Vector<long> LongVect; - -/* State class that implements actions and priorities. */ -struct FsmState -{ - FsmState(); - FsmState(const FsmState &other); - ~FsmState(); - - /* Is the state final? */ - bool isFinState() { return stateBits & SB_ISFINAL; } - - /* Out transition list and the pointer for the default out trans. */ - TransList outList; - - /* In transition Lists. */ - TransInList inList; - - /* Entry points into the state. */ - EntryIdSet entryIds; - - /* Epsilon transitions. */ - EpsilonTrans epsilonTrans; - - /* Condition info. */ - StateCondList stateCondList; - - /* Number of in transitions from states other than ourselves. */ - int foreignInTrans; - - /* Temporary data for various algorithms. */ - union { - /* When duplicating the fsm we need to map each - * state to the new state representing it. */ - FsmState *stateMap; - - /* When minimizing machines by partitioning, this maps to the group - * the state is in. */ - MinPartition *partition; - - /* When merging states (state machine operations) this next pointer is - * used for the list of states that need to be filled in. */ - FsmState *next; - - /* Identification for printing and stable minimization. */ - int stateNum; - - } alg; - - /* Data used in epsilon operation, maybe fit into alg? */ - FsmState *isolatedShadow; - int owningGraph; - - /* A pointer to a dict element that contains the set of states this state - * represents. This cannot go into alg, because alg.next is used during - * the merging process. */ - StateDictEl *stateDictEl; - - /* When drawing epsilon transitions, holds the list of states to merge - * with. */ - EptVect *eptVect; - - /* Bits controlling the behaviour of the state during collapsing to dfa. */ - int stateBits; - - /* State list elements. */ - FsmState *next, *prev; - - /* - * Priority and Action data. - */ - - /* Out priorities transfered to out transitions. */ - PriorTable outPriorTable; - - /* The following two action tables are distinguished by the fact that when - * toState actions are executed immediatly after transition actions of - * incoming transitions and the current character will be the same as the - * one available then. The fromState actions are executed immediately - * before the transition actions of outgoing transitions and the current - * character is same as the one available then. */ - - /* Actions to execute upon entering into a state. */ - ActionTable toStateActionTable; - - /* Actions to execute when going from the state to the transition. */ - ActionTable fromStateActionTable; - - /* Actions to add to any future transitions that leave via this state. */ - ActionTable outActionTable; - - /* Conditions to add to any future transiions that leave via this sttate. */ - ActionSet outCondSet; - - /* Error action tables. */ - ErrActionTable errActionTable; - - /* Actions to execute on eof. */ - ActionTable eofActionTable; - - /* Set of longest match items that may be active in this state. */ - LmItemSet lmItemSet; - - FsmState *eofTarget; -}; - -template <class ListItem> struct NextTrans -{ - Key lowKey, highKey; - ListItem *trans; - ListItem *next; - - void load() { - if ( trans == 0 ) - next = 0; - else { - next = trans->next; - lowKey = trans->lowKey; - highKey = trans->highKey; - } - } - - void set( ListItem *t ) { - trans = t; - load(); - } - - void increment() { - trans = next; - load(); - } -}; - - -/* Encodes the different states that are meaningful to the of the iterator. */ -enum PairIterUserState -{ - RangeInS1, RangeInS2, - RangeOverlap, - BreakS1, BreakS2 -}; - -template <class ListItem1, class ListItem2 = ListItem1> struct PairIter -{ - /* Encodes the different states that an fsm iterator can be in. */ - enum IterState { - Begin, - ConsumeS1Range, ConsumeS2Range, - OnlyInS1Range, OnlyInS2Range, - S1SticksOut, S1SticksOutBreak, - S2SticksOut, S2SticksOutBreak, - S1DragsBehind, S1DragsBehindBreak, - S2DragsBehind, S2DragsBehindBreak, - ExactOverlap, End - }; - - PairIter( ListItem1 *list1, ListItem2 *list2 ); - - /* Query iterator. */ - bool lte() { return itState != End; } - bool end() { return itState == End; } - void operator++(int) { findNext(); } - void operator++() { findNext(); } - - /* Iterator state. */ - ListItem1 *list1; - ListItem2 *list2; - IterState itState; - PairIterUserState userState; - - NextTrans<ListItem1> s1Tel; - NextTrans<ListItem2> s2Tel; - Key bottomLow, bottomHigh; - ListItem1 *bottomTrans1; - ListItem2 *bottomTrans2; - -private: - void findNext(); -}; - -/* Init the iterator by advancing to the first item. */ -template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter( - ListItem1 *list1, ListItem2 *list2 ) -: - list1(list1), - list2(list2), - itState(Begin) -{ - findNext(); -} - -/* Return and re-entry for the co-routine iterators. This should ALWAYS be - * used inside of a block. */ -#define CO_RETURN(label) \ - itState = label; \ - return; \ - entry##label: {} - -/* Return and re-entry for the co-routine iterators. This should ALWAYS be - * used inside of a block. */ -#define CO_RETURN2(label, uState) \ - itState = label; \ - userState = uState; \ - return; \ - entry##label: {} - -/* Advance to the next transition. When returns, trans points to the next - * transition, unless there are no more, in which case end() returns true. */ -template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext() -{ - /* Jump into the iterator routine base on the iterator state. */ - switch ( itState ) { - case Begin: goto entryBegin; - case ConsumeS1Range: goto entryConsumeS1Range; - case ConsumeS2Range: goto entryConsumeS2Range; - case OnlyInS1Range: goto entryOnlyInS1Range; - case OnlyInS2Range: goto entryOnlyInS2Range; - case S1SticksOut: goto entryS1SticksOut; - case S1SticksOutBreak: goto entryS1SticksOutBreak; - case S2SticksOut: goto entryS2SticksOut; - case S2SticksOutBreak: goto entryS2SticksOutBreak; - case S1DragsBehind: goto entryS1DragsBehind; - case S1DragsBehindBreak: goto entryS1DragsBehindBreak; - case S2DragsBehind: goto entryS2DragsBehind; - case S2DragsBehindBreak: goto entryS2DragsBehindBreak; - case ExactOverlap: goto entryExactOverlap; - case End: goto entryEnd; - } - -entryBegin: - /* Set up the next structs at the head of the transition lists. */ - s1Tel.set( list1 ); - s2Tel.set( list2 ); - - /* Concurrently scan both out ranges. */ - while ( true ) { - if ( s1Tel.trans == 0 ) { - /* We are at the end of state1's ranges. Process the rest of - * state2's ranges. */ - while ( s2Tel.trans != 0 ) { - /* Range is only in s2. */ - CO_RETURN2( ConsumeS2Range, RangeInS2 ); - s2Tel.increment(); - } - break; - } - else if ( s2Tel.trans == 0 ) { - /* We are at the end of state2's ranges. Process the rest of - * state1's ranges. */ - while ( s1Tel.trans != 0 ) { - /* Range is only in s1. */ - CO_RETURN2( ConsumeS1Range, RangeInS1 ); - s1Tel.increment(); - } - break; - } - /* Both state1's and state2's transition elements are good. - * The signiture of no overlap is a back key being in front of a - * front key. */ - else if ( s1Tel.highKey < s2Tel.lowKey ) { - /* A range exists in state1 that does not overlap with state2. */ - CO_RETURN2( OnlyInS1Range, RangeInS1 ); - s1Tel.increment(); - } - else if ( s2Tel.highKey < s1Tel.lowKey ) { - /* A range exists in state2 that does not overlap with state1. */ - CO_RETURN2( OnlyInS2Range, RangeInS2 ); - s2Tel.increment(); - } - /* There is overlap, must mix the ranges in some way. */ - else if ( s1Tel.lowKey < s2Tel.lowKey ) { - /* Range from state1 sticks out front. Must break it into - * non-overlaping and overlaping segments. */ - bottomLow = s2Tel.lowKey; - bottomHigh = s1Tel.highKey; - s1Tel.highKey = s2Tel.lowKey; - s1Tel.highKey.decrement(); - bottomTrans1 = s1Tel.trans; - - /* Notify the caller that we are breaking s1. This gives them a - * chance to duplicate s1Tel[0,1].value. */ - CO_RETURN2( S1SticksOutBreak, BreakS1 ); - - /* Broken off range is only in s1. */ - CO_RETURN2( S1SticksOut, RangeInS1 ); - - /* Advance over the part sticking out front. */ - s1Tel.lowKey = bottomLow; - s1Tel.highKey = bottomHigh; - s1Tel.trans = bottomTrans1; - } - else if ( s2Tel.lowKey < s1Tel.lowKey ) { - /* Range from state2 sticks out front. Must break it into - * non-overlaping and overlaping segments. */ - bottomLow = s1Tel.lowKey; - bottomHigh = s2Tel.highKey; - s2Tel.highKey = s1Tel.lowKey; - s2Tel.highKey.decrement(); - bottomTrans2 = s2Tel.trans; - - /* Notify the caller that we are breaking s2. This gives them a - * chance to duplicate s2Tel[0,1].value. */ - CO_RETURN2( S2SticksOutBreak, BreakS2 ); - - /* Broken off range is only in s2. */ - CO_RETURN2( S2SticksOut, RangeInS2 ); - - /* Advance over the part sticking out front. */ - s2Tel.lowKey = bottomLow; - s2Tel.highKey = bottomHigh; - s2Tel.trans = bottomTrans2; - } - /* Low ends are even. Are the high ends even? */ - else if ( s1Tel.highKey < s2Tel.highKey ) { - /* Range from state2 goes longer than the range from state1. We - * must break the range from state2 into an evenly overlaping - * segment. */ - bottomLow = s1Tel.highKey; - bottomLow.increment(); - bottomHigh = s2Tel.highKey; - s2Tel.highKey = s1Tel.highKey; - bottomTrans2 = s2Tel.trans; - - /* Notify the caller that we are breaking s2. This gives them a - * chance to duplicate s2Tel[0,1].value. */ - CO_RETURN2( S2DragsBehindBreak, BreakS2 ); - - /* Breaking s2 produces exact overlap. */ - CO_RETURN2( S2DragsBehind, RangeOverlap ); - - /* Advance over the front we just broke off of range 2. */ - s2Tel.lowKey = bottomLow; - s2Tel.highKey = bottomHigh; - s2Tel.trans = bottomTrans2; - - /* Advance over the entire s1Tel. We have consumed it. */ - s1Tel.increment(); - } - else if ( s2Tel.highKey < s1Tel.highKey ) { - /* Range from state1 goes longer than the range from state2. We - * must break the range from state1 into an evenly overlaping - * segment. */ - bottomLow = s2Tel.highKey; - bottomLow.increment(); - bottomHigh = s1Tel.highKey; - s1Tel.highKey = s2Tel.highKey; - bottomTrans1 = s1Tel.trans; - - /* Notify the caller that we are breaking s1. This gives them a - * chance to duplicate s2Tel[0,1].value. */ - CO_RETURN2( S1DragsBehindBreak, BreakS1 ); - - /* Breaking s1 produces exact overlap. */ - CO_RETURN2( S1DragsBehind, RangeOverlap ); - - /* Advance over the front we just broke off of range 1. */ - s1Tel.lowKey = bottomLow; - s1Tel.highKey = bottomHigh; - s1Tel.trans = bottomTrans1; - - /* Advance over the entire s2Tel. We have consumed it. */ - s2Tel.increment(); - } - else { - /* There is an exact overlap. */ - CO_RETURN2( ExactOverlap, RangeOverlap ); - - s1Tel.increment(); - s2Tel.increment(); - } - } - - /* Done, go into end state. */ - CO_RETURN( End ); -} - - -/* Compare lists of epsilon transitions. Entries are name ids of targets. */ -typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; - -/* Compare class for the Approximate minimization. */ -class ApproxCompare -{ -public: - ApproxCompare() { } - int compare( const FsmState *pState1, const FsmState *pState2 ); -}; - -/* Compare class for the initial partitioning of a partition minimization. */ -class InitPartitionCompare -{ -public: - InitPartitionCompare() { } - int compare( const FsmState *pState1, const FsmState *pState2 ); -}; - -/* Compare class for the regular partitioning of a partition minimization. */ -class PartitionCompare -{ -public: - PartitionCompare() { } - int compare( const FsmState *pState1, const FsmState *pState2 ); -}; - -/* Compare class for a minimization that marks pairs. Provides the shouldMark - * routine. */ -class MarkCompare -{ -public: - MarkCompare() { } - bool shouldMark( MarkIndex &markIndex, const FsmState *pState1, - const FsmState *pState2 ); -}; - -/* List of partitions. */ -typedef DList< MinPartition > PartitionList; - -/* List of transtions out of a state. */ -typedef Vector<TransEl> TransListVect; - -/* Entry point map used for keeping track of entry points in a machine. */ -typedef BstSet< int > EntryIdSet; -typedef BstMapEl< int, FsmState* > EntryMapEl; -typedef BstMap< int, FsmState* > EntryMap; -typedef Vector<EntryMapEl> EntryMapBase; - -/* Graph class that implements actions and priorities. */ -struct FsmGraph -{ - /* Constructors/Destructors. */ - FsmGraph( ); - FsmGraph( const FsmGraph &graph ); - ~FsmGraph(); - - /* The list of states. */ - StateList stateList; - StateList misfitList; - - /* The map of entry points. */ - EntryMap entryPoints; - - /* The start state. */ - FsmState *startState; - - /* Error state, possibly created only when the final machine has been - * created and the XML machine is about to be written. No transitions - * point to this state. */ - FsmState *errState; - - /* The set of final states. */ - StateSet finStateSet; - - /* Misfit Accounting. Are misfits put on a separate list. */ - bool misfitAccounting; - - bool lmRequiresErrorState; - NameInst **nameIndex; - - /* - * Transition actions and priorities. - */ - - /* Set priorities on transtions. */ - void startFsmPrior( int ordering, PriorDesc *prior ); - void allTransPrior( int ordering, PriorDesc *prior ); - void finishFsmPrior( int ordering, PriorDesc *prior ); - void leaveFsmPrior( int ordering, PriorDesc *prior ); - - /* Action setting support. */ - void transferErrorActions( FsmState *state, int transferPoint ); - void setErrorAction( FsmState *state, int ordering, Action *action ); - void setErrorActions( FsmState *state, const ActionTable &other ); - - /* Fill all spaces in a transition list with an error transition. */ - void fillGaps( FsmState *state ); - - /* Similar to setErrorAction, instead gives a state to go to on error. */ - void setErrorTarget( FsmState *state, FsmState *target, int *orderings, - Action **actions, int nActs ); - - /* Set actions to execute. */ - void startFsmAction( int ordering, Action *action ); - void allTransAction( int ordering, Action *action ); - void finishFsmAction( int ordering, Action *action ); - void leaveFsmAction( int ordering, Action *action ); - void longMatchAction( int ordering, TokenInstance *lmPart ); - - /* Set error actions to execute. */ - void startErrorAction( int ordering, Action *action, int transferPoint ); - void allErrorAction( int ordering, Action *action, int transferPoint ); - void finalErrorAction( int ordering, Action *action, int transferPoint ); - void notStartErrorAction( int ordering, Action *action, int transferPoint ); - void notFinalErrorAction( int ordering, Action *action, int transferPoint ); - void middleErrorAction( int ordering, Action *action, int transferPoint ); - - /* Set EOF actions. */ - void startEOFAction( int ordering, Action *action ); - void allEOFAction( int ordering, Action *action ); - void finalEOFAction( int ordering, Action *action ); - void notStartEOFAction( int ordering, Action *action ); - void notFinalEOFAction( int ordering, Action *action ); - void middleEOFAction( int ordering, Action *action ); - - /* Set To State actions. */ - void startToStateAction( int ordering, Action *action ); - void allToStateAction( int ordering, Action *action ); - void finalToStateAction( int ordering, Action *action ); - void notStartToStateAction( int ordering, Action *action ); - void notFinalToStateAction( int ordering, Action *action ); - void middleToStateAction( int ordering, Action *action ); - - /* Set From State actions. */ - void startFromStateAction( int ordering, Action *action ); - void allFromStateAction( int ordering, Action *action ); - void finalFromStateAction( int ordering, Action *action ); - void notStartFromStateAction( int ordering, Action *action ); - void notFinalFromStateAction( int ordering, Action *action ); - void middleFromStateAction( int ordering, Action *action ); - - /* Shift the action ordering of the start transitions to start at - * fromOrder and increase in units of 1. Useful before kleene star - * operation. */ - int shiftStartActionOrder( int fromOrder ); - - /* Clear all priorities from the fsm to so they won't affcet minimization - * of the final fsm. */ - void clearAllPriorities(); - - /* Zero out all the function keys. */ - void nullActionKeys(); - - /* Walk the list of states and verify state properties. */ - void verifyStates(); - - /* Misfit Accounting. Are misfits put on a separate list. */ - void setMisfitAccounting( bool val ) - { misfitAccounting = val; } - - /* Set and Unset a state as final. */ - void setFinState( FsmState *state ); - void unsetFinState( FsmState *state ); - - void setStartState( FsmState *state ); - void unsetStartState( ); - - /* Set and unset a state as an entry point. */ - void setEntry( int id, FsmState *state ); - void changeEntry( int id, FsmState *to, FsmState *from ); - void unsetEntry( int id, FsmState *state ); - void unsetEntry( int id ); - void unsetAllEntryPoints(); - - /* Epsilon transitions. */ - void epsilonTrans( int id ); - void shadowReadWriteStates( MergeData &md ); - - /* - * Basic attaching and detaching. - */ - - /* Common to attaching/detaching list and default. */ - void attachToInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); - void detachFromInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); - - /* Attach with a new transition. */ - FsmTrans *attachNewTrans( FsmState *from, FsmState *to, - Key onChar1, Key onChar2 ); - - /* Attach with an existing transition that already in an out list. */ - void attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); - - /* Redirect a transition away from error and towards some state. */ - void redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ); - - /* Detach a transition from a target state. */ - void detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); - - /* Detach a state from the graph. */ - void detachState( FsmState *state ); - - /* - * NFA to DFA conversion routines. - */ - - /* Duplicate a transition that will dropin to a free spot. */ - FsmTrans *dupTrans( FsmState *from, FsmTrans *srcTrans ); - - /* In crossing, two transitions both go to real states. */ - FsmTrans *fsmAttachStates( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ); - - /* Two transitions are to be crossed, handle the possibility of either - * going to the error state. */ - FsmTrans *mergeTrans( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ); - - /* Compare deterimne relative priorities of two transition tables. */ - int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); - - /* Cross a src transition with one that is already occupying a spot. */ - FsmTrans *crossTransitions( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ); - - void outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ); - void mergeStateConds( FsmState *destState, FsmState *srcState ); - - /* Merge a set of states into newState. */ - void mergeStates( MergeData &md, FsmState *destState, - FsmState **srcStates, int numSrc ); - void mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ); - void mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ); - - /* Make all states that are combinations of other states and that - * have not yet had their out transitions filled in. This will - * empty out stateDict and stFil. */ - void fillInStates( MergeData &md ); - - /* - * Transition Comparison. - */ - - /* Compare transition data. Either of the pointers may be null. */ - static inline int compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ); - - /* Compare target state and transition data. Either pointer may be null. */ - static inline int compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ); - - /* Compare target partitions. Either pointer may be null. */ - static inline int comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ); - - /* Check marked status of target states. Either pointer may be null. */ - static inline bool shouldMarkPtr( MarkIndex &markIndex, - FsmTrans *trans1, FsmTrans *trans2 ); - - /* - * Callbacks. - */ - - /* Compare priority and function table of transitions. */ - static int compareTransData( FsmTrans *trans1, FsmTrans *trans2 ); - - /* Add in the properties of srcTrans into this. */ - void addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ); - - /* Compare states on data stored in the states. */ - static int compareStateData( const FsmState *state1, const FsmState *state2 ); - - /* Out transition data. */ - void clearOutData( FsmState *state ); - bool hasOutData( FsmState *state ); - void transferOutData( FsmState *destState, FsmState *srcState ); - - /* - * Allocation. - */ - - /* New up a state and add it to the graph. */ - FsmState *addState(); - - /* - * Building basic machines - */ - - void concatFsm( Key c ); - void concatFsm( Key *str, int len ); - void concatFsmCI( Key *str, int len ); - void orFsm( Key *set, int len ); - void rangeFsm( Key low, Key high ); - void rangeStarFsm( Key low, Key high ); - void emptyFsm( ); - void lambdaFsm( ); - - /* - * Fsm operators. - */ - - void starOp( ); - void repeatOp( int times ); - void optionalRepeatOp( int times ); - void concatOp( FsmGraph *other ); - void unionOp( FsmGraph *other ); - void intersectOp( FsmGraph *other ); - void subtractOp( FsmGraph *other ); - void epsilonOp(); - void joinOp( int startId, int finalId, FsmGraph **others, int numOthers ); - void globOp( FsmGraph **others, int numOthers ); - void deterministicEntry(); - - /* - * Operator workers - */ - - /* Determine if there are any entry points into a start state other than - * the start state. */ - bool isStartStateIsolated(); - - /* Make a new start state that has no entry points. Will not change the - * identity of the fsm. */ - void isolateStartState(); - - /* Workers for resolving epsilon transitions. */ - bool inEptVect( EptVect *eptVect, FsmState *targ ); - void epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ); - void resolveEpsilonTrans( MergeData &md ); - - /* Workers for concatenation and union. */ - void doConcat( FsmGraph *other, StateSet *fromStates, bool optional ); - void doOr( FsmGraph *other ); - - /* - * Final states - */ - - /* Unset any final states that are no longer to be final - * due to final bits. */ - void unsetIncompleteFinals(); - void unsetKilledFinals(); - - /* Bring in other's entry points. Assumes others states are going to be - * copied into this machine. */ - void copyInEntryPoints( FsmGraph *other ); - - /* Ordering states. */ - void depthFirstOrdering( FsmState *state ); - void depthFirstOrdering(); - void sortStatesByFinal(); - - /* Set sqequential state numbers starting at 0. */ - void setStateNumbers( int base ); - - /* Unset all final states. */ - void unsetAllFinStates(); - - /* Set the bits of final states and clear the bits of non final states. */ - void setFinBits( int finStateBits ); - - /* - * Self-consistency checks. - */ - - /* Run a sanity check on the machine. */ - void verifyIntegrity(); - - /* Verify that there are no unreachable states, or dead end states. */ - void verifyReachability(); - void verifyNoDeadEndStates(); - - /* - * Path pruning - */ - - /* Mark all states reachable from state. */ - void markReachableFromHereReverse( FsmState *state ); - - /* Mark all states reachable from state. */ - void markReachableFromHere( FsmState *state ); - void markReachableFromHereStopFinal( FsmState *state ); - - /* Removes states that cannot be reached by any path in the fsm and are - * thus wasted silicon. */ - void removeDeadEndStates(); - - /* Removes states that cannot be reached by any path in the fsm and are - * thus wasted silicon. */ - void removeUnreachableStates(); - - /* Remove error actions from states on which the error transition will - * never be taken. */ - bool outListCovers( FsmState *state ); - bool anyErrorRange( FsmState *state ); - - /* Remove states that are on the misfit list. */ - void removeMisfits(); - - /* - * FSM Minimization - */ - - /* Minimization by partitioning. */ - void minimizePartition1(); - void minimizePartition2(); - - /* Minimize the final state Machine. The result is the minimal fsm. Slow - * but stable, correct minimization. Uses n^2 space (lookout) and average - * n^2 time. Worst case n^3 time, but a that is a very rare case. */ - void minimizeStable(); - - /* Minimize the final state machine. Does not find the minimal fsm, but a - * pretty good approximation. Does not use any extra space. Average n^2 - * time. Worst case n^3 time, but a that is a very rare case. */ - void minimizeApproximate(); - - /* This is the worker for the minimize approximate solution. It merges - * states that have identical out transitions. */ - bool minimizeRound( ); - - /* Given an intial partioning of states, split partitions that have out trans - * to differing partitions. */ - int partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ); - - /* Split partitions that have a transition to a previously split partition, until - * there are no more partitions to split. */ - int splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ); - - /* Fuse together states in the same partition. */ - void fusePartitions( MinPartition *parts, int numParts ); - - /* Mark pairs where out final stateness differs, out trans data differs, - * trans pairs go to a marked pair or trans data differs. Should get - * alot of pairs. */ - void initialMarkRound( MarkIndex &markIndex ); - - /* One marking round on all state pairs. Considers if trans pairs go - * to a marked state only. Returns whether or not a pair was marked. */ - bool markRound( MarkIndex &markIndex ); - - /* Move the in trans into src into dest. */ - void inTransMove(FsmState *dest, FsmState *src); - - /* Make state src and dest the same state. */ - void fuseEquivStates(FsmState *dest, FsmState *src); - - /* Find any states that didn't get marked by the marking algorithm and - * merge them into the primary states of their equivalence class. */ - void fuseUnmarkedPairs( MarkIndex &markIndex ); - - /* Merge neighboring transitions go to the same state and have the same - * transitions data. */ - void compressTransitions(); - - /* Returns true if there is a transtion (either explicit or by a gap) to - * the error state. */ - bool checkErrTrans( FsmState *state, FsmTrans *trans ); - bool checkErrTransFinish( FsmState *state ); - bool hasErrorTrans(); -}; - - -#endif /* _COLM_FSMGRAPH_H */ - diff --git a/src/fsmmin.cc b/src/fsmmin.cc deleted file mode 100644 index f47500bd..00000000 --- a/src/fsmmin.cc +++ /dev/null @@ -1,737 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdbool.h> -#include <assert.h> - -#include <mergesort.h> - -#include "fsmgraph.h" - -int FsmGraph::partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ) -{ - /* Need a mergesort object and a single partition compare. */ - MergeSort<FsmState*, PartitionCompare> mergeSort; - PartitionCompare partCompare; - - /* For each partition. */ - for ( int p = 0; p < numParts; p++ ) { - /* Fill the pointer array with the states in the partition. */ - StateList::Iter state = parts[p].list; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - /* Sort the states using the partitioning compare. */ - int numStates = parts[p].list.length(); - mergeSort.sort( statePtrs, numStates ); - - /* Assign the states into partitions based on the results of the sort. */ - int destPart = p, firstNewPart = numParts; - for ( int s = 1; s < numStates; s++ ) { - /* If this state differs from the last then move to the next partition. */ - if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { - /* The new partition is the next avail spot. */ - destPart = numParts; - numParts += 1; - } - - /* If the state is not staying in the first partition, then - * transfer it to its destination partition. */ - if ( destPart != p ) { - FsmState *state = parts[p].list.detach( statePtrs[s] ); - parts[destPart].list.append( state ); - } - } - - /* Fix the partition pointer for all the states that got moved to a new - * partition. This must be done after the states are transfered so the - * result of the sort is not altered. */ - for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) { - StateList::Iter state = parts[newPart].list; - for ( ; state.lte(); state++ ) - state->alg.partition = &parts[newPart]; - } - } - - return numParts; -} - -/** - * \brief Minimize by partitioning version 1. - * - * Repeatedly tries to split partitions until all partitions are unsplittable. - * Produces the most minimal FSM possible. - */ -void FsmGraph::minimizePartition1() -{ - /* Need one mergesort object and partition compares. */ - MergeSort<FsmState*, InitPartitionCompare> mergeSort; - InitPartitionCompare initPartCompare; - - /* Nothing to do if there are no states. */ - if ( stateList.length() == 0 ) - return; - - /* - * First thing is to partition the states by final state status and - * transition functions. This gives us an initial partitioning to work - * with. - */ - - /* Make a array of pointers to states. */ - int numStates = stateList.length(); - FsmState** statePtrs = new FsmState*[numStates]; - - /* Fill up an array of pointers to the states for easy sorting. */ - StateList::Iter state = stateList; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - /* Sort the states using the array of states. */ - mergeSort.sort( statePtrs, numStates ); - - /* An array of lists of states is used to partition the states. */ - MinPartition *parts = new MinPartition[numStates]; - - /* Assign the states into partitions. */ - int destPart = 0; - for ( int s = 0; s < numStates; s++ ) { - /* If this state differs from the last then move to the next partition. */ - if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { - /* Move to the next partition. */ - destPart += 1; - } - - /* Put the state into its partition. */ - statePtrs[s]->alg.partition = &parts[destPart]; - parts[destPart].list.append( statePtrs[s] ); - } - - /* We just moved all the states from the main list into partitions without - * taking them off the main list. So clean up the main list now. */ - stateList.abandon(); - - /* Split partitions. */ - int numParts = destPart + 1; - while ( true ) { - /* Test all partitions for splitting. */ - int newNum = partitionRound( statePtrs, parts, numParts ); - - /* When no partitions can be split, stop. */ - if ( newNum == numParts ) - break; - - numParts = newNum; - } - - /* Fuse states in the same partition. The states will end up back on the - * main list. */ - fusePartitions( parts, numParts ); - - /* Cleanup. */ - delete[] statePtrs; - delete[] parts; -} - -/* Split partitions that need splittting, decide which partitions might need - * to be split as a result, continue until there are no more that might need - * to be split. */ -int FsmGraph::splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ) -{ - /* Need a mergesort and a partition compare. */ - MergeSort<FsmState*, PartitionCompare> mergeSort; - PartitionCompare partCompare; - - /* The lists of unsplitable (partList) and splitable partitions. - * Only partitions in the splitable list are check for needing splitting. */ - PartitionList partList, splittable; - - /* Initially, all partitions are born from a split (the initial - * partitioning) and can cause other partitions to be split. So any - * partition with a state with a transition out to another partition is a - * candidate for splitting. This will make every partition except possibly - * partitions of final states split candidates. */ - for ( int p = 0; p < numParts; p++ ) { - /* Assume not active. */ - parts[p].active = false; - - /* Look for a trans out of any state in the partition. */ - for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) { - /* If there is at least one transition out to another state then - * the partition becomes splittable. */ - if ( state->outList.length() > 0 ) { - parts[p].active = true; - break; - } - } - - /* If it was found active then it goes on the splittable list. */ - if ( parts[p].active ) - splittable.append( &parts[p] ); - else - partList.append( &parts[p] ); - } - - /* While there are partitions that are splittable, pull one off and try - * to split it. If it splits, determine which partitions may now be split - * as a result of the newly split partition. */ - while ( splittable.length() > 0 ) { - MinPartition *partition = splittable.detachFirst(); - - /* Fill the pointer array with the states in the partition. */ - StateList::Iter state = partition->list; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - /* Sort the states using the partitioning compare. */ - int numStates = partition->list.length(); - mergeSort.sort( statePtrs, numStates ); - - /* Assign the states into partitions based on the results of the sort. */ - MinPartition *destPart = partition; - int firstNewPart = numParts; - for ( int s = 1; s < numStates; s++ ) { - /* If this state differs from the last then move to the next partition. */ - if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { - /* The new partition is the next avail spot. */ - destPart = &parts[numParts]; - numParts += 1; - } - - /* If the state is not staying in the first partition, then - * transfer it to its destination partition. */ - if ( destPart != partition ) { - FsmState *state = partition->list.detach( statePtrs[s] ); - destPart->list.append( state ); - } - } - - /* Fix the partition pointer for all the states that got moved to a new - * partition. This must be done after the states are transfered so the - * result of the sort is not altered. */ - int newPart; - for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { - StateList::Iter state = parts[newPart].list; - for ( ; state.lte(); state++ ) - state->alg.partition = &parts[newPart]; - } - - /* Put the partition we just split and any new partitions that came out - * of the split onto the inactive list. */ - partition->active = false; - partList.append( partition ); - for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { - parts[newPart].active = false; - partList.append( &parts[newPart] ); - } - - if ( destPart == partition ) - continue; - - /* Now determine which partitions are splittable as a result of - * splitting partition by walking the in lists of the states in - * partitions that got split. Partition is the faked first item in the - * loop. */ - MinPartition *causalPart = partition; - newPart = firstNewPart - 1; - while ( newPart < numParts ) { - /* Loop all states in the causal partition. */ - StateList::Iter state = causalPart->list; - for ( ; state.lte(); state++ ) { - /* Walk all transition into the state and put the partition - * that the from state is in onto the splittable list. */ - for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) { - MinPartition *fromPart = trans->fromState->alg.partition; - if ( ! fromPart->active ) { - fromPart->active = true; - partList.detach( fromPart ); - splittable.append( fromPart ); - } - } - } - - newPart += 1; - causalPart = &parts[newPart]; - } - } - return numParts; -} - - -/** - * \brief Minimize by partitioning version 2 (best alg). - * - * Repeatedly tries to split partitions that may splittable until there are no - * more partitions that might possibly need splitting. Runs faster than - * version 1. Produces the most minimal fsm possible. - */ -void FsmGraph::minimizePartition2() -{ - /* Need a mergesort and an initial partition compare. */ - MergeSort<FsmState*, InitPartitionCompare> mergeSort; - InitPartitionCompare initPartCompare; - - /* Nothing to do if there are no states. */ - if ( stateList.length() == 0 ) - return; - - /* - * First thing is to partition the states by final state status and - * transition functions. This gives us an initial partitioning to work - * with. - */ - - /* Make a array of pointers to states. */ - int numStates = stateList.length(); - FsmState** statePtrs = new FsmState*[numStates]; - - /* Fill up an array of pointers to the states for easy sorting. */ - StateList::Iter state = stateList; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - /* Sort the states using the array of states. */ - mergeSort.sort( statePtrs, numStates ); - - /* An array of lists of states is used to partition the states. */ - MinPartition *parts = new MinPartition[numStates]; - - /* Assign the states into partitions. */ - int destPart = 0; - for ( int s = 0; s < numStates; s++ ) { - /* If this state differs from the last then move to the next partition. */ - if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { - /* Move to the next partition. */ - destPart += 1; - } - - /* Put the state into its partition. */ - statePtrs[s]->alg.partition = &parts[destPart]; - parts[destPart].list.append( statePtrs[s] ); - } - - /* We just moved all the states from the main list into partitions without - * taking them off the main list. So clean up the main list now. */ - stateList.abandon(); - - /* Split partitions. */ - int numParts = splitCandidates( statePtrs, parts, destPart+1 ); - - /* Fuse states in the same partition. The states will end up back on the - * main list. */ - fusePartitions( parts, numParts ); - - /* Cleanup. */ - delete[] statePtrs; - delete[] parts; -} - -void FsmGraph::initialMarkRound( MarkIndex &markIndex ) -{ - /* P and q for walking pairs. */ - FsmState *p = stateList.head, *q; - - /* Need an initial partition compare. */ - InitPartitionCompare initPartCompare; - - /* Walk all unordered pairs of (p, q) where p != q. - * The second depth of the walk stops before reaching p. This - * gives us all unordered pairs of states (p, q) where p != q. */ - while ( p != 0 ) { - q = stateList.head; - while ( q != p ) { - /* If the states differ on final state status, out transitions or - * any transition data then they should be separated on the initial - * round. */ - if ( initPartCompare.compare( p, q ) != 0 ) - markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); - - q = q->next; - } - p = p->next; - } -} - -bool FsmGraph::markRound( MarkIndex &markIndex ) -{ - /* P an q for walking pairs. Take note if any pair gets marked. */ - FsmState *p = stateList.head, *q; - bool pairWasMarked = false; - - /* Need a mark comparison. */ - MarkCompare markCompare; - - /* Walk all unordered pairs of (p, q) where p != q. - * The second depth of the walk stops before reaching p. This - * gives us all unordered pairs of states (p, q) where p != q. */ - while ( p != 0 ) { - q = stateList.head; - while ( q != p ) { - /* Should we mark the pair? */ - if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { - if ( markCompare.shouldMark( markIndex, p, q ) ) { - markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); - pairWasMarked = true; - } - } - q = q->next; - } - p = p->next; - } - - return pairWasMarked; -} - - -/** - * \brief Minimize by pair marking. - * - * Decides if each pair of states is distinct or not. Uses O(n^2) memory and - * should only be used on small graphs. Produces the most minmimal FSM - * possible. - */ -void FsmGraph::minimizeStable() -{ - /* Set the state numbers. */ - setStateNumbers( 0 ); - - /* This keeps track of which pairs have been marked. */ - MarkIndex markIndex( stateList.length() ); - - /* Mark pairs where final stateness, out trans, or trans data differ. */ - initialMarkRound( markIndex ); - - /* While the last round of marking succeeded in marking a state - * continue to do another round. */ - int modified = markRound( markIndex ); - while (modified) - modified = markRound( markIndex ); - - /* Merge pairs that are unmarked. */ - fuseUnmarkedPairs( markIndex ); -} - -bool FsmGraph::minimizeRound() -{ - /* Nothing to do if there are no states. */ - if ( stateList.length() == 0 ) - return false; - - /* Need a mergesort on approx compare and an approx compare. */ - MergeSort<FsmState*, ApproxCompare> mergeSort; - ApproxCompare approxCompare; - - /* Fill up an array of pointers to the states. */ - FsmState **statePtrs = new FsmState*[stateList.length()]; - StateList::Iter state = stateList; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - bool modified = false; - - /* Sort The list. */ - mergeSort.sort( statePtrs, stateList.length() ); - - /* Walk the list looking for duplicates next to each other, - * merge in any duplicates. */ - FsmState **pLast = statePtrs; - FsmState **pState = statePtrs + 1; - for ( int i = 1; i < stateList.length(); i++, pState++ ) { - if ( approxCompare.compare( *pLast, *pState ) == 0 ) { - /* Last and pState are the same, so fuse together. Move forward - * with pState but not with pLast. If any more are identical, we - * must */ - fuseEquivStates( *pLast, *pState ); - modified = true; - } - else { - /* Last and this are different, do not set to merge them. Move - * pLast to the current (it may be way behind from merging many - * states) and pState forward one to consider the next pair. */ - pLast = pState; - } - } - delete[] statePtrs; - return modified; -} - -/** - * \brief Minmimize by an approximation. - * - * Repeatedly tries to find states with transitions out to the same set of - * states on the same set of keys until no more identical states can be found. - * Does not produce the most minimial FSM possible. - */ -void FsmGraph::minimizeApproximate() -{ - /* While the last minimization round succeeded in compacting states, - * continue to try to compact states. */ - while ( true ) { - bool modified = minimizeRound(); - if ( ! modified ) - break; - } -} - - -/* Remove states that have no path to them from the start state. Recursively - * traverses the graph marking states that have paths into them. Then removes - * all states that did not get marked. */ -void FsmGraph::removeUnreachableStates() -{ - /* Misfit accounting should be off and there should be no states on the - * misfit list. */ - assert( !misfitAccounting && misfitList.length() == 0 ); - - /* Mark all the states that can be reached - * through the existing set of entry points. */ - markReachableFromHere( startState ); - for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) - markReachableFromHere( en->value ); - - /* Delete all states that are not marked - * and unmark the ones that are marked. */ - FsmState *state = stateList.head; - while ( state ) { - FsmState *next = state->next; - - if ( state->stateBits & SB_ISMARKED ) - state->stateBits &= ~ SB_ISMARKED; - else { - detachState( state ); - stateList.detach( state ); - delete state; - } - - state = next; - } -} - -bool FsmGraph::outListCovers( FsmState *state ) -{ - /* Must be at least one range to cover. */ - if ( state->outList.length() == 0 ) - return false; - - /* The first must start at the lower bound. */ - TransList::Iter trans = state->outList.first(); - if ( keyOps->minKey < trans->lowKey ) - return false; - - /* Loop starts at second el. */ - trans.increment(); - - /* Loop checks lower against prev upper. */ - for ( ; trans.lte(); trans++ ) { - /* Lower end of the trans must be one greater than the - * previous' high end. */ - Key lowKey = trans->lowKey; - lowKey.decrement(); - if ( trans->prev->highKey < lowKey ) - return false; - } - - /* Require that the last range extends to the upper bound. */ - trans = state->outList.last(); - if ( trans->highKey < keyOps->maxKey ) - return false; - - return true; -} - -/* Remove states that that do not lead to a final states. Works recursivly traversing - * the graph in reverse (starting from all final states) and marking seen states. Then - * removes states that did not get marked. */ -void FsmGraph::removeDeadEndStates() -{ - /* Misfit accounting should be off and there should be no states on the - * misfit list. */ - assert( !misfitAccounting && misfitList.length() == 0 ); - - /* Mark all states that have paths to the final states. */ - FsmState **st = finStateSet.data; - int nst = finStateSet.length(); - for ( int i = 0; i < nst; i++, st++ ) - markReachableFromHereReverse( *st ); - - /* Start state gets honorary marking. If the machine accepts nothing we - * still want the start state to hang around. This must be done after the - * recursive call on all the final states so that it does not cause the - * start state in transitions to be skipped when the start state is - * visited by the traversal. */ - startState->stateBits |= SB_ISMARKED; - - /* Delete all states that are not marked - * and unmark the ones that are marked. */ - FsmState *state = stateList.head; - while ( state != 0 ) { - FsmState *next = state->next; - - if ( state->stateBits & SB_ISMARKED ) - state->stateBits &= ~ SB_ISMARKED; - else { - detachState( state ); - stateList.detach( state ); - delete state; - } - - state = next; - } -} - -/* Remove states on the misfit list. To work properly misfit accounting should - * be on when this is called. The detaching of a state will likely cause - * another misfit to be collected and it can then be removed. */ -void FsmGraph::removeMisfits() -{ - while ( misfitList.length() > 0 ) { - /* Get the first state. */ - FsmState *state = misfitList.head; - - /* Detach and delete. */ - detachState( state ); - - /* The state was previously on the misfit list and detaching can only - * remove in transitions so the state must still be on the misfit - * list. */ - misfitList.detach( state ); - delete state; - } -} - -/* Fuse src into dest because they have been deemed equivalent states. - * Involves moving transitions into src to go into dest and invoking - * callbacks. Src is deleted detached from the graph and deleted. */ -void FsmGraph::fuseEquivStates( FsmState *dest, FsmState *src ) -{ - /* This would get ugly. */ - assert( dest != src ); - - /* Cur is a duplicate. We can merge it with trail. */ - inTransMove( dest, src ); - - detachState( src ); - stateList.detach( src ); - delete src; -} - -void FsmGraph::fuseUnmarkedPairs( MarkIndex &markIndex ) -{ - FsmState *p = stateList.head, *nextP, *q; - - /* Definition: The primary state of an equivalence class is the first state - * encounterd that belongs to the equivalence class. All equivalence - * classes have primary state including equivalence classes with one state - * in it. */ - - /* For each unmarked pair merge p into q and delete p. q is always the - * primary state of it's equivalence class. We wouldn't have landed on it - * here if it were not, because it would have been deleted. - * - * Proof that q is the primaray state of it's equivalence class: Assume q - * is not the primary state of it's equivalence class, then it would be - * merged into some state that came before it and thus p would be - * equivalent to that state. But q is the first state that p is equivalent - * to so we have a contradiction. */ - - /* Walk all unordered pairs of (p, q) where p != q. - * The second depth of the walk stops before reaching p. This - * gives us all unordered pairs of states (p, q) where p != q. */ - while ( p != 0 ) { - nextP = p->next; - - q = stateList.head; - while ( q != p ) { - /* If one of p or q is a final state then mark. */ - if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { - fuseEquivStates( q, p ); - break; - } - q = q->next; - } - p = nextP; - } -} - -void FsmGraph::fusePartitions( MinPartition *parts, int numParts ) -{ - /* For each partition, fuse state 2, 3, ... into state 1. */ - for ( int p = 0; p < numParts; p++ ) { - /* Assume that there will always be at least one state. */ - FsmState *first = parts[p].list.head, *toFuse = first->next; - - /* Put the first state back onto the main state list. Don't bother - * removing it from the partition list first. */ - stateList.append( first ); - - /* Fuse the rest of the state into the first. */ - while ( toFuse != 0 ) { - /* Save the next. We will trash it before it is needed. */ - FsmState *next = toFuse->next; - - /* Put the state to be fused in to the first back onto the main - * list before it is fuse. the graph. The state needs to be on - * the main list for the detach from the graph to work. Don't - * bother removing the state from the partition list first. We - * need not maintain it. */ - stateList.append( toFuse ); - - /* Now fuse to the first. */ - fuseEquivStates( first, toFuse ); - - /* Go to the next that we saved before trashing the next pointer. */ - toFuse = next; - } - - /* We transfered the states from the partition list into the main list without - * removing the states from the partition list first. Clean it up. */ - parts[p].list.abandon(); - } -} - - -/* Merge neighboring transitions go to the same state and have the same - * transitions data. */ -void FsmGraph::compressTransitions() -{ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - if ( st->outList.length() > 1 ) { - for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) { - Key nextLow = next->lowKey; - nextLow.decrement(); - if ( trans->highKey == nextLow && trans->toState == next->toState && - CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 ) - { - trans->highKey = next->highKey; - st->outList.detach( next ); - detachTrans( next->fromState, next->toState, next ); - delete next; - next = trans.next(); - } - else { - trans.increment(); - next.increment(); - } - } - } - } -} diff --git a/src/fsmstate.cc b/src/fsmstate.cc deleted file mode 100644 index b3d1c313..00000000 --- a/src/fsmstate.cc +++ /dev/null @@ -1,441 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <string.h> -#include <assert.h> -#include <stdbool.h> - -#include <iostream> - -#include "fsmgraph.h" - -using namespace std; - -/* Construct a mark index for a specified number of states. Must new up - * an array that is states^2 in size. */ -MarkIndex::MarkIndex( int states ) : numStates(states) -{ - /* Total pairs is states^2. Actually only use half of these, but we allocate - * them all to make indexing into the array easier. */ - int total = states * states; - - /* New up chars so that individual DListEl constructors are - * not called. Zero out the mem manually. */ - array = new bool[total]; - memset( array, 0, sizeof(bool) * total ); -} - -/* Free the array used to store state pairs. */ -MarkIndex::~MarkIndex() -{ - delete[] array; -} - -/* Mark a pair of states. States are specified by their number. The - * marked states are moved from the unmarked list to the marked list. */ -void MarkIndex::markPair(int state1, int state2) -{ - int pos = ( state1 >= state2 ) ? - ( state1 * numStates ) + state2 : - ( state2 * numStates ) + state1; - - array[pos] = true; -} - -/* Returns true if the pair of states are marked. Returns false otherwise. - * Ordering of states given does not matter. */ -bool MarkIndex::isPairMarked(int state1, int state2) -{ - int pos = ( state1 >= state2 ) ? - ( state1 * numStates ) + state2 : - ( state2 * numStates ) + state1; - - return array[pos]; -} - -/* Create a new fsm state. State has not out transitions or in transitions, not - * out out transition data and not number. */ -FsmState::FsmState() -: - /* No out or in transitions. */ - outList(), - inList(), - - /* No entry points, or epsilon trans. */ - entryIds(), - epsilonTrans(), - - /* No transitions in from other states. */ - foreignInTrans(0), - - /* Only used during merging. Normally null. */ - stateDictEl(0), - eptVect(0), - - /* No state identification bits. */ - stateBits(0), - - /* No Priority data. */ - outPriorTable(), - - /* No Action data. */ - toStateActionTable(), - fromStateActionTable(), - outActionTable(), - outCondSet(), - errActionTable(), - eofActionTable(), - - eofTarget(0) -{ -} - -/* Copy everything except actual the transitions. That is left up to the - * FsmGraph copy constructor. */ -FsmState::FsmState(const FsmState &other) -: - /* All lists are cleared. They will be filled in when the - * individual transitions are duplicated and attached. */ - outList(), - inList(), - - /* Duplicate the entry id set and epsilon transitions. These - * are sets of integers and as such need no fixing. */ - entryIds(other.entryIds), - epsilonTrans(other.epsilonTrans), - - /* No transitions in from other states. */ - foreignInTrans(0), - - /* This is only used during merging. Normally null. */ - stateDictEl(0), - eptVect(0), - - /* Fsm state data. */ - stateBits(other.stateBits), - - /* Copy in priority data. */ - outPriorTable(other.outPriorTable), - - /* Copy in action data. */ - toStateActionTable(other.toStateActionTable), - fromStateActionTable(other.fromStateActionTable), - outActionTable(other.outActionTable), - outCondSet(other.outCondSet), - errActionTable(other.errActionTable), - eofActionTable(other.eofActionTable), - - eofTarget(0) -{ - /* Duplicate all the transitions. */ - for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) { - /* Dupicate and store the orginal target in the transition. This will - * be corrected once all the states have been created. */ - FsmTrans *newTrans = new FsmTrans(*trans); - newTrans->toState = trans->toState; - outList.append( newTrans ); - } -} - -/* If there is a state dict element, then delete it. Everything else is left - * up to the FsmGraph destructor. */ -FsmState::~FsmState() -{ - if ( stateDictEl != 0 ) - delete stateDictEl; -} - -/* Compare two states using pointers to the states. With the approximate - * compare the idea is that if the compare finds them the same, they can - * immediately be merged. */ -int ApproxCompare::compare( const FsmState *state1 , const FsmState *state2 ) -{ - int compareRes; - - /* Test final state status. */ - if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) - return -1; - else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) - return 1; - - /* Test epsilon transition sets. */ - compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, - state2->epsilonTrans ); - if ( compareRes != 0 ) - return compareRes; - - /* Compare the out transitions. */ - compareRes = FsmGraph::compareStateData( state1, state2 ); - if ( compareRes != 0 ) - return compareRes; - - /* Use a pair iterator to get the transition pairs. */ - PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - - case RangeInS1: - compareRes = FsmGraph::compareFullPtr( outPair.s1Tel.trans, 0 ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeInS2: - compareRes = FsmGraph::compareFullPtr( 0, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeOverlap: - compareRes = FsmGraph::compareFullPtr( - outPair.s1Tel.trans, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case BreakS1: - case BreakS2: - break; - } - } - - /* Got through the entire state comparison, deem them equal. */ - return 0; -} - -/* Compare class for the sort that does the intial partition of compaction. */ -int InitPartitionCompare::compare( const FsmState *state1 , const FsmState *state2 ) -{ - int compareRes; - - /* Test final state status. */ - if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) - return -1; - else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) - return 1; - - /* Test epsilon transition sets. */ - compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, - state2->epsilonTrans ); - if ( compareRes != 0 ) - return compareRes; - - /* Compare the out transitions. */ - compareRes = FsmGraph::compareStateData( state1, state2 ); - if ( compareRes != 0 ) - return compareRes; - - /* Use a pair iterator to test the transition pairs. */ - PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - - case RangeInS1: - compareRes = FsmGraph::compareDataPtr( outPair.s1Tel.trans, 0 ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeInS2: - compareRes = FsmGraph::compareDataPtr( 0, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeOverlap: - compareRes = FsmGraph::compareDataPtr( - outPair.s1Tel.trans, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case BreakS1: - case BreakS2: - break; - } - } - - return 0; -} - -/* Compare class for the sort that does the partitioning. */ -int PartitionCompare::compare( const FsmState *state1, const FsmState *state2 ) -{ - int compareRes; - - /* Use a pair iterator to get the transition pairs. */ - PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - - case RangeInS1: - compareRes = FsmGraph::comparePartPtr( outPair.s1Tel.trans, 0 ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeInS2: - compareRes = FsmGraph::comparePartPtr( 0, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeOverlap: - compareRes = FsmGraph::comparePartPtr( - outPair.s1Tel.trans, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case BreakS1: - case BreakS2: - break; - } - } - - return 0; -} - -/* Compare class for the sort that does the partitioning. */ -bool MarkCompare::shouldMark( MarkIndex &markIndex, const FsmState *state1, - const FsmState *state2 ) -{ - /* Use a pair iterator to get the transition pairs. */ - PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - - case RangeInS1: - if ( FsmGraph::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) ) - return true; - break; - - case RangeInS2: - if ( FsmGraph::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) ) - return true; - break; - - case RangeOverlap: - if ( FsmGraph::shouldMarkPtr( markIndex, - outPair.s1Tel.trans, outPair.s2Tel.trans ) ) - return true; - break; - - case BreakS1: - case BreakS2: - break; - } - } - - return false; -} - -/* - * Transition Comparison. - */ - -/* Compare target partitions. Either pointer may be null. */ -int FsmGraph::comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ) -{ - if ( trans1 != 0 ) { - /* If trans1 is set then so should trans2. The initial partitioning - * guarantees this for us. */ - if ( trans1->toState == 0 && trans2->toState != 0 ) - return -1; - else if ( trans1->toState != 0 && trans2->toState == 0 ) - return 1; - else if ( trans1->toState != 0 ) { - /* Both of targets are set. */ - return CmpOrd< MinPartition* >::compare( - trans1->toState->alg.partition, trans2->toState->alg.partition ); - } - } - return 0; -} - - -/* Compares two transition pointers according to priority and functions. - * Either pointer may be null. Does not consider to state or from state. */ -int FsmGraph::compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ) -{ - if ( trans1 == 0 && trans2 != 0 ) - return -1; - else if ( trans1 != 0 && trans2 == 0 ) - return 1; - else if ( trans1 != 0 ) { - /* Both of the transition pointers are set. */ - int compareRes = compareTransData( trans1, trans2 ); - if ( compareRes != 0 ) - return compareRes; - } - return 0; -} - -/* Compares two transitions according to target state, priority and functions. - * Does not consider from state. Either of the pointers may be null. */ -int FsmGraph::compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ) -{ - if ( (trans1 != 0) ^ (trans2 != 0) ) { - /* Exactly one of the transitions is set. */ - if ( trans1 != 0 ) - return -1; - else - return 1; - } - else if ( trans1 != 0 ) { - /* Both of the transition pointers are set. Test target state, - * priority and funcs. */ - if ( trans1->toState < trans2->toState ) - return -1; - else if ( trans1->toState > trans2->toState ) - return 1; - else if ( trans1->toState != 0 ) { - /* Test transition data. */ - int compareRes = compareTransData( trans1, trans2 ); - if ( compareRes != 0 ) - return compareRes; - } - } - return 0; -} - - -bool FsmGraph::shouldMarkPtr( MarkIndex &markIndex, FsmTrans *trans1, - FsmTrans *trans2 ) -{ - if ( (trans1 != 0) ^ (trans2 != 0) ) { - /* Exactly one of the transitions is set. The initial mark round - * should rule out this case. */ - assert( false ); - } - else if ( trans1 != 0 ) { - /* Both of the transitions are set. If the target pair is marked, then - * the pair we are considering gets marked. */ - return markIndex.isPairMarked( trans1->toState->alg.stateNum, - trans2->toState->alg.stateNum ); - } - - /* Neither of the transitiosn are set. */ - return false; -} - - diff --git a/src/global.h b/src/global.h deleted file mode 100644 index c049d182..00000000 --- a/src/global.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_GLOBAL_H -#define _COLM_GLOBAL_H - -#include <stdio.h> - -#include <iostream> -#include <fstream> -#include <fstream> -#include <string> - -#include <avltree.h> - -#include "config.h" -#include "defs.h" -#include "keyops.h" - -#define PROGNAME "colm" - -/* IO filenames and stream. */ -extern bool genGraphviz; -extern int gblErrorCount; - -std::ostream &error(); - -/* IO filenames and stream. */ -extern std::ostream *outStream; -extern bool generateGraphviz; -extern bool branchPointInfo; -extern bool verbose, logging; -extern bool addUniqueEmptyProductions; - -extern int gblErrorCount; -extern char startDefName[]; - -/* Error reporting. */ -std::ostream &error(); -std::ostream &error( int first_line, int first_column ); -std::ostream &warning( ); -std::ostream &warning( int first_line, int first_column ); - -extern std::ostream *outStream; -extern bool printStatistics; - -extern int gblErrorCount; -extern bool gblLibrary; -extern long gblActiveRealm; -extern char machineMain[]; -extern const char *exportHeaderFn; - -struct colm_location; - -/* Location in an input file. */ -struct InputLoc -{ - InputLoc( colm_location *pcloc ); - - InputLoc() : fileName(0), line(-1), col(-1) {} - - InputLoc( const InputLoc &loc ) - { - fileName = loc.fileName; - line = loc.line; - col = loc.col; - } - - const char *fileName; - int line; - int col; -}; - -extern InputLoc internal; - -/* Error reporting. */ -std::ostream &error(); -std::ostream &error( const InputLoc &loc ); -std::ostream &warning( const InputLoc &loc ); - -void scan( char *fileName, std::istream &input, std::ostream &output ); -void terminateAllParsers( ); -void checkMachines( ); - -void xmlEscapeHost( std::ostream &out, char *data, int len ); -void openOutput(); -void escapeLiteralString( std::ostream &out, const char *data ); -bool readCheck( const char *fn ); - -#endif /* _COLM_GLOBAL_H */ - diff --git a/src/input.c b/src/input.c deleted file mode 100644 index ba4643ee..00000000 --- a/src/input.c +++ /dev/null @@ -1,740 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <colm/input.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> -#include <unistd.h> -#include <stdbool.h> - -#include <colm/pdarun.h> -#include <colm/debug.h> -#include <colm/program.h> -#include <colm/tree.h> -#include <colm/bytecode.h> -#include <colm/pool.h> -#include <colm/struct.h> - -DEF_INPUT_FUNCS( input_funcs_seq, input_impl_seq ); -extern struct input_funcs_seq input_funcs; - -static bool is_tree( struct seq_buf *b ) -{ - return b->type == SB_TOKEN || b->type == SB_IGNORE; -} - -static bool is_stream( struct seq_buf *b ) -{ - return b->type == SB_SOURCE || b->type == SB_ACCUM; -} - -char *colm_filename_add( program_t *prg, const char *fn ) -{ - /* Search for it. */ - const char **ptr = prg->stream_fns; - while ( *ptr != 0 ) { - if ( strcmp( *ptr, fn ) == 0 ) - return (char*)*ptr; - ptr += 1; - } - - /* Not present, find. */ - int items = ptr - prg->stream_fns; - - prg->stream_fns = realloc( prg->stream_fns, sizeof(char*) * ( items + 2 ) ); - prg->stream_fns[items] = strdup( fn ); - prg->stream_fns[items+1] = 0; - - return (char*)prg->stream_fns[items]; -} - -static struct seq_buf *new_seq_buf() -{ - struct seq_buf *rb = (struct seq_buf*) malloc( sizeof(struct seq_buf) ); - memset( rb, 0, sizeof(struct seq_buf) ); - return rb; -} - -static void input_transfer_loc( struct colm_program *prg, location_t *loc, struct input_impl_seq *ss ) -{ -} - -static bool call_destructor( struct seq_buf *buf ) -{ - return is_stream( buf ) && buf->own_si; -} - -static void colm_input_destroy( program_t *prg, tree_t **sp, struct_t *s ) -{ - input_t *input = (input_t*) s; - struct input_impl *si = input->impl; - si->funcs->destructor( prg, sp, si ); -} - -static void input_stream_stash_head( struct colm_program *prg, struct input_impl_seq *si, struct seq_buf *seq_buf ) -{ - debug( prg, REALM_INPUT, "stash_head: stream %p buf %p\n", si, seq_buf ); - seq_buf->next = si->stash; - si->stash = seq_buf; -} - -static struct seq_buf *input_stream_pop_stash( struct colm_program *prg, struct input_impl_seq *si ) -{ - struct seq_buf *seq_buf = si->stash; - si->stash = si->stash->next; - - debug( prg, REALM_INPUT, "pop_stash: stream %p buf %p\n", si, seq_buf ); - - return seq_buf; -} - -static void maybe_split( struct colm_program *prg, struct input_impl_seq *iis ) -{ - struct seq_buf *head = iis->queue.head; - if ( head != 0 && is_stream( head ) ) { - /* Maybe the stream will split itself off. */ - struct stream_impl *split_off = head->si->funcs->split_consumed( prg, head->si ); - - if ( split_off != 0 ) { - debug( prg, REALM_INPUT, "maybe split: consumed is > 0, splitting\n" ); - - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = SB_ACCUM; - new_buf->si = split_off; - new_buf->own_si = 1; - - input_stream_stash_head( prg, iis, new_buf ); - } - } -} - - -/* - * StreamImpl struct, this wraps the list of input streams. - */ - -void init_input_impl_seq( struct input_impl_seq *is, char *name ) -{ - memset( is, 0, sizeof(struct input_impl_seq) ); - - is->type = 'S'; - //is->name = name; - //is->line = 1; - //is->column = 1; - //is->byte = 0; -} - -static struct seq_buf *input_stream_seq_pop_head( struct input_impl_seq *is ) -{ - struct seq_buf *ret = is->queue.head; - is->queue.head = is->queue.head->next; - if ( is->queue.head == 0 ) - is->queue.tail = 0; - else - is->queue.head->prev = 0; - return ret; -} - -static void input_stream_seq_append( struct input_impl_seq *is, struct seq_buf *seq_buf ) -{ - if ( is->queue.head == 0 ) { - seq_buf->prev = seq_buf->next = 0; - is->queue.head = is->queue.tail = seq_buf; - } - else { - is->queue.tail->next = seq_buf; - seq_buf->prev = is->queue.tail; - seq_buf->next = 0; - is->queue.tail = seq_buf; - } -} - -static struct seq_buf *input_stream_seq_pop_tail( struct input_impl_seq *is ) -{ - struct seq_buf *ret = is->queue.tail; - is->queue.tail = is->queue.tail->prev; - if ( is->queue.tail == 0 ) - is->queue.head = 0; - else - is->queue.tail->next = 0; - return ret; -} - -static void input_stream_seq_prepend( struct input_impl_seq *is, struct seq_buf *seq_buf ) -{ - if ( is->queue.head == 0 ) { - seq_buf->prev = seq_buf->next = 0; - is->queue.head = is->queue.tail = seq_buf; - } - else { - is->queue.head->prev = seq_buf; - seq_buf->prev = 0; - seq_buf->next = is->queue.head; - is->queue.head = seq_buf; - } -} - -void input_set_eof_mark( struct colm_program *prg, struct input_impl_seq *si, char eof_mark ) -{ - si->eof_mark = eof_mark; -} - -static void input_destructor( program_t *prg, tree_t **sp, struct input_impl_seq *si ) -{ - struct seq_buf *buf = si->queue.head; - while ( buf != 0 ) { - if ( is_tree( buf ) ) - colm_tree_downref( prg, sp, buf->tree ); - - if ( call_destructor( buf ) ) - buf->si->funcs->destructor( prg, sp, buf->si ); - - struct seq_buf *next = buf->next; - free( buf ); - buf = next; - } - - buf = si->stash; - while ( buf != 0 ) { - struct seq_buf *next = buf->next; - if ( call_destructor( buf ) ) - buf->si->funcs->destructor( prg, sp, buf->si ); - - free( buf ); - buf = next; - } - - si->queue.head = 0; - - /* FIXME: Need to leak this for now. Until we can return strings to a - * program loader and free them at a later date (after the colm program is - * deleted). */ - // if ( stream->impl->name != 0 ) - // free( stream->impl->name ); - - free( si ); -} - -static int input_get_option( struct colm_program *prg, struct input_impl_seq *ii, int option ) -{ - return ii->auto_trim; -} - -static void input_set_option( struct colm_program *prg, struct input_impl_seq *ii, int option, int value ) -{ - ii->auto_trim = value ? 1 : 0; -} - - -static int input_get_parse_block( struct colm_program *prg, struct input_impl_seq *is, int *pskip, char **pdp, int *copied ) -{ - int ret = 0; - *copied = 0; - - /* Move over skip bytes. */ - struct seq_buf *buf = is->queue.head; - while ( true ) { - if ( buf == 0 ) { - /* Got through the in-mem buffers without copying anything. */ - ret = is->eof_mark ? INPUT_EOF : INPUT_EOD; - break; - } - - if ( is_stream( buf ) ) { - struct stream_impl *si = buf->si; - int type = si->funcs->get_parse_block( prg, si, pskip, pdp, copied ); - - if ( type == INPUT_EOD || type == INPUT_EOF ) { - buf = buf->next; - continue; - } - - ret = type; - break; - } - - if ( buf->type == SB_TOKEN ) { - ret = INPUT_TREE; - break; - } - - if ( buf->type == SB_IGNORE ) { - ret = INPUT_IGNORE; - break; - } - - buf = buf->next; - } - -#if DEBUG - switch ( ret ) { - case INPUT_DATA: - if ( *pdp != 0 ) { - debug( prg, REALM_INPUT, "get parse block: DATA: %d %.*s\n", *copied, (int)(*copied), *pdp ); - } - else { - debug( prg, REALM_INPUT, "get parse block: DATA: %d\n", *copied ); - } - break; - case INPUT_EOD: - debug( prg, REALM_INPUT, "get parse block: EOD\n" ); - break; - case INPUT_EOF: - debug( prg, REALM_INPUT, "get parse block: EOF\n" ); - break; - case INPUT_TREE: - debug( prg, REALM_INPUT, "get parse block: TREE\n" ); - break; - case INPUT_IGNORE: - debug( prg, REALM_INPUT, "get parse block: IGNORE\n" ); - break; - case INPUT_LANG_EL: - debug( prg, REALM_INPUT, "get parse block: LANG_EL\n" ); - break; - } -#endif - - return ret; -} - -static int input_get_data( struct colm_program *prg, struct input_impl_seq *is, char *dest, int length ) -{ - int copied = 0; - - /* Move over skip bytes. */ - struct seq_buf *buf = is->queue.head; - while ( true ) { - if ( buf == 0 ) { - /* Got through the in-mem buffers without copying anything. */ - break; - } - - if ( is_stream( buf ) ) { - struct stream_impl *si = buf->si; - int glen = si->funcs->get_data( prg, si, dest+copied, length ); - - if ( glen == 0 ) { - //debug( REALM_INPUT, "skipping over input\n" ); - buf = buf->next; - continue; - } - - copied += glen; - length -= glen; - } - else if ( buf->type == SB_TOKEN ) - break; - else if ( buf->type == SB_IGNORE ) - break; - - if ( length == 0 ) { - //debug( REALM_INPUT, "exiting get data\n", length ); - break; - } - - buf = buf->next; - } - - return copied; -} - -/* - * Consume - */ - -static int input_consume_data( struct colm_program *prg, struct input_impl_seq *si, int length, location_t *loc ) -{ - debug( prg, REALM_INPUT, "input_consume_data: stream %p consuming %d bytes\n", si, length ); - - int consumed = 0; - - /* Move over skip bytes. */ - while ( true ) { - struct seq_buf *buf = si->queue.head; - - if ( buf == 0 ) - break; - - if ( is_stream( buf ) ) { - struct stream_impl *sub = buf->si; - int slen = sub->funcs->consume_data( prg, sub, length, loc ); - //debug( REALM_INPUT, " got %d bytes from source\n", slen ); - - consumed += slen; - length -= slen; - } - else if ( buf->type == SB_TOKEN ) - break; - else if ( buf->type == SB_IGNORE ) - break; - else { - assert(false); - } - - if ( length == 0 ) { - //debug( REALM_INPUT, "exiting consume\n", length ); - break; - } - - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - input_stream_stash_head( prg, si, seq_buf ); - } - - return consumed; -} - -static int input_undo_consume_data( struct colm_program *prg, struct input_impl_seq *si, const char *data, int length ) -{ - /* When we push back data we need to move backwards through the block of - * text. The source stream type will */ - debug( prg, REALM_INPUT, "input_undo_consume_data: stream %p undoing consume of %d bytes\n", si, length ); - - assert( length > 0 ); - long tot = length; - int offset = 0; - int remaining = length; - - while ( true ) { - if ( is_stream( si->queue.head ) ) { - struct stream_impl *sub = si->queue.head->si; - int pushed_back = sub->funcs->undo_consume_data( prg, sub, data, remaining ); - remaining -= pushed_back; - offset += pushed_back; - - if ( remaining == 0 ) - break; - } - - struct seq_buf *b = input_stream_pop_stash( prg, si ); - input_stream_seq_prepend( si, b ); - } - - return tot; -} - -static tree_t *input_consume_tree( struct colm_program *prg, struct input_impl_seq *si ) -{ - debug( prg, REALM_INPUT, "input_consume_tree: stream %p\n", si ); - - while ( si->queue.head != 0 && is_stream( si->queue.head ) ) - { - debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si ); - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - input_stream_stash_head( prg, si, seq_buf ); - } - - assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN || si->queue.head->type == SB_IGNORE ) ); - - { - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - input_stream_stash_head( prg, si, seq_buf ); - tree_t *tree = seq_buf->tree; - debug( prg, REALM_INPUT, " stream %p consume: tree: %p\n", si, tree ); - return tree; - } - - return 0; -} - - -static void input_undo_consume_tree( struct colm_program *prg, struct input_impl_seq *si, tree_t *tree, int ignore ) -{ - debug( prg, REALM_INPUT, "input_undo_consume_tree: stream %p undo consume tree %p\n", si, tree ); - - while ( true ) { - debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si ); - - struct seq_buf *b = input_stream_pop_stash( prg, si ); - input_stream_seq_prepend( si, b ); - - if ( is_tree( b ) ) { - assert( b->tree->id == tree->id ); - break; - } - } -} - -/* - * Prepend - */ -static void input_prepend_data( struct colm_program *prg, struct input_impl_seq *si, const char *data, long length ) -{ - debug( prg, REALM_INPUT, "input_prepend_data: stream %p prepend data length %d\n", si, length ); - - maybe_split( prg, si ); - - struct stream_impl *sub_si = colm_impl_new_text( "<text1>", data, length ); - - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = SB_ACCUM; - new_buf->si = sub_si; - new_buf->own_si = 1; - - input_stream_seq_prepend( si, new_buf ); -} - -static int input_undo_prepend_data( struct colm_program *prg, struct input_impl_seq *si, int length ) -{ - debug( prg, REALM_INPUT, "input_undo_prepend_data: stream %p undo append data length %d\n", si, length ); - - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - free( seq_buf ); - - return 0; -} - -static void input_prepend_tree( struct colm_program *prg, struct input_impl_seq *si, tree_t *tree, int ignore ) -{ - debug( prg, REALM_INPUT, "input_prepend_tree: stream %p prepend tree %p\n", si, tree ); - - maybe_split( prg, si ); - - /* Create a new buffer for the data. This is the easy implementation. - * Something better is needed here. It puts a max on the amount of - * data that can be pushed back to the inputStream. */ - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = ignore ? SB_IGNORE : SB_TOKEN; - new_buf->tree = tree; - input_stream_seq_prepend( si, new_buf ); -} - -static tree_t *input_undo_prepend_tree( struct colm_program *prg, struct input_impl_seq *si ) -{ - debug( prg, REALM_INPUT, "input_undo_prepend_tree: stream %p undo prepend tree\n", si ); - - assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN || - si->queue.head->type == SB_IGNORE ) ); - - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - - tree_t *tree = seq_buf->tree; - free(seq_buf); - - debug( prg, REALM_INPUT, " stream %p tree %p\n", si, tree ); - - return tree; -} - - -static void input_prepend_stream( struct colm_program *prg, struct input_impl_seq *si, struct colm_stream *stream ) -{ - maybe_split( prg, si ); - - /* Create a new buffer for the data. This is the easy implementation. - * Something better is needed here. It puts a max on the amount of - * data that can be pushed back to the inputStream. */ - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = SB_SOURCE; - new_buf->si = stream_to_impl( stream ); - input_stream_seq_prepend( si, new_buf ); - - assert( ((struct stream_impl_data*)new_buf->si)->type == 'D' ); -} - -static tree_t *input_undo_prepend_stream( struct colm_program *prg, struct input_impl_seq *is ) -{ - struct seq_buf *seq_buf = input_stream_seq_pop_head( is ); - free( seq_buf ); - return 0; -} - -static void input_append_data( struct colm_program *prg, struct input_impl_seq *si, const char *data, long length ) -{ - debug( prg, REALM_INPUT, "input_append_data: stream %p append data length %d\n", si, length ); - - if ( si->queue.tail == 0 || si->queue.tail->type != SB_ACCUM ) { - debug( prg, REALM_INPUT, "input_append_data: creating accum\n" ); - - struct stream_impl *sub_si = colm_impl_new_accum( "<text2>" ); - - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = SB_ACCUM; - new_buf->si = sub_si; - new_buf->own_si = 1; - - input_stream_seq_append( si, new_buf ); - } - - si->queue.tail->si->funcs->append_data( prg, si->queue.tail->si, data, length ); -} - -static tree_t *input_undo_append_data( struct colm_program *prg, struct input_impl_seq *si, int length ) -{ - debug( prg, REALM_INPUT, "input_undo_append_data: stream %p undo append data length %d\n", si, length ); - - while ( true ) { - struct seq_buf *buf = si->queue.tail; - - if ( buf == 0 ) - break; - - if ( is_stream( buf ) ) { - struct stream_impl *sub = buf->si; - int slen = sub->funcs->undo_append_data( prg, sub, length ); - //debug( REALM_INPUT, " got %d bytes from source\n", slen ); - //consumed += slen; - length -= slen; - } - else if ( buf->type == SB_TOKEN ) - break; - else if ( buf->type == SB_IGNORE ) - break; - else { - assert(false); - } - - if ( length == 0 ) { - //debug( REALM_INPUT, "exiting consume\n", length ); - break; - } - - struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); - free( seq_buf ); - } - return 0; -} - -static void input_append_tree( struct colm_program *prg, struct input_impl_seq *si, tree_t *tree ) -{ - debug( prg, REALM_INPUT, "input_append_tree: stream %p append tree %p\n", si, tree ); - - struct seq_buf *ad = new_seq_buf(); - - input_stream_seq_append( si, ad ); - - ad->type = SB_TOKEN; - ad->tree = tree; -} - -static tree_t *input_undo_append_tree( struct colm_program *prg, struct input_impl_seq *si ) -{ - debug( prg, REALM_INPUT, "input_undo_append_tree: stream %p undo append tree\n", si ); - - struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); - tree_t *tree = seq_buf->tree; - free( seq_buf ); - return tree; -} - -static void input_append_stream( struct colm_program *prg, struct input_impl_seq *si, struct colm_stream *stream ) -{ - debug( prg, REALM_INPUT, "input_append_stream: stream %p append stream %p\n", si, stream ); - - struct seq_buf *ad = new_seq_buf(); - - input_stream_seq_append( si, ad ); - - ad->type = SB_SOURCE; - ad->si = stream_to_impl( stream ); - - assert( ((struct stream_impl_data*)ad->si)->type == 'D' ); -} - -static tree_t *input_undo_append_stream( struct colm_program *prg, struct input_impl_seq *si ) -{ - debug( prg, REALM_INPUT, "input_undo_append_stream: stream %p undo append stream\n", si ); - - struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); - free( seq_buf ); - return 0; -} - -struct input_funcs_seq input_funcs = -{ - &input_get_parse_block, - &input_get_data, - - /* Consume. */ - &input_consume_data, - &input_undo_consume_data, - - &input_consume_tree, - &input_undo_consume_tree, - - 0, /* consume_lang_el */ - 0, /* undo_consume_lang_el */ - - /* Prepend */ - &input_prepend_data, - &input_undo_prepend_data, - - &input_prepend_tree, - &input_undo_prepend_tree, - - &input_prepend_stream, - &input_undo_prepend_stream, - - /* Append */ - &input_append_data, - &input_undo_append_data, - - &input_append_tree, - &input_undo_append_tree, - - &input_append_stream, - &input_undo_append_stream, - - /* EOF */ - &input_set_eof_mark, - - &input_transfer_loc, - &input_destructor, - - /* Trimming */ - &input_get_option, - &input_set_option, -}; - -struct input_impl *colm_impl_new_generic( char *name ) -{ - struct input_impl_seq *ss = (struct input_impl_seq*)malloc(sizeof(struct input_impl_seq)); - init_input_impl_seq( ss, name ); - ss->funcs = (struct input_funcs*)&input_funcs; - return (struct input_impl*)ss; -} - -input_t *colm_input_new_struct( program_t *prg ) -{ - size_t memsize = sizeof(struct colm_input); - struct colm_input *input = (struct colm_input*) malloc( memsize ); - memset( input, 0, memsize ); - colm_struct_add( prg, (struct colm_struct *)input ); - input->id = prg->rtd->struct_input_id; - input->destructor = &colm_input_destroy; - return input; -} - -input_t *colm_input_new( program_t *prg ) -{ - struct input_impl *impl = colm_impl_new_generic( colm_filename_add( prg, "<internal>" ) ); - struct colm_input *input = colm_input_new_struct( prg ); - input->impl = impl; - return input; -} - -struct input_impl *input_to_impl( input_t *ptr ) -{ - return ptr->impl; -} diff --git a/src/input.h b/src/input.h deleted file mode 100644 index f116561f..00000000 --- a/src/input.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_INPUT_H -#define _COLM_INPUT_H - -#include <stdio.h> -#include "colm.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define FSM_BUFSIZE 8192 -//#define FSM_BUFSIZE 8 - -#define INPUT_DATA 1 -/* This is for data sources to return, not for the wrapper. */ -#define INPUT_EOD 2 -#define INPUT_EOF 3 -#define INPUT_EOS 4 -#define INPUT_LANG_EL 5 -#define INPUT_TREE 6 -#define INPUT_IGNORE 7 - -struct LangEl; -struct colm_tree; -struct colm_stream; -struct colm_location; -struct colm_program; -struct colm_struct; -struct colm_str; -struct colm_stream; - -struct input_impl; -struct stream_impl; - -#define DEF_INPUT_FUNCS( input_funcs, _input_impl ) \ -struct input_funcs \ -{ \ - int (*get_parse_block)( struct colm_program *prg, struct _input_impl *si, int *pskip, char **pdp, int *copied ); \ - int (*get_data)( struct colm_program *prg, struct _input_impl *si, char *dest, int length ); \ - int (*consume_data)( struct colm_program *prg, struct _input_impl *si, int length, struct colm_location *loc ); \ - int (*undo_consume_data)( struct colm_program *prg, struct _input_impl *si, const char *data, int length ); \ - struct colm_tree *(*consume_tree)( struct colm_program *prg, struct _input_impl *si ); \ - void (*undo_consume_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \ - struct LangEl *(*consume_lang_el)( struct colm_program *prg, struct _input_impl *si, long *bind_id, char **data, long *length ); \ - void (*undo_consume_lang_el)( struct colm_program *prg, struct _input_impl *si ); \ - void (*prepend_data)( struct colm_program *prg, struct _input_impl *si, const char *data, long len ); \ - int (*undo_prepend_data)( struct colm_program *prg, struct _input_impl *si, int length ); \ - void (*prepend_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \ - struct colm_tree *(*undo_prepend_tree)( struct colm_program *prg, struct _input_impl *si ); \ - void (*prepend_stream)( struct colm_program *prg, struct _input_impl *si, struct colm_stream *stream ); \ - struct colm_tree *(*undo_prepend_stream)( struct colm_program *prg, struct _input_impl *si ); \ - void (*append_data)( struct colm_program *prg, struct _input_impl *si, const char *data, long length ); \ - struct colm_tree *(*undo_append_data)( struct colm_program *prg, struct _input_impl *si, int length ); \ - void (*append_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree ); \ - struct colm_tree *(*undo_append_tree)( struct colm_program *prg, struct _input_impl *si ); \ - void (*append_stream)( struct colm_program *prg, struct _input_impl *si, struct colm_stream *stream ); \ - struct colm_tree *(*undo_append_stream)( struct colm_program *prg, struct _input_impl *si ); \ - void (*set_eof_mark)( struct colm_program *prg, struct _input_impl *si, char eof_mark ); \ - void (*transfer_loc)( struct colm_program *prg, struct colm_location *loc, struct _input_impl *si ); \ - void (*destructor)( struct colm_program *prg, struct colm_tree **sp, struct _input_impl *si ); \ - int (*get_option)( struct colm_program *prg, struct _input_impl *si, int option ); \ - void (*set_option)( struct colm_program *prg, struct _input_impl *si, int option, int value ); \ -} - -#define DEF_STREAM_FUNCS( stream_funcs, _stream_impl ) \ -struct stream_funcs \ -{ \ - int (*get_parse_block)( struct colm_program *prg, struct _stream_impl *si, int *pskip, char **pdp, int *copied ); \ - int (*get_data)( struct colm_program *prg, struct _stream_impl *si, char *dest, int length ); \ - int (*get_data_source)( struct colm_program *prg, struct _stream_impl *si, char *dest, int length ); \ - int (*consume_data)( struct colm_program *prg, struct _stream_impl *si, int length, struct colm_location *loc ); \ - int (*undo_consume_data)( struct colm_program *prg, struct _stream_impl *si, const char *data, int length ); \ - void (*transfer_loc)( struct colm_program *prg, struct colm_location *loc, struct _stream_impl *si ); \ - struct colm_str_collect *(*get_collect)( struct colm_program *prg, struct _stream_impl *si ); \ - void (*flush_stream)( struct colm_program *prg, struct _stream_impl *si ); \ - void (*close_stream)( struct colm_program *prg, struct _stream_impl *si ); \ - void (*print_tree)( struct colm_program *prg, struct colm_tree **sp, \ - struct _stream_impl *impl, struct colm_tree *tree, int trim ); \ - struct stream_impl *(*split_consumed)( struct colm_program *prg, struct _stream_impl *si ); \ - int (*append_data)( struct colm_program *prg, struct _stream_impl *si, const char *data, int len ); \ - int (*undo_append_data)( struct colm_program *prg, struct _stream_impl *si, int length ); \ - void (*destructor)( struct colm_program *prg, struct colm_tree **sp, struct _stream_impl *si ); \ - int (*get_option)( struct colm_program *prg, struct _stream_impl *si, int option ); \ - void (*set_option)( struct colm_program *prg, struct _stream_impl *si, int option, int value ); \ -} - -DEF_INPUT_FUNCS( input_funcs, input_impl ); -DEF_STREAM_FUNCS( stream_funcs, stream_impl ); - -/* List of source streams. Enables streams to be pushed/popped. */ -struct input_impl -{ - struct input_funcs *funcs; -}; - -/* List of source streams. Enables streams to be pushed/popped. */ -struct stream_impl -{ - struct stream_funcs *funcs; -}; - -enum seq_buf_type { - SB_TOKEN = 1, - SB_IGNORE, - SB_SOURCE, - SB_ACCUM -}; - -struct seq_buf -{ - enum seq_buf_type type; - char own_si; - struct colm_tree *tree; - struct stream_impl *si; - struct seq_buf *next, *prev; -}; - -/* List of source streams. Enables streams to be pushed/popped. */ -struct input_impl_seq -{ - struct input_funcs *funcs; - char type; - - char eof_mark; - char eof_sent; - - struct { - struct seq_buf *head; - struct seq_buf *tail; - } queue; - - struct seq_buf *stash; - - int consumed; - int auto_trim; -}; - -struct run_buf -{ - long length; - long offset; - struct run_buf *next, *prev; - - /* Must be at the end. We will grow this struct to add data if the input - * demands it. */ - char data[FSM_BUFSIZE]; -}; - -struct run_buf *new_run_buf( int sz ); - -struct stream_impl_data -{ - struct stream_funcs *funcs; - char type; - - struct { - struct run_buf *head; - struct run_buf *tail; - } queue; - - const char *data; - long dlen; - int offset; - - long line; - long column; - long byte; - - char *name; - FILE *file; - - struct colm_str_collect *collect; - - int consumed; - - struct indent_impl indent; - - int *line_len; - int lines_alloc; - int lines_cur; - - int auto_trim; -}; - -void stream_impl_push_line( struct stream_impl_data *ss, int ll ); -int stream_impl_pop_line( struct stream_impl_data *ss ); - -struct input_impl *colm_impl_new_generic( char *name ); - -void update_position( struct stream_impl *input_stream, const char *data, long length ); -void undo_position( struct stream_impl *input_stream, const char *data, long length ); - -struct stream_impl *colm_stream_impl( struct colm_struct *s ); - -struct colm_str *collect_string( struct colm_program *prg, struct colm_stream *s ); -struct colm_stream *colm_stream_open_collect( struct colm_program *prg ); - -char *colm_filename_add( struct colm_program *prg, const char *fn ); -struct stream_impl *colm_impl_new_accum( char *name ); -struct stream_impl *colm_impl_consumed( char *name, int len ); -struct stream_impl *colm_impl_new_text( char *name, const char *data, int len ); - -#ifdef __cplusplus -} -#endif - -#endif /* _COLM_INPUT_H */ - diff --git a/src/internal.h b/src/internal.h deleted file mode 100644 index e6e1fa7e..00000000 --- a/src/internal.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_INTERNAL_H -#define _COLM_INTERNAL_H - -#include "colm.h" - -typedef struct colm_struct struct_t; -typedef struct colm_program program_t; -typedef unsigned long value_t; - -#endif /* _COLM_INTERNAL_H */ - diff --git a/src/iter.c b/src/iter.c deleted file mode 100644 index 66974f4a..00000000 --- a/src/iter.c +++ /dev/null @@ -1,648 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <assert.h> -#include <stdbool.h> - -#include <colm/tree.h> -#include <colm/bytecode.h> -#include <colm/program.h> - -#include "internal.h" - -void colm_init_list_iter( generic_iter_t *list_iter, tree_t **stack_root, - long arg_size, long root_size, const ref_t *root_ref, int generic_id ) -{ - list_iter->type = IT_Tree; - list_iter->root_ref = *root_ref; - list_iter->stack_root = stack_root; - list_iter->yield_size = 0; - list_iter->root_size = root_size; - list_iter->ref.kid = 0; - list_iter->ref.next = 0; - list_iter->arg_size = arg_size; - list_iter->generic_id = generic_id; -} - -void colm_list_iter_destroy( program_t *prg, tree_t ***psp, generic_iter_t *iter ) -{ - if ( (int)iter->type != 0 ) { - int i; - tree_t **sp = *psp; - long cur_stack_size = vm_ssize() - iter->root_size; - assert( iter->yield_size == cur_stack_size ); - vm_popn( iter->yield_size ); - for ( i = 0; i < iter->arg_size; i++ ) { - //colm_tree_downref( prg, sp, vm_pop_tree() ); - vm_pop_value(); - } - iter->type = 0; - *psp = sp; - } -} - -tree_t *colm_list_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter ) -{ - tree_t **sp = *psp; - assert( iter->yield_size == (vm_ssize() - iter->root_size) ); - - if ( iter->ref.kid == 0 ) { - /* kid_t is zero, start from the root. */ - list_t *list = *((list_t**)iter->root_ref.kid); - iter->ref.kid = (kid_t*)list->head; - iter->ref.next = 0; - - //= iter->rootRef; - //iter - //iterFind( prg, psp, iter, true ); - } - else { - /* Have a previous item, continue searching from there. */ - //iterFind( prg, psp, iter, false ); - - list_el_t *list_el = (list_el_t*)iter->ref.kid; - list_el = list_el->list_next; - iter->ref.kid = (kid_t*)list_el; - iter->ref.next = 0; - } - - sp = *psp; - iter->yield_size = vm_ssize() - iter->root_size; - - return (iter->ref.kid ? prg->true_val : prg->false_val ); -} - -tree_t *colm_rev_list_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter ) -{ - tree_t **sp = *psp; - assert( iter->yield_size == (vm_ssize() - iter->root_size) ); - - if ( iter->ref.kid == 0 ) { - /* kid_t is zero, start from the root. */ - list_t *list = *((list_t**)iter->root_ref.kid); - iter->ref.kid = (kid_t*)list->tail; - iter->ref.next = 0; - - //= iter->rootRef; - //iter - //iterFind( prg, psp, iter, true ); - } - else { - /* Have a previous item, continue searching from there. */ - //iterFind( prg, psp, iter, false ); - - list_el_t *list_el = (list_el_t*)iter->ref.kid; - list_el = list_el->list_prev; - iter->ref.kid = (kid_t*)list_el; - iter->ref.next = 0; - } - - sp = *psp; - iter->yield_size = vm_ssize() - iter->root_size; - - return (iter->ref.kid ? prg->true_val : prg->false_val ); -} - -tree_t *colm_map_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter ) -{ - tree_t **sp = *psp; - assert( iter->yield_size == (vm_ssize() - iter->root_size) ); - - if ( iter->ref.kid == 0 ) { - /* kid_t is zero, start from the root. */ - map_t *map = *((map_t**)iter->root_ref.kid); - iter->ref.kid = (kid_t*)map->head; - iter->ref.next = 0; - - //= iter->rootRef; - //iter - //iterFind( prg, psp, iter, true ); - } - else { - /* Have a previous item, continue searching from there. */ - //iterFind( prg, psp, iter, false ); - - map_el_t *map_el = (map_el_t*)iter->ref.kid; - map_el = map_el->next; - iter->ref.kid = (kid_t*)map_el; - iter->ref.next = 0; - } - - sp = *psp; - iter->yield_size = vm_ssize() - iter->root_size; - - return (iter->ref.kid ? prg->true_val : prg->false_val ); -} - -tree_t *colm_list_iter_deref_cur( program_t *prg, generic_iter_t *iter ) -{ - struct generic_info *gi = &prg->rtd->generic_info[iter->generic_id]; - list_el_t *el = (list_el_t*)iter->ref.kid; - struct colm_struct *s = el != 0 ? - colm_struct_container( el, gi->el_offset ) : 0; - return (tree_t*)s; -} - -value_t colm_viter_deref_cur( program_t *prg, generic_iter_t *iter ) -{ - struct generic_info *gi = &prg->rtd->generic_info[iter->generic_id]; - list_el_t *el = (list_el_t*)iter->ref.kid; - struct colm_struct *s = el != 0 ? - colm_struct_container( el, gi->el_offset ) : 0; - - value_t value = colm_struct_get_field( s, value_t, 0 ); - if ( gi->value_type == TYPE_TREE ) - colm_tree_upref( prg, (tree_t*)value ); - - return value; -} - -void colm_init_tree_iter( tree_iter_t *tree_iter, tree_t **stack_root, - long arg_size, long root_size, - const ref_t *root_ref, int search_id ) -{ - tree_iter->type = IT_Tree; - tree_iter->root_ref = *root_ref; - tree_iter->search_id = search_id; - tree_iter->stack_root = stack_root; - tree_iter->yield_size = 0; - tree_iter->root_size = root_size; - tree_iter->ref.kid = 0; - tree_iter->ref.next = 0; - tree_iter->arg_size = arg_size; -} - -void colm_init_rev_tree_iter( rev_tree_iter_t *rev_triter, tree_t **stack_root, - long arg_size, long root_size, - const ref_t *root_ref, int search_id, int children ) -{ - rev_triter->type = IT_RevTree; - rev_triter->root_ref = *root_ref; - rev_triter->search_id = search_id; - rev_triter->stack_root = stack_root; - rev_triter->yield_size = children; - rev_triter->root_size = root_size; - rev_triter->kid_at_yield = 0; - rev_triter->children = children; - rev_triter->ref.kid = 0; - rev_triter->ref.next = 0; - rev_triter->arg_size = arg_size; -} - -void init_user_iter( user_iter_t *user_iter, tree_t **stack_root, long root_size, - long arg_size, long search_id ) -{ - user_iter->type = IT_User; - user_iter->stack_root = stack_root; - user_iter->arg_size = arg_size; - user_iter->yield_size = 0; - user_iter->root_size = root_size; - user_iter->resume = 0; - user_iter->frame = 0; - user_iter->search_id = search_id; - - user_iter->ref.kid = 0; - user_iter->ref.next = 0; -} - - -user_iter_t *colm_uiter_create( program_t *prg, tree_t ***psp, struct function_info *fi, long search_id ) -{ - tree_t **sp = *psp; - - vm_pushn( sizeof(user_iter_t) / sizeof(word_t) ); - void *mem = vm_ptop(); - user_iter_t *uiter = mem; - - tree_t **stack_root = vm_ptop(); - long root_size = vm_ssize(); - - init_user_iter( uiter, stack_root, root_size, fi->arg_size, search_id ); - - *psp = sp; - return uiter; -} - -void uiter_init( program_t *prg, tree_t **sp, user_iter_t *uiter, - struct function_info *fi, int revert_on ) -{ - /* Set up the first yeild so when we resume it starts at the beginning. */ - uiter->ref.kid = 0; - uiter->yield_size = vm_ssize() - uiter->root_size; - // uiter->frame = &uiter->stackRoot[-IFR_AA]; - - if ( revert_on ) - uiter->resume = prg->rtd->frame_info[fi->frame_id].codeWV; - else - uiter->resume = prg->rtd->frame_info[fi->frame_id].codeWC; -} - - -void colm_tree_iter_destroy( program_t *prg, tree_t ***psp, tree_iter_t *iter ) -{ - if ( (int)iter->type != 0 ) { - int i; - tree_t **sp = *psp; - long cur_stack_size = vm_ssize() - iter->root_size; - assert( iter->yield_size == cur_stack_size ); - vm_popn( iter->yield_size ); - for ( i = 0; i < iter->arg_size; i++ ) - colm_tree_downref( prg, sp, vm_pop_tree() ); - iter->type = 0; - *psp = sp; - } -} - -void colm_rev_tree_iter_destroy( struct colm_program *prg, tree_t ***psp, rev_tree_iter_t *riter ) -{ - if ( (int)riter->type != 0 ) { - int i; - tree_t **sp = *psp; - long cur_stack_size = vm_ssize() - riter->root_size; - assert( riter->yield_size == cur_stack_size ); - vm_popn( riter->yield_size ); - for ( i = 0; i < riter->arg_size; i++ ) - colm_tree_downref( prg, sp, vm_pop_tree() ); - riter->type = 0; - *psp = sp; - } -} - -void colm_uiter_destroy( program_t *prg, tree_t ***psp, user_iter_t *uiter ) -{ - if ( uiter != 0 && (int)uiter->type != 0 ) { - tree_t **sp = *psp; - - /* We should always be coming from a yield. The current stack size will be - * nonzero and the stack size in the iterator will be correct. */ - long cur_stack_size = vm_ssize() - uiter->root_size; - assert( uiter->yield_size == cur_stack_size ); - - vm_popn( uiter->yield_size ); - vm_popn( sizeof(user_iter_t) / sizeof(word_t) ); - - uiter->type = 0; - - *psp = sp; - } -} - -void colm_uiter_unwind( program_t *prg, tree_t ***psp, user_iter_t *uiter ) -{ - if ( uiter != 0 && (int)uiter->type != 0 ) { - tree_t **sp = *psp; - - /* We should always be coming from a yield. The current stack size will be - * nonzero and the stack size in the iterator will be correct. */ - long cur_stack_size = vm_ssize() - uiter->root_size; - assert( uiter->yield_size == cur_stack_size ); - - long arg_size = uiter->arg_size; - - vm_popn( uiter->yield_size ); - vm_popn( sizeof(user_iter_t) / sizeof(word_t) ); - - /* The IN_PREP_ARGS stack data. */ - vm_popn( arg_size ); - vm_pop_value(); - - uiter->type = 0; - - *psp = sp; - } -} - -tree_t *tree_iter_deref_cur( tree_iter_t *iter ) -{ - return iter->ref.kid == 0 ? 0 : iter->ref.kid->tree; -} - -void set_triter_cur( program_t *prg, tree_iter_t *iter, tree_t *tree ) -{ - iter->ref.kid->tree = tree; -} - -void set_uiter_cur( program_t *prg, user_iter_t *uiter, tree_t *tree ) -{ - uiter->ref.kid->tree = tree; -} - -void split_iter_cur( program_t *prg, tree_t ***psp, tree_iter_t *iter ) -{ - if ( iter->ref.kid == 0 ) - return; - - split_ref( prg, psp, &iter->ref ); -} - -void iter_find( program_t *prg, tree_t ***psp, tree_iter_t *iter, int try_first ) -{ - int any_tree = iter->search_id == prg->rtd->any_id; - tree_t **top = iter->stack_root; - kid_t *child; - tree_t **sp = *psp; - -rec_call: - if ( try_first && ( iter->ref.kid->tree->id == iter->search_id || any_tree ) ) { - *psp = sp; - return; - } - else { - child = tree_child( prg, iter->ref.kid->tree ); - if ( child != 0 ) { - vm_contiguous( 2 ); - vm_push_ref( iter->ref.next ); - vm_push_kid( iter->ref.kid ); - iter->ref.kid = child; - iter->ref.next = (ref_t*)vm_ptop(); - while ( iter->ref.kid != 0 ) { - try_first = true; - goto rec_call; - rec_return: - iter->ref.kid = iter->ref.kid->next; - } - iter->ref.kid = vm_pop_kid(); - iter->ref.next = vm_pop_ref(); - } - } - - if ( top != vm_ptop() ) - goto rec_return; - - iter->ref.kid = 0; - *psp = sp; -} - -tree_t *tree_iter_advance( program_t *prg, tree_t ***psp, tree_iter_t *iter ) -{ - tree_t **sp = *psp; - assert( iter->yield_size == (vm_ssize() - iter->root_size) ); - - if ( iter->ref.kid == 0 ) { - /* kid_t is zero, start from the root. */ - iter->ref = iter->root_ref; - iter_find( prg, psp, iter, true ); - } - else { - /* Have a previous item, continue searching from there. */ - iter_find( prg, psp, iter, false ); - } - - sp = *psp; - iter->yield_size = vm_ssize() - iter->root_size; - - return (iter->ref.kid ? prg->true_val : prg->false_val ); -} - -tree_t *tree_iter_next_child( program_t *prg, tree_t ***psp, tree_iter_t *iter ) -{ - tree_t **sp = *psp; - assert( iter->yield_size == (vm_ssize() - iter->root_size) ); - kid_t *kid = 0; - - if ( iter->ref.kid == 0 ) { - /* kid_t is zero, start from the first child. */ - kid_t *child = tree_child( prg, iter->root_ref.kid->tree ); - - if ( child == 0 ) - iter->ref.next = 0; - else { - /* Make a reference to the root. */ - vm_contiguous( 2 ); - vm_push_ref( iter->root_ref.next ); - vm_push_kid( iter->root_ref.kid ); - iter->ref.next = (ref_t*)vm_ptop(); - - kid = child; - } - } - else { - /* Start at next. */ - kid = iter->ref.kid->next; - } - - if ( iter->search_id != prg->rtd->any_id ) { - /* Have a previous item, go to the next sibling. */ - while ( kid != 0 && kid->tree->id != iter->search_id ) - kid = kid->next; - } - - iter->ref.kid = kid; - iter->yield_size = vm_ssize() - iter->root_size; - *psp = sp; - return ( iter->ref.kid ? prg->true_val : prg->false_val ); -} - -tree_t *tree_rev_iter_prev_child( program_t *prg, tree_t ***psp, rev_tree_iter_t *iter ) -{ - tree_t **sp = *psp; - assert( iter->yield_size == ( vm_ssize() - iter->root_size ) ); - - if ( iter->kid_at_yield != iter->ref.kid ) { - /* Need to reload the kids. */ - vm_popn( iter->children ); - - int c; - kid_t *kid = tree_child( prg, iter->root_ref.kid->tree ); - for ( c = 0; c < iter->children; c++ ) { - vm_push_kid( kid ); - kid = kid->next; - } - } - - if ( iter->ref.kid != 0 ) { - vm_pop_ignore(); - iter->children -= 1; - } - - if ( iter->search_id != prg->rtd->any_id ) { - /* Have a previous item, go to the next sibling. */ - while ( iter->children > 0 && ((kid_t*)(vm_top()))->tree->id != iter->search_id ) { - iter->children -= 1; - vm_pop_ignore(); - } - } - - if ( iter->children == 0 ) { - iter->ref.next = 0; - iter->ref.kid = 0; - } - else { - iter->ref.next = &iter->root_ref; - iter->ref.kid = (kid_t*)vm_top(); - } - - /* We will use this to detect a split above the iterated tree. */ - iter->kid_at_yield = iter->ref.kid; - - iter->yield_size = vm_ssize() - iter->root_size; - - *psp = sp; - - return (iter->ref.kid ? prg->true_val : prg->false_val ); -} - -void iter_find_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter, int try_first ) -{ - tree_t **sp = *psp; - int any_tree = iter->search_id == prg->rtd->any_id; - tree_t **top = iter->stack_root; - kid_t *child; - -rec_call: - if ( try_first && ( iter->ref.kid->tree->id == iter->search_id || any_tree ) ) { - *psp = sp; - return; - } - else { - /* The repeat iterator is just like the normal top-down-left-right, - * execept it only goes into the children of a node if the node is the - * root of the iteration, or if does not have any neighbours to the - * right. */ - if ( top == vm_ptop() || iter->ref.kid->next == 0 ) { - child = tree_child( prg, iter->ref.kid->tree ); - if ( child != 0 ) { - vm_contiguous( 2 ); - vm_push_ref( iter->ref.next ); - vm_push_kid( iter->ref.kid ); - iter->ref.kid = child; - iter->ref.next = (ref_t*)vm_ptop(); - while ( iter->ref.kid != 0 ) { - try_first = true; - goto rec_call; - rec_return: - iter->ref.kid = iter->ref.kid->next; - } - iter->ref.kid = vm_pop_kid(); - iter->ref.next = vm_pop_ref(); - } - } - } - - if ( top != vm_ptop() ) - goto rec_return; - - iter->ref.kid = 0; - *psp = sp; -} - -tree_t *tree_iter_next_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter ) -{ - tree_t **sp = *psp; - assert( iter->yield_size == ( vm_ssize() - iter->root_size ) ); - - if ( iter->ref.kid == 0 ) { - /* kid_t is zero, start from the root. */ - iter->ref = iter->root_ref; - iter_find_repeat( prg, psp, iter, true ); - } - else { - /* Have a previous item, continue searching from there. */ - iter_find_repeat( prg, psp, iter, false ); - } - - sp = *psp; - iter->yield_size = vm_ssize() - iter->root_size; - - return (iter->ref.kid ? prg->true_val : prg->false_val ); -} - -void iter_find_rev_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter, int try_first ) -{ - tree_t **sp = *psp; - int any_tree = iter->search_id == prg->rtd->any_id; - tree_t **top = iter->stack_root; - kid_t *child; - - if ( try_first ) { - while ( true ) { - if ( top == vm_ptop() || iter->ref.kid->next == 0 ) { - child = tree_child( prg, iter->ref.kid->tree ); - - if ( child == 0 ) - break; - vm_contiguous( 2 ); - vm_push_ref( iter->ref.next ); - vm_push_kid( iter->ref.kid ); - iter->ref.kid = child; - iter->ref.next = (ref_t*)vm_ptop(); - } - else { - /* Not the top and not there is a next, go over to it. */ - iter->ref.kid = iter->ref.kid->next; - } - } - - goto first; - } - - while ( true ) { - if ( top == vm_ptop() ) { - iter->ref.kid = 0; - return; - } - - if ( iter->ref.kid->next == 0 ) { - /* Go up one and then down. Remember we can't use iter->ref.next - * because the chain may have been split, setting it null (to - * prevent repeated walks up). */ - ref_t *ref = (ref_t*)vm_ptop(); - iter->ref.kid = tree_child( prg, ref->kid->tree ); - } - else { - iter->ref.kid = vm_pop_kid(); - iter->ref.next = vm_pop_ref(); - } -first: - if ( iter->ref.kid->tree->id == iter->search_id || any_tree ) { - *psp = sp; - return; - } - } - *psp = sp; - return; -} - - -tree_t *tree_iter_prev_repeat( program_t *prg, tree_t ***psp, tree_iter_t *iter ) -{ - tree_t **sp = *psp; - assert( iter->yield_size == (vm_ssize() - iter->root_size) ); - - if ( iter->ref.kid == 0 ) { - /* kid_t is zero, start from the root. */ - iter->ref = iter->root_ref; - iter_find_rev_repeat( prg, psp, iter, true ); - } - else { - /* Have a previous item, continue searching from there. */ - iter_find_rev_repeat( prg, psp, iter, false ); - } - - sp = *psp; - iter->yield_size = vm_ssize() - iter->root_size; - - return (iter->ref.kid ? prg->true_val : prg->false_val ); -} - - - diff --git a/src/keyops.h b/src/keyops.h deleted file mode 100644 index 924fa7ab..00000000 --- a/src/keyops.h +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -#ifndef _COLM_KEYOPS_H -#define _COLM_KEYOPS_H - -#include <fstream> -#include <climits> - -enum MarkType -{ - MarkNone = 0, - MarkMark -}; - -typedef unsigned long long Size; - -struct Key -{ -private: - long key; - -public: - friend inline Key operator+(const Key key1, const Key key2); - friend inline Key operator-(const Key key1, const Key key2); - - friend inline bool operator<( const Key key1, const Key key2 ); - friend inline bool operator<=( const Key key1, const Key key2 ); - friend inline bool operator>( const Key key1, const Key key2 ); - friend inline bool operator>=( const Key key1, const Key key2 ); - friend inline bool operator==( const Key key1, const Key key2 ); - friend inline bool operator!=( const Key key1, const Key key2 ); - - friend struct KeyOps; - - Key( ) {} - Key( const Key &key ) : key(key.key) {} - Key( long key ) : key(key) {} - - /* Returns the value used to represent the key. This value must be - * interpreted based on signedness. */ - long getVal() const { return key; }; - - /* Returns the key casted to a long long. This form of the key does not - * require and signedness interpretation. */ - long long getLongLong() const; - - bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } - bool isLower() const { return ( 'a' <= key && key <= 'z' ); } - bool isPrintable() const { return ( 32 <= key && key < 127 ); } - - Key toUpper() const - { return Key( 'A' + ( key - 'a' ) ); } - Key toLower() const - { return Key( 'a' + ( key - 'A' ) ); } - - void operator+=( const Key other ) - { key += other.key; } - - void operator-=( const Key other ) - { key -= other.key; } - - void operator|=( const Key other ) - { key |= other.key; } - - /* Decrement. Needed only for ranges. */ - inline void decrement(); - inline void increment(); -}; - -struct HostType -{ - const char *data1; - const char *data2; - long long minVal; - long long maxVal; - unsigned int size; -}; - -struct HostLang -{ - HostType *hostTypes; - int numHostTypes; - HostType *defaultAlphType; - bool explicitUnsigned; -}; - -extern HostLang *hostLang; -extern HostLang hostLangC; - -/* An abstraction of the key operators that manages key operations such as - * comparison and increment according the signedness of the key. */ -struct KeyOps -{ - /* Default to signed alphabet. */ - KeyOps() : alphType(0) {} - - Key minKey, maxKey; - HostType *alphType; - - void setAlphType( HostType *alphType ) - { - this->alphType = alphType; - minKey = (long) alphType->minVal; - maxKey = (long) alphType->maxVal; - } - - /* Compute the distance between two keys. */ - Size span( Key key1, Key key2 ) - { - return (unsigned long long)( (long long)key2.key - (long long)key1.key + 1) ; - } - - Size alphSize() - { return span( minKey, maxKey ); } -}; - -inline bool operator<( const Key key1, const Key key2 ) -{ - return key1.key < key2.key; -} - -inline bool operator<=( const Key key1, const Key key2 ) -{ - return key1.key <= key2.key; -} - -inline bool operator>( const Key key1, const Key key2 ) -{ - return key1.key > key2.key; -} - -inline bool operator>=( const Key key1, const Key key2 ) -{ - return key1.key >= key2.key; -} - -inline bool operator==( const Key key1, const Key key2 ) -{ - return key1.key == key2.key; -} - -inline bool operator!=( const Key key1, const Key key2 ) -{ - return key1.key != key2.key; -} - -/* Decrement. Needed only for ranges. */ -inline void Key::decrement() -{ - key = key - 1; -} - -/* Increment. Needed only for ranges. */ -inline void Key::increment() -{ - key = key + 1; -} - -inline long long Key::getLongLong() const -{ - return (long long) key; -} - -inline Key operator+(const Key key1, const Key key2) -{ - return Key( key1.key + key2.key ); -} - -inline Key operator-(const Key key1, const Key key2) -{ - return Key( key1.key - key2.key ); -} - -const char *findFileExtension( const char *stemFile ); -char *fileNameFromStem( const char *stemFile, const char *suffix ); - -#endif /* _COLM_KEYOPS_H */ - diff --git a/src/list.c b/src/list.c deleted file mode 100644 index 2003674a..00000000 --- a/src/list.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <string.h> -#include <stdlib.h> -#include <assert.h> - -#include <colm/pdarun.h> -#include <colm/program.h> -#include <colm/struct.h> -#include <colm/bytecode.h> - -static void colm_list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el ); -static void colm_list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el); -list_el_t *colm_list_detach( list_t *list, list_el_t *el ); - -void colm_list_prepend( list_t *list, list_el_t *new_el ) -{ - colm_list_add_before( list, list->head, new_el ); -} - -void colm_list_append( list_t *list, list_el_t *new_el ) -{ - colm_list_add_after( list, list->tail, new_el ); -} - -list_el_t *colm_list_detach_head( list_t *list ) -{ - return colm_list_detach( list, list->head ); -} - -list_el_t *colm_list_detach_tail( list_t *list ) -{ - return colm_list_detach( list, list->tail ); -} - -long colm_list_length( list_t *list ) -{ - return list->list_len; -} - -void colm_vlist_append( struct colm_program *prg, list_t *list, value_t value ) -{ - struct colm_struct *s = colm_struct_new( prg, list->generic_info->el_struct_id ); - - colm_struct_set_field( s, value_t, 0, value ); - - list_el_t *list_el = colm_struct_get_addr( s, list_el_t*, list->generic_info->el_offset ); - - colm_list_append( list, list_el ); -} - -void colm_vlist_prepend( struct colm_program *prg, list_t *list, value_t value ) -{ - struct colm_struct *s = colm_struct_new( prg, list->generic_info->el_struct_id ); - - colm_struct_set_field( s, value_t, 0, value ); - - list_el_t *list_el = colm_struct_get_addr( s, list_el_t*, list->generic_info->el_offset ); - - colm_list_prepend( list, list_el ); -} - -value_t colm_vlist_detach_tail( struct colm_program *prg, list_t *list ) -{ - list_el_t *list_el = list->tail; - colm_list_detach( list, list_el ); - - struct colm_struct *s = colm_generic_el_container( prg, list_el, - (list->generic_info - prg->rtd->generic_info) ); - - value_t val = colm_struct_get_field( s, value_t, 0 ); - - if ( list->generic_info->value_type == TYPE_TREE ) - colm_tree_upref( prg, (tree_t*)val ); - - return val; -} - -value_t colm_vlist_detach_head( struct colm_program *prg, list_t *list ) -{ - list_el_t *list_el = list->head; - colm_list_detach( list, list_el ); - - struct colm_struct *s = colm_generic_el_container( prg, list_el, - (list->generic_info - prg->rtd->generic_info) ); - - value_t val = colm_struct_get_field( s, value_t, 0 ); - - if ( list->generic_info->value_type == TYPE_TREE ) - colm_tree_upref( prg, (tree_t*) val ); - - return val; -} - - -static void colm_list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el ) -{ - /* Set the previous pointer of new_el to prev_el. We do - * this regardless of the state of the list. */ - new_el->list_prev = prev_el; - - /* Set forward pointers. */ - if (prev_el == 0) { - /* There was no prev_el, we are inserting at the head. */ - new_el->list_next = list->head; - list->head = new_el; - } - else { - /* There was a prev_el, we can access previous next. */ - new_el->list_next = prev_el->list_next; - prev_el->list_next = new_el; - } - - /* Set reverse pointers. */ - if (new_el->list_next == 0) { - /* There is no next element. Set the tail pointer. */ - list->tail = new_el; - } - else { - /* There is a next element. Set it's prev pointer. */ - new_el->list_next->list_prev = new_el; - } - - /* Update list length. */ - list->list_len++; -} - -static void colm_list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el) -{ - /* Set the next pointer of the new element to next_el. We do - * this regardless of the state of the list. */ - new_el->list_next = next_el; - - /* Set reverse pointers. */ - if (next_el == 0) { - /* There is no next elememnt. We are inserting at the tail. */ - new_el->list_prev = list->tail; - list->tail = new_el; - } - else { - /* There is a next element and we can access next's previous. */ - new_el->list_prev = next_el->list_prev; - next_el->list_prev = new_el; - } - - /* Set forward pointers. */ - if (new_el->list_prev == 0) { - /* There is no previous element. Set the head pointer.*/ - list->head = new_el; - } - else { - /* There is a previous element, set it's next pointer to new_el. */ - new_el->list_prev->list_next = new_el; - } - - list->list_len++; -} - -list_el_t *colm_list_detach( list_t *list, list_el_t *el ) -{ - /* Set forward pointers to skip over el. */ - if (el->list_prev == 0) - list->head = el->list_next; - else - el->list_prev->list_next = el->list_next; - - /* Set reverse pointers to skip over el. */ - if (el->list_next == 0) - list->tail = el->list_prev; - else - el->list_next->list_prev = el->list_prev; - - /* Update List length and return element we detached. */ - list->list_len--; - return el; -} - -void colm_list_destroy( struct colm_program *prg, tree_t **sp, struct colm_struct *s ) -{ -} - -list_t *colm_list_new( struct colm_program *prg ) -{ - size_t memsize = sizeof(struct colm_list); - struct colm_list *list = (struct colm_list*) malloc( memsize ); - memset( list, 0, memsize ); - colm_struct_add( prg, (struct colm_struct *)list ); - list->id = prg->rtd->struct_inbuilt_id; - list->destructor = &colm_list_destroy; - return list; -} - -struct colm_struct *colm_list_get( struct colm_program *prg, - list_t *list, word_t gen_id, word_t field ) -{ - struct generic_info *gi = &prg->rtd->generic_info[gen_id]; - list_el_t *result = 0; - switch ( field ) { - case 0: - result = list->head; - break; - case 1: - result = list->tail; - break; - default: - assert( 0 ); - break; - } - - struct colm_struct *s = result != 0 ? - colm_struct_container( result, gi->el_offset ) : 0; - return s; -} - -struct colm_struct *colm_list_el_get( struct colm_program *prg, - list_el_t *list_el, word_t gen_id, word_t field ) -{ - struct generic_info *gi = &prg->rtd->generic_info[gen_id]; - list_el_t *result = 0; - switch ( field ) { - case 0: - result = list_el->list_prev; - break; - case 1: - result = list_el->list_next; - break; - default: - assert( 0 ); - break; - } - - struct colm_struct *s = result != 0 ? - colm_struct_container( result, gi->el_offset ) : 0; - return s; -} diff --git a/src/lmparse.kh b/src/lmparse.kh deleted file mode 100644 index 13977a9e..00000000 --- a/src/lmparse.kh +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2001-2007, 2013 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef LMPARSE_H -#define LMPARSE_H - -#include <iostream> -#include "avltree.h" -#include "parsedata.h" -#include "parser.h" - -struct ColmParser -: - public BaseParser -{ - ColmParser( Compiler *pd ) - : BaseParser( pd ) - {} - - %%{ - parser ColmParser; - - # Use a class for tokens. - token uses class Token; - - # Atoms. - token TK_Word, TK_Literal, TK_SingleLit, TK_DoubleLit, TK_Number, TK_UInt, - TK_Hex, KW_Nil, KW_True, KW_False; - - # General tokens. - token TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon, - TK_DoubleArrow, TK_StarStar, TK_NameSep, TK_DashDash, TK_DoubleEql, - TK_NotEql, TK_DoubleColon, TK_LessEql, TK_GrtrEql, TK_RightArrow, - TK_LitPat, TK_AmpAmp, TK_BarBar, TK_SqOpen, TK_SqOpenNeg, TK_SqClose, - TK_Dash, TK_ReChar, TK_LtLt; - - # Defining things. - token KW_Rl, KW_Def, KW_Lex, KW_Context, KW_Ignore, KW_Token, KW_Commit, KW_Namespace, KW_End, - KW_Literal, KW_ReduceFirst, KW_Map, KW_List, KW_Vector, KW_Parser, KW_Global, KW_Export, - KW_Iter, KW_Reject, KW_Ptr, KW_Ref, KW_Deref; - - # Language. - token KW_If, KW_While, KW_Else, KW_Elsif, KW_For, KW_Return, KW_Yield, KW_In, - KW_Break, KW_PrintXMLAC, KW_PrintXML, KW_Print, KW_PrintStream, KW_Require; - - # Patterns. - token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken, - KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni; - - token KW_Include, KW_Preeof; - - token KW_Left, KW_Right, KW_Nonassoc, KW_Prec; - - }%% - - %% write instance_data; - - /* Report an error encountered by the parser. */ - ostream &parse_error( int tokId, Token &token ); - void init(); - int parseLangEl( int type, const Token *token ); - int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); -}; - -%% write token_defs; - -#endif diff --git a/src/lmparse.kl b/src/lmparse.kl deleted file mode 100644 index b64bd344..00000000 --- a/src/lmparse.kl +++ /dev/null @@ -1,2139 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <iostream> -#include <errno.h> - -#include "config.h" -#include "lmparse.h" -#include "global.h" -#include "input.h" - -using std::cout; -using std::cerr; -using std::endl; - -%%{ - -parser ColmParser; - -include "lmparse.kh"; - -start: root_item_list - final { - pd->rootCodeBlock = CodeBlock::cons( $1->stmtList, 0 ); - }; - -nonterm root_item_list uses lang_stmt_list; - -root_item_list: root_item_list root_item - final { - $$->stmtList = appendStatement( $1->stmtList, $2->stmt ); - }; - -root_item_list: - final { - $$->stmtList = new StmtList; - }; - -nonterm root_item uses statement; - -root_item: literal_def commit final { $$->stmt = 0; }; -root_item: rl_def commit final { $$->stmt = 0; }; -root_item: token_def commit final { $$->stmt = 0; }; -root_item: cfl_def commit final { $$->stmt = 0; }; -root_item: region_def commit final { $$->stmt = 0; }; -root_item: context_def commit final { $$->stmt = 0; }; -root_item: namespace_def commit final { $$->stmt = 0; }; -root_item: function_def commit final { $$->stmt = 0; }; -root_item: iter_def commit final { $$->stmt = 0; }; -root_item: global_def commit final { $$->stmt = $1->stmt; }; -root_item: export_def commit final { $$->stmt = 0; }; -root_item: statement commit final { $$->stmt = $1->stmt; }; -root_item: pre_eof commit final { $$->stmt = 0; }; -root_item: precedence commit final { $$->stmt = 0; }; -root_item: typedef commit final { $$->stmt = 0; }; - -nonterm block_open -{ - ObjectDef *localFrame; -}; - -block_open: '{' - final { - $$->localFrame = blockOpen(); - }; - -block_close: '}' - final { - blockClose(); - }; - - -iter_def: - KW_Iter TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close - final { - iterDef( $7->stmtList, $6->localFrame, $4->paramList, $2->data ); - }; - -function_def: - type_ref TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close - final { - functionDef( $7->stmtList, $6->localFrame, $4->paramList, $1->typeRef, $2->data ); - }; - -nonterm opt_param_list uses param_list; - -opt_param_list: param_list - final { - $$->paramList = $1->paramList; - }; - -opt_param_list: - final { - $$->paramList = new ParameterList; - }; - -nonterm param_list -{ - ParameterList *paramList; -}; - -param_list: param_list param_var_def - final { - $$->paramList = appendParam( $1->paramList, $2->objField ); - }; - -param_list: param_var_def - final { - $$->paramList = appendParam( new ParameterList, $1->objField ); - }; - -nonterm param_var_def uses var_def; - -param_var_def: TK_Word ':' type_ref - final { - $$->objField = addParam( $1->loc, $3->typeRef, $1->data ); - }; -param_var_def: TK_Word ':' reference_type_ref - final { - $$->objField = addParam( $1->loc, $3->typeRef, $1->data ); - }; - -nonterm reference_type_ref uses type_ref; - -reference_type_ref: KW_Ref type_ref - final { - $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Ref, $2->typeRef ); - }; - -nonterm global_def uses statement; - -global_def: KW_Global var_def opt_def_init - final { - $$->stmt = globalDef( $2->objField, $3->expr, $3->assignType ); - }; - -nonterm export_def uses statement; - -export_def: KW_Export var_def opt_def_init - final { - $$->stmt = exportStmt( $2->objField, $3->assignType, $3->expr ); - }; - -precedence: - pred_type pred_token_list - final { - precedenceStmt( $1->predType, $2->predDeclList ); - }; - -nonterm pred_type -{ - PredType predType; -}; - -pred_type: KW_Left final { $$->predType = PredLeft; }; -pred_type: KW_Right final { $$->predType = PredRight; }; -pred_type: KW_Nonassoc final { $$->predType = PredNonassoc; }; - -nonterm pred_token_list -{ - PredDeclList *predDeclList; -}; - -pred_token_list: - pred_token_list ',' pred_token - final { - $$->predDeclList = $1->predDeclList; - $$->predDeclList->append( $3->predDecl ); - }; - -pred_token_list: - pred_token - final { - $$->predDeclList = new PredDeclList; - $$->predDeclList->append( $1->predDecl ); - }; - -nonterm pred_token -{ - PredDecl *predDecl; -}; - -pred_token: - region_qual TK_Word - final { - $$->predDecl = predTokenName( $2->loc, $1->nspaceQual, $2->data ); - }; - -pred_token: - region_qual TK_Literal - final { - $$->predDecl = predTokenLit( $2->loc, $2->data, $1->nspaceQual ); - }; - -typedef: - KW_Alias TK_Word type_ref - final { - alias( $1->loc, $2->data, $3->typeRef ); - }; - -cfl_def: - cfl_def_head obj_var_list opt_reduce_first cfl_prod_list - final { - $2->objectDef->name = $1->name; - NtDef *ntDef = NtDef::cons( $1->name, namespaceStack.top(), - contextStack.top(), $3->reduceFirst ); - - cflDef( ntDef, $2->objectDef, $4->defList ); - }; - -nonterm class cfl_def_head -{ - String name; -}; - -cfl_def_head: KW_Def TK_Word - final { - $$->name = $2->data; - }; - -nonterm cfl_prod_list -{ - LelDefList *defList; -}; - -cfl_prod_list: cfl_prod_list '|' define_prod - final { - $$->defList = prodAppend( $1->defList, $3->definition ); - }; -cfl_prod_list: define_prod - final { - $$->defList = prodAppend( new LelDefList, $1->definition ); - }; - -nonterm opt_reduce_first -{ - bool reduceFirst; -}; - -opt_reduce_first: - KW_ReduceFirst - final { - $$->reduceFirst = true; - }; -opt_reduce_first: - final { - $$->reduceFirst = false; - }; - -nonterm opt_prec -{ - LangEl *predOf; -}; - -opt_prec: - final { - $$->predOf = 0; - }; - -opt_prec: - KW_Prec pred_token - final { - //$$->predOf = $2->factor->langEl; - assert(false); - }; - -nonterm define_prod -{ - Production *definition; -}; - -define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec - final { - $$->definition = production( $1->loc, $2->list, $4->commit, - $5->codeBlock, $6->predOf ); - }; - -nonterm obj_var_list -{ - ObjectDef *objectDef; -}; - -obj_var_list: obj_var_list var_def - final { - objVarDef( $1->objectDef, $2->objField ); - $$->objectDef = $1->objectDef; - }; - -obj_var_list: - final { - $$->objectDef = ObjectDef::cons( ObjectDef::UserType, - String(), pd->nextObjectId++ ); - }; - - -nonterm type_ref -{ - TypeRef *typeRef; -}; - -type_ref: basic_type_ref - final { - $$->typeRef = $1->typeRef; - }; - -type_ref: KW_Map '<' type_ref type_ref '>' - final { - $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Map, - 0, $3->typeRef, $4->typeRef ); - }; - -type_ref: KW_List '<' type_ref '>' - final { - $$->typeRef = TypeRef::cons( $1->loc, TypeRef::List, - 0, $3->typeRef, 0 ); - }; -type_ref: KW_Vector '<' type_ref '>' - final { - $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Vector, - 0, $3->typeRef, 0 ); - }; -type_ref: KW_Parser '<' type_ref '>' - final { - $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Parser, - 0, $3->typeRef, 0 ); - }; - -nonterm basic_type_ref uses type_ref; - -basic_type_ref: region_qual TK_Word opt_repeat - final { - $$->typeRef = TypeRef::cons( $2->loc, $1->nspaceQual, $2->data, $3->repeatType ); - }; - -basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat - final { - TypeRef *inner = TypeRef::cons( $1->loc, $2->nspaceQual, $3->data, $4->repeatType ); - $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Ptr, inner ); - }; - - -nonterm var_def -{ - InputLoc loc; - ObjectField *objField; -}; - -var_def: TK_Word ':' type_ref - final { - /* Return an object field object. The user of this nonterminal must - * load it into the approrpriate map and do error checking. */ - $$->objField = ObjectField::cons( $1->loc, $3->typeRef, $1->data ); - }; - -region_def: - region_head root_item_list KW_End - final { - popRegionSet(); - }; - -region_head: - KW_Lex - final { - pushRegionSet( $1->loc ); - }; - -namespace_def: - namespace_head root_item_list KW_End - final { - namespaceStack.pop(); - }; - - -namespace_head: - KW_Namespace TK_Word - final { - /* Make the new namespace. */ - createNamespace( $1->loc, $2->data ); - }; - -context_var_def: - var_def - final { - contextVarDef( $1->loc, $1->objField ); - }; - - -context_item: context_var_def commit; -context_item: literal_def commit; -context_item: rl_def commit; -context_item: token_def commit; -context_item: cfl_def commit; -context_item: region_def commit; -context_item: context_def commit; -context_item: function_def commit; -context_item: iter_def commit; -context_item: export_def commit; -context_item: pre_eof commit; -context_item: precedence commit; - -context_item_list: - context_item_list context_item; -context_item_list: - ; - -context_def: - context_head context_item_list KW_End - final { - contextStack.pop(); - namespaceStack.pop(); - }; - -context_head: - KW_Context TK_Word - final { - contextHead( $1->loc, $2->data ); - }; - -# -# Pattern -# - -nonterm pattern -{ - PatternItemList *list; - InputLoc loc; -}; - -pattern: - pattern_list - final { - $$->list = $1->list; - }; - -nonterm pattern_list uses pattern; - -pattern_list: pattern_list pattern_top_el - final { - $$->list = patListConcat( $1->list, $2->list ); - }; -pattern_list: pattern_top_el - final { - $$->list = $1->list; - }; - -nonterm pattern_top_el uses pattern; - -pattern_top_el: '"' litpat_el_list '"' - final { - $$->list = $2->list; - }; -pattern_top_el: '[' pattern_el_list ']' - final { - $$->list = $2->list; - }; - -nonterm litpat_el_list uses pattern; - -litpat_el_list: litpat_el_list litpat_el - final { - $$->list = patListConcat( $1->list, $2->list ); - }; -litpat_el_list: - final { - $$->list = new PatternItemList; - }; - -nonterm litpat_el uses pattern; - -litpat_el: TK_LitPat - final { - PatternItem *patternItem = PatternItem::cons( $1->loc, $1->data, - PatternItem::InputText ); - $$->list = PatternItemList::cons( patternItem ); - }; - -litpat_el: '[' pattern_el_list ']' - final { - $$->list = $2->list; - }; - -nonterm pattern_el_list uses pattern; - -pattern_el_list: - pattern_el_list pattern_el - final { - $$->list = patListConcat( $1->list, $2->list ); - }; -pattern_el_list: - final { - $$->list = new PatternItemList; - }; - -nonterm pattern_el uses pattern; - -pattern_el: - opt_label pattern_el_type_or_lit - final { - $$->list = patternEl( $1->varRef, $2->list ); - }; - -nonterm pattern_el uses pattern; - -pattern_el: '"' litpat_el_list '"' - final { - $$->list = $2->list; - }; -pattern_el: '?' TK_Word - final { - /* FIXME: Implement */ - assert(false); - }; - -nonterm pattern_el_type_or_lit uses pattern; - -pattern_el_type_or_lit: - region_qual TK_Word opt_repeat - final { - $$->list = patternElNamed( $2->loc, $1->nspaceQual, $2->data, $3->repeatType ); - }; - -pattern_el_type_or_lit: - region_qual TK_Literal opt_repeat - final { - $$->list = patternElType( $2->loc, $1->nspaceQual, $2->data, $3->repeatType ); - }; - -nonterm opt_label -{ - /* Variable reference. */ - LangVarRef *varRef; -}; - -opt_label: TK_Word ':' - final { - $$->varRef = LangVarRef::cons( $1->loc, $1->data ); - }; -opt_label: - final { - $$->varRef = 0; - }; - -# -# Constructor List (constructor) -# - -nonterm constructor -{ - ConsItemList *list; -}; - -constructor: cons_list - final { - $$->list = $1->list; - }; - -nonterm cons_list uses constructor; - -cons_list: cons_top_el cons_list - final { - $$->list = consListConcat( $1->list, $2->list ); - }; -cons_list: cons_top_el - final { - $$->list = $1->list; - }; - -nonterm cons_top_el uses constructor; - -cons_top_el: '"' lit_cons_el_list '"' - final { - $$->list = $2->list; - }; -cons_top_el: '[' cons_el_list ']' - final { - $$->list = $2->list; - }; - -nonterm lit_cons_el_list uses constructor; - -lit_cons_el_list: lit_cons_el_list lit_cons_el - final { - $$->list = consListConcat( $1->list, $2->list ); - }; -lit_cons_el_list: - final { - $$->list = new ConsItemList; - }; - -nonterm lit_cons_el uses constructor; - -lit_cons_el: TK_LitPat - final { - ConsItem *consItem = ConsItem::cons( $1->loc, ConsItem::InputText, $1->data ); - $$->list = ConsItemList::cons( consItem ); - }; - -lit_cons_el: '[' cons_el_list ']' - final { - $$->list = $2->list; - }; - -nonterm cons_el_list uses constructor; - -cons_el_list: cons_el_list cons_el - final { - $$->list = consListConcat( $1->list, $2->list ); - }; -cons_el_list: - final { - $$->list = new ConsItemList; - }; - -nonterm cons_el uses constructor; - -cons_el: region_qual TK_Literal - final { - $$->list = consElLiteral( $2->loc, $2->data, $1->nspaceQual ); - }; - -cons_el: '"' lit_cons_el_list '"' - final { - $$->list = $2->list; - }; - -cons_el: code_expr - final { - ConsItem *consItem = ConsItem::cons( $1->expr->loc, ConsItem::ExprType, $1->expr ); - $$->list = ConsItemList::cons( consItem ); - }; - -# -# Accumulate List -# - -nonterm accumulate -{ - ConsItemList *list; -}; - -accumulate: - accum_list - final { - $$->list = $1->list; - }; - -nonterm accum_list uses accumulate; - -accum_list: accum_top_el accum_list - final { - $$->list = consListConcat( $1->list, $2->list ); - }; - -accum_list: accum_top_el - final { - $$->list = $1->list; - }; - -nonterm accum_top_el uses accumulate; - -accum_top_el: '"' lit_accum_el_list '"' - final { - $$->list = $2->list; - }; - -accum_top_el: '[' accum_el_list ']' - final { - $$->list = $2->list; - }; - -nonterm lit_accum_el_list uses accumulate; - -lit_accum_el_list: - lit_accum_el_list lit_accum_el - final { - $$->list = consListConcat( $1->list, $2->list ); - }; - -lit_accum_el_list: - final { - $$->list = new ConsItemList; - }; - -nonterm lit_accum_el uses accumulate; - -lit_accum_el: TK_LitPat - final { - ConsItem *consItem = ConsItem::cons( $1->loc, ConsItem::InputText, $1->data ); - $$->list = ConsItemList::cons( consItem ); - }; - -lit_accum_el: '[' accum_el_list ']' - final { - $$->list = $2->list; - }; - -nonterm accum_el_list uses accumulate; - -accum_el_list: accum_el_list accum_el - final { - $$->list = consListConcat( $1->list, $2->list ); - }; - -accum_el_list: - final { - $$->list = new ConsItemList; - }; - -nonterm accum_el uses accumulate; - -accum_el: code_expr - final { - ConsItem *consItem = ConsItem::cons( $1->expr->loc, ConsItem::ExprType, $1->expr ); - $$->list = ConsItemList::cons( consItem ); - }; - -accum_el: '"' lit_accum_el_list '"' - final { - $$->list = $2->list; - }; - - -# -# String List -# - -nonterm string -{ - ConsItemList *list; -}; - -string: string_list - final { - $$->list = $1->list; - }; - -nonterm string_list uses string; - -string_list: string_top_el string_list - final { - $$->list = consListConcat( $1->list, $2->list ); - }; -string_list: string_top_el - final { - $$->list = $1->list; - }; - -nonterm string_top_el uses string; - -string_top_el: '"' lit_string_el_list '"' - final { - $$->list = $2->list; - }; -string_top_el: '[' string_el_list ']' - final { - $$->list = $2->list; - }; - -nonterm lit_string_el_list uses string; - -lit_string_el_list: lit_string_el_list lit_string_el - final { - $$->list = consListConcat( $1->list, $2->list ); - }; -lit_string_el_list: - final { - $$->list = new ConsItemList; - }; - -nonterm lit_string_el uses string; - -lit_string_el: TK_LitPat - final { - ConsItem *consItem = ConsItem::cons( $1->loc, ConsItem::InputText, $1->data ); - $$->list = ConsItemList::cons( consItem ); - }; - -lit_string_el: '[' string_el_list ']' - final { - $$->list = $2->list; - }; - -nonterm string_el_list uses string; - -string_el_list: string_el_list string_el - final { - $$->list = consListConcat( $1->list, $2->list ); - }; -string_el_list: - final { - $$->list = new ConsItemList; - }; - -nonterm string_el uses string; - -string_el: code_expr - final { - ConsItem *consItem = ConsItem::cons( $1->expr->loc, ConsItem::ExprType, $1->expr ); - $$->list = ConsItemList::cons( consItem ); - }; - -string_el: '"' lit_string_el_list '"' - final { - $$->list = $2->list; - }; - -# -# Production Lists. -# - -nonterm prod_el_list -{ - ProdElList *list; -}; - -prod_el_list: - prod_el_list prod_el - final { - $$->list = appendProdEl( $1->list, $2->prodEl ); - }; - -prod_el_list: - final { - $$->list = new ProdElList; - }; - -nonterm opt_no_ignore { bool noIgnore; }; - -opt_no_ignore: KW_Ni final { $$->noIgnore = true; }; -opt_no_ignore: final { $$->noIgnore = false; }; - -nonterm prod_el -{ - ProdEl *prodEl; -}; - -prod_el: - opt_capture opt_commit region_qual TK_Word opt_repeat - final { - $$->prodEl = prodElName( $4->loc, $4->data, $3->nspaceQual, - $1->objField, $5->repeatType, $2->commit ); - }; - -prod_el: - opt_capture opt_commit region_qual TK_Literal opt_repeat - final { - $$->prodEl = prodElLiteral( $4->loc, $4->data, $3->nspaceQual, - $1->objField, $5->repeatType, $2->commit ); - }; - -nonterm opt_repeat -{ - bool opt; - bool repeat; - RepeatType repeatType; -}; - -opt_repeat: '*' final { $$->opt = false; $$->repeat = true; $$->repeatType = RepeatRepeat; }; -opt_repeat: '+' final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatList; }; -opt_repeat: '?' final { $$->opt = true; $$->repeat = false; $$->repeatType = RepeatOpt; }; -opt_repeat: final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatNone; }; - -nonterm region_qual -{ - NamespaceQual *nspaceQual; -}; - -region_qual: region_qual TK_Word TK_DoubleColon - final { - $$->nspaceQual = $1->nspaceQual; - $$->nspaceQual->qualNames.append( $2->data ); - }; - -region_qual: - final { - $$->nspaceQual = NamespaceQual::cons( namespaceStack.top() ); - }; - -literal_def: KW_Literal literal_list; - -literal_list: literal_list ',' literal_item; -literal_list: literal_item; - -literal_item: opt_no_ignore TK_Literal opt_no_ignore - final { - if ( strcmp( $2->data, "''" ) == 0 ) - zeroDef( $2->loc, $2->data, $1->noIgnore, $3->noIgnore ); - else - literalDef( $2->loc, $2->data, $1->noIgnore, $3->noIgnore ); - }; - - -# These two productions are responsible for setting and unsetting the Regular -# language scanning context. -enter_rl: - try { - enterRl = true; - } - undo { - enterRl = false; - }; -leave_rl: - try { - enterRl = false; - } - undo { - enterRl = true; - }; - -token_def: - token_or_ignore token_def_name obj_var_list - enter_rl opt_no_ignore '/' opt_lex_join leave_rl '/' opt_no_ignore - opt_translate - final { - $3->objectDef->name = $2->name; - defineToken( $1->loc, $2->name, $7->join, $3->objectDef, - $11->transBlock, $1->ignore, $5->noIgnore, $10->noIgnore ); - }; - -nonterm token_or_ignore -{ - InputLoc loc; - bool ignore; -}; - -token_or_ignore: KW_Token - final { $$->loc = $1->loc; $$->ignore = false; }; - -token_or_ignore: KW_Ignore - final { $$->loc = $1->loc; $$->ignore = true; }; - -nonterm class token_def_name -{ - String name; -}; - -token_def_name: - opt_name - final { - $$->name = $1->name; - }; - -nonterm class opt_name -{ - String name; -}; - -opt_name: TK_Word final { $$->name = $1->data; }; -opt_name: ; - -nonterm opt_translate -{ - CodeBlock *transBlock; -}; - -opt_translate: - block_open lang_stmt_list block_close - final { - $$->transBlock = CodeBlock::cons( $2->stmtList, $1->localFrame ); - $$->transBlock->context = contextStack.top(); - }; - -opt_translate: - final { - $$->transBlock = 0; - }; - -pre_eof: - KW_Preeof block_open lang_stmt_list block_close - final { - preEof( $1->loc, $3->stmtList, $2->localFrame ); - }; - -rl_def: - KW_Rl machine_name enter_rl '/' lex_join leave_rl '/' - final { - /* Generic creation of machine for instantiation and assignment. */ - addRegularDef( $2->loc, namespaceStack.top(), $2->data, $5->join ); - }; - -type class token_data -{ - InputLoc loc; - String data; -}; - -nonterm machine_name uses token_data; - -machine_name: - TK_Word - final { - $$->loc = $1->loc; - $$->data = $1->data; - }; - -# -# Reduce statements -# - -nonterm opt_reduce_code -{ - CodeBlock *codeBlock; -}; - -opt_reduce_code: - final { $$->codeBlock = 0; }; - -opt_reduce_code: - start_reduce lang_stmt_list block_close - final { - $$->codeBlock = CodeBlock::cons( $2->stmtList, $1->localFrame ); - $$->codeBlock->context = contextStack.top(); - }; - -nonterm start_reduce uses block_open; - -start_reduce: - block_open - final { - $$->localFrame = $1->localFrame; - }; - -nonterm lang_stmt_list -{ - StmtList *stmtList; -}; - -lang_stmt_list: rec_stmt_list opt_require_stmt - final { - $$->stmtList = $1->stmtList; - if ( $2->stmt != 0 ) - $$->stmtList->append( $2->stmt ); - }; - -nonterm rec_stmt_list uses lang_stmt_list; - -rec_stmt_list: rec_stmt_list statement - final { - $$->stmtList = $1->stmtList; - - /* Maybe a statement was generated. */ - if ( $2->stmt != 0 ) - $$->stmtList->append( $2->stmt ); - }; - -rec_stmt_list: - final { - $$->stmtList = new StmtList; - }; - -nonterm opt_def_init -{ - LangExpr *expr; - LangStmt::Type assignType; -}; - -opt_def_init: '=' code_expr - final { - $$->expr = $2->expr; - $$->assignType = LangStmt::AssignType; - }; -opt_def_init: - final { - $$->expr = 0; - }; - -scope_push: - final { - pd->curLocalFrame->pushScope(); - }; - -scope_pop: - final { - pd->curLocalFrame->popScope(); - }; - -nonterm statement -{ - LangStmt *stmt; -}; -nonterm for_scope uses statement; - -statement: var_def opt_def_init - final { - $$->stmt = varDef( $1->objField, $2->expr, $2->assignType ); - }; -statement: var_ref '=' code_expr - final { - $$->stmt = LangStmt::cons( $2->loc, LangStmt::AssignType, $1->varRef, $3->expr ); - }; -statement: KW_Print '(' code_expr_list ')' - final { - $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintType, $3->exprVect ); - }; -statement: KW_PrintXMLAC '(' code_expr_list ')' - final { - $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintXMLACType, $3->exprVect ); - }; -statement: KW_PrintXML '(' code_expr_list ')' - final { - $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintXMLType, $3->exprVect ); - }; -statement: KW_PrintStream '(' code_expr_list ')' - final { - $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintStreamType, $3->exprVect ); - }; -statement: code_expr - final { - $$->stmt = LangStmt::cons( InputLoc(), LangStmt::ExprType, $1->expr ); - }; -statement: if_stmt - final { - $$->stmt = $1->stmt; - }; -statement: KW_Reject - final { - $$->stmt = LangStmt::cons( $1->loc, LangStmt::RejectType ); - }; -statement: KW_While scope_push code_expr block_or_single scope_pop - final { - $$->stmt = LangStmt::cons( LangStmt::WhileType, $3->expr, $4->stmtList ); - }; - -for_scope: TK_Word ':' type_ref KW_In iter_call block_or_single - final { - $$->stmt = forScope( $1->loc, $1->data, $3->typeRef, $5->langTerm, $6->stmtList ); - }; - -statement: KW_For scope_push for_scope scope_pop - final { - $$->stmt = $3->stmt; - }; - -statement: KW_Return code_expr - final { - $$->stmt = LangStmt::cons( $1->loc, LangStmt::ReturnType, $2->expr ); - }; -statement: KW_Break - final { - $$->stmt = LangStmt::cons( LangStmt::BreakType ); - }; -statement: KW_Yield var_ref - final { - $$->stmt = LangStmt::cons( LangStmt::YieldType, $2->varRef ); - }; - -nonterm opt_require_stmt uses statement; - -opt_require_stmt: - scope_push require_pattern lang_stmt_list scope_pop - final { - $$->stmt = LangStmt::cons( LangStmt::IfType, $2->expr, $3->stmtList, 0 ); - }; -opt_require_stmt: - final { - $$->stmt = 0; - }; - -nonterm require_pattern uses code_expr; - -require_pattern: - KW_Require var_ref pattern - final { - $$->expr = require( $1->loc, $2->varRef, $3->list ); - }; - -nonterm block_or_single uses lang_stmt_list; - -block_or_single: '{' lang_stmt_list '}' - final { - $$->stmtList = $2->stmtList; - }; -block_or_single: statement - final { - $$->stmtList = new StmtList; - $$->stmtList->append( $1->stmt ); - }; - -nonterm iter_call -{ - LangTerm *langTerm; -}; - -iter_call: var_ref '(' opt_code_expr_list ')' - final { - $$->langTerm = LangTerm::cons( InputLoc(), $1->varRef, $3->exprVect ); - }; -iter_call: TK_Word - final { - $$->langTerm = LangTerm::cons( InputLoc(), LangTerm::VarRefType, - LangVarRef::cons( $1->loc, $1->data ) ); - }; - -# -# If Statements -# - -nonterm if_stmt uses statement; - -if_stmt: KW_If scope_push code_expr block_or_single scope_pop elsif_list - final { - $$->stmt = LangStmt::cons( LangStmt::IfType, $3->expr, $4->stmtList, $6->stmt ); - }; - -nonterm elsif_list -{ - LangStmt *stmt; -}; - -elsif_list: - elsif_clause elsif_list - final { - /* Put any of the followng elseif part, an else, or null into the elsePart. */ - $$->stmt = $1->stmt; - $$->stmt->elsePart = $2->stmt; - }; -elsif_list: - optional_else - final { - $$->stmt = $1->stmt; - }; - -nonterm elsif_clause -{ - LangStmt *stmt; -}; - -elsif_clause: - KW_Elsif scope_push code_expr block_or_single scope_pop - final { - $$->stmt = LangStmt::cons( LangStmt::IfType, $3->expr, $4->stmtList, 0 ); - }; - -nonterm optional_else -{ - LangStmt *stmt; -}; - -optional_else: - KW_Else scope_push block_or_single scope_pop - final { - $$->stmt = LangStmt::cons( LangStmt::ElseType, $3->stmtList ); - }; - -optional_else: - final { - $$->stmt = 0; - }; - -# -# Code LexExpression Lists. -# -nonterm code_expr_list -{ - ExprVect *exprVect; -}; - -code_expr_list: - code_expr_list code_expr - final { - $$->exprVect = $1->exprVect; - $$->exprVect->append( $2->expr ); - }; -code_expr_list: - code_expr - final { - $$->exprVect = new ExprVect; - $$->exprVect->append( $1->expr ); - }; - -nonterm opt_code_expr_list uses code_expr_list; - -opt_code_expr_list: - code_expr_list - final { - $$->exprVect = $1->exprVect; - }; - -opt_code_expr_list: - final { - $$->exprVect = 0; - }; - -# -# Type list -# - -nonterm type_list -{ - TypeRefVect *typeRefVect; -}; - -type_list: type_list ',' type_ref - final { - $$->typeRefVect = $1->typeRefVect; - $$->typeRefVect->append( $3->typeRef ); - }; -type_list: type_ref - final { - $$->typeRefVect = new TypeRefVect; - $$->typeRefVect->append( $1->typeRef ); - }; - -nonterm opt_type_list uses type_list; - -opt_type_list: type_list - final { - $$->typeRefVect = $1->typeRefVect; - }; - -opt_type_list: - final { - $$->typeRefVect = 0; - }; - - -# -# Variable reference -# - -nonterm var_ref -{ - LangVarRef *varRef; -}; - -var_ref: qual TK_Word - final { - $$->varRef = LangVarRef::cons( $2->loc, $1->qual, $2->data ); - }; - -nonterm qual -{ - QualItemVect *qual; -}; - -qual: qual TK_Word '.' - final { - $$->qual = $1->qual; - $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Dot ) ); - }; -qual: qual TK_Word TK_RightArrow - final { - $$->qual = $1->qual; - $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Arrow ) ); - }; -qual: - final { - $$->qual = new QualItemVect; - }; - -# -# Code expression -# - -nonterm code_expr -{ - LangExpr *expr; -}; - -code_expr: code_expr TK_AmpAmp code_relational - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LogicalAnd, $3->expr ); - }; - -code_expr: code_expr TK_BarBar code_relational - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LogicalOr, $3->expr ); - }; - -code_expr: code_relational - final { - $$->expr = $1->expr; - }; - -nonterm code_relational uses code_expr; - -code_relational: code_relational TK_DoubleEql code_additive - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_DoubleEql, $3->expr ); - }; - -code_relational: code_relational TK_NotEql code_additive - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_NotEql, $3->expr ); - }; - -code_relational: code_relational '<' code_additive - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, '<', $3->expr ); - }; - -code_relational: code_relational '>' code_additive - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, '>', $3->expr ); - }; - -code_relational: code_relational TK_LessEql code_additive - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LessEql, $3->expr ); - }; - -code_relational: code_relational TK_GrtrEql code_additive - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_GrtrEql, $3->expr ); - }; - - -code_relational: code_additive - final { - $$->expr = $1->expr; - }; - -nonterm code_additive uses code_expr; - -code_additive: code_additive '+' code_multiplicitive - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, '+', $3->expr ); - }; - -code_additive: code_additive '-' code_multiplicitive - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, '-', $3->expr ); - }; - -code_additive: code_multiplicitive - final { - $$->expr = $1->expr; - }; - -nonterm code_multiplicitive uses code_expr; - -code_multiplicitive: code_multiplicitive '*' code_unary - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, '*', $3->expr ); - }; - -code_multiplicitive: code_multiplicitive '/' code_unary - final { - $$->expr = LangExpr::cons( $2->loc, $1->expr, '/', $3->expr ); - }; - -code_multiplicitive: code_unary - final { - $$->expr = $1->expr; - }; - -nonterm code_unary uses code_expr; -code_unary: '!' code_factor - final { - $$->expr = LangExpr::cons( $1->loc, '!', $2->expr ); - }; -code_unary: '$' code_factor - final { - $$->expr = LangExpr::cons( $1->loc, '$', $2->expr ); - }; -code_unary: '^' code_factor - final { - $$->expr = LangExpr::cons( $1->loc, '^', $2->expr ); - }; -code_unary: '%' code_factor - final { - $$->expr = LangExpr::cons( $1->loc, '%', $2->expr ); - }; -code_unary: code_factor - final { - $$->expr = $1->expr; - }; - -nonterm opt_capture uses var_def; - -opt_capture: TK_Word ':' - final { - $$->objField = ObjectField::cons( $1->loc, 0, $1->data ); - }; -opt_capture: - final { - $$->objField = 0; - }; - -nonterm parse_cmd -{ - bool stop; - InputLoc loc; -}; - -parse_cmd: - KW_Parse - final { - $$->stop = false; - $$->loc = $1->loc; - }; - -parse_cmd: - KW_ParseStop - final { - $$->stop = true; - $$->loc = $1->loc; - }; - -nonterm code_factor uses code_expr; - -code_factor: TK_Number - final { - $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::NumberType, $1->data ) ); - }; -code_factor: TK_Literal - final { - $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::StringType, $1->data ) ); - }; -code_factor: var_ref '(' opt_code_expr_list ')' - final { - $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), $1->varRef, $3->exprVect ) ); - }; -code_factor: var_ref - final { - $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::VarRefType, $1->varRef ) ); - }; -code_factor: KW_Match var_ref pattern - final { - $$->expr = match( $1->loc, $2->varRef, $3->list ); - }; -code_factor: KW_New code_factor - final { - $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::NewType, $2->expr ) ); - }; -code_factor: - KW_Construct opt_capture type_ref opt_field_init constructor - final { - $$->expr = construct( $1->loc, $2->objField, $5->list, - $3->typeRef, $4->fieldInitVect ); - }; - -code_factor: - parse_cmd opt_capture type_ref opt_field_init accumulate - final { - $$->expr = parseCmd( $1->loc, $1->stop, $2->objField, - $3->typeRef, $4->fieldInitVect, $5->list ); - }; -code_factor: - var_ref TK_LtLt accumulate - final { - $$->expr = send( $2->loc, $1->varRef, $3->list ); - }; -code_factor: - KW_Send var_ref accumulate - final { - $$->expr = send( $1->loc, $2->varRef, $3->list ); - }; -code_factor: KW_TypeId '<' type_ref '>' - final { - $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, - LangTerm::TypeIdType, $3->typeRef ) ); - }; -code_factor: type_ref KW_In var_ref - final { - $$->expr = LangExpr::cons( LangTerm::cons( $2->loc, - LangTerm::SearchType, $1->typeRef, $3->varRef ) ); - }; -code_factor: KW_Nil - final { - $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, - LangTerm::NilType ) ); - }; -code_factor: KW_True - final { - $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, - LangTerm::TrueType ) ); - }; -code_factor: KW_False - final { - $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, - LangTerm::FalseType ) ); - }; -code_factor: '(' code_expr ')' - final { - $$->expr = $2->expr; - }; -code_factor: KW_MakeTree '(' opt_code_expr_list ')' - final { - $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, - LangTerm::MakeTreeType, $3->exprVect ) ); - }; -code_factor: KW_MakeToken '(' opt_code_expr_list ')' - final { - $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, - LangTerm::MakeTokenType, $3->exprVect ) ); - }; -code_factor: KW_Deref code_expr - final { - $$->expr = LangExpr::cons( $1->loc, OP_Deref, $2->expr ); - }; -code_factor: string - final { - $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), $1->list ) ); - }; - -nonterm opt_field_init uses field_init_list; - -opt_field_init: '(' opt_field_init_list ')' - final { - $$->fieldInitVect = $2->fieldInitVect; - }; -opt_field_init: - final { - $$->fieldInitVect = 0; - }; - -nonterm opt_field_init_list uses field_init_list; - -opt_field_init_list: field_init_list - final { - $$->fieldInitVect = $1->fieldInitVect; - }; -opt_field_init_list: - final { - $$->fieldInitVect = 0; - }; - -nonterm field_init_list -{ - FieldInitVect *fieldInitVect; -}; - -field_init_list: field_init_list field_init - final { - $$->fieldInitVect = $1->fieldInitVect; - $$->fieldInitVect->append( $2->fieldInit ); - }; -field_init_list: field_init - final { - $$->fieldInitVect = new FieldInitVect; - $$->fieldInitVect->append( $1->fieldInit ); - }; - -nonterm field_init -{ - FieldInit *fieldInit; -}; - -field_init: code_expr - final { - $$->fieldInit = FieldInit::cons( InputLoc(), "_name", $1->expr ); - }; - -# -# Regular Expressions -# - -nonterm opt_lex_join -{ - LexJoin *join; -}; - -opt_lex_join: - lex_join opt_context - final { - $$->join = lexOptJoin( $1->join, $2->context ); - }; - -opt_lex_join: - final { - $$->join = 0; - }; - -nonterm lex_join -{ - LexJoin *join; -}; - -lex_join: - lex_expr - final { - $$->join = LexJoin::cons( $1->expression ); - }; - -nonterm opt_context -{ - LexJoin *context; -}; - -opt_context: - '@' lex_join - final - { - $$->context = $2->join; - }; - -opt_context: - final { - $$->context = 0; - }; - -nonterm lex_expr -{ - LexExpression *expression; -}; - -lex_expr: - lex_expr '|' lex_term_short - final { - $$->expression = LexExpression::cons( $1->expression, - $3->term, LexExpression::OrType ); - }; -lex_expr: - lex_expr '&' lex_term_short - final { - $$->expression = LexExpression::cons( $1->expression, - $3->term, LexExpression::IntersectType ); - }; -# This priority specification overrides the innermost parsing strategy which -# results ordered choice interpretation of the grammar. -lex_expr: - lex_expr '-' lex_term_short - final { - $$->expression = LexExpression::cons( $1->expression, - $3->term, LexExpression::SubtractType ); - }; -lex_expr: - lex_expr TK_DashDash lex_term_short - final { - $$->expression = LexExpression::cons( $1->expression, - $3->term, LexExpression::StrongSubtractType ); - }; -lex_expr: - lex_term_short - final { - $$->expression = LexExpression::cons( $1->term ); - }; - -nonterm lex_term_short -{ - LexTerm *term; -}; - -shortest lex_term_short; - -lex_term_short: lex_term - final { $$->term = $1->term; }; - -nonterm lex_term -{ - LexTerm *term; -}; - -lex_term: - lex_term lex_factor_label - final { - $$->term = LexTerm::cons( $1->term, $2->factorAug ); - }; -lex_term: - lex_term '.' lex_factor_label - final { - $$->term = LexTerm::cons( $1->term, $3->factorAug ); - }; -lex_term: - lex_term TK_ColonGt lex_factor_label - final { - $$->term = LexTerm::cons( $1->term, $3->factorAug, LexTerm::RightStartType ); - }; -lex_term: - lex_term TK_ColonGtGt lex_factor_label - final { - $$->term = LexTerm::cons( $1->term, $3->factorAug, LexTerm::RightFinishType ); - }; -lex_term: - lex_term TK_LtColon lex_factor_label - final { - $$->term = LexTerm::cons( $1->term, - $3->factorAug, LexTerm::LeftType ); - }; -lex_term: - lex_factor_label - final { - $$->term = LexTerm::cons( $1->factorAug ); - }; - -nonterm lex_factor_label -{ - LexFactorAug *factorAug; -}; - -lex_factor_label: - factor_ep - final { - $$->factorAug = $1->factorAug; - }; - -lex_factor_label: - TK_Word ':' lex_factor_label - final { - $$->factorAug = lexFactorLabel( $1->loc, $1->data, $3->factorAug ); - }; - -nonterm factor_ep -{ - LexFactorAug *factorAug; -}; - -factor_ep: - factor_aug - final { - $$->factorAug = $1->factorAug; - }; - -nonterm factor_aug -{ - LexFactorAug *factorAug; -}; - -factor_aug: - lex_factor_rep - final { - $$->factorAug = LexFactorAug::cons( $1->factorRep ); - }; - - -# The fourth level of precedence. These are the trailing unary operators that -# allow for repetition. - -nonterm lex_factor_rep -{ - LexFactorRep *factorRep; -}; - -lex_factor_rep: - lex_factor_rep '*' - final { - $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, - 0, 0, LexFactorRep::StarType ); - }; -lex_factor_rep: - lex_factor_rep TK_StarStar - final { - $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, - 0, 0, LexFactorRep::StarStarType ); - }; -lex_factor_rep: - lex_factor_rep '?' - final { - $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, - 0, 0, LexFactorRep::OptionalType ); - }; -lex_factor_rep: - lex_factor_rep '+' - final { - $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, - 0, 0, LexFactorRep::PlusType ); - }; -lex_factor_rep: - lex_factor_rep '{' lex_factor_rep_num '}' - final { - $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, - $3->rep, 0, LexFactorRep::ExactType ); - }; -lex_factor_rep: - lex_factor_rep '{' ',' lex_factor_rep_num '}' - final { - $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, - 0, $4->rep, LexFactorRep::MaxType ); - }; -lex_factor_rep: - lex_factor_rep '{' lex_factor_rep_num ',' '}' - final { - $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, - $3->rep, 0, LexFactorRep::MinType ); - }; -lex_factor_rep: - lex_factor_rep '{' lex_factor_rep_num ',' lex_factor_rep_num '}' - final { - $$->factorRep = LexFactorRep::cons( $2->loc, $1->factorRep, - $3->rep, $5->rep, LexFactorRep::RangeType ); - }; -lex_factor_rep: - lex_factor_neg - final { - $$->factorRep = LexFactorRep::cons( - $1->factorNeg->loc, $1->factorNeg ); - }; - -nonterm lex_factor_rep_num -{ - int rep; -}; - -lex_factor_rep_num: - TK_UInt - final { - $$->rep = lexFactorRepNum( $1->loc, $1->data ); - }; - - -# -# The fifth level up in precedence. Negation. -# - -nonterm lex_factor_neg -{ - LexFactorNeg *factorNeg; -}; - -lex_factor_neg: - '!' lex_factor_neg - final { - $$->factorNeg = LexFactorNeg::cons( $1->loc, - $2->factorNeg, LexFactorNeg::NegateType ); - }; -lex_factor_neg: - '^' lex_factor_neg - final { - $$->factorNeg = LexFactorNeg::cons( $1->loc, - $2->factorNeg, LexFactorNeg::CharNegateType ); - }; -lex_factor_neg: - lex_rl_factor - final { - $$->factorNeg = LexFactorNeg::cons( $1->factor->loc, $1->factor ); - }; - -nonterm lex_rl_factor -{ - LexFactor *factor; -}; - -lex_rl_factor: - TK_Literal - final { - /* Create a new factor node going to a concat literal. */ - $$->factor = LexFactor::cons( Literal::cons( $1->loc, - $1->data, Literal::LitString ) ); - }; -lex_rl_factor: - lex_alphabet_num - final { - /* Create a new factor node going to a literal number. */ - $$->factor = LexFactor::cons( Literal::cons( $1->loc, - $1->data, Literal::Number ) ); - }; -lex_rl_factor: - TK_Word - final { - $$->factor = lexRlFactorName( $1->data, $1->loc ); - }; -lex_rl_factor: - TK_SqOpen lex_regular_expr_or_data TK_SqClose - final { - /* Create a new factor node going to an OR expression. */ - $$->factor = LexFactor::cons( ReItem::cons( $1->loc, $2->reOrBlock, ReItem::OrBlock ) ); - }; -lex_rl_factor: - TK_SqOpenNeg lex_regular_expr_or_data TK_SqClose - final { - /* Create a new factor node going to a negated OR expression. */ - $$->factor = LexFactor::cons( ReItem::cons( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) ); - }; -lex_rl_factor: - lex_range_lit TK_DotDot lex_range_lit - final { - /* Create a new factor node going to a range. */ - $$->factor = LexFactor::cons( Range::cons( $1->literal, $3->literal ) ); - }; -lex_rl_factor: - '(' lex_join ')' - final { - /* Create a new factor going to a parenthesized join. */ - $$->factor = LexFactor::cons( $2->join ); - }; - -nonterm lex_range_lit -{ - Literal *literal; -}; - -# Literals which can be the end points of ranges. -lex_range_lit: - TK_Literal - final { - /* Range literas must have only one char. We restrict this in the parse tree. */ - $$->literal = Literal::cons( $1->loc, $1->data, Literal::LitString ); - }; -lex_range_lit: - lex_alphabet_num - final { - /* Create a new literal number. */ - $$->literal = Literal::cons( $1->loc, $1->data, Literal::Number ); - }; - -nonterm lex_alphabet_num uses token_data; - -# Any form of a number that can be used as a basic machine. */ -lex_alphabet_num: - TK_UInt - final { - $$->loc = $1->loc; - $$->data = $1->data; - }; -lex_alphabet_num: - '-' TK_UInt - final { - $$->loc = $1->loc; - $$->data = '+'; - $$->data += $2->data; - }; -lex_alphabet_num: - TK_Hex - final { - $$->loc = $1->loc; - $$->data = $1->data; - }; - -# -# Regular Expressions. -# - - -# The data inside of a [] expression in a regular expression. Accepts any -# number of characters or ranges. */ -nonterm lex_regular_expr_or_data -{ - ReOrBlock *reOrBlock; -}; - -lex_regular_expr_or_data: - lex_regular_expr_or_data lex_regular_expr_or_char - final { - $$->reOrBlock = lexRegularExprData( $1->reOrBlock, $2->reOrItem ); - }; -lex_regular_expr_or_data: - final { - $$->reOrBlock = ReOrBlock::cons(); - }; - -# A single character inside of an or expression. Can either be a character or a -# set of characters. -nonterm lex_regular_expr_or_char -{ - ReOrItem *reOrItem; -}; - -lex_regular_expr_or_char: - TK_ReChar - final { - $$->reOrItem = ReOrItem::cons( $1->loc, $1->data ); - }; - -lex_regular_expr_or_char: - TK_ReChar TK_Dash TK_ReChar - final { - $$->reOrItem = ReOrItem::cons( $2->loc, $1->data[0], $3->data[0] ); - }; - -nonterm opt_commit -{ - bool commit; -}; - -opt_commit: - final { - $$->commit = false; - }; - -opt_commit: - KW_Commit - final { - $$->commit = true; - }; - -# -# Grammar Finished -# - - write types; - write data; -}%% - -void ColmParser::init() -{ - BaseParser::init(); - %% write init; -} - -int ColmParser::parseLangEl( int type, const Token *token ) -{ - %% write exec; - return errCount == 0 ? 0 : -1; -} - -int ColmParser::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) -{ - Token token; - - if ( toklen > 0 ) - token.data.setAs( tokstart, toklen ); - - token.loc = loc; - int res = parseLangEl( tokId, &token ); - if ( res < 0 ) { - parse_error(tokId, token) << "parse error" << endl; - exit(1); - } - return res; -} - -ostream &ColmParser::parse_error( int tokId, Token &token ) -{ - /* Maintain the error count. */ - gblErrorCount += 1; - - cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": "; - cerr << "at token "; - if ( tokId < 128 ) - cerr << "\"" << ColmParser_lelNames[tokId] << "\""; - else - cerr << ColmParser_lelNames[tokId]; - if ( token.data != 0 ) - cerr << " with data \"" << token.data << "\""; - cerr << ": "; - - return cerr; -} - diff --git a/src/lmscan.h b/src/lmscan.h deleted file mode 100644 index ff3de0ad..00000000 --- a/src/lmscan.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _RLSCAN_H -#define _RLSCAN_H - -#include <iostream> -#include <fstream> -#include <string.h> - -#include "global.h" -#include "lmparse.h" -#include "compiler.h" -#include "avltree.h" -#include "vector.h" -#include "buffer.h" - -using std::ifstream; -using std::istream; -using std::ostream; -using std::cout; -using std::cerr; -using std::endl; - -extern char *Parser_lelNames[]; - - -struct ColmScanner -{ - ColmScanner( const char *fileName, istream &input, - ColmParser *parser, int includeDepth ) - : - fileName(fileName), input(input), - includeDepth(includeDepth), - line(1), column(1), lastnl(0), - parser(parser), - parserExistsError(false), - whitespaceOn(true) - { - } - - ifstream *tryOpenInclude( char **pathChecks, long &found ); - char **makeIncludePathChecks( const char *thisFileName, const char *fileName ); - bool recursiveInclude( const char *inclFileName ); - - void sectionParseInit(); - void token( int type, char *start, char *end ); - void token( int type, char c ); - void token( int type ); - void updateCol(); - void endSection(); - void scan(); - void eof(); - ostream &scan_error(); - - const char *fileName; - istream &input; - int includeDepth; - - int cs; - int line; - char *word, *lit; - int word_len, lit_len; - InputLoc sectionLoc; - char *ts, *te; - int column; - char *lastnl; - - /* Set by machine statements, these persist from section to section - * allowing for unnamed sections. */ - ColmParser *parser; - IncludeStack includeStack; - - /* This is set if ragel has already emitted an error stating that - * no section name has been seen and thus no parser exists. */ - bool parserExistsError; - - /* This is for inline code. By default it is on. It goes off for - * statements and values in inline blocks which are parsed. */ - bool whitespaceOn; - - Buffer litBuf; -}; - -#endif /* _RLSCAN_H */ diff --git a/src/lmscan.rl b/src/lmscan.rl deleted file mode 100644 index 231e2689..00000000 --- a/src/lmscan.rl +++ /dev/null @@ -1,637 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <iostream> -#include <fstream> -#include <string.h> - -#include "global.h" -#include "lmscan.h" -#include "lmparse.h" -#include "parsedata.h" -#include "avltree.h" -#include "vector.h" - -//#define PRINT_TOKENS - -using std::ifstream; -using std::istream; -using std::ostream; -using std::cout; -using std::cerr; -using std::endl; - -%%{ - machine section_parse; - alphtype int; - write data; -}%% - -void ColmScanner::sectionParseInit() -{ - %% write init; -} - -ostream &ColmScanner::scan_error() -{ - /* Maintain the error count. */ - gblErrorCount += 1; - cerr << fileName << ":" << line << ":" << column << ": "; - return cerr; -} - -bool ColmScanner::recursiveInclude( const char *inclFileName ) -{ - for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) { - if ( strcmp( si->fileName, inclFileName ) == 0 ) - return true; - } - return false; -} - -void ColmScanner::updateCol() -{ - char *from = lastnl; - if ( from == 0 ) - from = ts; - //cerr << "adding " << te - from << " to column" << endl; - column += te - from; - lastnl = 0; -} - -void ColmScanner::token( int type, char c ) -{ - token( type, &c, &c + 1 ); -} - -void ColmScanner::token( int type ) -{ - token( type, 0, 0 ); -} - -bool isAbsolutePath( const char *path ) -{ - return path[0] == '/'; -} - -ifstream *ColmScanner::tryOpenInclude( char **pathChecks, long &found ) -{ - char **check = pathChecks; - ifstream *inFile = new ifstream; - - while ( *check != 0 ) { - inFile->open( *check ); - if ( inFile->is_open() ) { - found = check - pathChecks; - return inFile; - } - check += 1; - } - - found = -1; - delete inFile; - return 0; -} - -char **ColmScanner::makeIncludePathChecks( const char *thisFileName, const char *fileName ) -{ - char **checks = 0; - long nextCheck = 0; - char *data = strdup(fileName); - long length = strlen(fileName); - - /* Absolute path? */ - if ( isAbsolutePath( data ) ) { - checks = new char*[2]; - checks[nextCheck++] = data; - } - else { - /* Search from the the location of the current file. */ - checks = new char *[2 + includePaths.length()]; - const char *lastSlash = strrchr( thisFileName, '/' ); - if ( lastSlash == 0 ) - checks[nextCheck++] = data; - else { - long givenPathLen = (lastSlash - thisFileName) + 1; - long checklen = givenPathLen + length; - char *check = new char[checklen+1]; - memcpy( check, thisFileName, givenPathLen ); - memcpy( check+givenPathLen, data, length ); - check[checklen] = 0; - checks[nextCheck++] = check; - } - - /* Search from the include paths given on the command line. */ - for ( ArgsVector::Iter incp = includePaths; incp.lte(); incp++ ) { - long pathLen = strlen( *incp ); - long checkLen = pathLen + 1 + length; - char *check = new char[checkLen+1]; - memcpy( check, *incp, pathLen ); - check[pathLen] = '/'; - memcpy( check+pathLen+1, data, length ); - check[checkLen] = 0; - checks[nextCheck++] = check; - } - } - - checks[nextCheck] = 0; - return checks; -} - - -%%{ - machine section_parse; - import "lmparse.h"; - - action clear_words { word = lit = 0; word_len = lit_len = 0; } - action store_lit { lit = tokdata; lit_len = toklen; } - - action mach_err { scan_error() << "bad machine statement" << endl; } - action incl_err { scan_error() << "bad include statement" << endl; } - action write_err { scan_error() << "bad write statement" << endl; } - - action handle_include - { - String src( lit, lit_len ); - String fileName; - bool unused; - - /* Need a location. */ - InputLoc here; - here.fileName = fileName; - here.line = line; - here.col = column; - - prepareLitString( fileName, unused, src, here ); - char **checks = makeIncludePathChecks( this->fileName, fileName ); - - /* Open the input file for reading. */ - long found = 0; - ifstream *inFile = tryOpenInclude( checks, found ); - if ( inFile == 0 ) { - scan_error() << "include: could not open " << - fileName << " for reading" << endl; - } - else { - /* Only proceed with the include if it was found. */ - if ( recursiveInclude( checks[found] ) ) - scan_error() << "include: this is a recursive include operation" << endl; - - /* Check for a recursive include structure. Add the current file/section - * name then check if what we are including is already in the stack. */ - includeStack.append( IncludeStackItem( checks[found] ) ); - - ColmScanner *scanner = new ColmScanner( fileName, *inFile, parser, includeDepth+1 ); - scanner->scan(); - delete inFile; - - /* Remove the last element (len-1) */ - includeStack.remove( -1 ); - - delete scanner; - } - } - - include_target = - TK_Literal >clear_words @store_lit; - - include_stmt = - ( KW_Include include_target ) @handle_include - <>err incl_err <>eof incl_err; - - action handle_token - { -// cout << Parser_lelNames[type] << " "; -// if ( start != 0 ) { -// cout.write( start, end-start ); -// } -// cout << endl; - - InputLoc loc; - - #ifdef PRINT_TOKENS - cerr << "scanner:" << line << ":" << column << - ": sending token to the parser " << Parser_lelNames[*p]; - cerr << " " << toklen; - if ( tokdata != 0 ) - cerr << " " << tokdata; - cerr << endl; - #endif - - loc.fileName = fileName; - loc.line = line; - loc.col = column; - - if ( tokdata != 0 && tokdata[toklen-1] == '\n' ) - loc.line -= 1; - - parser->token( loc, type, tokdata, toklen ); - } - - # Catch everything else. - everything_else = ^( KW_Include ) @handle_token; - - main := ( - include_stmt | - everything_else - )*; -}%% - -void ColmScanner::token( int type, char *start, char *end ) -{ - char *tokdata = 0; - int toklen = 0; - int *p = &type; - int *pe = &type + 1; - int *eof = 0; - - if ( start != 0 ) { - toklen = end-start; - tokdata = new char[toklen+1]; - memcpy( tokdata, start, toklen ); - tokdata[toklen] = 0; - } - - %%{ - machine section_parse; - write exec; - }%% - - updateCol(); -} - -void ColmScanner::endSection( ) -{ - /* Execute the eof actions for the section parser. */ - /* Probably use: token( -1 ); */ -} - -%%{ - machine lmscan; - - # This is sent by the driver code. - EOF = 0; - - action inc_nl { - lastnl = p; - column = 0; - line++; - } - NL = '\n' @inc_nl; - - # Identifiers, numbers, commetns, and other common things. - ident = ( alpha | '_' ) ( alpha |digit |'_' )*; - number = digit+; - hex_number = '0x' [0-9a-fA-F]+; - - # These literal forms are common to C-like host code and ragel. - s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; - d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; - - whitespace = [ \t] | NL; - pound_comment = '#' [^\n]* NL; - - or_literal := |* - # Escape sequences in OR expressions. - '\\0' => { token( TK_ReChar, '\0' ); }; - '\\a' => { token( TK_ReChar, '\a' ); }; - '\\b' => { token( TK_ReChar, '\b' ); }; - '\\t' => { token( TK_ReChar, '\t' ); }; - '\\n' => { token( TK_ReChar, '\n' ); }; - '\\v' => { token( TK_ReChar, '\v' ); }; - '\\f' => { token( TK_ReChar, '\f' ); }; - '\\r' => { token( TK_ReChar, '\r' ); }; - '\\\n' => { updateCol(); }; - '\\' any => { token( TK_ReChar, ts+1, te ); }; - - # Range dash in an OR expression. - '-' => { token( TK_Dash, 0, 0 ); }; - - # Terminate an OR expression. - ']' => { token( TK_SqClose ); fret; }; - - EOF => { - scan_error() << "unterminated OR literal" << endl; - }; - - # Characters in an OR expression. - [^\]] => { token( TK_ReChar, ts, te ); }; - - *|; - - regular_type := |* - # Identifiers. - ident => { token( TK_Word, ts, te ); } ; - - # Numbers - number => { token( TK_UInt, ts, te ); }; - hex_number => { token( TK_Hex, ts, te ); }; - - # Literals, with optionals. - ( s_literal | d_literal ) [i]? - => { token( TK_Literal, ts, te ); }; - - '[' => { token( TK_SqOpen ); fcall or_literal; }; - '[^' => { token( TK_SqOpenNeg ); fcall or_literal; }; - - '/' => { token( '/'); fret; }; - - # Ignore. - pound_comment => { updateCol(); }; - - '..' => { token( TK_DotDot ); }; - '**' => { token( TK_StarStar ); }; - '--' => { token( TK_DashDash ); }; - - ':>' => { token( TK_ColonGt ); }; - ':>>' => { token( TK_ColonGtGt ); }; - '<:' => { token( TK_LtColon ); }; - - # Whitespace other than newline. - [ \t\r]+ => { updateCol(); }; - - # If we are in a single line machine then newline may end the spec. - NL => { updateCol(); }; - - # Consume eof. - EOF; - - any => { token( *ts ); } ; - *|; - - literal_pattern := |* - '\\' '0' { litBuf.append( '\0' ); }; - '\\' 'a' { litBuf.append( '\a' ); }; - '\\' 'b' { litBuf.append( '\b' ); }; - '\\' 't' { litBuf.append( '\t' ); }; - '\\' 'n' { litBuf.append( '\n' ); }; - '\\' 'v' { litBuf.append( '\v' ); }; - '\\' 'f' { litBuf.append( '\f' ); }; - '\\' 'r' { litBuf.append( '\r' ); }; - - '\\' any { - litBuf.append( ts[1] ); - }; - '"' => { - if ( litBuf.length > 0 ) { - token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); - litBuf.clear(); - } - token( '"' ); - fret; - }; - NL => { - litBuf.append( '\n' ); - token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); - litBuf.clear(); - token( '"' ); - fret; - }; - '[' => { - if ( litBuf.length > 0 ) { - token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); - litBuf.clear(); - } - token( '[' ); - fcall main; - }; - any => { - litBuf.append( *ts ); - }; - *|; - - # Parser definitions. - main := |* - 'lex' => { token( KW_Lex ); }; - 'commit' => { token( KW_Commit ); }; - 'token' => { token( KW_Token ); }; - 'literal' => { token( KW_Literal ); }; - 'rl' => { token( KW_Rl ); }; - 'def' => { token( KW_Def ); }; - 'ignore' => { token( KW_Ignore ); }; - 'construct' => { token( KW_Construct ); }; - 'cons' => { token( KW_Construct ); }; - 'new' => { token( KW_New ); }; - 'if' => { token( KW_If ); }; - 'reject' => { token( KW_Reject ); }; - 'while' => { token( KW_While ); }; - 'else' => { token( KW_Else ); }; - 'elsif' => { token( KW_Elsif ); }; - 'match' => { token( KW_Match ); }; - 'for' => { token( KW_For ); }; - 'iter' => { token( KW_Iter ); }; - 'prints' => { token( KW_PrintStream ); }; - 'print' => { token( KW_Print ); }; - 'print_xml_ac' => { token( KW_PrintXMLAC ); }; - 'print_xml' => { token( KW_PrintXML ); }; - 'namespace' => { token( KW_Namespace ); }; - 'lex' => { token( KW_Lex ); }; - 'end' => { token( KW_End ); }; - 'map' => { token( KW_Map ); }; - 'list' => { token( KW_List ); }; - 'vector' => { token( KW_Vector ); }; - 'accum' => { token( KW_Parser ); }; - 'parser' => { token( KW_Parser ); }; - 'return' => { token( KW_Return ); }; - 'break' => { token( KW_Break ); }; - 'yield' => { token( KW_Yield ); }; - 'typeid' => { token( KW_TypeId ); }; - 'make_token' => { token( KW_MakeToken ); }; - 'make_tree' => { token( KW_MakeTree ); }; - 'reducefirst' => { token( KW_ReduceFirst ); }; - 'for' => { token( KW_For ); }; - 'in' => { token( KW_In ); }; - 'nil' => { token( KW_Nil ); }; - 'true' => { token( KW_True ); }; - 'false' => { token( KW_False ); }; - 'parse' => { token( KW_Parse ); }; - 'parse_stop' => { token( KW_ParseStop ); }; - 'global' => { token( KW_Global ); }; - 'export' => { token( KW_Export ); }; - 'ptr' => { token( KW_Ptr ); }; - 'ref' => { token( KW_Ref ); }; - 'deref' => { token( KW_Deref ); }; - 'require' => { token( KW_Require ); }; - 'preeof' => { token( KW_Preeof ); }; - 'left' => { token( KW_Left ); }; - 'right' => { token( KW_Right ); }; - 'nonassoc' => { token( KW_Nonassoc ); }; - 'prec' => { token( KW_Prec ); }; - 'include' => { token( KW_Include ); }; - 'context' => { token( KW_Context ); }; - 'alias' => { token( KW_Alias ); }; - 'send' => { token( KW_Send ); }; - 'ni' => { token( KW_Ni ); }; - - # Identifiers. - ident => { token( TK_Word, ts, te ); } ; - - number => { token( TK_Number, ts, te ); }; - - '/' => { - token( '/' ); - if ( parser->enterRl ) - fcall regular_type; - }; - - "~" [^\n]* NL => { - token( '"' ); - token( TK_LitPat, ts+1, te ); - token( '"' ); - }; - - "'" ([^'\\\n] | '\\' (any | NL))* ( "'" | NL ) => { - token( TK_Literal, ts, te ); - }; - - '"' => { - token( '"' ); - litBuf.clear(); - fcall literal_pattern; - }; - '[' => { - token( '[' ); - fcall main; - }; - - ']' => { - token( ']' ); - if ( top > 0 ) - fret; - }; - - # Ignore. - pound_comment => { updateCol(); }; - - '=>' => { token( TK_DoubleArrow ); }; - '==' => { token( TK_DoubleEql ); }; - '!=' => { token( TK_NotEql ); }; - '::' => { token( TK_DoubleColon ); }; - '<=' => { token( TK_LessEql ); }; - '>=' => { token( TK_GrtrEql ); }; - '->' => { token( TK_RightArrow ); }; - '&&' => { token( TK_AmpAmp ); }; - '||' => { token( TK_BarBar ); }; - '<<' => { token( TK_LtLt ); }; - - ( '+' | '-' | '*' | '/' | '(' | ')' | '@' | '$' | '^' ) => { token( *ts ); }; - - - # Whitespace other than newline. - [ \t\r]+ => { updateCol(); }; - NL => { updateCol(); }; - - # Consume eof. - EOF; - - any => { token( *ts ); } ; - *|; -}%% - -%% write data; - -void ColmScanner::scan() -{ - int bufsize = 8; - char *buf = new char[bufsize]; - const char last_char = 0; - int cs, act, have = 0; - int top, stack[32]; - bool execute = true; - - sectionParseInit(); - %% write init; - - while ( execute ) { - char *p = buf + have; - int space = bufsize - have; - - if ( space == 0 ) { - /* We filled up the buffer trying to scan a token. Grow it. */ - bufsize = bufsize * 2; - char *newbuf = new char[bufsize]; - - /* Recompute p and space. */ - p = newbuf + have; - space = bufsize - have; - - /* Patch up pointers possibly in use. */ - if ( ts != 0 ) - ts = newbuf + ( ts - buf ); - te = newbuf + ( te - buf ); - - /* Copy the new buffer in. */ - memcpy( newbuf, buf, have ); - delete[] buf; - buf = newbuf; - } - - input.read( p, space ); - int len = input.gcount(); - - /* If we see eof then append the EOF char. */ - if ( len == 0 ) { - p[0] = last_char, len = 1; - execute = false; - } - - char *pe = p + len; - char *eof = 0; - %% write exec; - - /* Check if we failed. */ - if ( cs == lmscan_error ) { - /* Machine failed before finding a token. I'm not yet sure if this - * is reachable. */ - scan_error() << "colm scanner error (metalanguage)" << endl; - exit(1); - } - - /* Decide if we need to preserve anything. */ - char *preserve = ts; - - /* Now set up the prefix. */ - if ( preserve == 0 ) - have = 0; - else { - /* There is data that needs to be shifted over. */ - have = pe - preserve; - memmove( buf, preserve, have ); - unsigned int shiftback = preserve - buf; - if ( ts != 0 ) - ts -= shiftback; - te -= shiftback; - - preserve = buf; - } - } - delete[] buf; -} - -void ColmScanner::eof() -{ - InputLoc loc; - loc.fileName = "<EOF>"; - loc.line = line; - loc.col = 1; - parser->token( loc, ColmParser_tk_eof, 0, 0 ); -} diff --git a/src/loadcolm.cc b/src/loadcolm.cc deleted file mode 100644 index 289b65e1..00000000 --- a/src/loadcolm.cc +++ /dev/null @@ -1,2851 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdbool.h> -#include <string.h> -#include <iostream> - -#include "loadcolm.h" -#include "gen/if2.h" - -extern colm_sections colm_object; - -InputLoc::InputLoc( colm_location *pcloc ) -{ - if ( pcloc != 0 ) { - fileName = pcloc->name; - line = pcloc->line; - col = pcloc->column; - } - else { - fileName = 0; - line = -1; - col = -1; - } -} - -String unescape( const String &s ) -{ - String out( String::Fresh(), s.length() ); - char *d = out.data; - - for ( int i = 0; i < s.length(); ) { - if ( s[i] == '\\' ) { - switch ( s[i+1] ) { - case '0': *d++ = '\0'; break; - case 'a': *d++ = '\a'; break; - case 'b': *d++ = '\b'; break; - case 't': *d++ = '\t'; break; - case 'n': *d++ = '\n'; break; - case 'v': *d++ = '\v'; break; - case 'f': *d++ = '\f'; break; - case 'r': *d++ = '\r'; break; - default: *d++ = s[i+1]; break; - } - i += 2; - } - else { - *d++ = s[i]; - i += 1; - } - } - out.chop( d - out.data ); - return out; -} - - -struct LoadColm -: - public BaseParser -{ - LoadColm( Compiler *pd, const char *inputFileName ) - : - BaseParser( pd ), - inputFileName( inputFileName ) - {} - - const char *inputFileName; - - Literal *walkLexRangeLit( lex_range_lit lexRangeLit ) - { - Literal *literal = 0; - switch ( lexRangeLit.prodName() ) { - case lex_range_lit::Lit: { - String lit = lexRangeLit.lex_lit().data(); - literal = Literal::cons( lexRangeLit.lex_lit().loc(), lit, Literal::LitString ); - break; - } - case lex_range_lit::Number: { - String num = lexRangeLit.lex_num().text().c_str(); - literal = Literal::cons( lexRangeLit.lex_num().loc(), num, Literal::Number ); - break; - }} - return literal; - } - - LexFactor *walkLexFactor( lex_factor lexFactor ) - { - LexFactor *factor = 0; - switch ( lexFactor.prodName() ) { - case lex_factor::Literal: { - String litString = lexFactor.lex_lit().data(); - Literal *literal = Literal::cons( lexFactor.lex_lit().loc(), - litString, Literal::LitString ); - factor = LexFactor::cons( literal ); - break; - } - case lex_factor::Id: { - String id = lexFactor.lex_id().data(); - factor = lexRlFactorName( id, lexFactor.lex_id().loc() ); - break; - } - case lex_factor::Range: { - Literal *low = walkLexRangeLit( lexFactor.Low() ); - Literal *high = walkLexRangeLit( lexFactor.High() ); - - Range *range = Range::cons( low, high ); - factor = LexFactor::cons( range ); - break; - } - case lex_factor::PosOrBlock: { - ReOrBlock *block = walkRegOrData( lexFactor.reg_or_data() ); - factor = LexFactor::cons( ReItem::cons( block, ReItem::OrBlock ) ); - break; - } - case lex_factor::NegOrBlock: { - ReOrBlock *block = walkRegOrData( lexFactor.reg_or_data() ); - factor = LexFactor::cons( ReItem::cons( block, ReItem::NegOrBlock ) ); - break; - } - case lex_factor::Number: { - String number = lexFactor.lex_uint().text().c_str(); - factor = LexFactor::cons( Literal::cons( lexFactor.lex_uint().loc(), - number, Literal::Number ) ); - break; - } - case lex_factor::Hex: { - String number = lexFactor.lex_hex().text().c_str(); - factor = LexFactor::cons( Literal::cons( lexFactor.lex_hex().loc(), - number, Literal::Number ) ); - break; - } - case lex_factor::Paren: { - lex_expr LexExpr = lexFactor.lex_expr(); - LexExpression *expr = walkLexExpr( LexExpr ); - LexJoin *join = LexJoin::cons( expr ); - factor = LexFactor::cons( join ); - break; - }} - return factor; - } - - LexFactorAug *walkLexFactorAug( lex_factor_rep LexFactorRepTree ) - { - LexFactorRep *factorRep = walkLexFactorRep( LexFactorRepTree ); - return LexFactorAug::cons( factorRep ); - } - - LangExpr *walkCodeExpr( code_expr codeExpr, bool used = true ) - { - LangExpr *expr = 0; - - switch ( codeExpr.prodName() ) { - case code_expr::AmpAmp: { - LangExpr *relational = walkCodeRelational( codeExpr.code_relational() ); - LangExpr *left = walkCodeExpr( codeExpr._code_expr() ); - - InputLoc loc = codeExpr.AMPAMP().loc(); - expr = LangExpr::cons( loc, left, OP_LogicalAnd, relational ); - break; - } - case code_expr::BarBar: { - LangExpr *relational = walkCodeRelational( codeExpr.code_relational() ); - LangExpr *left = walkCodeExpr( codeExpr._code_expr() ); - - InputLoc loc = codeExpr.BARBAR().loc(); - expr = LangExpr::cons( loc, left, OP_LogicalOr, relational ); - break; - } - case code_expr::Base: { - LangExpr *relational = walkCodeRelational( codeExpr.code_relational(), used ); - expr = relational; - break; - }} - return expr; - } - - LangStmt *walkStatement( statement Statement ) - { - LangStmt *stmt = 0; - switch ( Statement.prodName() ) { - case statement::Print: { - print_stmt printStmt = Statement.print_stmt(); - stmt = walkPrintStmt( printStmt ); - break; - } - case statement::VarDef: { - ObjectField *objField = walkVarDef( Statement.var_def(), - ObjectField::UserLocalType ); - LangExpr *expr = walkOptDefInit( Statement.opt_def_init() ); - stmt = varDef( objField, expr, LangStmt::AssignType ); - break; - } - case statement::For: { - pushScope(); - - String forDecl = Statement.id().text().c_str(); - TypeRef *typeRef = walkTypeRef( Statement.type_ref() ); - StmtList *stmtList = walkBlockOrSingle( Statement.block_or_single() ); - - IterCall *iterCall = walkIterCall( Statement.iter_call() ); - - stmt = forScope( Statement.id().loc(), forDecl, - curScope(), typeRef, iterCall, stmtList ); - - popScope(); - break; - } - case statement::If: { - pushScope(); - - LangExpr *expr = walkCodeExpr( Statement.code_expr() ); - StmtList *stmtList = walkBlockOrSingle( Statement.block_or_single() ); - - popScope(); - - LangStmt *elsifList = walkElsifList( Statement.elsif_list() ); - stmt = LangStmt::cons( LangStmt::IfType, expr, stmtList, elsifList ); - break; - } - case statement::SwitchUnder: - case statement::SwitchBlock: { - pushScope(); - stmt = walkCaseClauseList( Statement.case_clause_list(), Statement.var_ref() ); - popScope(); - break; - } - case statement::While: { - pushScope(); - LangExpr *expr = walkCodeExpr( Statement.code_expr() ); - StmtList *stmtList = walkBlockOrSingle( Statement.block_or_single() ); - stmt = LangStmt::cons( LangStmt::WhileType, expr, stmtList ); - popScope(); - break; - } - case statement::LhsVarRef: { - LangVarRef *varRef = walkVarRef( Statement.var_ref() ); - LangExpr *expr = walkCodeExpr( Statement.code_expr() ); - stmt = LangStmt::cons( varRef->loc, LangStmt::AssignType, varRef, expr ); - break; - } - case statement::Yield: { - LangVarRef *varRef = walkVarRef( Statement.var_ref() ); - stmt = LangStmt::cons( LangStmt::YieldType, varRef ); - break; - } - case statement::Return: { - LangExpr *expr = walkCodeExpr( Statement.code_expr() ); - stmt = LangStmt::cons( Statement.loc(), LangStmt::ReturnType, expr ); - break; - } - case statement::Break: { - stmt = LangStmt::cons( LangStmt::BreakType ); - break; - } - case statement::Reject: { - stmt = LangStmt::cons( Statement.REJECT().loc(), LangStmt::RejectType ); - break; - } - case statement::Call: { - LangVarRef *langVarRef = walkVarRef( Statement.var_ref() ); - CallArgVect *exprVect = walkCallArgList( Statement.call_arg_list() ); - LangTerm *term = LangTerm::cons( langVarRef->loc, langVarRef, exprVect ); - LangExpr *expr = LangExpr::cons( term ); - stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr ); - break; - } - case statement::StmtOrFactor: { - LangExpr *expr = walkStmtOrFactor( Statement.stmt_or_factor() ); - stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr ); - break; - } - case statement::BareSend: { - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - QualItemVect *qualItemVect = new QualItemVect; - - LangVarRef *varRef = LangVarRef::cons( InputLoc(), - curNspace(), curStruct(), curScope(), nspaceQual, - qualItemVect, String("_") ); - - ConsItemList *list = walkAccumulate( Statement.accumulate() ); - bool eof = walkOptEos( Statement.opt_eos() ); - LangExpr *expr = send( InputLoc(), varRef, list, eof ); - stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr ); - break; - } - } - return stmt; - } - - StmtList *walkLangStmtList( lang_stmt_list langStmtList ) - { - StmtList *retList = new StmtList; - _repeat_statement stmtList = langStmtList.StmtList(); - - /* Walk the list of items. */ - while ( !stmtList.end() ) { - statement Statement = stmtList.value(); - LangStmt *stmt = walkStatement( Statement ); - if ( stmt != 0 ) - retList->append( stmt ); - stmtList = stmtList.next(); - } - - require_pattern require = langStmtList.opt_require_stmt().require_pattern(); - if ( require != 0 ) { - pushScope(); - - LangVarRef *varRef = walkVarRef( require.var_ref() ); - PatternItemList *list = walkPattern( require.pattern(), varRef ); - LangExpr *expr = match( require.REQUIRE().loc(), varRef, list ); - - StmtList *reqList = walkLangStmtList( langStmtList.opt_require_stmt().lang_stmt_list() ); - - LangStmt *stmt = LangStmt::cons( LangStmt::IfType, expr, reqList, 0 ); - - popScope(); - - retList->append( stmt ); - } - - return retList; - } - - void walkTokenDef( token_def TokenDef ) - { - String name = TokenDef.id().data(); - - bool niLeft = walkNoIgnoreLeft( TokenDef.no_ignore_left() ); - bool niRight = walkNoIgnoreRight( TokenDef.no_ignore_right() ); - - ObjectDef *objectDef = walkVarDefList( TokenDef.VarDefList() ); - objectDef->name = name; - - LexJoin *join = 0; - if ( TokenDef.opt_lex_expr().lex_expr() != 0 ) { - LexExpression *expr = walkLexExpr( TokenDef.opt_lex_expr().lex_expr() ); - join = LexJoin::cons( expr ); - } - - CodeBlock *translate = walkOptTranslate( TokenDef.opt_translate() ); - - defineToken( TokenDef.id().loc(), name, join, objectDef, - translate, false, niLeft, niRight ); - } - - void walkIgnoreCollector( ic_def IgnoreCollector ) - { - String id = IgnoreCollector.id().data(); - zeroDef( IgnoreCollector.id().loc(), id ); - } - - String walkOptId( opt_id optId ) - { - String name; - if ( optId.prodName() == opt_id::Id ) - name = optId.id().data(); - return name; - } - - ObjectDef *walkVarDefList( _repeat_var_def varDefList ) - { - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, - String(), pd->nextObjectId++ ); - - while ( !varDefList.end() ) { - ObjectField *varDef = walkVarDef( varDefList.value(), - ObjectField::UserFieldType ); - objVarDef( objectDef, varDef ); - varDefList = varDefList.next(); - } - - return objectDef; - } - - void walkPreEof( pre_eof_def PreEofDef ) - { - ObjectDef *localFrame = blockOpen(); - StmtList *stmtList = walkLangStmtList( PreEofDef.lang_stmt_list() ); - preEof( PreEofDef.PREEOF().loc(), stmtList, localFrame ); - blockClose(); - } - - void walkIgnoreDef( ignore_def IgnoreDef ) - { - String name = walkOptId( IgnoreDef.opt_id() ); - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, - name, pd->nextObjectId++ ); - - LexJoin *join = 0; - if ( IgnoreDef.opt_lex_expr().lex_expr() != 0 ) { - LexExpression *expr = walkLexExpr( IgnoreDef.opt_lex_expr().lex_expr() ); - join = LexJoin::cons( expr ); - } - - defineToken( IgnoreDef.IGNORE().loc(), name, join, objectDef, - 0, true, false, false ); - } - - LangExpr *walkCodeMultiplicitive( code_multiplicitive mult, bool used = true ) - { - LangExpr *expr = 0; - switch ( mult.prodName() ) { - case code_multiplicitive::Star: { - LangExpr *right = walkCodeUnary( mult.code_unary() ); - LangExpr *left = walkCodeMultiplicitive( mult._code_multiplicitive() ); - expr = LangExpr::cons( mult.STAR().loc(), left, '*', right ); - break; - } - case code_multiplicitive::Fslash: { - LangExpr *right = walkCodeUnary( mult.code_unary() ); - LangExpr *left = walkCodeMultiplicitive( mult._code_multiplicitive() ); - expr = LangExpr::cons( mult.FSLASH().loc(), left, '/', right ); - break; - } - case code_multiplicitive::Base: { - LangExpr *right = walkCodeUnary( mult.code_unary(), used ); - expr = right; - break; - }} - return expr; - } - - PatternItemList *walkPatternElTypeOrLit( pattern_el_lel typeOrLit, - LangVarRef *patternVarRef ) - { - NamespaceQual *nspaceQual = walkRegionQual( typeOrLit.region_qual() ); - RepeatType repeatType = walkOptRepeat( typeOrLit.opt_repeat() ); - - PatternItemList *list = 0; - switch ( typeOrLit.prodName() ) { - case pattern_el_lel::Id: { - String id = typeOrLit.id().data(); - list = patternElNamed( typeOrLit.id().loc(), patternVarRef, - nspaceQual, id, repeatType ); - break; - } - case pattern_el_lel::Lit: { - String lit = typeOrLit.backtick_lit().data(); - list = patternElType( typeOrLit.backtick_lit().loc(), patternVarRef, - nspaceQual, lit, repeatType ); - break; - }} - - return list; - } - - LangVarRef *walkOptLabel( opt_label optLabel ) - { - LangVarRef *varRef = 0; - if ( optLabel.prodName() == opt_label::Id ) { - String id = optLabel.id().data(); - varRef = LangVarRef::cons( optLabel.id().loc(), - curNspace(), curStruct(), curScope(), id ); - } - return varRef; - } - - PatternItemList *walkPatternEl( pattern_el patternEl, LangVarRef *patternVarRef ) - { - PatternItemList *list = 0; - switch ( patternEl.prodName() ) { - case pattern_el::Dq: { - list = walkLitpatElList( patternEl.LitpatElList(), - patternEl.dq_lit_term().LIT_DQ_NL(), patternVarRef ); - break; - } - case pattern_el::Sq: { - list = walkPatSqConsDataList( patternEl.SqConsDataList(), - patternEl.sq_lit_term().CONS_SQ_NL() ); - break; - } - case pattern_el::Tilde: { - String patternData = patternEl.opt_tilde_data().text().c_str(); - patternData += '\n'; - PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, - patternEl.opt_tilde_data().loc(), patternData ); - list = PatternItemList::cons( patternItem ); - break; - } - case pattern_el::PatternEl: { - PatternItemList *typeOrLitList = walkPatternElTypeOrLit( - patternEl.pattern_el_lel(), patternVarRef ); - LangVarRef *varRef = walkOptLabel( patternEl.opt_label() ); - list = consPatternEl( varRef, typeOrLitList ); - break; - }} - return list; - } - - PatternItemList *walkLitpatEl( litpat_el litpatEl, LangVarRef *patternVarRef ) - { - PatternItemList *list = 0; - switch ( litpatEl.prodName() ) { - case litpat_el::ConsData: { - String consData = unescape( litpatEl.lit_dq_data().text().c_str() ); - PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, - litpatEl.lit_dq_data().loc(), consData ); - list = PatternItemList::cons( patternItem ); - break; - } - case litpat_el::SubList: { - list = walkPatternElList( litpatEl.PatternElList(), patternVarRef ); - break; - }} - return list; - } - - PatternItemList *walkPatSqConsDataList( _repeat_sq_cons_data sqConsDataList, CONS_SQ_NL Nl ) - { - PatternItemList *list = new PatternItemList; - while ( !sqConsDataList.end() ) { - String consData = unescape( sqConsDataList.value().text().c_str() ); - PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, - sqConsDataList.value().loc(), consData ); - PatternItemList *tail = PatternItemList::cons( patternItem ); - list = patListConcat( list, tail ); - - sqConsDataList = sqConsDataList.next(); - } - - if ( Nl != 0 ) { - String nl = unescape( Nl.data() ); - PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, - Nl.loc(), nl ); - PatternItemList *term = PatternItemList::cons( patternItem ); - list = patListConcat( list, term ); - } - - return list; - } - - ConsItemList *walkConsSqConsDataList( _repeat_sq_cons_data sqConsDataList, CONS_SQ_NL Nl ) - { - ConsItemList *list = new ConsItemList; - while ( !sqConsDataList.end() ) { - String consData = unescape( sqConsDataList.value().text().c_str() ); - ConsItem *consItem = ConsItem::cons( - sqConsDataList.value().loc(), ConsItem::InputText, consData ); - ConsItemList *tail = ConsItemList::cons( consItem ); - list = consListConcat( list, tail ); - - sqConsDataList = sqConsDataList.next(); - } - - if ( Nl != 0 ) { - String nl = unescape( Nl.data() ); - ConsItem *consItem = ConsItem::cons( - Nl.loc(), ConsItem::InputText, nl ); - ConsItemList *term = ConsItemList::cons( consItem ); - list = consListConcat( list, term ); - } - - return list; - } - - PatternItemList *walkLitpatElList( _repeat_litpat_el litpatElList, LIT_DQ_NL Nl, - LangVarRef *patternVarRef ) - { - PatternItemList *list = new PatternItemList; - while ( !litpatElList.end() ) { - PatternItemList *tail = walkLitpatEl( litpatElList.value(), patternVarRef ); - list = patListConcat( list, tail ); - litpatElList = litpatElList.next(); - } - - if ( Nl != 0 ) { - String nl = unescape( Nl.data() ); - PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, - Nl.loc(), nl ); - PatternItemList *term = PatternItemList::cons( patternItem ); - list = patListConcat( list, term ); - } - - return list; - } - - PatternItemList *walkPatternElList( _repeat_pattern_el patternElList, - LangVarRef *patternVarRef ) - { - PatternItemList *list = new PatternItemList; - while ( !patternElList.end() ) { - PatternItemList *tail = walkPatternEl( patternElList.value(), patternVarRef ); - list = patListConcat( list, tail ); - patternElList = patternElList.next(); - } - return list; - } - - PatternItemList *walkPattternTopEl( pattern_top_el patternTopEl, - LangVarRef *patternVarRef ) - { - PatternItemList *list = 0; - switch ( patternTopEl.prodName() ) { - case pattern_top_el::Dq: { - list = walkLitpatElList( patternTopEl.LitpatElList(), - patternTopEl.dq_lit_term().LIT_DQ_NL(), patternVarRef ); - break; - } - case pattern_top_el::Sq: { - list = walkPatSqConsDataList( patternTopEl.SqConsDataList(), - patternTopEl.sq_lit_term().CONS_SQ_NL() ); - break; - } - case pattern_top_el::Tilde: { - String patternData = patternTopEl.opt_tilde_data().text().c_str(); - patternData += '\n'; - PatternItem *patternItem = PatternItem::cons( PatternItem::InputTextForm, - patternTopEl.opt_tilde_data().loc(), patternData ); - list = PatternItemList::cons( patternItem ); - break; - } - case pattern_top_el::SubList: { - list = walkPatternElList( patternTopEl.PatternElList(), patternVarRef ); - break; - }} - return list; - } - - PatternItemList *walkPatternList( pattern_list patternList, LangVarRef *patternVarRef ) - { - PatternItemList *list = 0; - switch ( patternList.prodName() ) { - case pattern_list::Base: { - list = walkPattternTopEl( patternList.pattern_top_el(), patternVarRef ); - break; - }} - return list; - } - - PatternItemList *walkPattern( pattern Pattern, LangVarRef *patternVarRef ) - { - return walkPatternList( Pattern.pattern_list(), patternVarRef ); - } - - LangExpr *walkOptDefInit( opt_def_init optDefInit ) - { - LangExpr *expr = 0; - if ( optDefInit.prodName() == opt_def_init::Init ) - expr = walkCodeExpr( optDefInit.code_expr() ); - return expr; - } - - LangStmt *walkExportDef( export_def exportDef ) - { - ObjectField *objField = walkVarDef( exportDef.var_def(), - ObjectField::StructFieldType ); - LangExpr *expr = walkOptDefInit( exportDef.opt_def_init() ); - - return exportStmt( objField, LangStmt::AssignType, expr ); - } - - LangStmt *walkGlobalDef( global_def GlobalDef ) - { - ObjectField *objField = walkVarDef( GlobalDef.var_def(), - ObjectField::StructFieldType ); - LangExpr *expr = walkOptDefInit( GlobalDef.opt_def_init() ); - - return globalDef( objField, expr, LangStmt::AssignType ); - } - - void walkAliasDef( alias_def aliasDef ) - { - String id = aliasDef.id().data(); - TypeRef *typeRef = walkTypeRef( aliasDef.type_ref() ); - alias( aliasDef.id().loc(), id, typeRef ); - } - - CodeBlock *walkOptTranslate( opt_translate optTranslate ) - { - CodeBlock *block = 0; - if ( optTranslate.prodName() == opt_translate::Translate ) { - ObjectDef *localFrame = blockOpen(); - StmtList *stmtList = walkLangStmtList( optTranslate.lang_stmt_list() ); - block = CodeBlock::cons( stmtList, localFrame ); - block->context = curStruct(); - blockClose(); - } - return block; - } - - PredDecl *walkPredToken( pred_token predToken ) - { - NamespaceQual *nspaceQual = walkRegionQual( predToken.region_qual() ); - PredDecl *predDecl = 0; - switch ( predToken.prodName() ) { - case pred_token::Id: { - String id = predToken.id().data(); - predDecl = predTokenName( predToken.id().loc(), nspaceQual, id ); - break; - } - case pred_token::Lit: { - String lit = predToken.backtick_lit().data(); - predDecl = predTokenLit( predToken.backtick_lit().loc(), lit, nspaceQual ); - break; - }} - return predDecl; - } - - PredDeclList *walkPredTokenList( pred_token_list predTokenList ) - { - PredDeclList *list = 0; - switch ( predTokenList.prodName() ) { - case pred_token_list::List: { - list = walkPredTokenList( predTokenList._pred_token_list() ); - PredDecl *predDecl = walkPredToken( predTokenList.pred_token() ); - list->append( predDecl ); - break; - } - case pred_token_list::Base: { - PredDecl *predDecl = walkPredToken( predTokenList.pred_token() ); - list = new PredDeclList; - list->append( predDecl ); - break; - }} - return list; - } - - PredType walkPredType( pred_type predType ) - { - PredType pt = PredLeft; - switch ( predType.prodName() ) { - case pred_type::Left: - pt = PredLeft; - break; - case pred_type::Right: - pt = PredRight; - break; - case pred_type::NonAssoc: - pt = PredNonassoc; - break; - } - - return pt; - } - - void walkPrecedenceDef( precedence_def precedenceDef ) - { - PredType predType = walkPredType( precedenceDef.pred_type() ); - PredDeclList *predDeclList = walkPredTokenList( - precedenceDef.pred_token_list() ); - precedenceStmt( predType, predDeclList ); - } - - StmtList *walkInclude( include Include ) - { - String lit = ""; - _repeat_sq_cons_data sqConsDataList = Include.SqConsDataList(); - while ( !sqConsDataList.end() ) { - colm_data *data = sqConsDataList.value().data(); - lit.append( data->data, data->length ); - sqConsDataList = sqConsDataList.next(); - } - - String file = unescape( lit ); - - /* Check if we can open the input file for reading. */ - if ( ! readCheck( file.data ) ) { - - bool found = false; - for ( ArgsVector::Iter av = includePaths; av.lte(); av++ ) { - String path = String( *av ) + "/" + file; - if ( readCheck( path.data ) ) { - found = true; - file = path; - break; - } - } - - if ( !found ) - error() << "could not open " << file.data << " for reading" << endp; - } - - const char *argv[3]; - argv[0] = "load-include"; - argv[1] = file.data; - argv[2] = 0; - - colm_program *program = colm_new_program( &colm_object ); - colm_run_program( program, 2, argv ); - - /* Extract the parse tree. */ - start Start = ColmTree( program ); - str Error = ColmError( program ); - - if ( Start == 0 ) { - gblErrorCount += 1; - InputLoc loc = Error.loc(); - error(loc) << file.data << ": parse error: " << Error.text() << std::endl; - return 0; - } - - StmtList *stmtList = walkRootItemList( Start.RootItemList() ); - pd->streamFileNames.append( colm_extract_fns( program ) ); - colm_delete_program( program ); - return stmtList; - } - - - NamespaceQual *walkRegionQual( region_qual regionQual ) - { - NamespaceQual *qual = 0; - switch ( regionQual.prodName() ) { - case region_qual::Qual: { - qual = walkRegionQual( regionQual._region_qual() ); - qual->qualNames.append( String( regionQual.id().data() ) ); - break; - } - case region_qual::Base: { - qual = NamespaceQual::cons( curNspace() ); - break; - }} - return qual; - } - - RepeatType walkOptRepeat( opt_repeat OptRepeat ) - { - RepeatType repeatType = RepeatNone; - switch ( OptRepeat.prodName() ) { - case opt_repeat::Star: - repeatType = RepeatRepeat; - break; - case opt_repeat::Plus: - repeatType = RepeatList; - break; - case opt_repeat::Question: - repeatType = RepeatOpt; - break; - } - return repeatType; - } - - TypeRef *walkValueList( type_ref typeRef ) - { - TypeRef *valType = walkTypeRef( typeRef._type_ref() ); - TypeRef *elType = TypeRef::cons( typeRef.loc(), TypeRef::ListEl, valType ); - return TypeRef::cons( typeRef.loc(), TypeRef::List, 0, elType, valType ); - } - - TypeRef *walkListEl( type_ref typeRef ) - { - TypeRef *valType = walkTypeRef( typeRef._type_ref() ); - return TypeRef::cons( typeRef.loc(), TypeRef::ListEl, valType ); - } - - TypeRef *walkValueMap( type_ref typeRef ) - { - TypeRef *keyType = walkTypeRef( typeRef.KeyType() ); - TypeRef *valType = walkTypeRef( typeRef.ValType() ); - TypeRef *elType = TypeRef::cons( typeRef.loc(), - TypeRef::MapEl, 0, keyType, valType ); - - return TypeRef::cons( typeRef.loc(), TypeRef::Map, 0, - keyType, elType, valType ); - } - - TypeRef *walkMapEl( type_ref typeRef ) - { - TypeRef *keyType = walkTypeRef( typeRef.KeyType() ); - TypeRef *valType = walkTypeRef( typeRef.ValType() ); - - return TypeRef::cons( typeRef.loc(), TypeRef::MapEl, 0, keyType, valType ); - } - - TypeRef *walkTypeRef( type_ref typeRef ) - { - TypeRef *tr = 0; - switch ( typeRef.prodName() ) { - case type_ref::Id: { - NamespaceQual *nspaceQual = walkRegionQual( typeRef.region_qual() ); - String id = typeRef.id().data(); - RepeatType repeatType = walkOptRepeat( typeRef.opt_repeat() ); - tr = TypeRef::cons( typeRef.id().loc(), nspaceQual, id, repeatType ); - break; - } - case type_ref::Int: { - tr = TypeRef::cons( internal, pd->uniqueTypeInt ); - break; - } - case type_ref::Bool: { - tr = TypeRef::cons( internal, pd->uniqueTypeBool ); - break; - } - case type_ref::Void: { - tr = TypeRef::cons( internal, pd->uniqueTypeVoid ); - break; - } - case type_ref::Parser: { - TypeRef *type = walkTypeRef( typeRef._type_ref() ); - tr = TypeRef::cons( typeRef.loc(), TypeRef::Parser, 0, type, 0 ); - break; - } - case type_ref::List: { - tr = walkValueList( typeRef ); - break; - } - case type_ref::Map: { - tr = walkValueMap( typeRef ); - break; - } - case type_ref::ListEl: { - tr = walkListEl( typeRef ); - break; - } - case type_ref::MapEl: { - tr = walkMapEl( typeRef ); - break; - }} - return tr; - } - - StmtList *walkBlockOrSingle( block_or_single blockOrSingle ) - { - StmtList *stmtList = 0; - switch ( blockOrSingle.prodName() ) { - case block_or_single::Single: { - stmtList = new StmtList; - LangStmt *stmt = walkStatement( blockOrSingle.statement() ); - stmtList->append( stmt ); - break; - } - case block_or_single::Block: { - stmtList = walkLangStmtList( blockOrSingle.lang_stmt_list() ); - break; - }} - - return stmtList; - } - - void walkProdEl( const String &defName, ProdElList *list, prod_el El ) - { - ObjectField *captureField = 0; - if ( El.opt_prod_el_name().prodName() == opt_prod_el_name::Name ) { - String fieldName = El.opt_prod_el_name().id().data(); - captureField = ObjectField::cons( El.opt_prod_el_name().id().loc(), - ObjectField::RhsNameType, 0, fieldName ); - } - else { - /* default the prod name. */ - if ( El.prodName() == prod_el::Id ) { - String fieldName = El.id().data(); - opt_repeat::prod_name orpn = El.opt_repeat().prodName(); - if ( orpn == opt_repeat::Star ) - fieldName = "_repeat_" + fieldName; - else if ( orpn == opt_repeat::Plus ) - fieldName = "_list_" + fieldName; - else if ( orpn == opt_repeat::Question ) - fieldName = "_opt_" + fieldName; - else if ( strcmp( fieldName, defName ) == 0 ) - fieldName = "_" + fieldName; - captureField = ObjectField::cons( El.id().loc(), - ObjectField::RhsNameType, 0, fieldName ); - } - } - - RepeatType repeatType = walkOptRepeat( El.opt_repeat() ); - NamespaceQual *nspaceQual = walkRegionQual( El.region_qual() ); - - switch ( El.prodName() ) { - case prod_el::Id: { - String typeName = El.id().data(); - ProdEl *prodEl = prodElName( El.id().loc(), typeName, - nspaceQual, captureField, repeatType, false ); - appendProdEl( list, prodEl ); - break; - } - case prod_el::Lit: { - String lit = El.backtick_lit().data(); - ProdEl *prodEl = prodElLiteral( El.backtick_lit().loc(), lit, - nspaceQual, captureField, repeatType, false ); - appendProdEl( list, prodEl ); - break; - }} - } - - void walkProdElList( const String &defName, ProdElList *list, prod_el_list ProdElList ) - { - if ( ProdElList.prodName() == prod_el_list::List ) { - prod_el_list RightProdElList = ProdElList._prod_el_list(); - walkProdElList( defName, list, RightProdElList ); - walkProdEl( defName, list, ProdElList.prod_el() ); - } - } - - CodeBlock *walkOptReduce( opt_reduce OptReduce ) - { - CodeBlock *block = 0; - if ( OptReduce.prodName() == opt_reduce::Reduce ) { - ObjectDef *localFrame = blockOpen(); - StmtList *stmtList = walkLangStmtList( OptReduce.lang_stmt_list() ); - - block = CodeBlock::cons( stmtList, localFrame ); - block->context = curStruct(); - - blockClose(); - } - return block; - } - - void walkProdudction( const String &defName, LelDefList *lelDefList, prod Prod ) - { - ProdElList *list = new ProdElList; - - walkProdElList( defName, list, Prod.prod_el_list() ); - - String name; - if ( Prod.opt_prod_name().prodName() == opt_prod_name::Name ) - name = Prod.opt_prod_name().id().data(); - - CodeBlock *codeBlock = walkOptReduce( Prod.opt_reduce() ); - bool commit = Prod.opt_commit().prodName() == opt_commit::Commit; - - Production *prod = BaseParser::production( Prod.SQOPEN().loc(), - list, name, commit, codeBlock, 0 ); - prodAppend( lelDefList, prod ); - } - - void walkProdList( const String &name, LelDefList *lelDefList, prod_list ProdList ) - { - if ( ProdList.prodName() == prod_list::List ) - walkProdList( name, lelDefList, ProdList._prod_list() ); - - walkProdudction( name, lelDefList, ProdList.prod() ); - } - - ReOrItem *walkRegOrChar( reg_or_char regOrChar ) - { - ReOrItem *orItem = 0; - switch ( regOrChar.prodName() ) { - case reg_or_char::Char: { - String c = unescape( regOrChar.RE_CHAR().data() ); - orItem = ReOrItem::cons( regOrChar.RE_CHAR().loc(), c ); - break; - } - case reg_or_char::Range: { - String low = unescape( regOrChar.Low().data() ); - String high = unescape( regOrChar.High().data() ); - orItem = ReOrItem::cons( regOrChar.Low().loc(), low[0], high[0] ); - break; - }} - return orItem; - } - - ReOrBlock *walkRegOrData( reg_or_data regOrData ) - { - ReOrBlock *block = 0; - switch ( regOrData.prodName() ) { - case reg_or_data::Data: { - ReOrBlock *left = walkRegOrData( regOrData._reg_or_data() ); - ReOrItem *right = walkRegOrChar( regOrData.reg_or_char() ); - block = lexRegularExprData( left, right ); - break; - } - case reg_or_data::Base: { - block = ReOrBlock::cons(); - break; - }} - return block; - } - - LexFactorNeg *walkLexFactorNeg( lex_factor_neg lexFactorNeg ) - { - LexFactorNeg *factorNeg = 0; - switch ( lexFactorNeg.prodName() ) { - case lex_factor_neg::Caret: { - LexFactorNeg *recNeg = walkLexFactorNeg( lexFactorNeg._lex_factor_neg() ); - factorNeg = LexFactorNeg::cons( recNeg, LexFactorNeg::CharNegateType ); - break; - } - case lex_factor_neg::Base: { - LexFactor *factor = walkLexFactor( lexFactorNeg.lex_factor() ); - factorNeg = LexFactorNeg::cons( factor ); - break; - }} - return factorNeg; - } - - LexFactorRep *walkLexFactorRep( lex_factor_rep lexFactorRep ) - { - LexFactorRep *factorRep = 0; - LexFactorRep *recRep = 0; - lex_factor_rep::prod_name pn = lexFactorRep.prodName(); - - if ( pn != lex_factor_rep::Base ) - recRep = walkLexFactorRep( lexFactorRep._lex_factor_rep() ); - - switch ( pn ) { - case lex_factor_rep::Star: { - factorRep = LexFactorRep::cons( lexFactorRep.LEX_STAR().loc(), - recRep, 0, 0, LexFactorRep::StarType ); - break; - } - case lex_factor_rep::StarStar: { - factorRep = LexFactorRep::cons( lexFactorRep.LEX_STARSTAR().loc(), - recRep, 0, 0, LexFactorRep::StarStarType ); - break; - } - case lex_factor_rep::Plus: { - factorRep = LexFactorRep::cons( lexFactorRep.LEX_PLUS().loc(), - recRep, 0, 0, LexFactorRep::PlusType ); - break; - } - case lex_factor_rep::Question: { - factorRep = LexFactorRep::cons( lexFactorRep.LEX_QUESTION().loc(), - recRep, 0, 0, LexFactorRep::OptionalType ); - break; - } - case lex_factor_rep::Exact: { - int low = atoi( lexFactorRep.lex_uint().data()->data ); - factorRep = LexFactorRep::cons( lexFactorRep.lex_uint().loc(), - recRep, low, 0, LexFactorRep::ExactType ); - break; - } - case lex_factor_rep::Max: { - int high = atoi( lexFactorRep.lex_uint().data()->data ); - factorRep = LexFactorRep::cons( lexFactorRep.lex_uint().loc(), - recRep, 0, high, LexFactorRep::MaxType ); - break; - } - case lex_factor_rep::Min: { - int low = atoi( lexFactorRep.lex_uint().data()->data ); - factorRep = LexFactorRep::cons( lexFactorRep.lex_uint().loc(), - recRep, low, 0, LexFactorRep::MinType ); - break; - } - case lex_factor_rep::Range: { - int low = atoi( lexFactorRep.Low().data()->data ); - int high = atoi( lexFactorRep.High().data()->data ); - factorRep = LexFactorRep::cons( lexFactorRep.Low().loc(), - recRep, low, high, LexFactorRep::RangeType ); - break; - } - case lex_factor_rep::Base: { - LexFactorNeg *factorNeg = walkLexFactorNeg( lexFactorRep.lex_factor_neg() ); - factorRep = LexFactorRep::cons( factorNeg ); - }} - - return factorRep; - } - - LexTerm *walkLexTerm( lex_term lexTerm ) - { - LexTerm *term = 0; - lex_term::prod_name pn = lexTerm.prodName(); - - LexTerm *leftTerm = 0; - if ( pn != lex_term::Base ) - leftTerm = walkLexTerm( lexTerm._lex_term() ); - - LexFactorAug *factorAug = walkLexFactorAug( lexTerm.lex_factor_rep() ); - - switch ( pn ) { - case lex_term::Dot: - term = LexTerm::cons( leftTerm, factorAug, LexTerm::ConcatType ); - break; - case lex_term::ColonGt: - term = LexTerm::cons( leftTerm, factorAug, LexTerm::RightStartType ); - break; - case lex_term::ColonGtGt: - term = LexTerm::cons( leftTerm, factorAug, LexTerm::RightFinishType ); - break; - case lex_term::LtColon: - term = LexTerm::cons( leftTerm, factorAug, LexTerm::LeftType ); - break; - default: - term = LexTerm::cons( factorAug ); - break; - } - - return term; - } - - LexExpression *walkLexExpr( lex_expr lexExpr ) - { - LexExpression *expr = 0; - lex_expr::prod_name pn = lexExpr.prodName(); - - LexExpression *leftExpr = 0; - if ( pn != lex_expr::Base ) - leftExpr = walkLexExpr( lexExpr._lex_expr() ); - - LexTerm *term = walkLexTerm( lexExpr.lex_term() ); - - switch ( pn ) { - case lex_expr::Bar: - expr = LexExpression::cons( leftExpr, term, LexExpression::OrType ); - break; - case lex_expr::Amp: - expr = LexExpression::cons( leftExpr, term, LexExpression::IntersectType ); - break; - case lex_expr::Dash: - expr = LexExpression::cons( leftExpr, term, LexExpression::SubtractType ); - break; - case lex_expr::DashDash: - expr = LexExpression::cons( leftExpr, term, LexExpression::StrongSubtractType ); - break; - case lex_expr::Base: - expr = LexExpression::cons( term ); - } - return expr; - } - - - void walkRlDef( rl_def rlDef ) - { - String id = rlDef.id().data(); - - lex_expr LexExpr = rlDef.lex_expr(); - LexExpression *expr = walkLexExpr( LexExpr ); - LexJoin *join = LexJoin::cons( expr ); - - addRegularDef( rlDef.id().loc(), curNspace(), id, join ); - } - - void walkLexRegion( region_def regionDef ) - { - pushRegionSet( regionDef.loc() ); - walkRootItemList( regionDef.RootItemList() ); - popRegionSet(); - } - - void walkCflDef( cfl_def cflDef ) - { - String name = cflDef.id().data(); - ObjectDef *objectDef = walkVarDefList( cflDef.VarDefList() ); - objectDef->name = name; - - LelDefList *defList = new LelDefList; - walkProdList( name, defList, cflDef.prod_list() ); - - bool reduceFirst = cflDef.opt_reduce_first().REDUCEFIRST() != 0; - - NtDef *ntDef = NtDef::cons( name, curNspace(), - curStruct(), reduceFirst ); - - BaseParser::cflDef( ntDef, objectDef, defList ); - } - - CallArgVect *walkCallArgSeq( call_arg_seq callArgSeq ) - { - CallArgVect *callArgVect = new CallArgVect; - while ( callArgSeq != 0 ) { - code_expr codeExpr = callArgSeq.code_expr(); - LangExpr *expr = walkCodeExpr( codeExpr ); - callArgVect->append( new CallArg(expr) ); - callArgSeq = callArgSeq._call_arg_seq(); - } - return callArgVect; - } - - CallArgVect *walkCallArgList( call_arg_list callArgList ) - { - CallArgVect *callArgVect = walkCallArgSeq( callArgList.call_arg_seq() ); - return callArgVect; - } - - LangExpr *liftTrim( LangExpr *expr, ConsItem::Trim &trim ) - { - if ( expr->type == LangExpr::UnaryType ) { - if ( expr->op == '^' ) { - trim = ConsItem::TrimYes; - expr = expr->right; - } - else if ( expr->op == '@' ) { - trim = ConsItem::TrimNo; - expr = expr->right; - } - } - return expr; - } - - ConsItemList *walkCallArgSeqAccum( call_arg_seq callArgSeq ) - { - ConsItemList *consItemList = new ConsItemList; - while ( callArgSeq != 0 ) { - code_expr codeExpr = callArgSeq.code_expr(); - -// LangExpr *expr = walkCodeExpr( codeExpr ); -// callArgVect->append( new CallArg(expr) ); - - ConsItem::Trim trim = ConsItem::TrimDefault; - LangExpr *consExpr = walkCodeExpr( codeExpr ); - - ConsItem *consItem = ConsItem::cons( consExpr->loc, - ConsItem::ExprType, consExpr, trim ); - consItemList->append( consItem ); - - callArgSeq = callArgSeq._call_arg_seq(); - } - return consItemList; - } - - ConsItemList *walkCallArgListAccum( call_arg_list callArgList ) - { - return walkCallArgSeqAccum( callArgList.call_arg_seq() ); - } - - LangStmt *walkPrintStmt( print_stmt &printStmt ) - { - LangStmt *stmt = 0; - switch ( printStmt.prodName() ) { - case print_stmt::Accum: { - InputLoc loc = printStmt.PRINT().loc(); - - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - QualItemVect *qualItemVect = new QualItemVect; - LangVarRef *varRef = LangVarRef::cons( loc, curNspace(), curStruct(), - curScope(), nspaceQual, qualItemVect, String("stdout") ); - - ConsItemList *list = walkAccumulate( printStmt.accumulate() ); - - bool eof = false; //walkOptEos( StmtOrFactor.opt_eos() ); - LangExpr *expr = send( loc, varRef, list, eof ); - stmt = LangStmt::cons( loc, LangStmt::ExprType, expr ); - break; - } - case print_stmt::Tree: { - InputLoc loc = printStmt.PRINT().loc(); - - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - QualItemVect *qualItemVect = new QualItemVect; - LangVarRef *varRef = LangVarRef::cons( loc, curNspace(), curStruct(), - curScope(), nspaceQual, qualItemVect, String("stdout") ); - - ConsItemList *list = walkCallArgListAccum( printStmt.call_arg_list() ); - - bool eof = false; //walkOptEos( StmtOrFactor.opt_eos() ); - LangExpr *expr = send( loc, varRef, list, eof ); - stmt = LangStmt::cons( loc, LangStmt::ExprType, expr ); - break; - } - case print_stmt::PrintStream: { - LangVarRef *varRef = walkVarRef( printStmt.var_ref() ); - - ConsItemList *list = walkCallArgListAccum( printStmt.call_arg_list() ); - - InputLoc loc = printStmt.PRINTS().loc(); - - bool eof = false; //walkOptEos( StmtOrFactor.opt_eos() ); - LangExpr *expr = send( loc, varRef, list, eof ); - stmt = LangStmt::cons( loc, LangStmt::ExprType, expr ); - break; - }} - return stmt; - } - - QualItemVect *walkQual( qual &Qual ) - { - QualItemVect *qualItemVect = 0; - qual RecQual = Qual._qual(); - switch ( Qual.prodName() ) { - case qual::Dot: - case qual::Arrow: { - qualItemVect = walkQual( RecQual ); - String id = Qual.id().data(); - QualItem::Form form = Qual.DOT() != 0 ? QualItem::Dot : QualItem::Arrow; - qualItemVect->append( QualItem( form, Qual.id().loc(), id ) ); - break; - } - case qual::Base: { - qualItemVect = new QualItemVect; - break; - }} - return qualItemVect; - } - - LangVarRef *walkVarRef( var_ref varRef ) - { - NamespaceQual *nspaceQual = walkRegionQual( varRef.region_qual() ); - qual Qual = varRef.qual(); - QualItemVect *qualItemVect = walkQual( Qual ); - String id = varRef.id().data(); - LangVarRef *langVarRef = LangVarRef::cons( varRef.id().loc(), - curNspace(), curStruct(), curScope(), nspaceQual, qualItemVect, id ); - return langVarRef; - } - - ObjectField *walkOptCapture( opt_capture optCapture ) - { - ObjectField *objField = 0; - if ( optCapture.prodName() == opt_capture::Id ) { - String id = optCapture.id().data(); - objField = ObjectField::cons( optCapture.id().loc(), - ObjectField::UserLocalType, 0, id ); - } - return objField; - } - - /* - * Constructor - */ - - ConsItemList *walkLitConsEl( lit_cons_el litConsEl, TypeRef *consTypeRef ) - { - ConsItemList *list = 0; - switch ( litConsEl.prodName() ) { - case lit_cons_el::ConsData: { - String consData = unescape( litConsEl.lit_dq_data().text().c_str() ); - ConsItem *consItem = ConsItem::cons( litConsEl.lit_dq_data().loc(), - ConsItem::InputText, consData ); - list = ConsItemList::cons( consItem ); - break; - } - case lit_cons_el::SubList: { - list = walkConsElList( litConsEl.ConsElList(), consTypeRef ); - break; - }} - return list; - } - - ConsItemList *walkLitConsElList( _repeat_lit_cons_el litConsElList, - LIT_DQ_NL Nl, TypeRef *consTypeRef ) - { - ConsItemList *list = new ConsItemList; - while ( !litConsElList.end() ) { - ConsItemList *extension = walkLitConsEl( litConsElList.value(), consTypeRef ); - list = consListConcat( list, extension ); - litConsElList = litConsElList.next(); - } - - if ( Nl != 0 ) { - String consData = unescape( Nl.data() ); - ConsItem *consItem = ConsItem::cons( Nl.loc(), ConsItem::InputText, consData ); - ConsItemList *term = ConsItemList::cons( consItem ); - list = consListConcat( list, term ); - } - - return list; - } - - ConsItemList *walkConsEl( cons_el consEl, TypeRef *consTypeRef ) - { - ConsItemList *list = 0; - switch ( consEl.prodName() ) { - case cons_el::Lit: { - NamespaceQual *nspaceQual = walkRegionQual( consEl.region_qual() ); - String lit = consEl.backtick_lit().data(); - list = consElLiteral( consEl.backtick_lit().loc(), consTypeRef, lit, nspaceQual ); - break; - } - case cons_el::Tilde: { - String consData = consEl.opt_tilde_data().text().c_str(); - consData += '\n'; - ConsItem *consItem = ConsItem::cons( consEl.opt_tilde_data().loc(), - ConsItem::InputText, consData ); - list = ConsItemList::cons( consItem ); - break; - } - case cons_el::Sq: { - list = walkConsSqConsDataList( consEl.SqConsDataList(), - consEl.sq_lit_term().CONS_SQ_NL() ); - break; - } - case cons_el::CodeExpr: { - ConsItem::Trim trim = ConsItem::TrimDefault; - LangExpr *consExpr = walkCodeExpr( consEl.code_expr() ); - ConsItem *consItem = ConsItem::cons( consExpr->loc, - ConsItem::ExprType, consExpr, trim ); - list = ConsItemList::cons( consItem ); - break; - } - case cons_el::Dq: { - list = walkLitConsElList( consEl.LitConsElList(), - consEl.dq_lit_term().LIT_DQ_NL(), consTypeRef ); - break; - }} - return list; - } - - ConsItemList *walkConsElList( _repeat_cons_el consElList, TypeRef *consTypeRef ) - { - ConsItemList *list = new ConsItemList; - while ( !consElList.end() ) { - ConsItemList *extension = walkConsEl( consElList.value(), consTypeRef ); - list = consListConcat( list, extension ); - consElList = consElList.next(); - } - return list; - } - - ConsItemList *walkConsTopEl( cons_top_el consTopEl, TypeRef *consTypeRef ) - { - ConsItemList *list = 0; - switch ( consTopEl.prodName() ) { - case cons_top_el::Dq: { - list = walkLitConsElList( consTopEl.LitConsElList(), - consTopEl.dq_lit_term().LIT_DQ_NL(), consTypeRef ); - break; - } - case cons_top_el::Sq: { - list = walkConsSqConsDataList( consTopEl.SqConsDataList(), - consTopEl.sq_lit_term().CONS_SQ_NL() ); - break; - } - case cons_top_el::Tilde: { - String consData = consTopEl.opt_tilde_data().text().c_str(); - consData += '\n'; - ConsItem *consItem = ConsItem::cons( consTopEl.opt_tilde_data().loc(), - ConsItem::InputText, consData ); - list = ConsItemList::cons( consItem ); - break; - } - case cons_top_el::SubList: { - list = walkConsElList( consTopEl.ConsElList(), consTypeRef ); - break; - }} - return list; - } - - ConsItemList *walkConsList( cons_list consList, TypeRef *consTypeRef ) - { - return walkConsTopEl( consList.cons_top_el(), consTypeRef ); - } - - ConsItemList *walkConstructor( constructor Constructor, TypeRef *consTypeRef ) - { - return walkConsList( Constructor.cons_list(), consTypeRef ); - } - - /* - * String - */ - - ConsItemList *walkLitStringEl( lit_string_el litStringEl ) - { - ConsItemList *list = 0; - switch ( litStringEl.prodName() ) { - case lit_string_el::ConsData: { - String consData = unescape( litStringEl.lit_dq_data().text().c_str() ); - ConsItem *stringItem = ConsItem::cons( litStringEl.lit_dq_data().loc(), - ConsItem::InputText, consData ); - list = ConsItemList::cons( stringItem ); - break; - } - case lit_string_el::SubList: { - list = walkStringElList( litStringEl.StringElList() ); - break; - }} - return list; - } - - ConsItemList *walkLitStringElList( _repeat_lit_string_el litStringElList, LIT_DQ_NL Nl ) - { - ConsItemList *list = new ConsItemList; - while ( !litStringElList.end() ) { - ConsItemList *extension = walkLitStringEl( litStringElList.value() ); - list = consListConcat( list, extension ); - litStringElList = litStringElList.next(); - } - - if ( Nl != 0 ) { - String consData = unescape( Nl.data() ); - ConsItem *consItem = ConsItem::cons( Nl.loc(), - ConsItem::InputText, consData ); - ConsItemList *term = ConsItemList::cons( consItem ); - list = consListConcat( list, term ); - } - return list; - } - - ConsItemList *walkStringEl( string_el stringEl ) - { - ConsItemList *list = 0; - switch ( stringEl.prodName() ) { - case string_el::Dq: { - list = walkLitStringElList( stringEl.LitStringElList(), - stringEl.dq_lit_term().LIT_DQ_NL() ); - break; - } - case string_el::Sq: { - list = walkConsSqConsDataList( stringEl.SqConsDataList(), - stringEl.sq_lit_term().CONS_SQ_NL() ); - break; - } - case string_el::Tilde: { - String consData = stringEl.opt_tilde_data().text().c_str(); - consData += '\n'; - ConsItem *consItem = ConsItem::cons( stringEl.opt_tilde_data().loc(), - ConsItem::InputText, consData ); - list = ConsItemList::cons( consItem ); - break; - } - case string_el::CodeExpr: { - ConsItem::Trim trim = ConsItem::TrimDefault; - LangExpr *consExpr = walkCodeExpr( stringEl.code_expr() ); - consExpr = liftTrim( consExpr, trim ); - ConsItem *consItem = ConsItem::cons( consExpr->loc, - ConsItem::ExprType, consExpr, trim ); - list = ConsItemList::cons( consItem ); - break; - }} - return list; - } - - ConsItemList *walkStringElList( _repeat_string_el stringElList ) - { - ConsItemList *list = new ConsItemList; - while ( !stringElList.end() ) { - ConsItemList *extension = walkStringEl( stringElList.value() ); - list = consListConcat( list, extension ); - stringElList = stringElList.next(); - } - return list; - } - - ConsItemList *walkStringTopEl( string_top_el stringTopEl ) - { - ConsItemList *list = 0; - switch ( stringTopEl.prodName() ) { - case string_top_el::Dq: { - list = walkLitStringElList( stringTopEl.LitStringElList(), - stringTopEl.dq_lit_term().LIT_DQ_NL() ); - break; - } - case string_el::Sq: { - list = walkConsSqConsDataList( stringTopEl.SqConsDataList(), - stringTopEl.sq_lit_term().CONS_SQ_NL() ); - break; - } - case string_top_el::Tilde: { - String consData = stringTopEl.opt_tilde_data().text().c_str(); - consData += '\n'; - ConsItem *consItem = ConsItem::cons( stringTopEl.opt_tilde_data().loc(), - ConsItem::InputText, consData ); - list = ConsItemList::cons( consItem ); - break; - } - case string_top_el::SubList: { - list = walkStringElList( stringTopEl.StringElList() ); - break; - }} - return list; - } - - ConsItemList *walkStringList( string_list stringList ) - { - return walkStringTopEl( stringList.string_top_el() ); - } - - ConsItemList *walkString( string String ) - { - ConsItemList *list = walkStringList( String.string_list() ); - return list; - } - - /* - * Accum - */ - - ConsItemList *walkLitAccumEl( lit_accum_el litAccumEl ) - { - ConsItemList *list = 0; - switch ( litAccumEl.prodName() ) { - case lit_accum_el::ConsData: { - String consData = unescape( litAccumEl.lit_dq_data().text().c_str() ); - ConsItem *consItem = ConsItem::cons( litAccumEl.lit_dq_data().loc(), - ConsItem::InputText, consData ); - list = ConsItemList::cons( consItem ); - break; - } - case lit_accum_el::SubList: { - list = walkAccumElList( litAccumEl.AccumElList() ); - break; - }} - return list; - } - - ConsItemList *walkLitAccumElList( _repeat_lit_accum_el litAccumElList, LIT_DQ_NL Nl ) - { - ConsItemList *list = new ConsItemList; - while ( !litAccumElList.end() ) { - ConsItemList *extension = walkLitAccumEl( litAccumElList.value() ); - list = consListConcat( list, extension ); - litAccumElList = litAccumElList.next(); - } - - if ( Nl != 0 ) { - String consData = unescape( Nl.data() ); - ConsItem *consItem = ConsItem::cons( Nl.loc(), ConsItem::InputText, consData ); - ConsItemList *term = ConsItemList::cons( consItem ); - list = consListConcat( list, term ); - } - - return list; - } - - ConsItemList *walkAccumEl( accum_el accumEl ) - { - ConsItemList *list = 0; - switch ( accumEl.prodName() ) { - case accum_el::Dq: { - list = walkLitAccumElList( accumEl.LitAccumElList(), - accumEl.dq_lit_term().LIT_DQ_NL() ); - break; - } - case accum_el::Sq: { - list = walkConsSqConsDataList( accumEl.SqConsDataList(), - accumEl.sq_lit_term().CONS_SQ_NL() ); - break; - } - case accum_el::Tilde: { - String consData = accumEl.opt_tilde_data().text().c_str(); - consData += '\n'; - ConsItem *consItem = ConsItem::cons( accumEl.opt_tilde_data().loc(), - ConsItem::InputText, consData ); - list = ConsItemList::cons( consItem ); - break; - } - case accum_el::CodeExpr: { - ConsItem::Trim trim = ConsItem::TrimDefault; - LangExpr *accumExpr = walkCodeExpr( accumEl.code_expr() ); - accumExpr = liftTrim( accumExpr, trim ); - ConsItem *consItem = ConsItem::cons( accumExpr->loc, - ConsItem::ExprType, accumExpr, trim ); - list = ConsItemList::cons( consItem ); - break; - }} - return list; - } - - ConsItemList *walkAccumElList( _repeat_accum_el accumElList ) - { - ConsItemList *list = new ConsItemList; - while ( !accumElList.end() ) { - ConsItemList *extension = walkAccumEl( accumElList.value() ); - list = consListConcat( list, extension ); - accumElList = accumElList.next(); - } - return list; - } - - ConsItemList *walkAccumTopEl( accum_top_el accumTopEl ) - { - ConsItemList *list = 0; - switch ( accumTopEl.prodName() ) { - case accum_top_el::Dq: { - list = walkLitAccumElList( accumTopEl.LitAccumElList(), - accumTopEl.dq_lit_term().LIT_DQ_NL() ); - break; - } - case accum_top_el::Sq: { - list = walkConsSqConsDataList( accumTopEl.SqConsDataList(), - accumTopEl.sq_lit_term().CONS_SQ_NL() ); - break; - } - case accum_top_el::Tilde: { - String consData = accumTopEl.opt_tilde_data().text().c_str(); - consData += '\n'; - ConsItem *consItem = ConsItem::cons( accumTopEl.opt_tilde_data().loc(), - ConsItem::InputText, consData ); - list = ConsItemList::cons( consItem ); - break; - } - case accum_top_el::SubList: { - list = walkAccumElList( accumTopEl.AccumElList() ); - break; - }} - return list; - } - - ConsItemList *walkAccumList( accum_list accumList ) - { - ConsItemList *list = walkAccumTopEl( accumList.accum_top_el() ); - - if ( accumList.prodName() == accum_list::List ) { - ConsItemList *extension = walkAccumList( accumList._accum_list() ); - consListConcat( list, extension ); - } - - return list; - } - - ConsItemList *walkAccumulate( accumulate Accumulate ) - { - ConsItemList *list = walkAccumList( Accumulate.accum_list() ); - return list; - } - - void walkFieldInit( FieldInitVect *list, field_init fieldInit ) - { - LangExpr *expr = walkCodeExpr( fieldInit.code_expr() ); - FieldInit *init = FieldInit::cons( expr->loc, "_name", expr ); - list->append( init ); - } - - FieldInitVect *walkFieldInit( _repeat_field_init fieldInitList ) - { - FieldInitVect *list = new FieldInitVect; - while ( !fieldInitList.end() ) { - walkFieldInit( list, fieldInitList.value() ); - fieldInitList = fieldInitList.next(); - } - return list; - } - FieldInitVect *walkOptFieldInit( opt_field_init optFieldInit ) - { - FieldInitVect *list = 0; - if ( optFieldInit.prodName() == opt_field_init::Init ) - list = walkFieldInit( optFieldInit.FieldInitList() ); - return list; - } - - LangExpr *walkStmtOrFactor( stmt_or_factor StmtOrFactor ) - { - LangExpr *expr = 0; - switch ( StmtOrFactor.prodName() ) { - case stmt_or_factor::Parse: { - /* The type we are parsing. */ - type_ref typeRefTree = StmtOrFactor.type_ref(); - TypeRef *typeRef = walkTypeRef( typeRefTree ); - ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() ); - FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); - ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); - - expr = parseCmd( StmtOrFactor.PARSE().loc(), false, false, objField, - typeRef, init, list, true, false, false, "" ); - break; - } - case stmt_or_factor::ParseTree: { - /* The type we are parsing. */ - type_ref typeRefTree = StmtOrFactor.type_ref(); - TypeRef *typeRef = walkTypeRef( typeRefTree ); - ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() ); - FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); - ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); - - expr = parseCmd( StmtOrFactor.PARSE_TREE().loc(), true, false, objField, - typeRef, init, list, true, false, false, "" ); - break; - } - case stmt_or_factor::ParseStop: { - /* The type we are parsing. */ - type_ref typeRefTree = StmtOrFactor.type_ref(); - TypeRef *typeRef = walkTypeRef( typeRefTree ); - ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() ); - FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); - ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); - - expr = parseCmd( StmtOrFactor.PARSE_STOP().loc(), false, true, objField, - typeRef, init, list, true, false, false, "" ); - break; - } - case stmt_or_factor::Reduce: { - /* The reducer name. */ - String reducer = StmtOrFactor.id().data(); - - /* The type we are parsing. */ - type_ref typeRefTree = StmtOrFactor.type_ref(); - TypeRef *typeRef = walkTypeRef( typeRefTree ); - FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); - ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); - - expr = parseCmd( StmtOrFactor.REDUCE().loc(), false, false, 0, - typeRef, init, list, true, true, false, reducer ); - break; - } - case stmt_or_factor::ReadReduce: { - /* The reducer name. */ - String reducer = StmtOrFactor.id().data(); - - /* The type we are parsing. */ - type_ref typeRefTree = StmtOrFactor.type_ref(); - TypeRef *typeRef = walkTypeRef( typeRefTree ); - FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); - ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); - - expr = parseCmd( StmtOrFactor.READ_REDUCE().loc(), false, false, 0, - typeRef, init, list, true, true, true, reducer ); - break; - } - case stmt_or_factor::Send: { - LangVarRef *varRef = walkVarRef( StmtOrFactor.var_ref() ); - ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); - bool eof = walkOptEos( StmtOrFactor.opt_eos() ); - expr = send( StmtOrFactor.SEND().loc(), varRef, list, eof ); - break; - } - case stmt_or_factor::SendTree: { - LangVarRef *varRef = walkVarRef( StmtOrFactor.var_ref() ); - ConsItemList *list = walkAccumulate( StmtOrFactor.accumulate() ); - bool eof = walkOptEos( StmtOrFactor.opt_eos() ); - expr = sendTree( StmtOrFactor.SEND_TREE().loc(), varRef, list, eof ); - break; - } - case stmt_or_factor::MakeTree: { - CallArgVect *exprList = walkCallArgList( StmtOrFactor.call_arg_list() ); - expr = LangExpr::cons( LangTerm::cons( StmtOrFactor.loc(), - LangTerm::MakeTreeType, exprList ) ); - break; - } - case stmt_or_factor::MakeToken: { - CallArgVect *exprList = walkCallArgList( StmtOrFactor.call_arg_list() ); - expr = LangExpr::cons( LangTerm::cons( StmtOrFactor.loc(), - LangTerm::MakeTokenType, exprList ) ); - break; - } - case stmt_or_factor::Cons: { - /* The type we are parsing. */ - type_ref typeRefTree = StmtOrFactor.type_ref(); - TypeRef *typeRef = walkTypeRef( typeRefTree ); - ObjectField *objField = walkOptCapture( StmtOrFactor.opt_capture() ); - ConsItemList *list = walkConstructor( StmtOrFactor.constructor(), typeRef ); - FieldInitVect *init = walkOptFieldInit( StmtOrFactor.opt_field_init() ); - - expr = construct( StmtOrFactor.CONS().loc(), objField, list, typeRef, init ); - break; - } - case stmt_or_factor::Match: { - LangVarRef *varRef = walkVarRef( StmtOrFactor.var_ref() ); - PatternItemList *list = walkPattern( StmtOrFactor.pattern(), varRef ); - expr = match( StmtOrFactor.loc(), varRef, list ); - break; - } - case stmt_or_factor::New: { - TypeRef *typeRef = walkTypeRef( StmtOrFactor.type_ref() ); - - ObjectField *captureField = walkOptCapture( StmtOrFactor.opt_capture() ); - FieldInitVect *init = walkFieldInit( StmtOrFactor.FieldInitList() ); - - LangVarRef *captureVarRef = 0; - if ( captureField != 0 ) { - captureVarRef = LangVarRef::cons( captureField->loc, - curNspace(), curStruct(), curScope(), captureField->name ); - } - - expr = LangExpr::cons( LangTerm::consNew( - StmtOrFactor.loc(), typeRef, captureVarRef, init ) ); - - /* Check for redeclaration. */ - if ( captureField != 0 ) { - if ( curScope()->checkRedecl( captureField->name ) != 0 ) { - error( captureField->loc ) << "variable " << - captureField->name << " redeclared" << endp; - } - - /* Insert it into the field map. */ - captureField->typeRef = typeRef; - curScope()->insertField( captureField->name, captureField ); - } - break; - }} - return expr; - } - - LangExpr *walkCodeFactor( code_factor codeFactor, bool used = true ) - { - LangExpr *expr = 0; - switch ( codeFactor.prodName() ) { - case code_factor::VarRef: { - LangVarRef *langVarRef = walkVarRef( codeFactor.var_ref() ); - LangTerm *term = LangTerm::cons( langVarRef->loc, - LangTerm::VarRefType, langVarRef ); - expr = LangExpr::cons( term ); - break; - } - case code_factor::Call: { - LangVarRef *langVarRef = walkVarRef( codeFactor.var_ref() ); - CallArgVect *exprVect = walkCallArgList( codeFactor.call_arg_list() ); - LangTerm *term = LangTerm::cons( langVarRef->loc, langVarRef, exprVect ); - expr = LangExpr::cons( term ); - break; - } - case code_factor::Number: { - String number = codeFactor.number().text().c_str(); - LangTerm *term = LangTerm::cons( codeFactor.number().loc(), - LangTerm::NumberType, number ); - expr = LangExpr::cons( term ); - break; - } - case code_factor::StmtOrFactor: { - expr = walkStmtOrFactor( codeFactor.stmt_or_factor() ); - break; - } - case code_factor::Nil: { - expr = LangExpr::cons( LangTerm::cons( codeFactor.NIL().loc(), - LangTerm::NilType ) ); - break; - } - case code_factor::True: { - expr = LangExpr::cons( LangTerm::cons( codeFactor.TRUE().loc(), - LangTerm::TrueType ) ); - break; - } - case code_factor::False: { - expr = LangExpr::cons( LangTerm::cons( codeFactor.FALSE().loc(), - LangTerm::FalseType ) ); - break; - } - case code_factor::Paren: { - expr = walkCodeExpr( codeFactor.code_expr() ); - break; - } - case code_factor::String: { - ConsItemList *list = walkString( codeFactor.string() ); - expr = LangExpr::cons( LangTerm::cons( codeFactor.string().loc(), list ) ); - break; - } - case code_factor::In: { - TypeRef *typeRef = walkTypeRef( codeFactor.type_ref() ); - LangVarRef *varRef = walkVarRef( codeFactor.var_ref() ); - expr = LangExpr::cons( LangTerm::cons( typeRef->loc, - LangTerm::SearchType, typeRef, varRef ) ); - break; - } - case code_factor::TypeId: { - TypeRef *typeRef = walkTypeRef( codeFactor.type_ref() ); - expr = LangExpr::cons( LangTerm::cons( codeFactor.loc(), - LangTerm::TypeIdType, typeRef ) ); - break; - } - case code_factor::Cast: { - TypeRef *typeRef = walkTypeRef( codeFactor.type_ref() ); - LangExpr *castExpr = walkCodeFactor( codeFactor._code_factor() ); - expr = LangExpr::cons( LangTerm::cons( codeFactor.loc(), - LangTerm::CastType, typeRef, castExpr ) ); - break; - }} - return expr; - } - - LangExpr *walkCodeAdditive( code_additive additive, bool used = true ) - { - LangExpr *expr = 0; - switch ( additive.prodName() ) { - case code_additive::Plus: { - LangExpr *left = walkCodeAdditive( additive._code_additive() ); - LangExpr *right = walkCodeMultiplicitive( additive.code_multiplicitive() ); - expr = LangExpr::cons( additive.PLUS().loc(), left, '+', right ); - break; - } - case code_additive::Minus: { - LangExpr *left = walkCodeAdditive( additive._code_additive() ); - LangExpr *right = walkCodeMultiplicitive( additive.code_multiplicitive() ); - expr = LangExpr::cons( additive.MINUS().loc(), left, '-', right ); - break; - } - case code_additive::Base: { - expr = walkCodeMultiplicitive( additive.code_multiplicitive(), used ); - break; - }} - return expr; - } - - LangExpr *walkCodeUnary( code_unary unary, bool used = true ) - { - LangExpr *expr = 0; - - switch ( unary.prodName() ) { - case code_unary::Bang: { - LangExpr *factor = walkCodeFactor( unary.code_factor() ); - expr = LangExpr::cons( unary.BANG().loc(), '!', factor ); - break; - } - case code_unary::Dollar: { - LangExpr *factor = walkCodeFactor( unary.code_factor() ); - expr = LangExpr::cons( unary.DOLLAR().loc(), '$', factor ); - break; - } - case code_unary::DollarDollar: { - LangExpr *factor = walkCodeFactor( unary.code_factor() ); - expr = LangExpr::cons( unary.DOLLAR().loc(), 'S', factor ); - break; - } - case code_unary::Caret: { - LangExpr *factor = walkCodeFactor( unary.code_factor() ); - expr = LangExpr::cons( unary.CARET().loc(), '^', factor ); - break; - } - case code_unary::At: { - LangExpr *factor = walkCodeFactor( unary.code_factor() ); - expr = LangExpr::cons( unary.AT().loc(), '@', factor ); - break; - } - case code_unary::Percent: { - LangExpr *factor = walkCodeFactor( unary.code_factor() ); - expr = LangExpr::cons( unary.PERCENT().loc(), '%', factor ); - break; - } - case code_unary::Base: { - LangExpr *factor = walkCodeFactor( unary.code_factor(), used ); - expr = factor; - }} - - return expr; - } - - LangExpr *walkCodeRelational( code_relational codeRelational, bool used = true ) - { - LangExpr *expr = 0, *left = 0; - - bool base = codeRelational.prodName() == code_relational::Base; - - if ( ! base ) { - used = true; - left = walkCodeRelational( codeRelational._code_relational() ); - } - - LangExpr *additive = walkCodeAdditive( codeRelational.code_additive(), used ); - - switch ( codeRelational.prodName() ) { - case code_relational::EqEq: { - expr = LangExpr::cons( codeRelational.loc(), left, OP_DoubleEql, additive ); - break; - } - case code_relational::Neq: { - expr = LangExpr::cons( codeRelational.loc(), left, OP_NotEql, additive ); - break; - } - case code_relational::Lt: { - expr = LangExpr::cons( codeRelational.loc(), left, '<', additive ); - break; - } - case code_relational::Gt: { - expr = LangExpr::cons( codeRelational.loc(), left, '>', additive ); - break; - } - case code_relational::LtEq: { - expr = LangExpr::cons( codeRelational.loc(), left, OP_LessEql, additive ); - break; - } - case code_relational::GtEq: { - expr = LangExpr::cons( codeRelational.loc(), left, OP_GrtrEql, additive ); - break; - } - case code_relational::Base: { - expr = additive; - break; - }} - return expr; - } - - LangStmt *walkExprStmt( expr_stmt exprStmt ) - { - LangExpr *expr = walkCodeExpr( exprStmt.code_expr(), false ); - LangStmt *stmt = LangStmt::cons( expr->loc, LangStmt::ExprType, expr ); - return stmt; - } - - ObjectField *walkVarDef( var_def varDef, ObjectField::Type type ) - { - String id = varDef.id().data(); - TypeRef *typeRef = walkTypeRef( varDef.type_ref() ); - return ObjectField::cons( varDef.id().loc(), type, typeRef, id ); - } - - IterCall *walkIterCall( iter_call Tree ) - { - IterCall *iterCall = 0; - switch ( Tree.prodName() ) { - case iter_call::Call: { - LangVarRef *varRef = walkVarRef( Tree.var_ref() ); - CallArgVect *exprVect = walkCallArgList( Tree.call_arg_list() ); - LangTerm *langTerm = LangTerm::cons( varRef->loc, varRef, exprVect ); - iterCall = IterCall::cons( IterCall::Call, langTerm ); - break; - } - case iter_call::Id: { - String tree = Tree.id().data(); - LangVarRef *varRef = LangVarRef::cons( Tree.id().loc(), - curNspace(), curStruct(), curScope(), tree ); - LangTerm *langTerm = LangTerm::cons( Tree.id().loc(), - LangTerm::VarRefType, varRef ); - LangExpr *langExpr = LangExpr::cons( langTerm ); - iterCall = IterCall::cons( IterCall::Expr, langExpr ); - break; - } - case iter_call::Expr: { - LangExpr *langExpr = walkCodeExpr( Tree.code_expr() ); - iterCall = IterCall::cons( IterCall::Expr, langExpr ); - break; - }} - - return iterCall; - } - - LangStmt *walkElsifClause( elsif_clause elsifClause ) - { - pushScope(); - LangExpr *expr = walkCodeExpr( elsifClause.code_expr() ); - StmtList *stmtList = walkBlockOrSingle( elsifClause.block_or_single() ); - LangStmt *stmt = LangStmt::cons( LangStmt::IfType, expr, stmtList, 0 ); - popScope(); - return stmt; - } - - LangStmt *walkOptionalElse( optional_else optionalElse ) - { - LangStmt *stmt = 0; - if ( optionalElse.prodName() == optional_else::Else ) { - pushScope(); - StmtList *stmtList = walkBlockOrSingle( optionalElse.block_or_single() ); - stmt = LangStmt::cons( LangStmt::ElseType, stmtList ); - popScope(); - } - return stmt; - } - - LangStmt *walkElsifList( elsif_list elsifList ) - { - LangStmt *stmt = 0; - switch ( elsifList.prodName() ) { - case elsif_list::Clause: - stmt = walkElsifClause( elsifList.elsif_clause() ); - stmt->elsePart = walkElsifList( elsifList._elsif_list() ); - break; - case elsif_list::OptElse: - stmt = walkOptionalElse( elsifList.optional_else() ); - break; - } - return stmt; - } - - LangStmt *walkCaseClause( case_clause CaseClause, var_ref VarRef ) - { - pushScope(); - - LangVarRef *varRef = walkVarRef( VarRef ); - - scopeTop->caseClauseVarRef = varRef; - - LangExpr *expr = 0; - - switch ( CaseClause.prodName() ) { - case case_clause::Pattern: { - /* A match pattern. */ - PatternItemList *list = walkPattern( CaseClause.pattern(), varRef ); - expr = match( CaseClause.loc(), varRef, list ); - break; - } - case case_clause::Id: { - /* An identifier to be interpreted as a production name. */ - String prod = CaseClause.id().text().c_str(); - expr = prodCompare( CaseClause.loc(), varRef, prod, 0 ); - break; - } - case case_clause::IdPat: { - String prod = CaseClause.id().text().c_str(); - PatternItemList *list = walkPattern( CaseClause.pattern(), varRef ); - LangExpr *matchExpr = match( CaseClause.loc(), varRef, list ); - expr = prodCompare( CaseClause.loc(), varRef, prod, matchExpr ); - break; - } - } - - StmtList *stmtList = walkBlockOrSingle( CaseClause.block_or_single() ); - - popScope(); - - LangStmt *stmt = LangStmt::cons( LangStmt::IfType, expr, stmtList ); - - return stmt; - } - - LangStmt *walkCaseClauseList( case_clause_list CaseClauseList, var_ref VarRef ) - { - LangStmt *stmt = 0; - switch ( CaseClauseList.prodName() ) { - case case_clause_list::Recursive: { - stmt = walkCaseClause( CaseClauseList.case_clause(), VarRef ); - - LangStmt *recList = walkCaseClauseList( - CaseClauseList._case_clause_list(), VarRef ); - - stmt->setElsePart( recList ); - break; - } - case case_clause_list::BaseCase: { - stmt = walkCaseClause( CaseClauseList.case_clause(), VarRef ); - break; - } - case case_clause_list::BaseDefault: { - pushScope(); - StmtList *stmtList = walkBlockOrSingle( - CaseClauseList.default_clause().block_or_single() ); - popScope(); - stmt = LangStmt::cons( LangStmt::ElseType, stmtList ); - break; - } - } - return stmt; - } - - void walkStructVarDef( struct_var_def StructVarDef ) - { - ObjectField *objField = walkVarDef( StructVarDef.var_def(), - ObjectField::StructFieldType ); - structVarDef( objField->loc, objField ); - } - - TypeRef *walkReferenceTypeRef( reference_type_ref ReferenceTypeRef ) - { - TypeRef *typeRef = walkTypeRef( ReferenceTypeRef.type_ref() ); - return TypeRef::cons( ReferenceTypeRef.REF().loc(), TypeRef::Ref, typeRef ); - } - - ObjectField *walkParamVarDef( param_var_def paramVarDef ) - { - String id = paramVarDef.id().data(); - TypeRef *typeRef = 0; - ObjectField::Type type; - - switch ( paramVarDef.prodName() ) { - case param_var_def::Type: - typeRef = walkTypeRef( paramVarDef.type_ref() ); - type = ObjectField::ParamValType; - break; - case param_var_def::Ref: - typeRef = walkReferenceTypeRef( paramVarDef.reference_type_ref() ); - type = ObjectField::ParamRefType; - break; - } - - return addParam( paramVarDef.id().loc(), type, typeRef, id ); - } - - ParameterList *walkParamVarDefSeq( param_var_def_seq paramVarDefSeq ) - { - ParameterList *paramList = new ParameterList; - while ( paramVarDefSeq != 0 ) { - ObjectField *param = walkParamVarDef( paramVarDefSeq.param_var_def() ); - appendParam( paramList, param ); - paramVarDefSeq = paramVarDefSeq._param_var_def_seq(); - } - return paramList; - } - - ParameterList *walkParamVarDefList( param_var_def_list paramVarDefList ) - { - ParameterList *paramList = walkParamVarDefSeq( - paramVarDefList.param_var_def_seq() ); - return paramList; - } - - bool walkOptExport( opt_export OptExport ) - { - return OptExport.prodName() == opt_export::Export; - } - - void walkFunctionDef( function_def FunctionDef ) - { - ObjectDef *localFrame = blockOpen(); - - bool exprt = walkOptExport( FunctionDef.opt_export() ); - TypeRef *typeRef = walkTypeRef( FunctionDef.type_ref() ); - String id = FunctionDef.id().data(); - ParameterList *paramList = walkParamVarDefList( FunctionDef.ParamVarDefList() ); - StmtList *stmtList = walkLangStmtList( FunctionDef.lang_stmt_list() ); - functionDef( stmtList, localFrame, paramList, typeRef, id, exprt ); - - blockClose(); - } - - void walkInHostDef( in_host_def InHostDef ) - { - ObjectDef *localFrame = blockOpen(); - - TypeRef *typeRef = walkTypeRef( InHostDef.type_ref() ); - String id = InHostDef.id().data(); - ParameterList *paramList = walkParamVarDefList( InHostDef.ParamVarDefList() ); - inHostDef( InHostDef.HostFunc().data(), localFrame, paramList, typeRef, id, false ); - - blockClose(); - } - - void walkIterDef( iter_def IterDef ) - { - ObjectDef *localFrame = blockOpen(); - - String id = IterDef.id().data(); - ParameterList *paramList = walkParamVarDefList( IterDef.ParamVarDefList() ); - StmtList *stmtList = walkLangStmtList( IterDef.lang_stmt_list() ); - iterDef( stmtList, localFrame, paramList, id ); - - blockClose(); - } - - void walkStructItem( struct_item structItem ) - { - switch ( structItem.prodName() ) { - case struct_item::Rl: - walkRlDef( structItem.rl_def() ); - break; - case struct_item::StructVar: - walkStructVarDef( structItem.struct_var_def() ); - break; - case struct_item::Token: - walkTokenDef( structItem.token_def() ); - break; - case struct_item::IgnoreCollector: - walkIgnoreCollector( structItem.ic_def() ); - break; - case struct_item::Ignore: - walkIgnoreDef( structItem.ignore_def() ); - break; - case struct_item::Literal: - walkLiteralDef( structItem.literal_def() ); - break; - case struct_item::Cfl: - walkCflDef( structItem.cfl_def() ); - break; - case struct_item::Region: - walkLexRegion( structItem.region_def() ); - break; - case struct_item::Struct: - walkStructDef( structItem.struct_def() ); - break; - case struct_item::Function: - walkFunctionDef( structItem.function_def() ); - break; - case struct_item::InHost: - walkInHostDef( structItem.in_host_def() ); - break; - case struct_item::Iter: - walkIterDef( structItem.iter_def() ); - break; - case struct_item::PreEof: - walkPreEof( structItem.pre_eof_def() ); - break; - case struct_item::Export: - walkExportDef( structItem.export_def() ); - break; - case struct_item::Precedence: - walkPrecedenceDef( structItem.precedence_def() ); - break; -// case struct_item::ListEl: -// listElDef( structItem.list_el_def().id().data() ); -// break; -// case struct_item::MapEl: { -// map_el_def Def = structItem.map_el_def(); -// TypeRef *keyTr = walkTypeRef( Def.type_ref() ); -// mapElDef( Def.id().data(), keyTr ); -// break; -// } - case struct_item::Alias: - walkAliasDef( structItem.alias_def() ); - break; - } - } - - void walkStructDef( struct_def structDef ) - { - String name = structDef.id().data(); - structHead( structDef.id().loc(), curNspace(), name, ObjectDef::StructType ); - - _repeat_struct_item structItemList = structDef.ItemList(); - while ( !structItemList.end() ) { - walkStructItem( structItemList.value() ); - structItemList = structItemList.next(); - } - - structStack.pop(); - namespaceStack.pop(); - } - - void walkNamespaceDef( namespace_def NamespaceDef, StmtList *stmtList ) - { - String name = NamespaceDef.id().data(); - createNamespace( NamespaceDef.id().loc(), name ); - walkNamespaceItemList( NamespaceDef.ItemList(), stmtList ); - namespaceStack.pop(); - } - - void walkRedItem( host_item item, ReduceTextItemList &list ) - { - if ( item.RED_LHS() != 0 ) { - ReduceTextItem *rti = new ReduceTextItem; - rti->type = ReduceTextItem::LhsRef; - list.append( rti ); - } - else if ( item.RED_RHS_REF() != 0 ) { - ReduceTextItem *rti = new ReduceTextItem; - rti->type = ReduceTextItem::RhsRef; - rti->txt = item.RED_RHS_REF().text().c_str(); - list.append( rti ); - } - else if ( item.RED_TREE_REF() != 0 ) { - ReduceTextItem *rti = new ReduceTextItem; - rti->type = ReduceTextItem::TreeRef; - rti->txt = item.RED_TREE_REF().text().c_str(); - list.append( rti ); - } - else if ( item.RED_RHS_LOC() != 0 ) { - ReduceTextItem *rti = new ReduceTextItem; - rti->type = ReduceTextItem::RhsLoc; - rti->txt = item.RED_RHS_LOC().text().c_str(); - list.append( rti ); - } - else if ( item.RED_RHS_NREF() != 0 ) { - ReduceTextItem *rti = new ReduceTextItem; - rti->type = ReduceTextItem::RhsRef; - rti->n = atoi( item.RED_RHS_NREF().text().c_str() + 1 ); - list.append( rti ); - } - else if ( item.RED_TREE_NREF() != 0 ) { - ReduceTextItem *rti = new ReduceTextItem; - rti->type = ReduceTextItem::TreeRef; - rti->n = atoi( item.RED_TREE_NREF().text().c_str() + 2 ); - list.append( rti ); - } - else if ( item.RED_RHS_NLOC() != 0 ) { - ReduceTextItem *rti = new ReduceTextItem; - rti->type = ReduceTextItem::RhsLoc; - rti->n = atoi( item.RED_RHS_NLOC().text().c_str() + 1 ); - list.append( rti ); - } - else if ( item.RED_OPEN() != 0 ) { - ReduceTextItem *open = new ReduceTextItem; - open->type = ReduceTextItem::Txt; - open->txt = "{"; - list.append( open ); - - walkRedItemList( item.HostItems(), list ); - - ReduceTextItem *close = new ReduceTextItem; - close->type = ReduceTextItem::Txt; - close->txt = "}"; - list.append( close ); - } - else { - if ( list.length() > 0 && list.tail->type == ReduceTextItem::Txt ) { - std::string txt = item.text(); - list.tail->txt.append( txt.c_str(), txt.size() ); - } - else { - ReduceTextItem *rti = new ReduceTextItem; - rti->type = ReduceTextItem::Txt; - rti->txt = item.text().c_str(); - list.append( rti ); - } - } - } - - void walkRedItemList( _repeat_host_item itemList, ReduceTextItemList &list ) - { - while ( !itemList.end() ) { - walkRedItem( itemList.value(), list ); - itemList = itemList.next(); - } - } - - void walkRedNonTerm( red_nonterm RN ) - { - InputLoc loc = RN.RED_OPEN().loc(); - - TypeRef *typeRef = walkTypeRef( RN.type_ref() ); - - ReduceNonTerm *rnt = new ReduceNonTerm( loc, typeRef ); - - walkRedItemList( RN.HostItems(), rnt->itemList ); - - curReduction()->reduceNonTerms.append( rnt ); - } - - void walkRedAction( red_action RA ) - { - InputLoc loc = RA.RED_OPEN().loc(); - String text = RA.HostItems().text().c_str(); - - TypeRef *typeRef = walkTypeRef( RA.type_ref() ); - - ReduceAction *ra = new ReduceAction( loc, typeRef, RA.id().data() ); - - walkRedItemList( RA.HostItems(), ra->itemList ); - - curReduction()->reduceActions.append( ra ); - } - - void walkReductionItem( reduction_item reductionItem ) - { - switch ( reductionItem.prodName() ) { - case reduction_item::NonTerm: { - walkRedNonTerm( reductionItem.red_nonterm() ); - break; - } - case reduction_item::Action: { - walkRedAction( reductionItem.red_action() ); - break; - } - } - } - - void walkReductionList( _repeat_reduction_item itemList ) - { - while ( !itemList.end() ) { - walkReductionItem( itemList.value() ); - itemList = itemList.next(); - } - } - - void walkRootItem( root_item rootItem, StmtList *stmtList ) - { - switch ( rootItem.prodName() ) { - case root_item::Rl: - walkRlDef( rootItem.rl_def() ); - break; - case root_item::Token: - walkTokenDef( rootItem.token_def() ); - break; - case root_item::IgnoreCollector: - walkIgnoreCollector( rootItem.ic_def() ); - break; - case root_item::Ignore: - walkIgnoreDef( rootItem.ignore_def() ); - break; - case root_item::Literal: - walkLiteralDef( rootItem.literal_def() ); - break; - case root_item::Cfl: - walkCflDef( rootItem.cfl_def() ); - break; - case root_item::Region: - walkLexRegion( rootItem.region_def() ); - break; - case root_item::Statement: { - LangStmt *stmt = walkStatement( rootItem.statement() ); - if ( stmt != 0 ) - stmtList->append( stmt ); - break; - } - case root_item::Struct: - walkStructDef( rootItem.struct_def() ); - break; - case root_item::Namespace: - walkNamespaceDef( rootItem.namespace_def(), stmtList ); - break; - case root_item::Function: - walkFunctionDef( rootItem.function_def() ); - break; - case struct_item::InHost: - walkInHostDef( rootItem.in_host_def() ); - break; - case root_item::Iter: - walkIterDef( rootItem.iter_def() ); - break; - case root_item::PreEof: - walkPreEof( rootItem.pre_eof_def() ); - break; - case root_item::Export: { - LangStmt *stmt = walkExportDef( rootItem.export_def() ); - if ( stmt != 0 ) - stmtList->append( stmt ); - break; - } - case root_item::Alias: - walkAliasDef( rootItem.alias_def() ); - break; - case root_item::Precedence: - walkPrecedenceDef( rootItem.precedence_def() ); - break; - case root_item::Include: { - StmtList *includeList = walkInclude( rootItem.include() ); - if ( includeList ) - stmtList->append( *includeList ); - break; - } - case root_item::Global: { - LangStmt *stmt = walkGlobalDef( rootItem.global_def() ); - if ( stmt != 0 ) - stmtList->append( stmt ); - break; - } - case root_item::Reduction: { - reduction_def RD = rootItem.reduction_def(); - - InputLoc loc = RD.REDUCTION().loc(); - String id = RD.id().data(); - - createReduction( loc, id ); - - walkReductionList( RD.ItemList() ); - - reductionStack.pop(); - break; - }} - } - - void walkNamespaceItem( namespace_item item, StmtList *stmtList ) - { - switch ( item.prodName() ) { - case namespace_item::Rl: - walkRlDef( item.rl_def() ); - break; - case namespace_item::Token: - walkTokenDef( item.token_def() ); - break; - case root_item::IgnoreCollector: - walkIgnoreCollector( item.ic_def() ); - break; - case namespace_item::Ignore: - walkIgnoreDef( item.ignore_def() ); - break; - case namespace_item::Literal: - walkLiteralDef( item.literal_def() ); - break; - case namespace_item::Cfl: - walkCflDef( item.cfl_def() ); - break; - case namespace_item::Region: - walkLexRegion( item.region_def() ); - break; - case namespace_item::Struct: - walkStructDef( item.struct_def() ); - break; - case namespace_item::Namespace: - walkNamespaceDef( item.namespace_def(), stmtList ); - break; - case namespace_item::Function: - walkFunctionDef( item.function_def() ); - break; - case struct_item::InHost: - walkInHostDef( item.in_host_def() ); - break; - case namespace_item::Iter: - walkIterDef( item.iter_def() ); - break; - case namespace_item::PreEof: - walkPreEof( item.pre_eof_def() ); - break; - case namespace_item::Alias: - walkAliasDef( item.alias_def() ); - break; - case namespace_item::Precedence: - walkPrecedenceDef( item.precedence_def() ); - break; - case namespace_item::Include: { - StmtList *includeList = walkInclude( item.include() ); - stmtList->append( *includeList ); - break; - } - case namespace_item::Global: { - LangStmt *stmt = walkGlobalDef( item.global_def() ); - if ( stmt != 0 ) - stmtList->append( stmt ); - break; - }} - } - - bool walkNoIgnoreLeft( no_ignore_left OptNoIngore ) - { - return OptNoIngore.prodName() == no_ignore_left::Ni; - } - - bool walkNoIgnoreRight( no_ignore_right OptNoIngore ) - { - return OptNoIngore.prodName() == no_ignore_right::Ni; - } - - bool walkOptEos( opt_eos OptEos ) - { - opt_eos::prod_name pn = OptEos.prodName(); - return pn == opt_eos::Dot || pn == opt_eos::Eos; - } - - void walkLiteralItem( literal_item literalItem ) - { - bool niLeft = walkNoIgnoreLeft( literalItem.no_ignore_left() ); - bool niRight = walkNoIgnoreRight( literalItem.no_ignore_right() ); - - String lit = literalItem.backtick_lit().data(); - literalDef( literalItem.backtick_lit().loc(), lit, niLeft, niRight ); - } - - void walkLiteralList( literal_list literalList ) - { - if ( literalList.prodName() == literal_list::Item ) - walkLiteralList( literalList._literal_list() ); - walkLiteralItem( literalList.literal_item() ); - } - - void walkLiteralDef( literal_def literalDef ) - { - walkLiteralList( literalDef.literal_list() ); - } - - void walkNamespaceItemList( _repeat_namespace_item itemList, StmtList *stmtList ) - { - /* Walk the list of items. */ - while ( !itemList.end() ) { - walkNamespaceItem( itemList.value(), stmtList ); - itemList = itemList.next(); - } - } - - StmtList *walkRootItemList( _repeat_root_item rootItemList ) - { - StmtList *stmtList = new StmtList; - - /* Walk the list of items. */ - while ( !rootItemList.end() ) { - walkRootItem( rootItemList.value(), stmtList ); - rootItemList = rootItemList.next(); - } - return stmtList; - } - - virtual void go( long activeRealm ); -}; - -void LoadColm::go( long activeRealm ) -{ - LoadColm::init(); - - const char *argv[3]; - argv[0] = "load-colm"; - argv[1] = inputFileName; - argv[2] = 0; - - colm_program *program = colm_new_program( &colm_object ); - colm_set_debug( program, activeRealm ); - colm_run_program( program, 2, argv ); - - /* Extract the parse tree. */ - start Start = ColmTree( program ); - str Error = ColmError( program ); - - if ( Start == 0 ) { - gblErrorCount += 1; - InputLoc loc = Error.loc(); - error(loc) << inputFileName << ": parse error: " << Error.text() << std::endl; - return; - } - - StmtList *stmtList = walkRootItemList( Start.RootItemList() ); - pd->streamFileNames.append( colm_extract_fns( program ) ); - colm_delete_program( program ); - - pd->rootCodeBlock = CodeBlock::cons( stmtList, 0 ); -} - -BaseParser *consLoadColm( Compiler *pd, const char *inputFileName ) -{ - return new LoadColm( pd, inputFileName ); -} diff --git a/src/loadcolm.h b/src/loadcolm.h deleted file mode 100644 index 0c888f9a..00000000 --- a/src/loadcolm.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_LOADCOLM_H -#define _COLM_LOADCOLM_H - -#include "parser.h" - -BaseParser *consLoadColm( Compiler *pd, const char *inputFileName ); - -#endif /* _COLM_LOADCOLM_H */ - diff --git a/src/loadinit.cc b/src/loadinit.cc deleted file mode 100644 index fc75906c..00000000 --- a/src/loadinit.cc +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "loadinit.h" - -#include <string.h> - -#include <iostream> - -#include "gen/if1.h" - -using std::string; - -extern colm_sections colm_object; - -void LoadInit::walkProdElList( String defName, ProdElList *list, prod_el_list &prodElList ) -{ - if ( prodElList.ProdElList() != 0 ) { - prod_el_list RightProdElList = prodElList.ProdElList(); - walkProdElList( defName, list, RightProdElList ); - } - - if ( prodElList.ProdEl() != 0 ) { - prod_el El = prodElList.ProdEl(); - String typeName = El.Id().text().c_str(); - - ObjectField *captureField = 0; - if ( El.OptName().Name() != 0 ) { - /* Has a capture. */ - String fieldName = El.OptName().Name().text().c_str(); - captureField = ObjectField::cons( internal, - ObjectField::RhsNameType, 0, fieldName ); - } - else { - /* Default the capture to the name of the type. */ - String fieldName = typeName; - if ( strcmp( fieldName, defName ) == 0 ) - fieldName = "_" + defName; - captureField = ObjectField::cons( internal, - ObjectField::RhsNameType, 0, fieldName ); - } - - RepeatType repeatType = RepeatNone; - if ( El.OptRepeat().Star() != 0 ) - repeatType = RepeatRepeat; - - ProdEl *prodEl = prodElName( internal, typeName, - NamespaceQual::cons( curNspace() ), - captureField, repeatType, false ); - - appendProdEl( list, prodEl ); - } -} - -void LoadInit::walkProdList( String defName, LelDefList *outProdList, prod_list &prodList ) -{ - if ( prodList.ProdList() != 0 ) { - prod_list RightProdList = prodList.ProdList(); - walkProdList( defName, outProdList, RightProdList ); - } - - ProdElList *outElList = new ProdElList; - prod_el_list prodElList = prodList.Prod().ProdElList(); - walkProdElList( defName, outElList, prodElList ); - - String name; - if ( prodList.Prod().OptName().Name() != 0 ) - name = prodList.Prod().OptName().Name().text().c_str(); - - bool commit = prodList.Prod().OptCommit().Commit() != 0; - - Production *prod = BaseParser::production( internal, outElList, name, commit, 0, 0 ); - prodAppend( outProdList, prod ); -} - -LexFactor *LoadInit::walkLexFactor( lex_factor &lexFactor ) -{ - LexFactor *factor = 0; - if ( lexFactor.Literal() != 0 ) { - String litString = lexFactor.Literal().text().c_str(); - Literal *literal = Literal::cons( internal, litString, Literal::LitString ); - factor = LexFactor::cons( literal ); - } - if ( lexFactor.Id() != 0 ) { - String id = lexFactor.Id().text().c_str(); - factor = lexRlFactorName( id, internal ); - } - else if ( lexFactor.Expr() != 0 ) { - lex_expr LexExpr = lexFactor.Expr(); - LexExpression *expr = walkLexExpr( LexExpr ); - LexJoin *join = LexJoin::cons( expr ); - factor = LexFactor::cons( join ); - } - else if ( lexFactor.Low() != 0 ) { - String low = lexFactor.Low().text().c_str(); - Literal *lowLit = Literal::cons( internal, low, Literal::LitString ); - - String high = lexFactor.High().text().c_str(); - Literal *highLit = Literal::cons( internal, high, Literal::LitString ); - - Range *range = Range::cons( lowLit, highLit ); - factor = LexFactor::cons( range ); - } - return factor; -} - -LexFactorNeg *LoadInit::walkLexFactorNeg( lex_factor_neg &lexFactorNeg ) -{ - if ( lexFactorNeg.FactorNeg() != 0 ) { - lex_factor_neg Rec = lexFactorNeg.FactorNeg(); - LexFactorNeg *recNeg = walkLexFactorNeg( Rec ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( recNeg, LexFactorNeg::CharNegateType ); - return factorNeg; - } - else { - lex_factor LexFactorTree = lexFactorNeg.Factor(); - LexFactor *factor = walkLexFactor( LexFactorTree ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - return factorNeg; - } -} - -LexFactorRep *LoadInit::walkLexFactorRep( lex_factor_rep &lexFactorRep ) -{ - LexFactorRep *factorRep = 0; - if ( lexFactorRep.Star() != 0 ) { - lex_factor_rep Rec = lexFactorRep.FactorRep(); - LexFactorRep *recRep = walkLexFactorRep( Rec ); - factorRep = LexFactorRep::cons( internal, recRep, 0, 0, LexFactorRep::StarType ); - } - else if ( lexFactorRep.Plus() != 0 ) { - lex_factor_rep Rec = lexFactorRep.FactorRep(); - LexFactorRep *recRep = walkLexFactorRep( Rec ); - factorRep = LexFactorRep::cons( internal, recRep, 0, 0, LexFactorRep::PlusType ); - } - else { - lex_factor_neg LexFactorNegTree = lexFactorRep.FactorNeg(); - LexFactorNeg *factorNeg = walkLexFactorNeg( LexFactorNegTree ); - factorRep = LexFactorRep::cons( factorNeg ); - } - return factorRep; -} - -LexFactorAug *LoadInit::walkLexFactorAug( lex_factor_rep &lexFactorRep ) -{ - LexFactorRep *factorRep = walkLexFactorRep( lexFactorRep ); - return LexFactorAug::cons( factorRep ); -} - -LexTerm *LoadInit::walkLexTerm( lex_term &lexTerm ) -{ - if ( lexTerm.Term() != 0 ) { - lex_term Rec = lexTerm.Term(); - LexTerm *leftTerm = walkLexTerm( Rec ); - - lex_factor_rep LexFactorRepTree = lexTerm.FactorRep(); - LexFactorAug *factorAug = walkLexFactorAug( LexFactorRepTree ); - - LexTerm::Type type = lexTerm.Dot() != 0 ? - LexTerm::ConcatType : LexTerm::RightFinishType; - - LexTerm *term = LexTerm::cons( leftTerm, factorAug, type ); - - return term; - } - else { - lex_factor_rep LexFactorRepTree = lexTerm.FactorRep(); - LexFactorAug *factorAug = walkLexFactorAug( LexFactorRepTree ); - LexTerm *term = LexTerm::cons( factorAug ); - return term; - } -} - -LexExpression *LoadInit::walkLexExpr( lex_expr &LexExprTree ) -{ - if ( LexExprTree.Expr() != 0 ) { - lex_expr Rec = LexExprTree.Expr(); - LexExpression *leftExpr = walkLexExpr( Rec ); - - lex_term lexTerm = LexExprTree.Term(); - LexTerm *term = walkLexTerm( lexTerm ); - LexExpression *expr = LexExpression::cons( leftExpr, term, LexExpression::OrType ); - - return expr; - } - else { - lex_term lexTerm = LexExprTree.Term(); - LexTerm *term = walkLexTerm( lexTerm ); - LexExpression *expr = LexExpression::cons( term ); - return expr; - } -} - -bool walkNoIgnore( opt_ni OptNi ) -{ - return OptNi.Ni() != 0; -} - -void LoadInit::walkTokenList( token_list &tokenList ) -{ - if ( tokenList.TokenList() != 0 ) { - token_list RightTokenList = tokenList.TokenList(); - walkTokenList( RightTokenList ); - } - - if ( tokenList.TokenDef() != 0 ) { - token_def tokenDef = tokenList.TokenDef(); - String name = tokenDef.Id().text().c_str(); - - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, name, pd->nextObjectId++ ); - - lex_expr LexExpr = tokenDef.Expr(); - LexExpression *expr = walkLexExpr( LexExpr ); - LexJoin *join = LexJoin::cons( expr ); - - bool leftNi = walkNoIgnore( tokenDef.LeftNi() ); - bool rightNi = walkNoIgnore( tokenDef.RightNi() ); - - defineToken( internal, name, join, objectDef, 0, false, leftNi, rightNi ); - } - - if ( tokenList.IgnoreDef() != 0 ) { - ignore_def IgnoreDef = tokenList.IgnoreDef(); - - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, String(), pd->nextObjectId++ ); - - lex_expr LexExpr = IgnoreDef.Expr(); - LexExpression *expr = walkLexExpr( LexExpr ); - LexJoin *join = LexJoin::cons( expr ); - - defineToken( internal, String(), join, objectDef, 0, true, false, false ); - } -} - -void LoadInit::walkLexRegion( item &LexRegion ) -{ - pushRegionSet( internal ); - - token_list tokenList = LexRegion.TokenList(); - walkTokenList( tokenList ); - - popRegionSet(); -} - -void LoadInit::walkDefinition( item &define ) -{ - prod_list ProdList = define.ProdList(); - - String name = define.DefId().text().c_str(); - - LelDefList *defList = new LelDefList; - walkProdList( name, defList, ProdList ); - - NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, name, - pd->nextObjectId++ ); - cflDef( ntDef, objectDef, defList ); -} - -void LoadInit::consParseStmt( StmtList *stmtList ) -{ - /* Pop argv, this yields the file name . */ - CallArgVect *popArgs = new CallArgVect; - QualItemVect *popQual = new QualItemVect; - popQual->append( QualItem( QualItem::Arrow, internal, String( "argv" ) ) ); - - LangVarRef *popRef = LangVarRef::cons( internal, curNspace(), 0, - curLocalFrame()->rootScope, NamespaceQual::cons( curNspace() ), - popQual, String("pop") ); - LangExpr *pop = LangExpr::cons( LangTerm::cons( InputLoc(), popRef, popArgs ) ); - - TypeRef *typeRef = TypeRef::cons( internal, pd->uniqueTypeStr ); - ObjectField *objField = ObjectField::cons( internal, - ObjectField::UserLocalType, typeRef, "A" ); - - LangStmt *stmt = varDef( objField, pop, LangStmt::AssignType ); - stmtList->append( stmt ); - - /* Construct a literal string 'r', for second arg to open. */ - ConsItem *modeConsItem = ConsItem::cons( internal, - ConsItem::InputText, String("r") ); - ConsItemList *modeCons = new ConsItemList; - modeCons->append( modeConsItem ); - LangExpr *modeExpr = LangExpr::cons( LangTerm::cons( internal, modeCons ) ); - - /* Reference A->value */ - LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, - curLocalFrame()->rootScope, String("A") ); - LangExpr *Avalue = LangExpr::cons( LangTerm::cons( internal, - LangTerm::VarRefType, varRef ) ); - - /* Call open. */ - LangVarRef *openRef = LangVarRef::cons( internal, - curNspace(), 0, curLocalFrame()->rootScope, String("open") ); - CallArgVect *openArgs = new CallArgVect; - openArgs->append( new CallArg(Avalue) ); - openArgs->append( new CallArg(modeExpr) ); - LangExpr *open = LangExpr::cons( LangTerm::cons( InputLoc(), openRef, openArgs ) ); - - /* Construct a list containing the open stream. */ - ConsItem *consItem = ConsItem::cons( internal, ConsItem::ExprType, open, ConsItem::TrimDefault ); - ConsItemList *list = ConsItemList::cons( consItem ); - - /* Will capture the parser to "P" */ - objField = ObjectField::cons( internal, - ObjectField::UserLocalType, 0, String("P") ); - - /* Ref the start def. */ - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - typeRef = TypeRef::cons( internal, nspaceQual, - String("start"), RepeatNone ); - - /* Parse the above list. */ - LangExpr *parseExpr = parseCmd( internal, false, false, objField, - typeRef, 0, list, true, false, false, "" ); - LangStmt *parseStmt = LangStmt::cons( internal, LangStmt::ExprType, parseExpr ); - stmtList->append( parseStmt ); -} - -void LoadInit::consExportTree( StmtList *stmtList ) -{ - LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, - curLocalFrame()->rootScope, String("P") ); - LangExpr *expr = LangExpr::cons( LangTerm::cons( internal, - LangTerm::VarRefType, varRef ) ); - - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, String("start"), RepeatNone ); - ObjectField *program = ObjectField::cons( internal, - ObjectField::StructFieldType, typeRef, String("ColmTree") ); - LangStmt *programExport = exportStmt( program, LangStmt::AssignType, expr ); - stmtList->append( programExport ); -} - -void LoadInit::consExportError( StmtList *stmtList ) -{ - LangVarRef *varRef = LangVarRef::cons( internal, curNspace(), 0, - curLocalFrame()->rootScope, String("error") ); - LangExpr *expr = LangExpr::cons( LangTerm::cons( internal, - LangTerm::VarRefType, varRef ) ); - - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, String("str"), RepeatNone ); - ObjectField *program = ObjectField::cons( internal, - ObjectField::StructFieldType, typeRef, String("ColmError") ); - LangStmt *programExport = exportStmt( program, LangStmt::AssignType, expr ); - stmtList->append( programExport ); -} - -void LoadInit::go( long activeRealm ) -{ - LoadInit::init(); - - StmtList *stmtList = new StmtList; - - const char *argv[3]; - argv[0] = "load-init"; - argv[1] = inputFileName; - argv[2] = 0; - - colm_program *program = colm_new_program( &colm_object ); - colm_set_debug( program, 0 ); - colm_run_program( program, 2, argv ); - - /* Extract the parse tree. */ - start Start = ColmTree( program ); - - if ( Start == 0 ) { - gblErrorCount += 1; - std::cerr << inputFileName << ": parse error" << std::endl; - return; - } - - /* Walk the list of items. */ - _repeat_item ItemList = Start.ItemList(); - while ( !ItemList.end() ) { - - item Item = ItemList.value(); - if ( Item.DefId() != 0 ) - walkDefinition( Item ); - else if ( Item.TokenList() != 0 ) - walkLexRegion( Item ); - ItemList = ItemList.next(); - } - - pd->streamFileNames.append( colm_extract_fns( program ) ); - colm_delete_program( program ); - - consParseStmt( stmtList ); - consExportTree( stmtList ); - consExportError( stmtList ); - - pd->rootCodeBlock = CodeBlock::cons( stmtList, 0 ); -} diff --git a/src/loadinit.h b/src/loadinit.h deleted file mode 100644 index 93a18444..00000000 --- a/src/loadinit.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_LOADINIT_H -#define _COLM_LOADINIT_H - -#include <iostream> - -#include <avltree.h> - -#include "compiler.h" -#include "parser.h" - -struct lex_factor; -struct lex_factor_neg; -struct lex_factor_rep; -struct lex_term; -struct lex_expr; -struct token_list; -struct prod_el_list; -struct prod_list; -struct item; - -struct LoadInit -: - public BaseParser -{ - LoadInit( Compiler *pd, const char *inputFileName ) - : - BaseParser(pd), - inputFileName(inputFileName) - {} - - const char *inputFileName; - - /* Constructing the colm language data structures from the the parse tree. */ - LexFactor *walkLexFactor( lex_factor &LexFactorTree ); - LexFactorNeg *walkLexFactorNeg( lex_factor_neg &LexFactorNegTree ); - LexFactorRep *walkLexFactorRep( lex_factor_rep &LexFactorRepTree ); - LexFactorAug *walkLexFactorAug( lex_factor_rep &LexFactorRepTree ); - LexTerm *walkLexTerm( lex_term &LexTerm ); - LexExpression *walkLexExpr( lex_expr &LexExpr ); - void walkTokenList( token_list &TokenList ); - void walkLexRegion( item &LexRegion ); - void walkProdElList( String defName, ProdElList *list, prod_el_list &prodElList ); - void walkProdList( String defName, LelDefList *list, prod_list &prodList ); - void walkDefinition( item &define ); - - /* Constructing statements needed to parse and export the input. */ - void consParseStmt( StmtList *stmtList ); - void consExportTree( StmtList *stmtList ); - void consExportError( StmtList *stmtList ); - - virtual void go( long activeRealm ); -}; - -#endif /* _COLM_LOAD_INIT_H */ - diff --git a/src/lookup.cc b/src/lookup.cc deleted file mode 100644 index cb243dc6..00000000 --- a/src/lookup.cc +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -#include <assert.h> -#include <iostream> -#include "compiler.h" - -/* - * Variable Lookup - */ - -using std::cout; -using std::cerr; -using std::endl; - -ObjectDef *UniqueType::objectDef() -{ - if ( typeId == TYPE_TREE || typeId == TYPE_REF ) { - return langEl->objectDef; - } - else if ( typeId == TYPE_STRUCT ) { - return structEl->structDef->objectDef; - } - else if ( typeId == TYPE_GENERIC ) { - return generic->objDef; - } - - /* This should have generated a compiler error. */ - assert( false ); -} - -/* Recurisve find through a single object def's scope. */ -ObjectField *ObjectDef::findFieldInScope( const NameScope *inScope, - const String &name ) const -{ - FieldMapEl *objDefMapEl = inScope->fieldMap.find( name ); - if ( objDefMapEl != 0 ) - return objDefMapEl->value; - if ( inScope->parentScope != 0 ) - return findFieldInScope( inScope->parentScope, name ); - return 0; -} - -ObjectField *NameScope::findField( const String &name ) const -{ - return owningObj->findFieldInScope( this, name ); -} - -ObjectMethod *NameScope::findMethod( const String &name ) const -{ - MethodMapEl *methodMapEl = methodMap.find( name ); - if ( methodMapEl != 0 ) - return methodMapEl->value; - if ( parentScope != 0 ) - return parentScope->findMethod( name ); - return 0; -} - -VarRefLookup LangVarRef::lookupQualification( Compiler *pd, NameScope *rootScope ) const -{ - int lastPtrInQual = -1; - NameScope *searchScope = rootScope; - int firstConstPart = -1; - - for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { - /* Lookup the field int the current qualification. */ - ObjectField *el = searchScope->findField( qi->data ); - if ( el == 0 ) - error(qi->loc) << "cannot resolve qualification " << qi->data << endp; - - /* Lookup the type of the field. */ - el->typeRef->resolveType( pd ); - UniqueType *qualUT = el->typeRef->uniqueType; - - /* If we are dealing with an iterator then dereference it. */ - if ( qualUT->typeId == TYPE_ITER ) - qualUT = el->typeRef->searchUniqueType; - - /* Is it const? */ - if ( firstConstPart < 0 && el->isConst ) - firstConstPart = qi.pos(); - - /* Check for references. When loop is done we will have the last one - * present, if any. */ - if ( qualUT->ptr() ) - lastPtrInQual = qi.pos(); - - if ( qi->form == QualItem::Dot ) { - /* Cannot dot a reference. Iterator yes (access of the iterator - * not the current) */ - if ( qualUT->ptr() ) - error(loc) << "dot cannot be used to access a pointer" << endp; - } - else if ( qi->form == QualItem::Arrow ) { - if ( qualUT->typeId == TYPE_ITER ) - qualUT = el->typeRef->searchUniqueType; - } - - ObjectDef *searchObjDef = qualUT->objectDef(); - if ( searchObjDef == 0 ) - error(qi->loc) << "left hand side of qual has no object defintion" << endp; - searchScope = searchObjDef->rootScope; - } - - return VarRefLookup( lastPtrInQual, firstConstPart, searchScope->owningObj, searchScope ); -} - -bool LangVarRef::isLocalRef() const -{ - if ( qual->length() > 0 ) { - if ( scope->findField( qual->data[0].data ) != 0 ) - return true; - } - else if ( scope->findField( name ) != 0 ) - return true; - else if ( scope->findMethod( name ) != 0 ) - return true; - - return false; -} - -/* For accesing production RHS values inside a switch case that limits our - * search to a particular productions. */ -bool LangVarRef::isProdRef( Compiler *pd ) const -{ - if ( scope->caseClauseVarRef != 0 ) { - UniqueType *varUt = scope->caseClauseVarRef->lookup( pd ); - ObjectDef *searchObjDef = varUt->objectDef(); - - if ( qual->length() > 0 ) { - if ( searchObjDef->rootScope->findField( qual->data[0].data ) != 0 ) - return true; - } - else if ( searchObjDef->rootScope->findField( name ) != 0 ) - return true; - else if ( searchObjDef->rootScope->findMethod( name ) != 0 ) - return true; - } - return false; -} - -bool LangVarRef::isStructRef() const -{ - if ( structDef != 0 ) { - if ( qual->length() > 0 ) { - if ( structDef->objectDef->rootScope->findField( qual->data[0].data ) != 0 ) - return true; - } - else if ( structDef->objectDef->rootScope->findField( name ) != 0 ) - return true; - else if ( structDef->objectDef->rootScope->findMethod( name ) != 0 ) - return true; - } - - return false; -} - -bool LangVarRef::isInbuiltObject() const -{ - if ( qual->length() > 0 ) { - ObjectField *field = scope->findField( qual->data[0].data ); - if ( field != 0 && field->isInbuiltObject() ) - return true; - } - else { - ObjectField *field = scope->findField( name ); - if ( field != 0 ) { - if ( field->isInbuiltObject() ) - return true; - } - } - return false; -} - -VarRefLookup LangVarRef::lookupObj( Compiler *pd ) const -{ - NameScope *rootScope; - - if ( nspaceQual != 0 && nspaceQual->qualNames.length() > 0 ) { - Namespace *nspace = pd->rootNamespace->findNamespace( nspaceQual->qualNames[0] ); - rootScope = nspace->rootScope; - } - else if ( isLocalRef() ) - rootScope = scope; - else if ( isProdRef( pd ) ) { - UniqueType *varUt = scope->caseClauseVarRef->lookup( pd ); - ObjectDef *searchObjDef = varUt->objectDef(); - rootScope = searchObjDef->rootScope; - } - else if ( isStructRef() ) - rootScope = structDef->objectDef->rootScope; - else - rootScope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope; - - return lookupQualification( pd, rootScope ); -} - -VarRefLookup LangVarRef::lookupMethodObj( Compiler *pd ) const -{ - NameScope *rootScope; - - if ( nspaceQual != 0 && nspaceQual->qualNames.length() > 0 ) { - Namespace *nspace = pd->rootNamespace->findNamespace( nspaceQual->qualNames[0] ); - rootScope = nspace->rootScope; - } - else if ( isLocalRef() ) - rootScope = scope; - else if ( isStructRef() ) - rootScope = structDef->objectDef->rootScope; - else - rootScope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope; - - return lookupQualification( pd, rootScope ); -} - - -VarRefLookup LangVarRef::lookupField( Compiler *pd ) const -{ - /* Lookup the object that the field is in. */ - VarRefLookup lookup = lookupObj( pd ); - - /* Lookup the field. */ - ObjectField *field = lookup.inScope->findField( name ); - if ( field == 0 ) - error(loc) << "cannot find name " << name << " in object" << endp; - - lookup.objField = field; - lookup.uniqueType = field->typeRef->uniqueType; - - if ( field->typeRef->searchUniqueType != 0 ) - lookup.iterSearchUT = field->typeRef->searchUniqueType; - - return lookup; -} - -UniqueType *LangVarRef::lookup( Compiler *pd ) const -{ - /* Lookup the loadObj. */ - VarRefLookup lookup = lookupField( pd ); - - ObjectField *el = lookup.objField; - UniqueType *elUT = el->typeRef->resolveType( pd ); - - /* Deref iterators. */ - if ( elUT->typeId == TYPE_ITER ) - elUT = el->typeRef->searchUniqueType; - - return elUT; -} - -VarRefLookup LangVarRef::lookupMethod( Compiler *pd ) const -{ - /* Lookup the object that the field is in. */ - VarRefLookup lookup = lookupMethodObj( pd ); - - /* Find the method. */ - ObjectMethod *method = lookup.inScope->findMethod( name ); - if ( method == 0 ) { - /* Not found as a method, try it as an object on which we will call a - * default function. */ - qual->append( QualItem( QualItem::Dot, loc, name ) ); - - /* Lookup the object that the field is in. */ - VarRefLookup lookup = lookupObj( pd ); - - /* Find the method. */ - method = lookup.inScope->findMethod( "finish" ); - if ( method == 0 ) - error(loc) << "cannot find " << name << "(...) in object" << endp; - } - - lookup.objMethod = method; - lookup.uniqueType = method->returnUT; - - return lookup; -} - -VarRefLookup LangVarRef::lookupIterCall( Compiler *pd ) const -{ - /* Lookup the object that the field is in. */ - VarRefLookup lookup = lookupObj( pd ); - - /* Find the method. */ - ObjectMethod *method = lookup.inScope->findMethod( name ); - if ( method == 0 ) { - /* Not found as a method, try it as an object on which we will call a - * default function. */ - qual->append( QualItem( QualItem::Dot, loc, name ) ); - - /* Lookup the object that the field is in. */ - VarRefLookup lookup = lookupObj( pd ); - - /* Find the method. */ - method = lookup.inScope->findMethod( "finish" ); - if ( method == 0 ) - error(loc) << "cannot find " << name << "(...) in object" << endp; - } - - lookup.objMethod = method; - lookup.uniqueType = method->returnUT; - - return lookup; -} diff --git a/src/main.cc b/src/main.cc deleted file mode 100644 index 54c1dde1..00000000 --- a/src/main.cc +++ /dev/null @@ -1,798 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <assert.h> -#include <stdlib.h> -#include <string.h> -#include <strings.h> -#include <stdio.h> -#include <stdbool.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <iostream> - -#include "debug.h" -#include "pcheck.h" -#include "version.h" -#include "compiler.h" - -#if defined(CONS_INIT) -#include "consinit.h" -#elif defined(LOAD_INIT) -#include "loadinit.h" -#else -#include "loadcolm.h" -#endif - -using std::istream; -using std::ifstream; -using std::ostream; -using std::ios; -using std::cin; -using std::cout; -using std::cerr; -using std::endl; - -/* Graphviz dot file generation. */ -bool genGraphviz = false; - -using std::ostream; -using std::istream; -using std::ifstream; -using std::ofstream; -using std::ios; -using std::cout; -using std::cerr; -using std::cin; -using std::endl; - -InputLoc internal; - -/* Io globals. */ -istream *inStream = 0; -ostream *outStream = 0; -const char *inputFn = 0; -const char *outputFn = 0; -const char *intermedFn = 0; -const char *binaryFn = 0; -const char *exportHeaderFn = 0; -const char *exportCodeFn = 0; -const char *commitCodeFn = 0; -const char *objectName = "colm_object"; -bool exportCode = false; -bool hostAdapters = true; - -bool generateGraphviz = false; -bool verbose = false; -bool logging = false; -bool branchPointInfo = false; -bool addUniqueEmptyProductions = false; -bool gblLibrary = false; -long gblActiveRealm = 0; - -ArgsVector includePaths; -ArgsVector libraryPaths; -DefineVector defineArgs; -ArgsVector additionalCodeFiles; - -/* Print version information. */ -void version(); - -/* Total error count. */ -int gblErrorCount = 0; - -HostType hostTypesC[] = -{ - { "char", 0, CHAR_MIN, CHAR_MAX, sizeof(char) }, -}; - -HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true }; -HostLang *hostLang = &hostLangC; - -/* Print the opening to an error in the input, then return the error ostream. */ -ostream &error( const InputLoc &loc ) -{ - /* Keep the error count. */ - gblErrorCount += 1; - - if ( loc.fileName != 0 ) - cerr << loc.fileName << ":"; - else - cerr << "<input>:"; - - if ( loc.line == -1 ) { - cerr << "INT: "; - } - else { - cerr << loc.line << ":" << loc.col << ": "; - } - return cerr; -} - -/* Print the opening to a program error, then return the error stream. */ -ostream &error() -{ - gblErrorCount += 1; - cerr << "error: " PROGNAME ": "; - return cerr; -} - - -/* Print the opening to a warning, then return the error ostream. */ -ostream &warning( ) -{ - cerr << "warning: " << inputFn << ": "; - return cerr; -} - -/* Print the opening to a warning in the input, then return the error ostream. */ -ostream &warning( const InputLoc &loc ) -{ - assert( inputFn != 0 ); - cerr << "warning: " << inputFn << ":" << - loc.line << ":" << loc.col << ": "; - return cerr; -} - -void escapeLineDirectivePath( std::ostream &out, char *path ) -{ - for ( char *pc = path; *pc != 0; pc++ ) { - if ( *pc == '\\' ) - out << "\\\\"; - else - out << *pc; - } -} - -void escapeLineDirectivePath( std::ostream &out, char *path ); -void scan( char *fileName, istream &input ); - -bool printStatistics = false; - -/* Print a summary of the options. */ -void usage() -{ - cout << -"usage: colm [options] file\n" -"general:\n" -" -h, -H, -?, --help print this usage and exit\n" -" -v --version print version information and exit\n" -" -b <file> write binary to <file>\n" -" -o <file> write object to <file>\n" -" -e <file> write C++ export header to <file>\n" -" -x <file> write C++ export code to <file>\n" -" -m <file> write C++ commit code to <file>\n" -" -a <file> additional code file to include in output program\n" -" -E N=V set a string value availabe in the program\n" -" -I <path> additional include path for the compiler\n" -" -i activate branchpoint information\n" -" -L <path> additional library path for the linker\n" -" -l activate logging\n" -" -c compile only (don't produce binary)\n" -" -V print dot format (graphiz)\n" -" -d print verbose debug information\n" -#if DEBUG -" -D <tag> print more information about <tag>\n" -" (BYTECODE|PARSE|MATCH|COMPILE|POOL|PRINT|INPUT|SCAN\n" -#endif - ; -} - -/* Print version information. */ -void version() -{ - cout << "Colm version " VERSION << " " PUBDATE << endl << - "Copyright (c) 2007-2019 by Adrian D. Thurston" << endl; -} - -/* Scans a string looking for the file extension. If there is a file - * extension then pointer returned points to inside the string - * passed in. Otherwise returns null. */ -const char *findFileExtension( const char *stemFile ) -{ - const char *ppos = stemFile + strlen(stemFile) - 1; - - /* Scan backwards from the end looking for the first dot. - * If we encounter a '/' before the first dot, then stop the scan. */ - while ( 1 ) { - /* If we found a dot or got to the beginning of the string then - * we are done. */ - if ( ppos == stemFile || *ppos == '.' ) - break; - - /* If we hit a / then there is no extension. Done. */ - if ( *ppos == '/' ) { - ppos = stemFile; - break; - } - ppos--; - } - - /* If we got to the front of the string then bail we - * did not find an extension */ - if ( ppos == stemFile ) - ppos = 0; - - return ppos; -} - -/* Make a file name from a stem. Removes the old filename suffix and - * replaces it with a new one. Returns a newed up string. */ -char *fileNameFromStem( const char *stemFile, const char *suffix ) -{ - int len = strlen( stemFile ); - assert( len > 0 ); - - /* Get the extension. */ - const char *ppos = findFileExtension( stemFile ); - - /* If an extension was found, then shorten what we think the len is. */ - if ( ppos != 0 ) - len = ppos - stemFile; - - int slen = suffix != 0 ? strlen( suffix ) : 0; - char *retVal = new char[ len + slen + 1 ]; - strncpy( retVal, stemFile, len ); - if ( suffix != 0 ) - strcpy( retVal + len, suffix ); - retVal[len+slen] = 0; - - return retVal; -} - -void openOutputCompiled() -{ - /* Start with the fn given by -o option. */ - binaryFn = outputFn; - - if ( binaryFn == 0 ) - binaryFn = fileNameFromStem( inputFn, 0 ); - - if ( intermedFn == 0 ) - intermedFn = fileNameFromStem( binaryFn, ".c" ); - - if ( binaryFn != 0 && inputFn != 0 && - strcmp( inputFn, binaryFn ) == 0 ) - { - error() << "output file \"" << binaryFn << - "\" is the same as the input file" << endl; - } - - if ( intermedFn != 0 && inputFn != 0 && - strcmp( inputFn, intermedFn ) == 0 ) - { - error() << "intermediate file \"" << intermedFn << - "\" is the same as the input file" << endl; - } - - if ( intermedFn != 0 ) { - /* Open the output stream, attaching it to the filter. */ - ofstream *outFStream = new ofstream( intermedFn ); - - if ( !outFStream->is_open() ) { - error() << "error opening " << intermedFn << " for writing" << endl; - exit(1); - } - - outStream = outFStream; - } - else { - /* Writing out ot std out. */ - outStream = &cout; - } -} - -void openOutputLibrary() -{ - if ( outputFn == 0 ) - outputFn = fileNameFromStem( inputFn, ".c" ); - - /* Make sure we are not writing to the same file as the input file. */ - if ( outputFn != 0 && inputFn != 0 && - strcmp( inputFn, outputFn ) == 0 ) - { - error() << "output file \"" << outputFn << - "\" is the same as the input file" << endl; - } - - if ( outputFn != 0 ) { - /* Open the output stream, attaching it to the filter. */ - ofstream *outFStream = new ofstream( outputFn ); - - if ( !outFStream->is_open() ) { - error() << "error opening " << outputFn << " for writing" << endl; - exit(1); - } - - outStream = outFStream; - } - else { - /* Writing out ot std out. */ - outStream = &cout; - } -} - -void openExports( ) -{ - /* Make sure we are not writing to the same file as the input file. */ - if ( inputFn != 0 && exportHeaderFn != 0 && strcmp( inputFn, exportHeaderFn ) == 0 ) { - error() << "output file \"" << exportHeaderFn << - "\" is the same as the input file" << endl; - } - - if ( exportHeaderFn != 0 ) { - /* Open the output stream, attaching it to the filter. */ - ofstream *outFStream = new ofstream( exportHeaderFn ); - - if ( !outFStream->is_open() ) { - error() << "error opening " << exportHeaderFn << " for writing" << endl; - exit(1); - } - - outStream = outFStream; - } - else { - /* Writing out ot std out. */ - outStream = &cout; - } -} - -void openExportsImpl( ) -{ - /* Make sure we are not writing to the same file as the input file. */ - if ( inputFn != 0 && exportCodeFn != 0 && strcmp( inputFn, exportCodeFn ) == 0 ) { - error() << "output file \"" << exportCodeFn << - "\" is the same as the input file" << endl; - } - - if ( exportCodeFn != 0 ) { - /* Open the output stream, attaching it to the filter. */ - ofstream *outFStream = new ofstream( exportCodeFn ); - - if ( !outFStream->is_open() ) { - error() << "error opening " << exportCodeFn << " for writing" << endl; - exit(1); - } - - outStream = outFStream; - } - else { - /* Writing out ot std out. */ - outStream = &cout; - } -} - -void openCommit( ) -{ - /* Make sure we are not writing to the same file as the input file. */ - if ( inputFn != 0 && commitCodeFn != 0 && strcmp( inputFn, commitCodeFn ) == 0 ) { - error() << "output file \"" << commitCodeFn << - "\" is the same as the input file" << endl; - } - - if ( commitCodeFn != 0 ) { - /* Open the output stream, attaching it to the filter. */ - ofstream *outFStream = new ofstream( commitCodeFn ); - - if ( !outFStream->is_open() ) { - error() << "error opening " << commitCodeFn << " for writing" << endl; - exit(1); - } - - outStream = outFStream; - } - else { - /* Writing out ot std out. */ - outStream = &cout; - } -} - -void compileOutputCommand( const char *command ) -{ - if ( verbose ) - cout << "compiling with: '" << command << "'" << endl; - int res = system( command ); - if ( res != 0 ) - error() << "there was a problem compiling the output" << endl; -} - -void compileOutput( const char *argv0, const bool inSource, char *srcLocation ) -{ - /* Find the location of the colm program that is executing. */ - char *location = strdup( argv0 ); - char *last; - int length = 1024 + strlen( intermedFn ) + strlen( binaryFn ); - if ( inSource ) { - last = strrchr( location, '/' ); - assert( last != 0 ); - last[0] = 0; - length += 3 * strlen( location ); - } - else { - last = location + strlen( location ) - 1; - while ( true ) { - if ( last == location ) { - last[0] = '.'; - last[1] = 0; - break; - } - if ( *last == '/' ) { - last[0] = 0; - break; - } - last -= 1; - } - } - for ( ArgsVector::Iter af = additionalCodeFiles; af.lte(); af++ ) - length += strlen( *af ) + 2; - for ( ArgsVector::Iter ip = includePaths; ip.lte(); ip++ ) - length += strlen( *ip ) + 3; - for ( ArgsVector::Iter lp = libraryPaths; lp.lte(); lp++ ) - length += strlen( *lp ) + 3; -#define COMPILE_COMMAND_STRING "gcc -Wall -Wwrite-strings" \ - " -g" \ - " -o %s" \ - " %s" - char *command = new char[length]; - if ( inSource ) { - sprintf( command, - COMPILE_COMMAND_STRING - " -I%s/../aapl" - " -I%s/include" - " -L%s" - " -Wl,-rpath=%s", - binaryFn, intermedFn, srcLocation, - srcLocation, location, location ); - } - else { - sprintf( command, - COMPILE_COMMAND_STRING - " -I" PREFIX "/include" - " -L" PREFIX "/lib" - " -Wl,-rpath," PREFIX "/lib", - binaryFn, intermedFn ); - } -#undef COMPILE_COMMAND_STRING - for ( ArgsVector::Iter af = additionalCodeFiles; af.lte(); af++ ) { - strcat( command, " " ); - strcat( command, *af ); - } - for ( ArgsVector::Iter ip = includePaths; ip.lte(); ip++ ) { - strcat( command, " -I" ); - strcat( command, *ip ); - } - for ( ArgsVector::Iter lp = libraryPaths; lp.lte(); lp++ ) { - strcat( command, " -L" ); - strcat( command, *lp ); - } - strcat( command, " -lcolm" ); - - compileOutputCommand( command ); - - delete[] command; -} - -bool inSourceTree( const char *argv0, char *&location ) -{ - const char *lastSlash = strrchr( argv0, '/' ); - if ( lastSlash != 0 ) { - /* Take off the file name. */ - int rootLen = lastSlash - argv0; - - /* Create string for dir. */ - char *mainPath = new char[rootLen + 16]; - memcpy( mainPath, argv0, rootLen ); - mainPath[rootLen] = 0; - - /* If built using ldconfig then there will be a .libs dir. */ - lastSlash = strrchr( mainPath, '/' ); - if ( lastSlash != 0 ) { - if ( strlen( lastSlash ) >= 6 && memcmp( lastSlash, "/.libs", 7 ) == 0 ) { - rootLen = lastSlash - mainPath; - mainPath[rootLen] = 0; - } - } - - strcpy( mainPath + rootLen, "/main.cc" ); - - struct stat sb; - int res = stat( mainPath, &sb ); - if ( res == 0 && S_ISREG( sb.st_mode ) ) { - mainPath[rootLen] = 0; - location = mainPath; - return true; - } - - delete[] mainPath; - } - - return false; -} - -void processArgs( int argc, const char **argv ) -{ - ParamCheck pc( "cD:e:x:I:L:vdlio:S:M:vHh?-:sVa:m:b:E:", argc, argv ); - - while ( pc.check() ) { - switch ( pc.state ) { - case ParamCheck::match: - switch ( pc.parameter ) { - case 'I': - includePaths.append( pc.parameterArg ); - break; - case 'v': - version(); - exit(0); - break; - case 'd': - verbose = true; - break; - case 'l': - logging = true; - break; - case 'L': - libraryPaths.append( pc.parameterArg ); - break; - case 'i': - branchPointInfo = true; - break; - case 'o': - /* Output. */ - if ( *pc.parameterArg == 0 ) - error() << "a zero length output file name was given" << endl; - else if ( outputFn != 0 ) - error() << "more than one output file name was given" << endl; - else { - /* Ok, remember the output file name. */ - outputFn = pc.parameterArg; - } - break; - - case 'b': - /* object name. */ - if ( *pc.parameterArg == 0 ) - error() << "a zero length object name was given" << endl; - else { - /* Ok, remember the output file name. */ - objectName = pc.parameterArg; - hostAdapters = false; - } - break; - - case 'H': case 'h': case '?': - usage(); - exit(0); - case 's': - printStatistics = true; - break; - case 'V': - generateGraphviz = true; - break; - case '-': - if ( strcasecmp(pc.parameterArg, "help") == 0 ) { - usage(); - exit(0); - } - else if ( strcasecmp(pc.parameterArg, "version") == 0 ) { - version(); - exit(0); - } - else { - error() << "--" << pc.parameterArg << - " is an invalid argument" << endl; - } - break; - case 'c': - gblLibrary = true; - break; - case 'e': - exportHeaderFn = pc.parameterArg; - break; - case 'x': - exportCodeFn = pc.parameterArg; - break; - case 'a': - additionalCodeFiles.append( pc.parameterArg ); - break; - case 'm': - commitCodeFn = pc.parameterArg; - break; - - case 'E': { - const char *eq = strchr( pc.parameterArg, '=' ); - if ( eq == 0 ) - fatal( "-E option argument must contain =" ); - if ( eq == pc.parameterArg ) - fatal( "-E variable name is of zero length" ); - - defineArgs.append( DefineArg( - String( pc.parameterArg, eq-pc.parameterArg ), - String( eq + 1 ) ) ); - - break; - } - - case 'D': -#if DEBUG - // @NOTE: keep this in sync with 'debug.c': 'colm_realm_names' - if ( strcmp( pc.parameterArg, colm_realm_names[0] ) == 0 ) - gblActiveRealm |= REALM_BYTECODE; - else if ( strcmp( pc.parameterArg, colm_realm_names[1] ) == 0 ) - gblActiveRealm |= REALM_PARSE; - else if ( strcmp( pc.parameterArg, colm_realm_names[2] ) == 0 ) - gblActiveRealm |= REALM_MATCH; - else if ( strcmp( pc.parameterArg, colm_realm_names[3] ) == 0 ) - gblActiveRealm |= REALM_COMPILE; - else if ( strcmp( pc.parameterArg, colm_realm_names[4] ) == 0 ) - gblActiveRealm |= REALM_POOL; - else if ( strcmp( pc.parameterArg, colm_realm_names[5] ) == 0 ) - gblActiveRealm |= REALM_PRINT; - else if ( strcmp( pc.parameterArg, colm_realm_names[6] ) == 0 ) - gblActiveRealm |= REALM_INPUT; - else if ( strcmp( pc.parameterArg, colm_realm_names[7] ) == 0 ) - gblActiveRealm |= REALM_SCAN; - else - fatal( "unknown argument to -D %s\n", pc.parameterArg ); -#else - fatal( "-D option specified but debugging messsages not compiled in\n" ); -#endif - break; - - } - break; - - case ParamCheck::invalid: - error() << "-" << pc.parameter << " is an invalid argument" << endl; - break; - - case ParamCheck::noparam: - /* It is interpreted as an input file. */ - if ( *pc.curArg == 0 ) - error() << "a zero length input file name was given" << endl; - else if ( inputFn != 0 ) - error() << "more than one input file name was given" << endl; - else { - /* OK, Remember the filename. */ - inputFn = pc.curArg; - } - break; - } - } -} - -bool readCheck( const char *fn ) -{ - int result = true; - - /* Check if we can open the input file for reading. */ - ifstream *inFile = new ifstream( fn ); - if ( ! inFile->is_open() ) - result = false; - - delete inFile; - return result; -} - -/* Main, process args and call yyparse to start scanning input. */ -int main(int argc, const char **argv) -{ - processArgs( argc, argv ); - - if ( verbose ) - gblActiveRealm = 0xffffffff; - - /* Bail on above errors. */ - if ( gblErrorCount > 0 ) - exit(1); - - /* Make sure we are not writing to the same file as the input file. */ - if ( inputFn != 0 && outputFn != 0 && - strcmp( inputFn, outputFn ) == 0 ) - { - error() << "output file \"" << outputFn << - "\" is the same as the input file" << endl; - } - -#if defined(LOAD_INIT) || defined(LOAD_COLM) - /* Open the input file for reading. */ - if ( inputFn == 0 ) { - error() << "colm: no input file given" << endl; - } - else { - /* Check if we can open the input file for reading. */ - if ( ! readCheck( inputFn ) ) - error() << "could not open " << inputFn << " for reading" << endl; - } -#endif - - /* Bail on above errors. */ - if ( gblErrorCount > 0 ) - exit(1); - - Compiler *pd = new Compiler; - -#if defined(CONS_INIT) - BaseParser *parser = new ConsInit( pd ); -#elif defined(LOAD_INIT) - BaseParser *parser = new LoadInit( pd, inputFn ); -#else - BaseParser *parser = consLoadColm( pd, inputFn ); -#endif - - parser->go( gblActiveRealm ); - - /* Parsing complete, check for errors.. */ - if ( gblErrorCount > 0 ) - return 1; - - /* Initiate a compile following a parse. */ - pd->compile(); - - /* - * Write output. - */ - if ( generateGraphviz ) { - outStream = &cout; - pd->writeDotFile(); - } - else { - if ( gblLibrary ) - openOutputLibrary(); - else - openOutputCompiled(); - - pd->generateOutput( gblActiveRealm, ( commitCodeFn == 0 ) ); - if ( outStream != 0 ) - delete outStream; - - if ( !gblLibrary ) { - char *location = 0; - bool inSource = inSourceTree( argv[0], location ); - compileOutput( argv[0], inSource, location ); - } - - if ( exportHeaderFn != 0 ) { - openExports(); - pd->generateExports(); - delete outStream; - } - if ( exportCodeFn != 0 ) { - openExportsImpl(); - pd->generateExportsImpl(); - delete outStream; - } - if ( commitCodeFn != 0 ) { - openCommit(); - pd->writeCommit(); - delete outStream; - } - } - - delete parser; - delete pd; - - /* Bail on above errors. */ - if ( gblErrorCount > 0 ) - exit(1); - - return 0; -} diff --git a/src/map.c b/src/map.c deleted file mode 100644 index 052e5445..00000000 --- a/src/map.c +++ /dev/null @@ -1,876 +0,0 @@ -/* - * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <colm/map.h> - -#include <assert.h> -#include <stdbool.h> - -#include <colm/pdarun.h> -#include <colm/pool.h> -#include <colm/bytecode.h> - -struct colm_struct *colm_map_el_get( struct colm_program *prg, - map_el_t *map_el, word_t gen_id, word_t field ) -{ - struct generic_info *gi = &prg->rtd->generic_info[gen_id]; - map_el_t *result = 0; - switch ( field ) { - case 0: - result = map_el->prev; - break; - case 1: - result = map_el->next; - break; - default: - assert( 0 ); - break; - } - - struct colm_struct *s = result != 0 ? - colm_struct_container( result, gi->el_offset ) : 0; - return s; -} - -struct colm_struct *colm_map_get( struct colm_program *prg, - map_t *map, word_t gen_id, word_t field ) -{ - struct generic_info *gi = &prg->rtd->generic_info[gen_id]; - map_el_t *result = 0; - switch ( field ) { - case 0: - result = map->head; - break; - case 1: - result = map->tail; - break; - default: - assert( 0 ); - break; - } - - struct colm_struct *s = result != 0 ? - colm_struct_container( result, gi->el_offset ) : 0; - return s; -} - -void map_list_abandon( map_t *map ) -{ - map->head = map->tail = 0; -} - -void map_list_add_before( map_t *map, map_el_t *next_el, map_el_t *new_el ) -{ - /* Set the next pointer of the new element to next_el. We do - * this regardless of the state of the list. */ - new_el->next = next_el; - - /* Set reverse pointers. */ - if ( next_el == 0 ) { - /* There is no next elememnt. We are inserting at the tail. */ - new_el->prev = map->tail; - map->tail = new_el; - } - else { - /* There is a next element and we can access next's previous. */ - new_el->prev = next_el->prev; - next_el->prev = new_el; - } - - /* Set forward pointers. */ - if ( new_el->prev == 0 ) { - /* There is no previous element. Set the head pointer.*/ - map->head = new_el; - } - else { - /* There is a previous element, set it's next pointer to new_el. */ - new_el->prev->next = new_el; - } -} - -void map_list_add_after( map_t *map, map_el_t *prev_el, map_el_t *new_el ) -{ - /* Set the previous pointer of new_el to prev_el. We do - * this regardless of the state of the list. */ - new_el->prev = prev_el; - - /* Set forward pointers. */ - if (prev_el == 0) { - /* There was no prev_el, we are inserting at the head. */ - new_el->next = map->head; - map->head = new_el; - } - else { - /* There was a prev_el, we can access previous next. */ - new_el->next = prev_el->next; - prev_el->next = new_el; - } - - /* Set reverse pointers. */ - if (new_el->next == 0) { - /* There is no next element. Set the tail pointer. */ - map->tail = new_el; - } - else { - /* There is a next element. Set it's prev pointer. */ - new_el->next->prev = new_el; - } -} - - -map_el_t *map_list_detach( map_t *map, map_el_t *el ) -{ - /* Set forward pointers to skip over el. */ - if ( el->prev == 0 ) - map->head = el->next; - else - el->prev->next = el->next; - - /* Set reverse pointers to skip over el. */ - if ( el->next == 0 ) - map->tail = el->prev; - else - el->next->prev = el->prev; - - /* Update List length and return element we detached. */ - return el; -} - - -/* Once an insertion position is found, attach a element to the tree. */ -void map_attach_rebal( map_t *map, map_el_t *element, map_el_t *parent_el, map_el_t *last_less ) -{ - /* Increment the number of element in the tree. */ - map->tree_size += 1; - - /* Set element's parent. */ - element->parent = parent_el; - - /* New element always starts as a leaf with height 1. */ - element->left = 0; - element->right = 0; - element->height = 1; - - /* Are we inserting in the tree somewhere? */ - if ( parent_el != 0 ) { - /* We have a parent so we are somewhere in the tree. If the parent - * equals lastLess, then the last traversal in the insertion went - * left, otherwise it went right. */ - if ( last_less == parent_el ) { - parent_el->left = element; - - map_list_add_before( map, parent_el, element ); - } - else { - parent_el->right = element; - - map_list_add_after( map, parent_el, element ); - } - } - else { - /* No parent element so we are inserting the root. */ - map->root = element; - - map_list_add_after( map, map->tail, element ); - } - - /* Recalculate the heights. */ - map_recalc_heights( map, parent_el ); - - /* Find the first unbalance. */ - map_el_t *ub = mapFindFirstUnbalGP( map, element ); - - /* rebalance. */ - if ( ub != 0 ) - { - /* We assert that after this single rotation the - * tree is now properly balanced. */ - map_rebalance( map, ub ); - } -} - -#if 0 -/* Recursively delete all the children of a element. */ -void map_delete_children_of( map_t *map, map_el_t *element ) -{ - /* Recurse left. */ - if ( element->left ) { - map_delete_children_of( map, element->left ); - - /* Delete left element. */ - delete element->left; - element->left = 0; - } - - /* Recurse right. */ - if ( element->right ) { - map_delete_children_of( map, element->right ); - - /* Delete right element. */ - delete element->right; - element->left = 0; - } -} - -void map_empty( map_t *map ) -{ - if ( map->root ) { - /* Recursively delete from the tree structure. */ - map_delete_children_of( map, map->root ); - delete map->root; - map->root = 0; - map->tree_size = 0; - - map_list_abandon( map ); - } -} -#endif - -/* rebalance from a element whose gradparent is unbalanced. Only - * call on a element that has a grandparent. */ -map_el_t *map_rebalance( map_t *map, map_el_t *n ) -{ - long lheight, rheight; - map_el_t *a, *b, *c; - map_el_t *t1, *t2, *t3, *t4; - - map_el_t *p = n->parent; /* parent (Non-NUL). L*/ - map_el_t *gp = p->parent; /* Grand-parent (Non-NULL). */ - map_el_t *ggp = gp->parent; /* Great grand-parent (may be NULL). */ - - if (gp->right == p) - { - /* gp - * * p - p - */ - if (p->right == n) - { - /* gp - * * p - p - * * n - n - */ - a = gp; - b = p; - c = n; - t1 = gp->left; - t2 = p->left; - t3 = n->left; - t4 = n->right; - } - else - { - /* gp - * * p - p - * / - * n - */ - a = gp; - b = n; - c = p; - t1 = gp->left; - t2 = n->left; - t3 = n->right; - t4 = p->right; - } - } - else - { - /* gp - * / - * p - */ - if (p->right == n) - { - /* gp - * / - * p - * * n - n - */ - a = p; - b = n; - c = gp; - t1 = p->left; - t2 = n->left; - t3 = n->right; - t4 = gp->right; - } - else - { - /* gp - * / - * p - * / - * n - */ - a = n; - b = p; - c = gp; - t1 = n->left; - t2 = n->right; - t3 = p->right; - t4 = gp->right; - } - } - - /* Perform rotation. - */ - - /* Tie b to the great grandparent. */ - if ( ggp == 0 ) - map->root = b; - else if ( ggp->left == gp ) - ggp->left = b; - else - ggp->right = b; - b->parent = ggp; - - /* Tie a as a leftchild of b. */ - b->left = a; - a->parent = b; - - /* Tie c as a rightchild of b. */ - b->right = c; - c->parent = b; - - /* Tie t1 as a leftchild of a. */ - a->left = t1; - if ( t1 != 0 ) t1->parent = a; - - /* Tie t2 as a rightchild of a. */ - a->right = t2; - if ( t2 != 0 ) t2->parent = a; - - /* Tie t3 as a leftchild of c. */ - c->left = t3; - if ( t3 != 0 ) t3->parent = c; - - /* Tie t4 as a rightchild of c. */ - c->right = t4; - if ( t4 != 0 ) t4->parent = c; - - /* The heights are all recalculated manualy and the great - * grand-parent is passed to recalcHeights() to ensure - * the heights are correct up the tree. - * - * Note that recalcHeights() cuts out when it comes across - * a height that hasn't changed. - */ - - /* Fix height of a. */ - lheight = a->left ? a->left->height : 0; - rheight = a->right ? a->right->height : 0; - a->height = (lheight > rheight ? lheight : rheight) + 1; - - /* Fix height of c. */ - lheight = c->left ? c->left->height : 0; - rheight = c->right ? c->right->height : 0; - c->height = (lheight > rheight ? lheight : rheight) + 1; - - /* Fix height of b. */ - lheight = a->height; - rheight = c->height; - b->height = (lheight > rheight ? lheight : rheight) + 1; - - /* Fix height of b's parents. */ - map_recalc_heights( map, ggp ); - return ggp; -} - -/* Recalculates the heights of all the ancestors of element. */ -void map_recalc_heights( map_t *map, map_el_t *element ) -{ - while ( element != 0 ) - { - long lheight = element->left ? element->left->height : 0; - long rheight = element->right ? element->right->height : 0; - - long new_height = (lheight > rheight ? lheight : rheight) + 1; - - /* If there is no chage in the height, then there will be no - * change in any of the ancestor's height. We can stop going up. - * If there was a change, continue upward. */ - if (new_height == element->height) - return; - else - element->height = new_height; - - element = element->parent; - } -} - -/* Finds the first element whose grandparent is unbalanced. */ -map_el_t *mapFindFirstUnbalGP( map_t *map, map_el_t *element ) -{ - long lheight, rheight, balance_prop; - map_el_t *gp; - - if ( element == 0 || element->parent == 0 || - element->parent->parent == 0 ) - return 0; - - /* Don't do anything if we we have no grandparent. */ - gp = element->parent->parent; - while ( gp != 0 ) - { - lheight = gp->left ? gp->left->height : 0; - rheight = gp->right ? gp->right->height : 0; - balance_prop = lheight - rheight; - - if ( balance_prop < -1 || balance_prop > 1 ) - return element; - - element = element->parent; - gp = gp->parent; - } - return 0; -} - - - -/* Finds the first element that is unbalanced. */ -map_el_t *map_find_first_unbal_el( map_t *map, map_el_t *element ) -{ - if ( element == 0 ) - return 0; - - while ( element != 0 ) - { - long lheight = element->left ? - element->left->height : 0; - long rheight = element->right ? - element->right->height : 0; - long balance_prop = lheight - rheight; - - if ( balance_prop < -1 || balance_prop > 1 ) - return element; - - element = element->parent; - } - return 0; -} - -/* Replace a element in the tree with another element not in the tree. */ -void map_replace_el( map_t *map, map_el_t *element, map_el_t *replacement ) -{ - map_el_t *parent = element->parent, - *left = element->left, - *right = element->right; - - replacement->left = left; - if (left) - left->parent = replacement; - replacement->right = right; - if (right) - right->parent = replacement; - - replacement->parent = parent; - if (parent) - { - if (parent->left == element) - parent->left = replacement; - else - parent->right = replacement; - } - else { - map->root = replacement; - } - - replacement->height = element->height; -} - - -/* Removes a element from a tree and puts filler in it's place. - * Filler should be null or a child of element. */ -void map_remove_el( map_t *map, map_el_t *element, map_el_t *filler ) -{ - map_el_t *parent = element->parent; - - if ( parent ) - { - if ( parent->left == element ) - parent->left = filler; - else - parent->right = filler; - } - else { - map->root = filler; - } - - if ( filler ) - filler->parent = parent; - - return; -} - -#if 0 -/* Recursive worker for tree copying. */ -map_el_t *map_copy_branch( program_t *prg, map_t *map, map_el_t *el, kid_t *old_next_down, kid_t **new_next_down ) -{ - /* Duplicate element. Either the base element's copy constructor or defaul - * constructor will get called. Both will suffice for initting the - * pointers to null when they need to be. */ - map_el_t *new_el = map_el_allocate( prg ); - - if ( (kid_t*)el == old_next_down ) - *new_next_down = (kid_t*)new_el; - - /* If the left tree is there, copy it. */ - if ( new_el->left ) { - new_el->left = map_copy_branch( prg, map, new_el->left, old_next_down, new_next_down ); - new_el->left->parent = new_el; - } - - map_list_add_after( map, map->tail, new_el ); - - /* If the right tree is there, copy it. */ - if ( new_el->right ) { - new_el->right = map_copy_branch( prg, map, new_el->right, old_next_down, new_next_down ); - new_el->right->parent = new_el; - } - - return new_el; -} -#endif - -static long map_cmp( program_t *prg, map_t *map, const tree_t *tree1, const tree_t *tree2 ) -{ - if ( map->generic_info->key_type == TYPE_TREE ) { - return colm_cmp_tree( prg, tree1, tree2 ); - } - else { - if ( (long)tree1 < (long)tree2 ) - return -1; - else if ( (long)tree1 > (long)tree2) - return 1; - return 0; - } -} - -map_el_t *map_insert_el( program_t *prg, map_t *map, map_el_t *element, map_el_t **last_found ) -{ - long key_relation; - map_el_t *cur_el = map->root, *parent_el = 0; - map_el_t *last_less = 0; - - while ( true ) { - if ( cur_el == 0 ) { - /* We are at an external element and did not find the key we were - * looking for. Attach underneath the leaf and rebalance. */ - map_attach_rebal( map, element, parent_el, last_less ); - - if ( last_found != 0 ) - *last_found = element; - return element; - } - - key_relation = map_cmp( prg, map, - element->key, cur_el->key ); - - /* Do we go left? */ - if ( key_relation < 0 ) { - parent_el = last_less = cur_el; - cur_el = cur_el->left; - } - /* Do we go right? */ - else if ( key_relation > 0 ) { - parent_el = cur_el; - cur_el = cur_el->right; - } - /* We have hit the target. */ - else { - if ( last_found != 0 ) - *last_found = cur_el; - return 0; - } - } -} - -#if 0 -map_el_t *map_insert_key( program_t *prg, map_t *map, tree_t *key, map_el_t **last_found ) -{ - long key_relation; - map_el_t *cur_el = map->root, *parent_el = 0; - map_el_t *last_less = 0; - - while ( true ) { - if ( cur_el == 0 ) { - /* We are at an external element and did not find the key we were - * looking for. Create the new element, attach it underneath the leaf - * and rebalance. */ - map_el_t *element = map_el_allocate( prg ); - element->key = key; - map_attach_rebal( map, element, parent_el, last_less ); - - if ( last_found != 0 ) - *last_found = element; - return element; - } - - key_relation = map_cmp( prg, map, key, cur_el->key ); - - /* Do we go left? */ - if ( key_relation < 0 ) { - parent_el = last_less = cur_el; - cur_el = cur_el->left; - } - /* Do we go right? */ - else if ( key_relation > 0 ) { - parent_el = cur_el; - cur_el = cur_el->right; - } - /* We have hit the target. */ - else { - if ( last_found != 0 ) - *last_found = cur_el; - return 0; - } - } -} -#endif - -map_el_t *colm_map_insert( program_t *prg, map_t *map, map_el_t *map_el ) -{ - return map_insert_el( prg, map, map_el, 0 ); -} - -map_el_t *colm_vmap_insert( program_t *prg, map_t *map, struct_t *key, struct_t *value ) -{ - struct colm_struct *s = colm_struct_new( prg, map->generic_info->el_struct_id ); - - colm_struct_set_field( s, struct_t*, map->generic_info->el_offset, key ); - colm_struct_set_field( s, struct_t*, 0, value ); - - map_el_t *map_el = colm_struct_get_addr( s, map_el_t*, map->generic_info->el_offset ); - - return colm_map_insert( prg, map, map_el ); -} - -map_el_t *colm_vmap_remove( program_t *prg, map_t *map, tree_t *key ) -{ - map_el_t *map_el = colm_map_find( prg, map, key ); - if ( map_el != 0 ) - colm_map_detach( prg, map, map_el ); - return 0; -} - -tree_t *colm_vmap_find( program_t *prg, map_t *map, tree_t *key ) -{ - map_el_t *map_el = colm_map_find( prg, map, key ); - if ( map_el != 0 ) { - struct_t *s = colm_generic_el_container( prg, map_el, - map->generic_info - prg->rtd->generic_info ); - tree_t *val = colm_struct_get_field( s, tree_t*, 0 ); - - if ( map->generic_info->value_type == TYPE_TREE ) - colm_tree_upref( prg, val ); - - return val; - } - return 0; -} - -void colm_map_detach( program_t *prg, map_t *map, map_el_t *map_el ) -{ - map_detach( prg, map, map_el ); -} - -map_el_t *colm_map_find( program_t *prg, map_t *map, tree_t *key ) -{ - return map_impl_find( prg, map, key ); -} - -/** - * \brief Find a element in the tree with the given key. - * - * \returns The element if key exists, null if the key does not exist. - */ -map_el_t *map_impl_find( program_t *prg, map_t *map, tree_t *key ) -{ - map_el_t *cur_el = map->root; - long key_relation; - - while ( cur_el != 0 ) { - key_relation = map_cmp( prg, map, key, cur_el->key ); - - /* Do we go left? */ - if ( key_relation < 0 ) - cur_el = cur_el->left; - /* Do we go right? */ - else if ( key_relation > 0 ) - cur_el = cur_el->right; - /* We have hit the target. */ - else { - return cur_el; - } - } - return 0; -} - - -/** - * \brief Find a element, then detach it from the tree. - * - * The element is not deleted. - * - * \returns The element detached if the key is found, othewise returns null. - */ -map_el_t *map_detach_by_key( program_t *prg, map_t *map, tree_t *key ) -{ - map_el_t *element = map_impl_find( prg, map, key ); - if ( element ) - map_detach( prg, map, element ); - - return element; -} - -/** - * \brief Detach a element from the tree. - * - * If the element is not in the tree then undefined behaviour results. - * - * \returns The element given. - */ -map_el_t *map_detach( program_t *prg, map_t *map, map_el_t *element ) -{ - map_el_t *replacement, *fixfrom; - long lheight, rheight; - - /* Remove the element from the ordered list. */ - map_list_detach( map, element ); - - /* Update treeSize. */ - map->tree_size--; - - /* Find a replacement element. */ - if (element->right) - { - /* Find the leftmost element of the right subtree. */ - replacement = element->right; - while (replacement->left) - replacement = replacement->left; - - /* If replacing the element the with its child then we need to start - * fixing at the replacement, otherwise we start fixing at the - * parent of the replacement. */ - if (replacement->parent == element) - fixfrom = replacement; - else - fixfrom = replacement->parent; - - map_remove_el( map, replacement, replacement->right ); - map_replace_el( map, element, replacement ); - } - else if (element->left) - { - /* Find the rightmost element of the left subtree. */ - replacement = element->left; - while (replacement->right) - replacement = replacement->right; - - /* If replacing the element the with its child then we need to start - * fixing at the replacement, otherwise we start fixing at the - * parent of the replacement. */ - if (replacement->parent == element) - fixfrom = replacement; - else - fixfrom = replacement->parent; - - map_remove_el( map, replacement, replacement->left ); - map_replace_el( map, element, replacement ); - } - else - { - /* We need to start fixing at the parent of the element. */ - fixfrom = element->parent; - - /* The element we are deleting is a leaf element. */ - map_remove_el( map, element, 0 ); - } - - /* If fixfrom is null it means we just deleted - * the root of the tree. */ - if ( fixfrom == 0 ) - return element; - - /* Fix the heights after the deletion. */ - map_recalc_heights( map, fixfrom ); - - /* Fix every unbalanced element going up in the tree. */ - map_el_t *ub = map_find_first_unbal_el( map, fixfrom ); - while ( ub ) - { - /* Find the element to rebalance by moving down from the first unbalanced - * element 2 levels in the direction of the greatest heights. On the - * second move down, the heights may be equal ( but not on the first ). - * In which case go in the direction of the first move. */ - lheight = ub->left ? ub->left->height : 0; - rheight = ub->right ? ub->right->height : 0; - assert( lheight != rheight ); - if (rheight > lheight) - { - ub = ub->right; - lheight = ub->left ? - ub->left->height : 0; - rheight = ub->right ? - ub->right->height : 0; - if (rheight > lheight) - ub = ub->right; - else if (rheight < lheight) - ub = ub->left; - else - ub = ub->right; - } - else - { - ub = ub->left; - lheight = ub->left ? - ub->left->height : 0; - rheight = ub->right ? - ub->right->height : 0; - if (rheight > lheight) - ub = ub->right; - else if (rheight < lheight) - ub = ub->left; - else - ub = ub->left; - } - - - /* rebalance returns the grandparant of the subtree formed - * by the element that were rebalanced. - * We must continue upward from there rebalancing. */ - fixfrom = map_rebalance( map, ub ); - - /* Find the next unbalaced element. */ - ub = map_find_first_unbal_el( map, fixfrom ); - } - - return element; -} - - - diff --git a/src/map.cc b/src/map.cc deleted file mode 100644 index 4d3bd090..00000000 --- a/src/map.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "pdarun.h" -#include <assert.h> - - - diff --git a/src/map.h b/src/map.h deleted file mode 100644 index 1d6db2d7..00000000 --- a/src/map.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_MAP_H -#define _COLM_MAP_H - -#if defined(__cplusplus) -extern "C" { -#endif - -#include <colm/program.h> -#include <colm/struct.h> - -#include "internal.h" - -void map_list_abandon( map_t *map ); - -void map_list_add_before( map_t *map, map_el_t *next_el, map_el_t *new_el ); -void map_list_add_after( map_t *map, map_el_t *prev_el, map_el_t *new_el ); -map_el_t *map_list_detach( map_t *map, map_el_t *el ); -void map_attach_rebal( map_t *map, map_el_t *element, map_el_t *parent_el, map_el_t *last_less ); -void map_delete_children_of( map_t *map, map_el_t *element ); -void map_empty( map_t *map ); -map_el_t *map_rebalance( map_t *map, map_el_t *n ); -void map_recalc_heights( map_t *map, map_el_t *element ); -map_el_t *mapFindFirstUnbalGP( map_t *map, map_el_t *element ); -map_el_t *map_find_first_unbal_el( map_t *map, map_el_t *element ); -void map_remove_el( map_t *map, map_el_t *element, map_el_t *filler ); -void map_replace_el( map_t *map, map_el_t *element, map_el_t *replacement ); -map_el_t *map_insert_el( program_t *prg, map_t *map, map_el_t *element, map_el_t **last_found ); -map_el_t *map_insert_key( program_t *prg, map_t *map, tree_t *key, map_el_t **last_found ); -map_el_t *map_impl_find( program_t *prg, map_t *map, tree_t *key ); -map_el_t *map_detach_by_key( program_t *prg, map_t *map, tree_t *key ); -map_el_t *map_detach( program_t *prg, map_t *map, map_el_t *element ); -map_el_t *map_copy_branch( program_t *prg, map_t *map, map_el_t *el, - kid_t *old_next_down, kid_t **new_next_down ); - -struct tree_pair map_remove( program_t *prg, map_t *map, tree_t *key ); - -long cmp_tree( program_t *prg, const tree_t *tree1, const tree_t *tree2 ); - -void map_impl_remove_el( program_t *prg, map_t *map, map_el_t *element ); -int map_impl_remove_key( program_t *prg, map_t *map, tree_t *key ); - -tree_t *map_find( program_t *prg, map_t *map, tree_t *key ); -long map_length( map_t *map ); -tree_t *map_unstore( program_t *prg, map_t *map, tree_t *key, tree_t *existing ); -int map_insert( program_t *prg, map_t *map, tree_t *key, tree_t *element ); -void map_unremove( program_t *prg, map_t *map, tree_t *key, tree_t *element ); -tree_t *map_uninsert( program_t *prg, map_t *map, tree_t *key ); -tree_t *map_store( program_t *prg, map_t *map, tree_t *key, tree_t *element ); - -map_el_t *colm_map_insert( program_t *prg, map_t *map, map_el_t *map_el ); -void colm_map_detach( program_t *prg, map_t *map, map_el_t *map_el ); -map_el_t *colm_map_find( program_t *prg, map_t *map, tree_t *key ); - -map_el_t *colm_vmap_insert( program_t *prg, map_t *map, struct_t *key, struct_t *value ); -map_el_t *colm_vmap_remove( program_t *prg, map_t *map, tree_t *key ); -tree_t *colm_map_iter_advance( program_t *prg, tree_t ***psp, generic_iter_t *iter ); -tree_t *colm_vmap_find( program_t *prg, map_t *map, tree_t *key ); - -#if defined(__cplusplus) -} -#endif - -#endif /* _COLM_MAP_H */ - diff --git a/src/parser.cc b/src/parser.cc deleted file mode 100644 index 23e60ec2..00000000 --- a/src/parser.cc +++ /dev/null @@ -1,1122 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "parser.h" - -#include <stdbool.h> -#include <stdlib.h> -#include <errno.h> - -#include <iostream> - -using std::endl; - -void BaseParser::listElDef( String name ) -{ - /* - * The unique type. This is a def with a single empty form. - */ - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, - name, pd->nextObjectId++ ); - - LelDefList *defList = new LelDefList; - - Production *prod = BaseParser::production( InputLoc(), - new ProdElList, String(), false, 0, 0 ); - - prodAppend( defList, prod ); - - NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); - BaseParser::cflDef( ntDef, objectDef, defList ); - - /* - * List element with the same name as containing context. - */ - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - String id = curStruct()->objectDef->name; - RepeatType repeatType = RepeatNone; - TypeRef *objTr = TypeRef::cons( InputLoc(), nspaceQual, id, repeatType ); - TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::ListPtrs, 0, objTr, 0 ); - - ObjectField *of = ObjectField::cons( InputLoc(), - ObjectField::GenericElementType, elTr, name ); - - structVarDef( InputLoc(), of ); -} - -void BaseParser::mapElDef( String name, TypeRef *keyType ) -{ - /* - * The unique type. This is a def with a single empty form. - */ - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, - name, pd->nextObjectId++ ); - - LelDefList *defList = new LelDefList; - - Production *prod = BaseParser::production( InputLoc(), - new ProdElList, String(), false, 0, 0 ); - prodAppend( defList, prod ); - - NtDef *ntDef = NtDef::cons( name, curNspace(), curStruct(), false ); - BaseParser::cflDef( ntDef, objectDef, defList ); - - /* - * Same name as containing context. - */ - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - String id = curStruct()->objectDef->name; - TypeRef *objTr = TypeRef::cons( InputLoc(), nspaceQual, id, RepeatNone ); - TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::MapPtrs, 0, objTr, keyType ); - - ObjectField *of = ObjectField::cons( InputLoc(), - ObjectField::GenericElementType, elTr, name ); - structVarDef( InputLoc(), of ); -} - -#if 0 -void BaseParser::argvDecl() -{ - String structName = "argv_el"; - structHead( internal, pd->rootNamespace, structName, ObjectDef::StructType ); - - /* First the argv value. */ - String name = "value"; - String type = "str"; - NamespaceQual *nspaceQual = NamespaceQual::cons( curNspace() ); - TypeRef *typeRef = TypeRef::cons( internal, nspaceQual, type, RepeatNone ); - ObjectField *objField = ObjectField::cons( internal, - ObjectField::StructFieldType, typeRef, name ); - structVarDef( objField->loc, objField ); - - pd->argvEl = objField->context; - - /* Now the list element. */ - listElDef( "el" ); - - structStack.pop(); - namespaceStack.pop(); -} -#endif - -void BaseParser::init() -{ - /* Set up the root namespace. */ - pd->rootNamespace = createRootNamespace(); - - /* Setup the global object. */ - String global = "global"; - pd->globalObjectDef = ObjectDef::cons( ObjectDef::UserType, - global, pd->nextObjectId++ ); - - pd->rootNamespace->rootScope->owningObj = pd->globalObjectDef; - - pd->global = new StructDef( internal, global, pd->globalObjectDef ); - pd->globalSel = declareStruct( pd, 0, global, pd->global ); - - /* Setup the input object. */ - global = "_input"; - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::BuiltinType, - global, pd->nextObjectId++ ); - - pd->input = new StructDef( internal, global, objectDef ); - pd->inputSel = declareStruct( pd, pd->rootNamespace, - pd->input->name, pd->input ); - - /* Setup the stream object. */ - global = "stream"; - objectDef = ObjectDef::cons( ObjectDef::BuiltinType, - global, pd->nextObjectId++ ); - - pd->stream = new StructDef( internal, global, objectDef ); - pd->streamSel = declareStruct( pd, pd->rootNamespace, - pd->stream->name, pd->stream ); - - /* Initialize the dictionary of graphs. This is our symbol table. The - * initialization needs to be done on construction which happens at the - * beginning of a machine spec so any assignment operators can reference - * the builtins. */ - pd->initGraphDict(); - - pd->rootLocalFrame = ObjectDef::cons( ObjectDef::FrameType, - "local", pd->nextObjectId++ ); - localFrameTop = pd->rootLocalFrame; - scopeTop = pd->rootLocalFrame->rootScope; - - - /* Declarations of internal types. They must be declared now because we use - * them directly, rather than via type lookup. */ - pd->declareBaseLangEls(); - pd->initUniqueTypes(); - - //argvDecl(); - - /* Internal variables. */ - addArgvList(); - addStdsList(); -} - -void BaseParser::addRegularDef( const InputLoc &loc, Namespace *nspace, - const String &name, LexJoin *join ) -{ - GraphDictEl *newEl = nspace->rlMap.insert( name ); - if ( newEl != 0 ) { - /* New element in the dict, all good. */ - newEl->value = new LexDefinition( name, join ); - newEl->isInstance = false; - newEl->loc = loc; - } - else { - // Recover by ignoring the duplicate. - error(loc) << "regular definition \"" << name << "\" already exists" << endl; - } -} - -TokenRegion *BaseParser::createRegion( const InputLoc &loc, RegionImpl *impl ) -{ - TokenRegion *tokenRegion = new TokenRegion( loc, - pd->regionList.length(), impl ); - - pd->regionList.append( tokenRegion ); - - return tokenRegion; -} - -void BaseParser::pushRegionSet( const InputLoc &loc ) -{ - RegionImpl *implTokenIgnore = new RegionImpl; - RegionImpl *implTokenOnly = new RegionImpl; - RegionImpl *implIgnoreOnly = new RegionImpl; - - pd->regionImplList.append( implTokenIgnore ); - pd->regionImplList.append( implTokenOnly ); - pd->regionImplList.append( implIgnoreOnly ); - - TokenRegion *tokenIgnore = createRegion( loc, implTokenIgnore ); - TokenRegion *tokenOnly = createRegion( loc, implTokenOnly ); - TokenRegion *ignoreOnly = createRegion( loc, implIgnoreOnly ); - TokenRegion *collectIgnore = createRegion( loc, implIgnoreOnly ); - - RegionSet *regionSet = new RegionSet( - implTokenIgnore, implTokenIgnore, implIgnoreOnly, - tokenIgnore, tokenOnly, ignoreOnly, collectIgnore ); - - collectIgnore->ignoreOnly = ignoreOnly; - - pd->regionSetList.append( regionSet ); - regionStack.push( regionSet ); -} - -void BaseParser::popRegionSet() -{ - regionStack.pop(); -} - -Namespace *BaseParser::createRootNamespace() -{ - /* Gets id of zero and default name. No parent. */ - Namespace *nspace = new Namespace( internal, - String("___ROOT_NAMESPACE"), 0, 0 ); - - nspace->rootScope->owningObj = pd->globalObjectDef; - - pd->namespaceList.append( nspace ); - namespaceStack.push( nspace ); - - return nspace; -} - -Namespace *BaseParser::createNamespace( const InputLoc &loc, const String &name ) -{ - Namespace *parent = namespaceStack.top(); - - /* Make the new namespace. */ - Namespace *nspace = parent->findNamespace( name ); - - if ( nspace == 0 ) { - nspace = new Namespace( loc, name, - pd->namespaceList.length(), parent ); - - /* Link the new namespace's scope to the parent namespace's scope. */ - nspace->rootScope->parentScope = parent->rootScope; - nspace->rootScope->owningObj = pd->globalObjectDef; - - parent->childNamespaces.append( nspace ); - pd->namespaceList.append( nspace ); - } - - namespaceStack.push( nspace ); - - return nspace; -} - -Reduction *BaseParser::createReduction( const InputLoc loc, const String &name ) -{ - Namespace *parent = namespaceStack.top(); - Reduction *reduction = parent->findReduction( name ); - - if ( reduction == 0 ) { - reduction = new Reduction( loc, name ); - parent->reductions.append( reduction ); - } - - reductionStack.push( reduction ); - - return reduction; -} - -LexJoin *BaseParser::literalJoin( const InputLoc &loc, const String &data ) -{ - Literal *literal = Literal::cons( loc, data, Literal::LitString ); - LexFactor *factor = LexFactor::cons( literal ); - LexFactorNeg *factorNeg = LexFactorNeg::cons( factor ); - LexFactorRep *factorRep = LexFactorRep::cons( factorNeg ); - LexFactorAug *factorAug = LexFactorAug::cons( factorRep ); - LexTerm *term = LexTerm::cons( factorAug ); - LexExpression *expr = LexExpression::cons( term ); - LexJoin *join = LexJoin::cons( expr ); - return join; -} - -void BaseParser::defineToken( const InputLoc &loc, String name, LexJoin *join, - ObjectDef *objectDef, CodeBlock *transBlock, bool ignore, - bool noPreIgnore, bool noPostIgnore ) -{ - bool pushedRegion = false; - if ( !insideRegion() ) { - if ( ignore ) - error(loc) << "ignore tokens can only appear inside scanners" << endp; - - pushedRegion = true; - pushRegionSet( internal ); - } - - /* Check the name if this is a token. */ - if ( !ignore && name == 0 ) - error(loc) << "tokens must have a name" << endp; - - /* Give a default name to ignores. */ - if ( name == 0 ) - name.setAs( 32, "_ignore_%.4x", pd->nextTokenId ); - - Namespace *nspace = curNspace(); - RegionSet *regionSet = regionStack.top(); - - TokenDef *tokenDef = TokenDef::cons( name, String(), false, ignore, join, - transBlock, loc, 0, nspace, regionSet, objectDef, curStruct() ); - - regionSet->tokenDefList.append( tokenDef ); - nspace->tokenDefList.append( tokenDef ); - - tokenDef->noPreIgnore = noPreIgnore; - tokenDef->noPostIgnore = noPostIgnore; - - TokenInstance *tokenInstance = TokenInstance::cons( tokenDef, - join, loc, pd->nextTokenId++, nspace, - regionSet->tokenIgnore ); - - regionSet->tokenIgnore->impl->tokenInstanceList.append( tokenInstance ); - - tokenDef->noPreIgnore = noPreIgnore; - tokenDef->noPostIgnore = noPostIgnore; - - if ( ignore ) { - /* The instance for the ignore-only. */ - TokenInstance *tokenInstanceIgn = TokenInstance::cons( tokenDef, - join, loc, pd->nextTokenId++, nspace, regionSet->ignoreOnly ); - - tokenInstanceIgn->dupOf = tokenInstance; - - regionSet->ignoreOnly->impl->tokenInstanceList.append( tokenInstanceIgn ); - } - else { - /* The instance for the token-only. */ - TokenInstance *tokenInstanceTok = TokenInstance::cons( tokenDef, - join, loc, pd->nextTokenId++, nspace, regionSet->tokenOnly ); - - tokenInstanceTok->dupOf = tokenInstance; - - regionSet->tokenOnly->impl->tokenInstanceList.append( tokenInstanceTok ); - } - - /* This is created and pushed in the name. */ - if ( pushedRegion ) - popRegionSet(); - - if ( join != 0 ) { - /* Create a regular language definition so the token can be used to - * make other tokens */ - addRegularDef( loc, curNspace(), name, join ); - } -} - -void BaseParser::zeroDef( const InputLoc &loc, const String &name ) -{ - if ( !insideRegion() ) - error(loc) << "zero token should be inside token" << endp; - - RegionSet *regionSet = regionStack.top(); - Namespace *nspace = curNspace(); - - LexJoin *join = literalJoin( loc, String("`") ); - - TokenDef *tokenDef = TokenDef::cons( name, String(), false, false, join, - 0, loc, 0, nspace, regionSet, 0, curStruct() ); - - tokenDef->isZero = true; - - regionSet->tokenDefList.append( tokenDef ); - nspace->tokenDefList.append( tokenDef ); - - /* No token instance created. */ -} - -void BaseParser::literalDef( const InputLoc &loc, const String &data, - bool noPreIgnore, bool noPostIgnore ) -{ - /* Create a name for the literal. */ - String name( 32, "_literal_%.4x", pd->nextTokenId ); - - bool pushedRegion = false; - if ( !insideRegion() ) { - pushRegionSet( loc ); - pushedRegion = true; - } - - bool unusedCI; - String interp; - prepareLitString( interp, unusedCI, data, loc ); - - /* Look for the production's associated region. */ - Namespace *nspace = curNspace(); - RegionSet *regionSet = regionStack.top(); - - LiteralDictEl *ldel = nspace->literalDict.find( interp ); - if ( ldel != 0 ) - error( loc ) << "literal already defined in this namespace" << endp; - - LexJoin *join = literalJoin( loc, data ); - - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::UserType, - name, pd->nextObjectId++ ); - - /* The token definition. */ - TokenDef *tokenDef = TokenDef::cons( name, data, true, false, join, - 0, loc, 0, nspace, regionSet, objectDef, 0 ); - - regionSet->tokenDefList.append( tokenDef ); - nspace->tokenDefList.append( tokenDef ); - - /* The instance for the token/ignore region. */ - TokenInstance *tokenInstance = TokenInstance::cons( tokenDef, join, - loc, pd->nextTokenId++, nspace, regionSet->tokenIgnore ); - - regionSet->tokenIgnore->impl->tokenInstanceList.append( tokenInstance ); - - ldel = nspace->literalDict.insert( interp, tokenInstance ); - - /* Make the duplicate for the token-only region. */ - tokenDef->noPreIgnore = noPreIgnore; - tokenDef->noPostIgnore = noPostIgnore; - - /* The instance for the token-only region. */ - TokenInstance *tokenInstanceTok = TokenInstance::cons( tokenDef, - join, loc, pd->nextTokenId++, nspace, - regionSet->tokenOnly ); - - tokenInstanceTok->dupOf = tokenInstance; - - regionSet->tokenOnly->impl->tokenInstanceList.append( tokenInstanceTok ); - - if ( pushedRegion ) - popRegionSet(); -} - -void BaseParser::addArgvList() -{ - TypeRef *valType = TypeRef::cons( internal, pd->uniqueTypeStr ); - TypeRef *elType = TypeRef::cons( internal, TypeRef::ListEl, valType ); - pd->argvTypeRef = TypeRef::cons( internal, TypeRef::List, 0, elType, valType ); -} - -void BaseParser::addStdsList() -{ - TypeRef *valType = TypeRef::cons( internal, pd->uniqueTypeStream ); - TypeRef *elType = TypeRef::cons( internal, TypeRef::ListEl, valType ); - pd->stdsTypeRef = TypeRef::cons( internal, TypeRef::List, 0, elType, valType ); -} - -ObjectDef *BaseParser::blockOpen() -{ - /* Init the object representing the local frame. */ - ObjectDef *frame = ObjectDef::cons( ObjectDef::FrameType, - "local", pd->nextObjectId++ ); - - localFrameTop = frame; - scopeTop = frame->rootScope; - return frame; -} - -void BaseParser::blockClose() -{ - localFrameTop = pd->rootLocalFrame; - scopeTop = pd->rootLocalFrame->rootScope; -} - -void BaseParser::functionDef( StmtList *stmtList, ObjectDef *localFrame, - ParameterList *paramList, TypeRef *typeRef, const String &name, bool exprt ) -{ - CodeBlock *codeBlock = CodeBlock::cons( stmtList, localFrame ); - Function *newFunction = Function::cons( curNspace(), typeRef, name, - paramList, codeBlock, pd->nextFuncId++, false, exprt ); - pd->functionList.append( newFunction ); - newFunction->inContext = curStruct(); -} - -void BaseParser::inHostDef( const String &hostCall, ObjectDef *localFrame, - ParameterList *paramList, TypeRef *typeRef, const String &name, bool exprt ) -{ - Function *newFunction = Function::cons( curNspace(), typeRef, name, - paramList, 0, pd->nextHostId++, false, exprt ); - newFunction->hostCall = hostCall; - newFunction->localFrame = localFrame; - newFunction->inHost = true; - pd->inHostList.append( newFunction ); - newFunction->inContext = curStruct(); -} - -void BaseParser::iterDef( StmtList *stmtList, ObjectDef *localFrame, - ParameterList *paramList, const String &name ) -{ - CodeBlock *codeBlock = CodeBlock::cons( stmtList, localFrame ); - Function *newFunction = Function::cons( curNspace(), 0, name, - paramList, codeBlock, pd->nextFuncId++, true, false ); - pd->functionList.append( newFunction ); -} - -LangStmt *BaseParser::globalDef( ObjectField *objField, LangExpr *expr, - LangStmt::Type assignType ) -{ - LangStmt *stmt = 0; - ObjectDef *object = pd->globalObjectDef; - Namespace *nspace = curNspace(); //pd->rootNamespace; - - if ( nspace->rootScope->checkRedecl( objField->name ) != 0 ) - error(objField->loc) << "object field renamed" << endp; - - object->insertField( nspace->rootScope, objField->name, objField ); - - if ( expr != 0 ) { - LangVarRef *varRef = LangVarRef::cons( objField->loc, - curNspace(), curStruct(), curScope(), objField->name ); - - stmt = LangStmt::cons( objField->loc, assignType, varRef, expr ); - } - - return stmt; -} - -LangStmt *BaseParser::exportStmt( ObjectField *objField, - LangStmt::Type assignType, LangExpr *expr ) -{ - LangStmt *stmt = 0; - - ObjectDef *object = pd->globalObjectDef; - Namespace *nspace = curNspace(); //pd->rootNamespace; - - if ( curStruct() != 0 ) - error(objField->loc) << "cannot export parser context variables" << endp; - - if ( nspace->rootScope->checkRedecl( objField->name ) != 0 ) - error(objField->loc) << "object field renamed" << endp; - - object->insertField( nspace->rootScope, objField->name, objField ); - objField->isExport = true; - - if ( expr != 0 ) { - LangVarRef *varRef = LangVarRef::cons( objField->loc, - curNspace(), 0, curScope(), objField->name ); - - stmt = LangStmt::cons( objField->loc, assignType, varRef, expr ); - } - - return stmt; -} - - -void BaseParser::cflDef( NtDef *ntDef, ObjectDef *objectDef, LelDefList *defList ) -{ - Namespace *nspace = curNspace(); - - ntDef->objectDef = objectDef; - ntDef->defList = defList; - - nspace->ntDefList.append( ntDef ); - - /* Declare the captures in the object. */ - for ( LelDefList::Iter prod = *defList; prod.lte(); prod++ ) { - for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++ ) { - /* If there is a capture, create the field. */ - if ( pel->captureField != 0 ) { - /* Might already exist. */ - ObjectField *newOf = objectDef->rootScope->checkRedecl( - pel->captureField->name ); - if ( newOf != 0 ) { - /* FIXME: check the types are the same. */ - } - else { - newOf = pel->captureField; - newOf->typeRef = pel->typeRef; - objectDef->rootScope->insertField( newOf->name, newOf ); - } - - newOf->rhsVal.append( RhsVal( pel ) ); - } - } - } -} - -ReOrBlock *BaseParser::lexRegularExprData( ReOrBlock *reOrBlock, ReOrItem *reOrItem ) -{ - ReOrBlock *ret; - - /* An optimization to lessen the tree size. If an or char is directly under - * the left side on the right and the right side is another or char then - * paste them together and return the left side. Otherwise just put the two - * under a new or data node. */ - if ( reOrItem->type == ReOrItem::Data && - reOrBlock->type == ReOrBlock::RecurseItem && - reOrBlock->item->type == ReOrItem::Data ) - { - /* Append the right side to right side of the left and toss the - * right side. */ - reOrBlock->item->data += reOrItem->data; - delete reOrItem; - ret = reOrBlock; - } - else { - /* Can't optimize, put the left and right under a new node. */ - ret = ReOrBlock::cons( reOrBlock, reOrItem ); - } - return ret; -} - -LexFactor *BaseParser::lexRlFactorName( const String &data, const InputLoc &loc ) -{ - LexFactor *factor = 0; - /* Find the named graph. */ - Namespace *nspace = curNspace(); - - while ( nspace != 0 ) { - GraphDictEl *gdNode = nspace->rlMap.find( data ); - if ( gdNode != 0 ) { - if ( gdNode->isInstance ) { - /* Recover by retuning null as the factor node. */ - error(loc) << "references to graph instantiations not allowed " - "in expressions" << endl; - factor = 0; - } - else { - /* Create a factor node that is a lookup of an expression. */ - factor = LexFactor::cons( loc, gdNode->value ); - } - break; - } - - nspace = nspace->parentNamespace; - } - - if ( nspace == 0 ) { - /* Recover by returning null as the factor node. */ - error(loc) << "graph lookup of \"" << data << "\" failed" << endl; - factor = 0; - } - - return factor; -} - -int BaseParser::lexFactorRepNum( const InputLoc &loc, const String &data ) -{ - /* Convert the priority number to a long. Check for overflow. */ - errno = 0; - long rep = strtol( data, 0, 10 ); - if ( errno == ERANGE && rep == LONG_MAX ) { - /* Repetition too large. Recover by returing repetition 1. */ - error(loc) << "repetition number " << data << " overflows" << endl; - rep = 1; - } - return rep; -} - -LexFactorAug *BaseParser::lexFactorLabel( const InputLoc &loc, - const String &data, LexFactorAug *factorAug ) -{ - /* Create the object field. */ - TypeRef *typeRef = TypeRef::cons( loc, pd->uniqueTypeStr ); - ObjectField *objField = ObjectField::cons( loc, - ObjectField::LexSubstrType, typeRef, data ); - - /* Create the enter and leaving actions that will mark the substring. */ - Action *enter = Action::cons( MarkMark, pd->nextMatchEndNum++ ); - Action *leave = Action::cons( MarkMark, pd->nextMatchEndNum++ ); - pd->actionList.append( enter ); - pd->actionList.append( leave ); - - /* Add entering and leaving actions. */ - factorAug->actions.append( ParserAction( loc, at_start, 0, enter ) ); - factorAug->actions.append( ParserAction( loc, at_leave, 0, leave ) ); - - factorAug->reCaptureVect.append( ReCapture( enter, leave, objField ) ); - - return factorAug; -} - -LexJoin *BaseParser::lexOptJoin( LexJoin *join, LexJoin *context ) -{ - if ( context != 0 ) { - /* Create the enter and leaving actions that will mark the substring. */ - Action *mark = Action::cons( MarkMark, pd->nextMatchEndNum++ ); - pd->actionList.append( mark ); - - join->context = context; - join->mark = mark; - } - - return join; -} - -LangExpr *BaseParser::send( const InputLoc &loc, LangVarRef *varRef, - ConsItemList *list, bool eof ) -{ - ParserText *parserText = ParserText::cons( loc, - curNspace(), list, true, false, false, "" ); - pd->parserTextList.append( parserText ); - - return LangExpr::cons( LangTerm::consSend( loc, varRef, - parserText, eof ) ); -} - -LangExpr *BaseParser::sendTree( const InputLoc &loc, LangVarRef *varRef, - ConsItemList *list, bool eof ) -{ - ParserText *parserText = ParserText::cons( loc, - curNspace(), list, true, false, false, "" ); - pd->parserTextList.append( parserText ); - - return LangExpr::cons( LangTerm::consSendTree( loc, varRef, - parserText, eof ) ); -} - -LangExpr *BaseParser::parseCmd( const InputLoc &loc, bool tree, bool stop, - ObjectField *objField, TypeRef *typeRef, FieldInitVect *fieldInitVect, - ConsItemList *list, bool used, bool reduce, bool read, const String &reducer ) -{ - LangExpr *expr = 0; - - /* Item list for what we are sending to the parser. */ - ConsItemList *consItemList = new ConsItemList; - - /* The parser may be referenced. */ - LangVarRef *varRef = 0; - if ( objField != 0 ) { - varRef = LangVarRef::cons( objField->loc, - curNspace(), curStruct(), curScope(), objField->name ); - } - - /* The typeref for the parser. */ - TypeRef *parserTypeRef = TypeRef::cons( loc, - TypeRef::Parser, 0, typeRef, 0 ); - - if ( objField != 0 ) - used = true; - - ParserText *parserText = ParserText::cons( loc, curNspace(), - list, used, reduce, read, reducer ); - pd->parserTextList.append( parserText ); - - LangTerm::Type langTermType = stop ? LangTerm::ParseStopType : ( tree ? - LangTerm::ParseTreeType : LangTerm::ParseType ); - - expr = LangExpr::cons( LangTerm::cons( loc, langTermType, - varRef, objField, parserTypeRef, fieldInitVect, consItemList, - parserText ) ); - - /* Check for redeclaration. */ - if ( objField != 0 ) { - if ( curScope()->checkRedecl( objField->name ) != 0 ) { - error( objField->loc ) << "variable " << objField->name << - " redeclared" << endp; - } - - /* Insert it into the field map. */ - objField->typeRef = typeRef; - curScope()->insertField( objField->name, objField ); - } - - return expr; -} - -PatternItemList *BaseParser::consPatternEl( LangVarRef *varRef, PatternItemList *list ) -{ - /* Store the variable reference in the pattern itemm. */ - list->head->varRef = varRef; - - if ( varRef != 0 ) { - if ( curScope()->checkRedecl( varRef->name ) != 0 ) { - error( varRef->loc ) << "variable " << varRef->name << - " redeclared" << endp; - } - - TypeRef *typeRef = list->head->prodEl->typeRef; - ObjectField *objField = ObjectField::cons( InputLoc(), - ObjectField::UserLocalType, typeRef, varRef->name ); - - /* Insert it into the field map. */ - curScope()->insertField( varRef->name, objField ); - } - - return list; -} - -PatternItemList *BaseParser::patternElNamed( const InputLoc &loc, - LangVarRef *parsedVarRef, NamespaceQual *nspaceQual, const String &data, - RepeatType repeatType ) -{ - TypeRef *typeRef = TypeRef::cons( loc, parsedVarRef, nspaceQual, data, repeatType ); - ProdEl *prodEl = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef, 0 ); - PatternItem *patternItem = PatternItem::cons( PatternItem::TypeRefForm, loc, prodEl ); - return PatternItemList::cons( patternItem ); -} - -PatternItemList *BaseParser::patternElType( const InputLoc &loc, - LangVarRef *parsedVarRef, NamespaceQual *nspaceQual, const String &data, - RepeatType repeatType ) -{ - PdaLiteral *literal = new PdaLiteral( loc, data ); - TypeRef *typeRef = TypeRef::cons( loc, parsedVarRef, nspaceQual, literal, repeatType ); - - ProdEl *prodEl = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef, 0 ); - PatternItem *patternItem = PatternItem::cons( PatternItem::TypeRefForm, loc, prodEl ); - return PatternItemList::cons( patternItem ); -} - -ProdElList *BaseParser::appendProdEl( ProdElList *prodElList, ProdEl *prodEl ) -{ - prodEl->pos = prodElList->length(); - prodElList->append( prodEl ); - return prodElList; -} - -PatternItemList *BaseParser::patListConcat( PatternItemList *list1, - PatternItemList *list2 ) -{ - list1->append( *list2 ); - delete list2; - return list1; -} - -ConsItemList *BaseParser::consListConcat( ConsItemList *list1, - ConsItemList *list2 ) -{ - list1->append( *list2 ); - delete list2; - return list1; -} - -LangStmt *BaseParser::forScope( const InputLoc &loc, const String &data, - NameScope *scope, TypeRef *typeRef, IterCall *iterCall, StmtList *stmtList ) -{ - /* Check for redeclaration. */ - if ( curScope()->checkRedecl( data ) != 0 ) - error( loc ) << "variable " << data << " redeclared" << endp; - - /* Note that we pass in a null type reference. This type is dependent on - * the result of the iter_call lookup since it must contain a reference to - * the iterator that is called. This lookup is done at compile time. */ - ObjectField *iterField = ObjectField::cons( loc, - ObjectField::UserLocalType, (TypeRef*)0, data ); - curScope()->insertField( data, iterField ); - - LangStmt *stmt = LangStmt::cons( loc, LangStmt::ForIterType, - iterField, typeRef, iterCall, stmtList, curStruct(), scope ); - - return stmt; -} - -void BaseParser::preEof( const InputLoc &loc, StmtList *stmtList, ObjectDef *localFrame ) -{ - if ( !insideRegion() ) - error(loc) << "preeof must be used inside an existing region" << endl; - - CodeBlock *codeBlock = CodeBlock::cons( stmtList, localFrame ); - codeBlock->context = curStruct(); - - RegionSet *regionSet = regionStack.top(); - regionSet->tokenIgnore->preEofBlock = codeBlock; -} - -ProdEl *BaseParser::prodElName( const InputLoc &loc, const String &data, - NamespaceQual *nspaceQual, ObjectField *objField, - RepeatType repeatType, bool commit ) -{ - TypeRef *typeRef = TypeRef::cons( loc, nspaceQual, data, repeatType ); - ProdEl *prodEl = new ProdEl( ProdEl::ReferenceType, loc, objField, commit, typeRef, 0 ); - return prodEl; -} - -ProdEl *BaseParser::prodElLiteral( const InputLoc &loc, const String &data, - NamespaceQual *nspaceQual, ObjectField *objField, RepeatType repeatType, - bool commit ) -{ - /* Create a new prodEl node going to a concat literal. */ - PdaLiteral *literal = new PdaLiteral( loc, data ); - TypeRef *typeRef = TypeRef::cons( loc, nspaceQual, literal, repeatType ); - ProdEl *prodEl = new ProdEl( ProdEl::LiteralType, loc, objField, commit, typeRef, 0 ); - return prodEl; -} - -ConsItemList *BaseParser::consElLiteral( const InputLoc &loc, - TypeRef *consTypeRef, const String &data, NamespaceQual *nspaceQual ) -{ - PdaLiteral *literal = new PdaLiteral( loc, data ); - TypeRef *typeRef = TypeRef::cons( loc, consTypeRef, nspaceQual, literal ); - ProdEl *prodEl = new ProdEl( ProdEl::LiteralType, loc, 0, false, typeRef, 0 ); - ConsItem *consItem = ConsItem::cons( loc, ConsItem::LiteralType, prodEl ); - ConsItemList *list = ConsItemList::cons( consItem ); - return list; -} - -Production *BaseParser::production( const InputLoc &loc, ProdElList *prodElList, - String name, bool commit, CodeBlock *codeBlock, LangEl *predOf ) -{ - Production *prod = Production::cons( loc, 0, prodElList, - name, commit, codeBlock, pd->prodList.length(), 0 ); - prod->predOf = predOf; - - /* Link the production elements back to the production. */ - for ( ProdEl *prodEl = prodElList->head; prodEl != 0; prodEl = prodEl->next ) - prodEl->production = prod; - - pd->prodList.append( prod ); - - return prod; -} - -void BaseParser::objVarDef( ObjectDef *objectDef, ObjectField *objField ) -{ - if ( objectDef->rootScope->checkRedecl( objField->name ) != 0 ) - error() << "object field renamed" << endp; - - objectDef->rootScope->insertField( objField->name, objField ); -} - -LelDefList *BaseParser::prodAppend( LelDefList *defList, Production *definition ) -{ - definition->prodNum = defList->length(); - defList->append( definition ); - return defList; -} - -LangExpr *BaseParser::construct( const InputLoc &loc, ObjectField *objField, - ConsItemList *list, TypeRef *typeRef, FieldInitVect *fieldInitVect ) -{ - Constructor *constructor = Constructor::cons( loc, curNspace(), - list, pd->nextPatConsId++ ); - pd->replList.append( constructor ); - - LangVarRef *varRef = 0; - if ( objField != 0 ) { - varRef = LangVarRef::cons( objField->loc, - curNspace(), curStruct(), curScope(), objField->name ); - } - - LangExpr *expr = LangExpr::cons( LangTerm::cons( loc, LangTerm::ConstructType, - varRef, objField, typeRef, fieldInitVect, constructor ) ); - - /* Check for redeclaration. */ - if ( objField != 0 ) { - if ( curScope()->checkRedecl( objField->name ) != 0 ) { - error( objField->loc ) << "variable " << objField->name << - " redeclared" << endp; - } - - /* Insert it into the field map. */ - objField->typeRef = typeRef; - curScope()->insertField( objField->name, objField ); - } - - return expr; -} - -LangExpr *BaseParser::match( const InputLoc &loc, LangVarRef *varRef, - PatternItemList *list ) -{ - Pattern *pattern = Pattern::cons( loc, curNspace(), - list, pd->nextPatConsId++ ); - pd->patternList.append( pattern ); - - LangExpr *expr = LangExpr::cons( LangTerm::consMatch( - InputLoc(), varRef, pattern ) ); - - return expr; -} - -LangExpr *BaseParser::prodCompare( const InputLoc &loc, LangVarRef *varRef, - const String &prod, LangExpr *matchExpr ) -{ - LangExpr *expr = LangExpr::cons( LangTerm::consProdCompare( - InputLoc(), varRef, prod, matchExpr ) ); - - return expr; -} - -LangStmt *BaseParser::varDef( ObjectField *objField, - LangExpr *expr, LangStmt::Type assignType ) -{ - LangStmt *stmt = 0; - - /* Check for redeclaration. */ - if ( curScope()->checkRedecl( objField->name ) != 0 ) { - error( objField->loc ) << "variable " << objField->name << - " redeclared" << endp; - } - - /* Insert it into the field map. */ - curScope()->insertField( objField->name, objField ); - - //cout << "var def " << $1->objField->name << endl; - - if ( expr != 0 ) { - LangVarRef *varRef = LangVarRef::cons( objField->loc, - curNspace(), curStruct(), curScope(), objField->name ); - - stmt = LangStmt::cons( objField->loc, assignType, varRef, expr ); - } - - return stmt; -} - -LangExpr *BaseParser::require( const InputLoc &loc, - LangVarRef *varRef, PatternItemList *list ) -{ - Pattern *pattern = Pattern::cons( loc, curNspace(), - list, pd->nextPatConsId++ ); - pd->patternList.append( pattern ); - - LangExpr *expr = LangExpr::cons( LangTerm::consMatch( - InputLoc(), varRef, pattern ) ); - return expr; -} - -void BaseParser::structVarDef( const InputLoc &loc, ObjectField *objField ) -{ - ObjectDef *object; - if ( curStruct() == 0 ) - error(loc) << "internal error: no context stack items found" << endp; - - StructDef *structDef = curStruct(); - object = structDef->objectDef; - - if ( object->rootScope->checkRedecl( objField->name ) != 0 ) - error(objField->loc) << "object field renamed" << endp; - - object->rootScope->insertField( objField->name, objField ); -} - -void BaseParser::structHead( const InputLoc &loc, Namespace *inNspace, - const String &data, ObjectDef::Type objectType ) -{ - ObjectDef *objectDef = ObjectDef::cons( objectType, - data, pd->nextObjectId++ ); - - StructDef *context = new StructDef( loc, data, objectDef ); - structStack.push( context ); - - inNspace->structDefList.append( context ); - - /* Make the namespace for the struct. */ - createNamespace( loc, data ); -} - -StmtList *BaseParser::appendStatement( StmtList *stmtList, LangStmt *stmt ) -{ - if ( stmt != 0 ) - stmtList->append( stmt ); - return stmtList; -} - -ParameterList *BaseParser::appendParam( ParameterList *paramList, ObjectField *objField ) -{ - paramList->append( objField ); - return paramList; -} - -ObjectField *BaseParser::addParam( const InputLoc &loc, - ObjectField::Type type, TypeRef *typeRef, const String &name ) -{ - ObjectField *objField = ObjectField::cons( loc, type, typeRef, name ); - return objField; -} - -PredDecl *BaseParser::predTokenName( const InputLoc &loc, NamespaceQual *qual, - const String &data ) -{ - TypeRef *typeRef = TypeRef::cons( loc, qual, data ); - PredDecl *predDecl = new PredDecl( typeRef, pd->predValue ); - return predDecl; -} - -PredDecl *BaseParser::predTokenLit( const InputLoc &loc, const String &data, - NamespaceQual *nspaceQual ) -{ - PdaLiteral *literal = new PdaLiteral( loc, data ); - TypeRef *typeRef = TypeRef::cons( loc, nspaceQual, literal ); - PredDecl *predDecl = new PredDecl( typeRef, pd->predValue ); - return predDecl; -} - -void BaseParser::alias( const InputLoc &loc, const String &data, TypeRef *typeRef ) -{ - Namespace *nspace = curNspace(); - TypeAlias *typeAlias = new TypeAlias( loc, nspace, data, typeRef ); - nspace->typeAliasList.append( typeAlias ); -} - -void BaseParser::precedenceStmt( PredType predType, PredDeclList *predDeclList ) -{ - while ( predDeclList->length() > 0 ) { - PredDecl *predDecl = predDeclList->detachFirst(); - predDecl->predType = predType; - pd->predDeclList.append( predDecl ); - } - pd->predValue++; -} - -void BaseParser::pushScope() -{ - scopeTop = curLocalFrame()->pushScope( curScope() ); -} - -void BaseParser::popScope() -{ - scopeTop = curScope()->parentScope; -} diff --git a/src/parser.h b/src/parser.h deleted file mode 100644 index aafa3f2b..00000000 --- a/src/parser.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright 2013-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_PARSER_H -#define _COLM_PARSER_H - -#include <iostream> - -#include <avltree.h> - -#include "compiler.h" -#include "parser.h" - -#define PROPERTY_REDUCE_FIRST 0x1 - -struct BaseParser -{ - BaseParser( Compiler *pd ) - : pd(pd), enterRl(false) - {} - - virtual ~BaseParser() {} - - Compiler *pd; - - RegionSetVect regionStack; - NamespaceVect namespaceStack; - ReductionVect reductionStack; - StructStack structStack; - ObjectDef *localFrameTop; - NameScope *scopeTop; - - bool enterRl; - - bool insideRegion() - { return regionStack.length() > 0; } - - StructDef *curStruct() - { return structStack.length() == 0 ? 0 : structStack.top(); } - - Namespace *curNspace() - { return namespaceStack.top(); } - - NameScope *curScope() - { return scopeTop; } - - ObjectDef *curLocalFrame() - { return localFrameTop; } - - Reduction *curReduction() - { return reductionStack.top(); } - - /* Lexical feedback. */ - - void listElDef( String name ); - void mapElDef( String name, TypeRef *keyType ); - - void argvDecl(); - void init(); - void addRegularDef( const InputLoc &loc, Namespace *nspace, - const String &name, LexJoin *join ); - TokenRegion *createRegion( const InputLoc &loc, RegionImpl *impl ); - Namespace *createRootNamespace(); - Namespace *createNamespace( const InputLoc &loc, const String &name ); - void pushRegionSet( const InputLoc &loc ); - void popRegionSet(); - void addProduction( const InputLoc &loc, const String &name, - ProdElList *prodElList, bool commit, - CodeBlock *redBlock, LangEl *predOf ); - void addArgvList(); - void addStdsList(); - LexJoin *literalJoin( const InputLoc &loc, const String &data ); - - Reduction *createReduction( const InputLoc loc, const String &name ); - - void defineToken( const InputLoc &loc, String name, LexJoin *join, - ObjectDef *objectDef, CodeBlock *transBlock, - bool ignore, bool noPreIgnore, bool noPostIgnore ); - - void zeroDef( const InputLoc &loc, const String &name ); - void literalDef( const InputLoc &loc, const String &data, - bool noPreIgnore, bool noPostIgnore ); - - ObjectDef *blockOpen(); - void blockClose(); - - void inHostDef( const String &hostCall, ObjectDef *localFrame, - ParameterList *paramList, TypeRef *typeRef, - const String &name, bool exprt ); - void functionDef( StmtList *stmtList, ObjectDef *localFrame, - ParameterList *paramList, TypeRef *typeRef, - const String &name, bool exprt ); - - void iterDef( StmtList *stmtList, ObjectDef *localFrame, - ParameterList *paramList, const String &name ); - LangStmt *globalDef( ObjectField *objField, LangExpr *expr, - LangStmt::Type assignType ); - void cflDef( NtDef *ntDef, ObjectDef *objectDef, LelDefList *defList ); - ReOrBlock *lexRegularExprData( ReOrBlock *reOrBlock, ReOrItem *reOrItem ); - - int lexFactorRepNum( const InputLoc &loc, const String &data ); - LexFactor *lexRlFactorName( const String &data, const InputLoc &loc ); - LexFactorAug *lexFactorLabel( const InputLoc &loc, const String &data, - LexFactorAug *factorAug ); - LexJoin *lexOptJoin( LexJoin *join, LexJoin *context ); - LangExpr *send( const InputLoc &loc, LangVarRef *varRef, - ConsItemList *list, bool eof ); - LangExpr *sendTree( const InputLoc &loc, LangVarRef *varRef, - ConsItemList *list, bool eof ); - LangExpr *parseCmd( const InputLoc &loc, bool tree, bool stop, ObjectField *objField, - TypeRef *typeRef, FieldInitVect *fieldInitVect, ConsItemList *list, - bool used, bool reduce, bool read, const String &reducer ); - PatternItemList *consPatternEl( LangVarRef *varRef, PatternItemList *list ); - PatternItemList *patternElNamed( const InputLoc &loc, LangVarRef *varRef, - NamespaceQual *nspaceQual, const String &data, RepeatType repeatType ); - PatternItemList *patternElType( const InputLoc &loc, LangVarRef *varRef, - NamespaceQual *nspaceQual, const String &data, RepeatType repeatType ); - PatternItemList *patListConcat( PatternItemList *list1, PatternItemList *list2 ); - ConsItemList *consListConcat( ConsItemList *list1, ConsItemList *list2 ); - LangStmt *forScope( const InputLoc &loc, const String &data, - NameScope *scope, TypeRef *typeRef, IterCall *iterCall, StmtList *stmtList ); - void preEof( const InputLoc &loc, StmtList *stmtList, ObjectDef *localFrame ); - - ProdEl *prodElName( const InputLoc &loc, const String &data, - NamespaceQual *nspaceQual, ObjectField *objField, RepeatType repeatType, - bool commit ); - ProdEl *prodElLiteral( const InputLoc &loc, const String &data, - NamespaceQual *nspaceQual, ObjectField *objField, RepeatType repeatType, - bool commit ); - ConsItemList *consElLiteral( const InputLoc &loc, TypeRef *consTypeRef, - const String &data, NamespaceQual *nspaceQual ); - Production *production( const InputLoc &loc, ProdElList *prodElList, - String name, bool commit, CodeBlock *codeBlock, LangEl *predOf ); - void objVarDef( ObjectDef *objectDef, ObjectField *objField ); - LelDefList *prodAppend( LelDefList *defList, Production *definition ); - - LangExpr *construct( const InputLoc &loc, ObjectField *objField, - ConsItemList *list, TypeRef *typeRef, FieldInitVect *fieldInitVect ); - LangExpr *match( const InputLoc &loc, LangVarRef *varRef, - PatternItemList *list ); - LangExpr *prodCompare( const InputLoc &loc, LangVarRef *varRef, - const String &prod, LangExpr *matchExpr ); - LangStmt *varDef( ObjectField *objField, - LangExpr *expr, LangStmt::Type assignType ); - LangStmt *exportStmt( ObjectField *objField, LangStmt::Type assignType, LangExpr *expr ); - - - LangExpr *require( const InputLoc &loc, LangVarRef *varRef, PatternItemList *list ); - void structVarDef( const InputLoc &loc, ObjectField *objField ); - void structHead( const InputLoc &loc, Namespace *inNspace, - const String &data, ObjectDef::Type objectType ); - StmtList *appendStatement( StmtList *stmtList, LangStmt *stmt ); - ParameterList *appendParam( ParameterList *paramList, ObjectField *objField ); - ObjectField *addParam( const InputLoc &loc, - ObjectField::Type type, TypeRef *typeRef, const String &name ); - PredDecl *predTokenName( const InputLoc &loc, NamespaceQual *qual, const String &data ); - PredDecl *predTokenLit( const InputLoc &loc, const String &data, - NamespaceQual *nspaceQual ); - void alias( const InputLoc &loc, const String &data, TypeRef *typeRef ); - void precedenceStmt( PredType predType, PredDeclList *predDeclList ); - ProdElList *appendProdEl( ProdElList *prodElList, ProdEl *prodEl ); - - void pushScope(); - void popScope(); - - virtual void go( long activeRealm ) = 0; - - BstSet<String, ColmCmpStr> genericElDefined; - - NamespaceQual *emptyNspaceQual() - { - return NamespaceQual::cons( curNspace() ); - } - -}; - -#endif /* _COLM_PARSER_H */ - diff --git a/src/parsetree.cc b/src/parsetree.cc deleted file mode 100644 index 0b1cdbeb..00000000 --- a/src/parsetree.cc +++ /dev/null @@ -1,1493 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <assert.h> -#include <stdbool.h> - -#include <iostream> - -#include "fsmgraph.h" -#include "compiler.h" -#include "parsetree.h" - -using namespace std; -ostream &operator<<( ostream &out, const NameRef &nameRef ); -ostream &operator<<( ostream &out, const NameInst &nameInst ); -ostream &operator<<( ostream &out, const Token &token ); - -/* Convert the literal string which comes in from the scanner into an array of - * characters with escapes and options interpreted. Also null terminates the - * string. Though this null termination should not be relied on for - * interpreting literals in the parser because the string may contain a - * literal string with \0 */ -void prepareLitString( String &result, bool &caseInsensitive, - const String &srcString, const InputLoc &loc ) -{ - result.setAs( String::Fresh(), srcString.length() ); - caseInsensitive = false; - - char *src = srcString.data + 1; - char *end = 0; - bool backtick = srcString[0] == '`'; - - if ( !backtick ) { - end = srcString.data + srcString.length() - 1; - - while ( *end != '\'' && *end != '\"' && *end != '\n' ) { - if ( *end == 'i' ) - caseInsensitive = true; - else { - error( loc ) << "literal string '" << *end << - "' option not supported" << endl; - } - end -= 1; - } - - if ( *end == '\n' ) - end++; - } - else { - end = srcString.data + srcString.length(); - } - - char *dest = result.data; - int len = 0; - while ( src != end ) { - if ( !backtick && *src == '\\' ) { - switch ( src[1] ) { - case '0': dest[len++] = '\0'; break; - case 'a': dest[len++] = '\a'; break; - case 'b': dest[len++] = '\b'; break; - case 't': dest[len++] = '\t'; break; - case 'n': dest[len++] = '\n'; break; - case 'v': dest[len++] = '\v'; break; - case 'f': dest[len++] = '\f'; break; - case 'r': dest[len++] = '\r'; break; - case '\n': break; - default: dest[len++] = src[1]; break; - } - src += 2; - } - else { - dest[len++] = *src++; - } - } - - result.chop( len ); -} - -int CmpUniqueType::compare( const UniqueType &ut1, const UniqueType &ut2 ) -{ - if ( ut1.typeId < ut2.typeId ) - return -1; - else if ( ut1.typeId > ut2.typeId ) - return 1; - switch ( ut1.typeId ) { - case TYPE_TREE: - case TYPE_REF: - if ( ut1.langEl < ut2.langEl ) - return -1; - else if ( ut1.langEl > ut2.langEl ) - return 1; - break; - case TYPE_ITER: - if ( ut1.iterDef < ut2.iterDef ) - return -1; - else if ( ut1.iterDef > ut2.iterDef ) - return 1; - break; - - case TYPE_NOTYPE: - case TYPE_NIL: - case TYPE_INT: - case TYPE_BOOL: - case TYPE_LIST_PTRS: - case TYPE_MAP_PTRS: - case TYPE_VOID: - break; - - case TYPE_STRUCT: - if ( ut1.structEl < ut2.structEl ) - return -1; - else if ( ut1.structEl > ut2.structEl ) - return 1; - break; - case TYPE_GENERIC: - if ( ut1.generic < ut2.generic ) - return -1; - else if ( ut1.generic > ut2.generic ) - return 1; - break; - } - - return 0; -} - -int CmpUniqueRepeat::compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 ) -{ - if ( ut1.repeatType < ut2.repeatType ) - return -1; - else if ( ut1.repeatType > ut2.repeatType ) - return 1; - else { - if ( ut1.langEl < ut2.langEl ) - return -1; - else if ( ut1.langEl > ut2.langEl ) - return 1; - } - - return 0; -} - -int CmpUniqueGeneric::compare( const UniqueGeneric &ut1, const UniqueGeneric &ut2 ) -{ - if ( ut1.type < ut2.type ) - return -1; - else if ( ut1.type > ut2.type ) - return 1; - else if ( ut1.value < ut2.value ) - return -1; - else if ( ut1.value > ut2.value ) - return 1; - else { - switch ( ut1.type ) { - case UniqueGeneric::List: - case UniqueGeneric::ListEl: - case UniqueGeneric::Parser: - break; - - case UniqueGeneric::Map: - case UniqueGeneric::MapEl: - if ( ut1.key < ut2.key ) - return -1; - else if ( ut1.key > ut2.key ) - return 1; - break; - } - } - return 0; -} - -FsmGraph *LexDefinition::walk( Compiler *pd ) -{ - /* Recurse on the expression. */ - FsmGraph *rtnVal = join->walk( pd ); - - /* If the expression below is a join operation with multiple expressions - * then it just had epsilon transisions resolved. If it is a join - * with only a single expression then run the epsilon op now. */ - if ( join->expr != 0 ) - rtnVal->epsilonOp(); - - return rtnVal; -} - -void RegionImpl::makeNameTree( const InputLoc &loc, Compiler *pd ) -{ - NameInst *nameInst = new NameInst( pd->nextNameId++ ); - pd->nameInstList.append( nameInst ); - - /* Guess we do this now. */ - makeActions( pd ); - - /* Save off the name inst into the token region. This is only legal for - * token regions because they are only ever referenced once (near the root - * of the name tree). They cannot have more than one corresponding name - * inst. */ - assert( regionNameInst == 0 ); - regionNameInst = nameInst; -} - -InputLoc TokenInstance::getLoc() -{ - return action != 0 ? action->loc : semiLoc; -} - -/* - * If there are any LMs then all of the following entry points must reset - * tokstart: - * - * 1. fentry(StateRef) - * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) - * 3. targt of any transition that has an fcall (the return loc). - * 4. start state of all longest match routines. - */ - -Action *RegionImpl::newAction( Compiler *pd, const InputLoc &loc, - const String &name, InlineList *inlineList ) -{ - Action *action = Action::cons( loc, name, inlineList ); - pd->actionList.append( action ); - action->isLmAction = true; - return action; -} - -void RegionImpl::makeActions( Compiler *pd ) -{ - /* Make actions that set the action id. */ - for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = InlineList::cons(); - inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi, - InlineItem::LmSetActId ) ); - char *actName = new char[50]; - sprintf( actName, "store%i", lmi->longestMatchId ); - lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart on the last character. */ - for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = InlineList::cons(); - inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi, - InlineItem::LmOnLast ) ); - char *actName = new char[50]; - sprintf( actName, "imm%i", lmi->longestMatchId ); - lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart on the next - * character. These actions will set tokend themselves (it is the current - * char). */ - for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = InlineList::cons(); - inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi, - InlineItem::LmOnNext ) ); - char *actName = new char[50]; - sprintf( actName, "lagh%i", lmi->longestMatchId ); - lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart at tokend. These - * actions execute some time after matching the last char. */ - for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = InlineList::cons(); - inlineList->append( InlineItem::cons( lmi->getLoc(), this, lmi, - InlineItem::LmOnLagBehind ) ); - char *actName = new char[50]; - sprintf( actName, "lag%i", lmi->longestMatchId ); - lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList ); - } - - InputLoc loc; - loc.line = 1; - loc.col = 1; - - /* Create the error action. */ - InlineList *il6 = InlineList::cons(); - il6->append( InlineItem::cons( loc, this, 0, InlineItem::LmSwitch ) ); - lmActSelect = newAction( pd, loc, "lagsel", il6 ); -} - -void RegionImpl::restart( FsmGraph *graph, FsmTrans *trans ) -{ - FsmState *fromState = trans->fromState; - graph->detachTrans( fromState, trans->toState, trans ); - graph->attachTrans( fromState, graph->startState, trans ); -} - -void RegionImpl::runLongestMatch( Compiler *pd, FsmGraph *graph ) -{ - graph->markReachableFromHereStopFinal( graph->startState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & SB_ISMARKED ) { - ms->lmItemSet.insert( 0 ); - ms->stateBits &= ~ SB_ISMARKED; - } - } - - /* Transfer the first item of non-empty lmAction tables to the item sets - * of the states that follow. Exclude states that have no transitions out. - * This must happen on a separate pass so that on each iteration of the - * next pass we have the item set entries from all lmAction tables. */ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - if ( trans->lmActionTable.length() > 0 ) { - LmActionTableEl *lmAct = trans->lmActionTable.data; - FsmState *toState = trans->toState; - assert( toState ); - - /* Check if there are transitions out, this may be a very - * close approximation? Out transitions going nowhere? - * FIXME: Check. */ - if ( toState->outList.length() > 0 ) { - /* Fill the item sets. */ - graph->markReachableFromHereStopFinal( toState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & SB_ISMARKED ) { - ms->lmItemSet.insert( lmAct->value ); - ms->stateBits &= ~ SB_ISMARKED; - } - } - } - } - } - } - - /* The lmItem sets are now filled, telling us which longest match rules - * can succeed in which states. First determine if we need to make sure - * act is defaulted to zero. */ - int maxItemSetLength = 0; - graph->markReachableFromHereStopFinal( graph->startState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & SB_ISMARKED ) { - if ( ms->lmItemSet.length() > maxItemSetLength ) - maxItemSetLength = ms->lmItemSet.length(); - ms->stateBits &= ~ SB_ISMARKED; - } - } - - /* The actions executed on starting to match a token. */ - graph->isolateStartState(); - graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); - if ( maxItemSetLength > 1 ) { - /* The longest match action switch may be called when tokens are - * matched, in which case act must be initialized, there must be a - * case to handle the error, and the generated machine will require an - * error state. */ - lmSwitchHandlesError = true; - graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId ); - } - - /* The place to store transitions to restart. It maybe possible for the - * restarting to affect the searching through the graph that follows. For - * now take the safe route and save the list of transitions to restart - * until after all searching is done. */ - Vector<FsmTrans*> restartTrans; - - /* Set actions that do immediate token recognition, set the longest match part - * id and set the token ending. */ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - if ( trans->lmActionTable.length() > 0 ) { - LmActionTableEl *lmAct = trans->lmActionTable.data; - FsmState *toState = trans->toState; - assert( toState ); - - /* Check if there are transitions out, this may be a very - * close approximation? Out transitions going nowhere? - * FIXME: Check. */ - if ( toState->outList.length() == 0 ) { - /* Can execute the immediate action for the longest match - * part. Redirect the action to the start state. */ - trans->actionTable.setAction( lmAct->key, - lmAct->value->actOnLast ); - restartTrans.append( trans ); - } - else { - /* Look for non final states that have a non-empty item - * set. If these are present then we need to record the - * end of the token. Also Find the highest item set - * length reachable from here (excluding at transtions to - * final states). */ - bool nonFinalNonEmptyItemSet = false; - maxItemSetLength = 0; - graph->markReachableFromHereStopFinal( toState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & SB_ISMARKED ) { - if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) - nonFinalNonEmptyItemSet = true; - if ( ms->lmItemSet.length() > maxItemSetLength ) - maxItemSetLength = ms->lmItemSet.length(); - ms->stateBits &= ~ SB_ISMARKED; - } - } - - /* If there are reachable states that are not final and - * have non empty item sets or that have an item set - * length greater than one then we need to set tokend - * because the error action that matches the token will - * require it. */ - if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) - trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); - - /* Some states may not know which longest match item to - * execute, must set it. */ - if ( maxItemSetLength > 1 ) { - /* There are transitions out, another match may come. */ - trans->actionTable.setAction( lmAct->key, - lmAct->value->setActId ); - } - } - } - } - } - - /* Now that all graph searching is done it certainly safe set the - * restarting. It may be safe above, however this must be verified. */ - for ( Vector<FsmTrans*>::Iter rs = restartTrans; rs.lte(); rs++ ) - restart( graph, *rs ); - - int lmErrActionOrd = pd->curActionOrd++; - - /* Embed the error for recognizing a char. */ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) { - if ( st->isFinState() ) { - /* On error execute the onActNext action, which knows that - * the last character of the token was one back and restart. */ - graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, - &st->lmItemSet[0]->actOnNext, 1 ); - st->eofActionTable.setAction( lmErrActionOrd, - st->lmItemSet[0]->actOnNext ); - st->eofTarget = graph->startState; - } - else { - graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, - &st->lmItemSet[0]->actLagBehind, 1 ); - st->eofActionTable.setAction( lmErrActionOrd, - st->lmItemSet[0]->actLagBehind ); - st->eofTarget = graph->startState; - } - } - else if ( st->lmItemSet.length() > 1 ) { - /* Need to use the select. Take note of the which items the select - * is needed for so only the necessary actions are included. */ - for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) { - if ( *plmi != 0 ) - (*plmi)->inLmSelect = true; - } - /* On error, execute the action select and go to the start state. */ - graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, - &lmActSelect, 1 ); - st->eofActionTable.setAction( lmErrActionOrd, lmActSelect ); - st->eofTarget = graph->startState; - } - } - - /* Finally, the start state should be made final. */ - graph->setFinState( graph->startState ); -} - -void RegionImpl::transferScannerLeavingActions( FsmGraph *graph ) -{ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - if ( st->outActionTable.length() > 0 ) - graph->setErrorActions( st, st->outActionTable ); - } -} - -FsmGraph *RegionImpl::walk( Compiler *pd ) -{ - /* Make each part of the longest match. */ - int numParts = 0; - FsmGraph **parts = new FsmGraph*[tokenInstanceList.length()]; - for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) { - /* Watch out for patternless tokens. */ - if ( lmi->join != 0 ) { - /* Create the machine and embed the setting of the longest match id. */ - parts[numParts] = lmi->join->walk( pd ); - parts[numParts]->longMatchAction( pd->curActionOrd++, lmi ); - - /* Look for tokens that accept the zero length-word. The first one found - * will be used as the default token. */ - if ( defaultTokenInstance == 0 && parts[numParts]->startState->isFinState() ) - defaultTokenInstance = lmi; - - numParts += 1; - } - } - FsmGraph *retFsm = parts[0]; - - if ( defaultTokenInstance != 0 && defaultTokenInstance->tokenDef->tdLangEl->isIgnore ) - error() << "ignore token cannot be a scanner's zero-length token" << endp; - - /* The region is empty. Return the empty set. */ - if ( numParts == 0 ) { - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else { - /* Before we union the patterns we need to deal with leaving actions. They - * are transfered to error transitions out of the final states (like local - * error actions) and to eof actions. In the scanner we need to forbid - * on_last for any final state that has an leaving action. */ - for ( int i = 0; i < numParts; i++ ) - transferScannerLeavingActions( parts[i] ); - - /* Union machines one and up with machine zero. */ - FsmGraph *retFsm = parts[0]; - for ( int i = 1; i < numParts; i++ ) { - retFsm->unionOp( parts[i] ); - afterOpMinimize( retFsm ); - } - - runLongestMatch( pd, retFsm ); - delete[] parts; - } - - /* Need the entry point for the region. */ - retFsm->setEntry( regionNameInst->id, retFsm->startState ); - - return retFsm; -} - -/* Walk an expression node. */ -FsmGraph *LexJoin::walk( Compiler *pd ) -{ - FsmGraph *retFsm = expr->walk( pd ); - - /* Maybe the the context. */ - if ( context != 0 ) { - retFsm->leaveFsmAction( pd->curActionOrd++, mark ); - FsmGraph *contextGraph = context->walk( pd ); - retFsm->concatOp( contextGraph ); - } - - return retFsm; -} - -/* Clean up after an expression node. */ -LexExpression::~LexExpression() -{ - switch ( type ) { - case OrType: case IntersectType: case SubtractType: - case StrongSubtractType: - delete expression; - delete term; - break; - case TermType: - delete term; - break; - case BuiltinType: - break; - } -} - -/* Evaluate a single expression node. */ -FsmGraph *LexExpression::walk( Compiler *pd, bool lastInSeq ) -{ - FsmGraph *rtnVal = 0; - switch ( type ) { - case OrType: { - /* Evaluate the expression. */ - rtnVal = expression->walk( pd, false ); - /* Evaluate the term. */ - FsmGraph *rhs = term->walk( pd ); - /* Perform union. */ - rtnVal->unionOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case IntersectType: { - /* Evaluate the expression. */ - rtnVal = expression->walk( pd ); - /* Evaluate the term. */ - FsmGraph *rhs = term->walk( pd ); - /* Perform intersection. */ - rtnVal->intersectOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case SubtractType: { - /* Evaluate the expression. */ - rtnVal = expression->walk( pd ); - /* Evaluate the term. */ - FsmGraph *rhs = term->walk( pd ); - /* Perform subtraction. */ - rtnVal->subtractOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case StrongSubtractType: { - /* Evaluate the expression. */ - rtnVal = expression->walk( pd ); - - /* Evaluate the term and pad it with any* machines. */ - FsmGraph *rhs = dotStarFsm( pd ); - FsmGraph *termFsm = term->walk( pd ); - FsmGraph *trailAnyStar = dotStarFsm( pd ); - rhs->concatOp( termFsm ); - rhs->concatOp( trailAnyStar ); - - /* Perform subtraction. */ - rtnVal->subtractOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case TermType: { - /* Return result of the term. */ - rtnVal = term->walk( pd ); - break; - } - case BuiltinType: { - /* Duplicate the builtin. */ - rtnVal = makeBuiltin( builtin, pd ); - break; - } - } - - return rtnVal; -} - -/* Clean up after a term node. */ -LexTerm::~LexTerm() -{ - switch ( type ) { - case ConcatType: - case RightStartType: - case RightFinishType: - case LeftType: - delete term; - delete factorAug; - break; - case FactorAugType: - delete factorAug; - break; - } -} - -/* Evaluate a term node. */ -FsmGraph *LexTerm::walk( Compiler *pd, bool lastInSeq ) -{ - FsmGraph *rtnVal = 0; - switch ( type ) { - case ConcatType: { - /* Evaluate the Term. */ - rtnVal = term->walk( pd, false ); - /* Evaluate the LexFactorRep. */ - FsmGraph *rhs = factorAug->walk( pd ); - /* Perform concatenation. */ - rtnVal->concatOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case RightStartType: { - /* Evaluate the Term. */ - rtnVal = term->walk( pd ); - - /* Evaluate the LexFactorRep. */ - FsmGraph *rhs = factorAug->walk( pd ); - - /* Set up the priority descriptors. The left machine gets the - * lower priority where as the right get the higher start priority. */ - priorDescs[0].key = pd->nextPriorKey++; - priorDescs[0].priority = 0; - rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); - - /* The start transitions right machine get the higher priority. - * Use the same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 1; - rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); - - /* Perform concatenation. */ - rtnVal->concatOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case RightFinishType: { - /* Evaluate the Term. */ - rtnVal = term->walk( pd ); - - /* Evaluate the LexFactorRep. */ - FsmGraph *rhs = factorAug->walk( pd ); - - /* Set up the priority descriptors. The left machine gets the - * lower priority where as the finishing transitions to the right - * get the higher priority. */ - priorDescs[0].key = pd->nextPriorKey++; - priorDescs[0].priority = 0; - rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); - - /* The finishing transitions of the right machine get the higher - * priority. Use the same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 1; - rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); - - /* Perform concatenation. */ - rtnVal->concatOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case LeftType: { - /* Evaluate the Term. */ - rtnVal = term->walk( pd ); - - /* Evaluate the LexFactorRep. */ - FsmGraph *rhs = factorAug->walk( pd ); - - /* Set up the priority descriptors. The left machine gets the - * higher priority. */ - priorDescs[0].key = pd->nextPriorKey++; - priorDescs[0].priority = 1; - rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); - - /* The right machine gets the lower priority. Since - * startTransPrior might unnecessarily increase the number of - * states during the state machine construction process (due to - * isolation), we use allTransPrior instead, which has the same - * effect. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 0; - rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] ); - - /* Perform concatenation. */ - rtnVal->concatOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case FactorAugType: { - rtnVal = factorAug->walk( pd ); - break; - } - } - return rtnVal; -} - -LexFactorAug::~LexFactorAug() -{ - delete factorRep; -} - -void LexFactorAug::assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd ) -{ - /* Assign actions. */ - for ( int i = 0; i < actions.length(); i++ ) { - switch ( actions[i].type ) { - case at_start: - graph->startFsmAction( actionOrd[i], actions[i].action ); - afterOpMinimize( graph ); - break; - case at_leave: - graph->leaveFsmAction( actionOrd[i], actions[i].action ); - break; - } - } -} - -/* Evaluate a factor with augmentation node. */ -FsmGraph *LexFactorAug::walk( Compiler *pd ) -{ - /* Make the array of function orderings. */ - int *actionOrd = 0; - if ( actions.length() > 0 ) - actionOrd = new int[actions.length()]; - - /* First walk the list of actions, assigning order to all starting - * actions. */ - for ( int i = 0; i < actions.length(); i++ ) { - if ( actions[i].type == at_start ) - actionOrd[i] = pd->curActionOrd++; - } - - /* Evaluate the factor with repetition. */ - FsmGraph *rtnVal = factorRep->walk( pd ); - - /* Compute the remaining action orderings. */ - for ( int i = 0; i < actions.length(); i++ ) { - if ( actions[i].type != at_start ) - actionOrd[i] = pd->curActionOrd++; - } - - assignActions( pd, rtnVal , actionOrd ); - - if ( actionOrd != 0 ) - delete[] actionOrd; - return rtnVal; -} - - -/* Clean up after a factor with repetition node. */ -LexFactorRep::~LexFactorRep() -{ - switch ( type ) { - case StarType: case StarStarType: case OptionalType: case PlusType: - case ExactType: case MaxType: case MinType: case RangeType: - delete factorRep; - break; - case FactorNegType: - delete factorNeg; - break; - } -} - -/* Evaluate a factor with repetition node. */ -FsmGraph *LexFactorRep::walk( Compiler *pd ) -{ - FsmGraph *retFsm = 0; - - switch ( type ) { - case StarType: { - /* Evaluate the LexFactorRep. */ - retFsm = factorRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - } - - /* Shift over the start action orders then do the kleene star. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - retFsm->starOp( ); - afterOpMinimize( retFsm ); - break; - } - case StarStarType: { - /* Evaluate the LexFactorRep. */ - retFsm = factorRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - } - - /* Set up the prior descs. All gets priority one, whereas leaving gets - * priority zero. Make a unique key so that these priorities don't - * interfere with any priorities set by the user. */ - priorDescs[0].key = pd->nextPriorKey++; - priorDescs[0].priority = 1; - retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); - - /* Leaveing gets priority 0. Use same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 0; - retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); - - /* Shift over the start action orders then do the kleene star. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - retFsm->starOp( ); - afterOpMinimize( retFsm ); - break; - } - case OptionalType: { - /* Make the null fsm. */ - FsmGraph *nu = new FsmGraph(); - nu->lambdaFsm( ); - - /* Evaluate the LexFactorRep. */ - retFsm = factorRep->walk( pd ); - - /* Perform the question operator. */ - retFsm->unionOp( nu ); - afterOpMinimize( retFsm ); - break; - } - case PlusType: { - /* Evaluate the LexFactorRep. */ - retFsm = factorRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying plus operator to a machine that " - "accpets zero length word" << endl; - } - - /* Need a duplicated for the star end. */ - FsmGraph *dup = new FsmGraph( *retFsm ); - - /* The start func orders need to be shifted before doing the star. */ - pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd ); - - /* Star the duplicate. */ - dup->starOp( ); - afterOpMinimize( dup ); - - retFsm->concatOp( dup ); - afterOpMinimize( retFsm ); - break; - } - case ExactType: { - /* Get an int from the repetition amount. */ - if ( lowerRep == 0 ) { - /* No copies. Don't need to evaluate the factorRep. - * This Defeats the purpose so give a warning. */ - warning(loc) << "exactly zero repetitions results " - "in the null machine" << endl; - - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else { - /* Evaluate the first LexFactorRep. */ - retFsm = factorRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying repetition to a machine that " - "accepts zero length word" << endl; - } - - /* The start func orders need to be shifted before doing the - * repetition. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - - /* Do the repetition on the machine. Already guarded against n == 0 */ - retFsm->repeatOp( lowerRep ); - afterOpMinimize( retFsm ); - } - break; - } - case MaxType: { - /* Get an int from the repetition amount. */ - if ( upperRep == 0 ) { - /* No copies. Don't need to evaluate the factorRep. - * This Defeats the purpose so give a warning. */ - warning(loc) << "max zero repetitions results " - "in the null machine" << endl; - - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else { - /* Evaluate the first LexFactorRep. */ - retFsm = factorRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying max repetition to a machine that " - "accepts zero length word" << endl; - } - - /* The start func orders need to be shifted before doing the - * repetition. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - - /* Do the repetition on the machine. Already guarded against n == 0 */ - retFsm->optionalRepeatOp( upperRep ); - afterOpMinimize( retFsm ); - } - break; - } - case MinType: { - /* Evaluate the repeated machine. */ - retFsm = factorRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying min repetition to a machine that " - "accepts zero length word" << endl; - } - - /* The start func orders need to be shifted before doing the repetition - * and the kleene star. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - - if ( lowerRep == 0 ) { - /* Acts just like a star op on the machine to return. */ - retFsm->starOp( ); - afterOpMinimize( retFsm ); - } - else { - /* Take a duplicate for the plus. */ - FsmGraph *dup = new FsmGraph( *retFsm ); - - /* Do repetition on the first half. */ - retFsm->repeatOp( lowerRep ); - afterOpMinimize( retFsm ); - - /* Star the duplicate. */ - dup->starOp( ); - afterOpMinimize( dup ); - - /* Tak on the kleene star. */ - retFsm->concatOp( dup ); - afterOpMinimize( retFsm ); - } - break; - } - case RangeType: { - /* Check for bogus range. */ - if ( upperRep - lowerRep < 0 ) { - error(loc) << "invalid range repetition" << endl; - - /* Return null machine as recovery. */ - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else if ( lowerRep == 0 && upperRep == 0 ) { - /* No copies. Don't need to evaluate the factorRep. This - * defeats the purpose so give a warning. */ - warning(loc) << "zero to zero repetitions results " - "in the null machine" << endl; - - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else { - /* Now need to evaluate the repeated machine. */ - retFsm = factorRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying range repetition to a machine that " - "accepts zero length word" << endl; - } - - /* The start func orders need to be shifted before doing both kinds - * of repetition. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - - if ( lowerRep == 0 ) { - /* Just doing max repetition. Already guarded against n == 0. */ - retFsm->optionalRepeatOp( upperRep ); - afterOpMinimize( retFsm ); - } - else if ( lowerRep == upperRep ) { - /* Just doing exact repetition. Already guarded against n == 0. */ - retFsm->repeatOp( lowerRep ); - afterOpMinimize( retFsm ); - } - else { - /* This is the case that 0 < lowerRep < upperRep. Take a - * duplicate for the optional repeat. */ - FsmGraph *dup = new FsmGraph( *retFsm ); - - /* Do repetition on the first half. */ - retFsm->repeatOp( lowerRep ); - afterOpMinimize( retFsm ); - - /* Do optional repetition on the second half. */ - dup->optionalRepeatOp( upperRep - lowerRep ); - afterOpMinimize( dup ); - - /* Tak on the duplicate machine. */ - retFsm->concatOp( dup ); - afterOpMinimize( retFsm ); - } - } - break; - } - case FactorNegType: { - /* Evaluate the Factor. Pass it up. */ - retFsm = factorNeg->walk( pd ); - break; - }} - return retFsm; -} - - -/* Clean up after a factor with negation node. */ -LexFactorNeg::~LexFactorNeg() -{ - switch ( type ) { - case NegateType: - case CharNegateType: - delete factorNeg; - break; - case FactorType: - delete factor; - break; - } -} - -/* Evaluate a factor with negation node. */ -FsmGraph *LexFactorNeg::walk( Compiler *pd ) -{ - FsmGraph *retFsm = 0; - - switch ( type ) { - case NegateType: { - /* Evaluate the factorNeg. */ - FsmGraph *toNegate = factorNeg->walk( pd ); - - /* Negation is subtract from dot-star. */ - retFsm = dotStarFsm( pd ); - retFsm->subtractOp( toNegate ); - afterOpMinimize( retFsm ); - break; - } - case CharNegateType: { - /* Evaluate the factorNeg. */ - FsmGraph *toNegate = factorNeg->walk( pd ); - - /* CharNegation is subtract from dot. */ - retFsm = dotFsm( pd ); - retFsm->subtractOp( toNegate ); - afterOpMinimize( retFsm ); - break; - } - case FactorType: { - /* Evaluate the Factor. Pass it up. */ - retFsm = factor->walk( pd ); - break; - }} - return retFsm; -} - -/* Clean up after a factor node. */ -LexFactor::~LexFactor() -{ - switch ( type ) { - case LiteralType: - delete literal; - break; - case RangeType: - delete range; - break; - case OrExprType: - delete reItem; - break; - case RegExprType: - delete regExp; - break; - case ReferenceType: - break; - case ParenType: - delete join; - break; - } -} - -/* Evaluate a factor node. */ -FsmGraph *LexFactor::walk( Compiler *pd ) -{ - FsmGraph *rtnVal = 0; - switch ( type ) { - case LiteralType: - rtnVal = literal->walk( pd ); - break; - case RangeType: - rtnVal = range->walk( pd ); - break; - case OrExprType: - rtnVal = reItem->walk( pd, 0 ); - break; - case RegExprType: - rtnVal = regExp->walk( pd, 0 ); - break; - case ReferenceType: - rtnVal = varDef->walk( pd ); - break; - case ParenType: - rtnVal = join->walk( pd ); - break; - } - - return rtnVal; -} - - -/* Clean up a range object. Must delete the two literals. */ -Range::~Range() -{ - delete lowerLit; - delete upperLit; -} - -bool Range::verifyRangeFsm( FsmGraph *rangeEnd ) -{ - /* Must have two states. */ - if ( rangeEnd->stateList.length() != 2 ) - return false; - /* The start state cannot be final. */ - if ( rangeEnd->startState->isFinState() ) - return false; - /* There should be only one final state. */ - if ( rangeEnd->finStateSet.length() != 1 ) - return false; - /* The final state cannot have any transitions out. */ - if ( rangeEnd->finStateSet[0]->outList.length() != 0 ) - return false; - /* The start state should have only one transition out. */ - if ( rangeEnd->startState->outList.length() != 1 ) - return false; - /* The singe transition out of the start state should not be a range. */ - FsmTrans *startTrans = rangeEnd->startState->outList.head; - if ( startTrans->lowKey != startTrans->highKey ) - return false; - return true; -} - -/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ -FsmGraph *Range::walk( Compiler *pd ) -{ - /* Construct and verify the suitability of the lower end of the range. */ - FsmGraph *lowerFsm = lowerLit->walk( pd ); - if ( !verifyRangeFsm( lowerFsm ) ) { - error(lowerLit->loc) << - "bad range lower end, must be a single character" << endl; - } - - /* Construct and verify the upper end. */ - FsmGraph *upperFsm = upperLit->walk( pd ); - if ( !verifyRangeFsm( upperFsm ) ) { - error(upperLit->loc) << - "bad range upper end, must be a single character" << endl; - } - - /* Grab the keys from the machines, then delete them. */ - Key lowKey = lowerFsm->startState->outList.head->lowKey; - Key highKey = upperFsm->startState->outList.head->lowKey; - delete lowerFsm; - delete upperFsm; - - /* Validate the range. */ - if ( lowKey > highKey ) { - /* Recover by setting upper to lower; */ - error(lowerLit->loc) << "lower end of range is greater then upper end" << endl; - highKey = lowKey; - } - - /* Return the range now that it is validated. */ - FsmGraph *retFsm = new FsmGraph(); - retFsm->rangeFsm( lowKey, highKey ); - return retFsm; -} - -/* Evaluate a literal object. */ -FsmGraph *Literal::walk( Compiler *pd ) -{ - /* FsmGraph to return, is the alphabet signed. */ - FsmGraph *rtnVal = 0; - - switch ( type ) { - case Number: { - /* Make the fsm key in int format. */ - Key fsmKey = makeFsmKeyNum( literal.data, loc, pd ); - /* Make the new machine. */ - rtnVal = new FsmGraph(); - rtnVal->concatFsm( fsmKey ); - break; - } - case LitString: { - /* Make the array of keys in int format. */ - String interp; - bool caseInsensitive; - prepareLitString( interp, caseInsensitive, literal, loc ); - Key *arr = new Key[interp.length()]; - makeFsmKeyArray( arr, interp.data, interp.length(), pd ); - - /* Make the new machine. */ - rtnVal = new FsmGraph(); - if ( caseInsensitive ) - rtnVal->concatFsmCI( arr, interp.length() ); - else - rtnVal->concatFsm( arr, interp.length() ); - delete[] arr; - break; - }} - return rtnVal; -} - -/* Clean up after a regular expression object. */ -RegExpr::~RegExpr() -{ - switch ( type ) { - case RecurseItem: - delete regExp; - delete item; - break; - case Empty: - break; - } -} - -/* Evaluate a regular expression object. */ -FsmGraph *RegExpr::walk( Compiler *pd, RegExpr *rootRegex ) -{ - /* This is the root regex, pass down a pointer to this. */ - if ( rootRegex == 0 ) - rootRegex = this; - - FsmGraph *rtnVal = 0; - switch ( type ) { - case RecurseItem: { - /* Walk both items. */ - FsmGraph *fsm1 = regExp->walk( pd, rootRegex ); - FsmGraph *fsm2 = item->walk( pd, rootRegex ); - if ( fsm1 == 0 ) - rtnVal = fsm2; - else { - fsm1->concatOp( fsm2 ); - rtnVal = fsm1; - } - break; - } - case Empty: { - /* FIXME: Return something here. */ - rtnVal = 0; - break; - } - } - return rtnVal; -} - -/* Clean up after an item in a regular expression. */ -ReItem::~ReItem() -{ - switch ( type ) { - case Data: - case Dot: - break; - case OrBlock: - case NegOrBlock: - delete orBlock; - break; - } -} - -/* Evaluate a regular expression object. */ -FsmGraph *ReItem::walk( Compiler *pd, RegExpr *rootRegex ) -{ - /* The fsm to return, is the alphabet signed? */ - FsmGraph *rtnVal = 0; - - switch ( type ) { - case Data: { - /* Move the data into an integer array and make a concat fsm. */ - Key *arr = new Key[data.length()]; - makeFsmKeyArray( arr, data.data, data.length(), pd ); - - /* Make the concat fsm. */ - rtnVal = new FsmGraph(); - if ( rootRegex != 0 && rootRegex->caseInsensitive ) - rtnVal->concatFsmCI( arr, data.length() ); - else - rtnVal->concatFsm( arr, data.length() ); - delete[] arr; - break; - } - case Dot: { - /* Make the dot fsm. */ - rtnVal = dotFsm( pd ); - break; - } - case OrBlock: { - /* Get the or block and minmize it. */ - rtnVal = orBlock->walk( pd, rootRegex ); - if ( rtnVal == 0 ) { - rtnVal = new FsmGraph(); - rtnVal->lambdaFsm(); - } - rtnVal->minimizePartition2(); - break; - } - case NegOrBlock: { - /* Get the or block and minimize it. */ - FsmGraph *fsm = orBlock->walk( pd, rootRegex ); - fsm->minimizePartition2(); - - /* Make a dot fsm and subtract from it. */ - rtnVal = dotFsm( pd ); - rtnVal->subtractOp( fsm ); - rtnVal->minimizePartition2(); - break; - } - } - - return rtnVal; -} - -/* Clean up after an or block of a regular expression. */ -ReOrBlock::~ReOrBlock() -{ - switch ( type ) { - case RecurseItem: - delete orBlock; - delete item; - break; - case Empty: - break; - } -} - - -/* Evaluate an or block of a regular expression. */ -FsmGraph *ReOrBlock::walk( Compiler *pd, RegExpr *rootRegex ) -{ - FsmGraph *rtnVal = 0; - switch ( type ) { - case RecurseItem: { - /* Evaluate the two fsm. */ - FsmGraph *fsm1 = orBlock->walk( pd, rootRegex ); - FsmGraph *fsm2 = item->walk( pd, rootRegex ); - if ( fsm1 == 0 ) - rtnVal = fsm2; - else { - fsm1->unionOp( fsm2 ); - rtnVal = fsm1; - } - break; - } - case Empty: { - rtnVal = 0; - break; - } - } - return rtnVal;; -} - -/* Evaluate an or block item of a regular expression. */ -FsmGraph *ReOrItem::walk( Compiler *pd, RegExpr *rootRegex ) -{ - /* The return value, is the alphabet signed? */ - FsmGraph *rtnVal = 0; - switch ( type ) { - case Data: { - /* Make the or machine. */ - rtnVal = new FsmGraph(); - - /* Put the or data into an array of ints. Note that we find unique - * keys. Duplicates are silently ignored. The alternative would be to - * issue warning or an error but since we can't with [a0-9a] or 'a' | - * 'a' don't bother here. */ - KeySet keySet; - makeFsmUniqueKeyArray( keySet, data.data, data.length(), - rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); - - /* Run the or operator. */ - rtnVal->orFsm( keySet.data, keySet.length() ); - break; - } - case Range: { - /* Make the upper and lower keys. */ - Key lowKey = makeFsmKeyChar( lower, pd ); - Key highKey = makeFsmKeyChar( upper, pd ); - - /* Validate the range. */ - if ( lowKey > highKey ) { - /* Recover by setting upper to lower; */ - error(loc) << "lower end of range is greater then upper end" << endl; - highKey = lowKey; - } - - /* Make the range machine. */ - rtnVal = new FsmGraph(); - rtnVal->rangeFsm( lowKey, highKey ); - - if ( rootRegex != 0 && rootRegex->caseInsensitive ) { - if ( lowKey <= 'Z' && 'A' <= highKey ) { - Key otherLow = lowKey < 'A' ? Key('A') : lowKey; - Key otherHigh = 'Z' < highKey ? Key('Z') : highKey; - - otherLow = 'a' + ( otherLow - 'A' ); - otherHigh = 'a' + ( otherHigh - 'A' ); - - FsmGraph *otherRange = new FsmGraph(); - otherRange->rangeFsm( otherLow, otherHigh ); - rtnVal->unionOp( otherRange ); - rtnVal->minimizePartition2(); - } - else if ( lowKey <= 'z' && 'a' <= highKey ) { - Key otherLow = lowKey < 'a' ? Key('a') : lowKey; - Key otherHigh = 'z' < highKey ? Key('z') : highKey; - - otherLow = 'A' + ( otherLow - 'a' ); - otherHigh = 'A' + ( otherHigh - 'a' ); - - FsmGraph *otherRange = new FsmGraph(); - otherRange->rangeFsm( otherLow, otherHigh ); - rtnVal->unionOp( otherRange ); - rtnVal->minimizePartition2(); - } - } - - break; - }} - return rtnVal; -} diff --git a/src/parsetree.h b/src/parsetree.h deleted file mode 100644 index eea94454..00000000 --- a/src/parsetree.h +++ /dev/null @@ -1,3605 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_PARSETREE_H -#define _COLM_PARSETREE_H - -#include <iostream> -#include <string.h> -#include <string> - -#include <avlbasic.h> -#include <avlmap.h> -#include <bstmap.h> -#include <bstset.h> -#include <vector.h> -#include <dlist.h> -#include <dlistval.h> -#include <dlistmel.h> - -#include "global.h" -#include "cstring.h" -#include "bytecode.h" - - -/* Operators that are represented with single symbol characters. */ -#define OP_DoubleEql 'e' -#define OP_NotEql 'q' -#define OP_LessEql 'l' -#define OP_GrtrEql 'g' -#define OP_LogicalAnd 'a' -#define OP_LogicalOr 'o' -#define OP_Deref 'd' - -#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 - #error "SIZEOF_LONG contained an unexpected value" -#endif - -struct NameInst; -struct FsmGraph; -struct RedFsm; -struct ObjectDef; -struct ElementOf; -struct UniqueType; -struct ObjectField; -struct TransBlock; -struct CodeBlock; -struct PdaLiteral; -struct TypeAlias; -struct RegionSet; -struct NameScope; -struct IterCall; -struct TemplateType; -struct ObjectMethod; -struct Reduction; -struct Production; -struct LangVarRef; - -/* - * Code Vector - */ -struct CodeVect : public Vector<code_t> -{ - void appendHalf( half_t half ) - { - /* not optimal. */ - append( half & 0xff ); - append( (half>>8) & 0xff ); - } - - void appendWord( word_t word ) - { - /* not optimal. */ - append( word & 0xff ); - append( (word>>8) & 0xff ); - append( (word>>16) & 0xff ); - append( (word>>24) & 0xff ); - #if SIZEOF_LONG == 8 - append( (word>>32) & 0xff ); - append( (word>>40) & 0xff ); - append( (word>>48) & 0xff ); - append( (word>>56) & 0xff ); - #endif - } - - void setHalf( long pos, half_t half ) - { - /* not optimal. */ - data[pos] = half & 0xff; - data[pos+1] = (half>>8) & 0xff; - } - - void insertHalf( long pos, half_t half ) - { - /* not optimal. */ - insert( pos, half & 0xff ); - insert( pos+1, (half>>8) & 0xff ); - } - - void insertWord( long pos, word_t word ) - { - /* not at all optimal. */ - insert( pos, word & 0xff ); - insert( pos+1, (word>>8) & 0xff ); - insert( pos+2, (word>>16) & 0xff ); - insert( pos+3, (word>>24) & 0xff ); - #if SIZEOF_LONG == 8 - insert( pos+4, (word>>32) & 0xff ); - insert( pos+5, (word>>40) & 0xff ); - insert( pos+6, (word>>48) & 0xff ); - insert( pos+7, (word>>56) & 0xff ); - #endif - } - - void insertTree( long pos, tree_t *tree ) - { insertWord( pos, (word_t) tree ); } -}; - - - -/* Types of builtin machines. */ -enum BuiltinMachine -{ - BT_Any, - BT_Ascii, - BT_Extend, - BT_Alpha, - BT_Digit, - BT_Alnum, - BT_Lower, - BT_Upper, - BT_Cntrl, - BT_Graph, - BT_Print, - BT_Punct, - BT_Space, - BT_Xdigit, - BT_Lambda, - BT_Empty -}; - -/* Must match the LI defines in pdarun.h. */ -enum LocalType -{ - LT_Tree = 1, - LT_Iter, - LT_RevIter, - LT_UserIter -}; - -struct LocalLoc -{ - LocalLoc( LocalType type, int scope, int offset ) - : scope(scope), type(type), offset(offset) {} - - int scope; - LocalType type; - int offset; -}; - -struct Locals -{ - Vector<LocalLoc> locals; - - void append( const LocalLoc &ll ) - { - int pos = 0; - while ( pos < locals.length() && ll.scope >= locals[pos].scope ) - pos += 1; - locals.insert( pos, ll ); - } -}; - -typedef BstSet<char> CharSet; -typedef Vector<unsigned char> UnsignedCharVect; - -struct Compiler; -struct TypeRef; - -/* Leaf type. */ -struct Literal; - -/* tree_t nodes. */ - -struct LexTerm; -struct LexFactorAug; -struct LexFactorRep; -struct LexFactorNeg; -struct LexFactor; -struct LexExpression; -struct LexJoin; -struct JoinOrLm; -struct RegionJoinOrLm; -struct TokenRegion; -struct Namespace; -struct StructDef; -struct TokenDef; -struct TokenDefListReg; -struct TokenDefListNs; -struct TokenInstance; -struct TokenInstanceListReg; -struct Range; -struct LangEl; - -enum AugType -{ - at_start, - at_leave -}; - -struct Action; -struct PriorDesc; -struct RegExpr; -struct ReItem; -struct ReOrBlock; -struct ReOrItem; -struct ExplicitMachine; -struct InlineItem; -struct InlineList; - -/* Reference to a named state. */ -typedef Vector<String> NameRef; -typedef Vector<NameRef*> NameRefList; -typedef Vector<NameInst*> NameTargList; - -/* Structure for storing location of epsilon transitons. */ -struct EpsilonLink -{ - EpsilonLink( const InputLoc &loc, NameRef &target ) - : loc(loc), target(target) { } - - InputLoc loc; - NameRef target; -}; - -struct Label -{ - Label( const InputLoc &loc, const String &data, ObjectField *objField ) - : loc(loc), data(data), objField(objField) { } - - InputLoc loc; - String data; - ObjectField *objField; -}; - -/* Structure represents an action assigned to some LexFactorAug node. The - * factor with aug will keep an array of these. */ -struct ParserAction -{ - ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) - : loc(loc), type(type), localErrKey(localErrKey), action(action) { } - - InputLoc loc; - AugType type; - int localErrKey; - Action *action; -}; - -struct Token -{ - String data; - InputLoc loc; -}; - -void prepareLitString( String &result, bool &caseInsensitive, - const String &srcString, const InputLoc &loc ); - -std::ostream &operator<<(std::ostream &out, const Token &token ); - -typedef AvlMap< String, TokenInstance*, ColmCmpStr > LiteralDict; -typedef AvlMapEl< String, TokenInstance* > LiteralDictEl; - -/* Store the value and type of a priority augmentation. */ -struct PriorityAug -{ - PriorityAug( AugType type, int priorKey, int priorValue ) : - type(type), priorKey(priorKey), priorValue(priorValue) { } - - AugType type; - int priorKey; - int priorValue; -}; - -/* - * A Variable Definition - */ -struct LexDefinition -{ - LexDefinition( const String &name, LexJoin *join ) - : name(name), join(join) { } - - /* Parse tree traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( const InputLoc &loc, Compiler *pd ); - - String name; - LexJoin *join; -}; - -typedef Vector<String> StringVect; -typedef CmpTable<String, ColmCmpStr> CmpStrVect; - -struct NamespaceQual -{ - NamespaceQual() - : - cachedNspaceQual(0), - declInNspace(0) - {} - - static NamespaceQual *cons( Namespace *declInNspace ) - { - NamespaceQual *nsq = new NamespaceQual; - nsq->declInNspace = declInNspace; - return nsq; - } - - Namespace *cachedNspaceQual; - Namespace *declInNspace; - - StringVect qualNames; - - Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart ); - Namespace *getQual( Compiler *pd ); - bool thisOnly() - { return qualNames.length() != 0; } -}; - -struct ReCapture -{ - ReCapture( Action *markEnter, Action *markLeave, ObjectField *objField ) - : markEnter(markEnter), markLeave(markLeave), objField(objField) {} - - Action *markEnter; - Action *markLeave; - ObjectField *objField; -}; - - -typedef Vector<ReCapture> ReCaptureVect; - -struct TokenDefPtr1 -{ - TokenDef *prev, *next; -}; - -struct TokenDefPtr2 -{ - TokenDef *prev, *next; -}; - -struct TokenDef -: - public TokenDefPtr1, - public TokenDefPtr2 -{ - TokenDef() - : - action(0), tdLangEl(0), inLmSelect(false), dupOf(0), - noPostIgnore(false), noPreIgnore(false), isZero(false) - {} - - static TokenDef *cons( const String &name, const String &literal, - bool isLiteral, bool isIgnore, LexJoin *join, CodeBlock *codeBlock, - const InputLoc &semiLoc, int longestMatchId, Namespace *nspace, - RegionSet *regionSet, ObjectDef *objectDef, StructDef *contextIn ) - { - TokenDef *t = new TokenDef; - - t->name = name; - t->literal = literal; - t->isLiteral = isLiteral; - t->isIgnore = isIgnore; - t->join = join; - t->action = 0; - t->codeBlock = codeBlock; - t->tdLangEl = 0; - t->semiLoc = semiLoc; - t->longestMatchId = longestMatchId; - t->inLmSelect = false; - t->nspace = nspace; - t->regionSet = regionSet; - t->objectDef = objectDef; - t->contextIn = contextIn; - t->dupOf = 0; - t->noPostIgnore = false; - t->noPreIgnore = false; - t->isZero = false; - - return t; - } - - InputLoc getLoc(); - - String name; - String literal; - bool isLiteral; - bool isIgnore; - LexJoin *join; - Action *action; - CodeBlock *codeBlock; - LangEl *tdLangEl; - InputLoc semiLoc; - - Action *setActId; - Action *actOnLast; - Action *actOnNext; - Action *actLagBehind; - int longestMatchId; - bool inLmSelect; - Namespace *nspace; - RegionSet *regionSet; - ReCaptureVect reCaptureVect; - ObjectDef *objectDef; - StructDef *contextIn; - - TokenDef *dupOf; - bool noPostIgnore; - bool noPreIgnore; - bool isZero; -}; - -struct TokenInstancePtr -{ - TokenInstance *prev, *next; -}; - -struct TokenInstance -: - public TokenInstancePtr -{ - TokenInstance() - : - action(0), - inLmSelect(false), - dupOf(0) - {} - - static TokenInstance *cons( TokenDef *tokenDef, - LexJoin *join, const InputLoc &semiLoc, - int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion ) - { - TokenInstance *t = new TokenInstance; - - t->tokenDef = tokenDef; - t->join = join; - t->action = 0; - t->semiLoc = semiLoc; - t->longestMatchId = longestMatchId; - t->inLmSelect = false; - t->nspace = nspace; - t->tokenRegion = tokenRegion; - t->dupOf = 0; - - return t; - } - - InputLoc getLoc(); - - TokenDef *tokenDef; - LexJoin *join; - Action *action; - InputLoc semiLoc; - - Action *setActId; - Action *actOnLast; - Action *actOnNext; - Action *actLagBehind; - int longestMatchId; - bool inLmSelect; - Namespace *nspace; - TokenRegion *tokenRegion; - - TokenInstance *dupOf; -}; - -struct LelDefList; - -struct NtDef -{ - static NtDef *cons( const String &name, Namespace *nspace, - LelDefList *defList, ObjectDef *objectDef, - StructDef *contextIn, bool reduceFirst ) - { - NtDef *nt = new NtDef; - - nt->name = name; - nt->nspace = nspace; - nt->defList = defList; - nt->objectDef = objectDef; - nt->contextIn = contextIn; - nt->reduceFirst = reduceFirst; - - return nt; - } - - static NtDef *cons( const String &name, Namespace *nspace, - StructDef *contextIn, bool reduceFirst ) - { - NtDef *nt = new NtDef; - - nt->name = name; - nt->nspace = nspace; - nt->defList = 0; - nt->objectDef = 0; - nt->contextIn = contextIn; - nt->reduceFirst = reduceFirst; - - return nt; - } - - String name; - Namespace *nspace; - LelDefList *defList; - ObjectDef *objectDef; - StructDef *contextIn; - bool reduceFirst; - - NtDef *prev, *next; -}; - -struct NtDefList : DList<NtDef> {}; - -/* Declare a new type so that ptreetypes.h need not include dlist.h. */ -struct TokenInstanceListReg : DListMel<TokenInstance, TokenInstancePtr> {}; - -/* Declare a new type so that ptreetypes.h need not include dlist.h. */ -struct TokenDefListReg : DListMel<TokenDef, TokenDefPtr1> {}; -struct TokenDefListNs : DListMel<TokenDef, TokenDefPtr2> {}; - -struct StructStack - : public Vector<StructDef*> -{ - StructDef *top() - { return length() > 0 ? Vector<StructDef*>::top() : 0; } -}; - -struct StructEl; - -struct StructDef -{ - StructDef( const InputLoc &loc, const String &name, ObjectDef *objectDef ) - : - loc(loc), - name(name), - objectDef(objectDef), - structEl(0) - {} - - InputLoc loc; - String name; - ObjectDef *objectDef; - StructEl *structEl; - - StructDef *prev, *next; -}; - -struct StructEl -{ - StructEl( const String &name, StructDef *structDef ) - : - name(name), - structDef(structDef), - id(-1) - {} - - String name; - StructDef *structDef; - int id; - - StructEl *prev, *next; -}; - -typedef DList<StructEl> StructElList; -struct StructDefList : DList<StructDef> {}; - -struct TypeMapEl - : public AvlTreeEl<TypeMapEl> -{ - enum Type - { - AliasType = 1, - LangElType, - StructType - }; - - const String &getKey() { return key; } - - TypeMapEl( Type type, const String &key, TypeRef *typeRef ) - : type(type), key(key), value(0), typeRef(typeRef), structEl(0) {} - - TypeMapEl( Type type, const String &key, LangEl *value ) - : type(type), key(key), value(value), typeRef(0), structEl(0) {} - - TypeMapEl( Type type, const String &key, StructEl *structEl ) - : type(type), key(key), value(0), typeRef(0), structEl(structEl) {} - - Type type; - String key; - LangEl *value; - TypeRef *typeRef; - StructEl *structEl; - - TypeMapEl *prev, *next; -}; - -/* Symbol Map. */ -typedef AvlTree< TypeMapEl, String, ColmCmpStr > TypeMap; - -typedef Vector<TokenRegion*> RegionVect; - -struct RegionImpl -{ - RegionImpl() - : - regionNameInst(0), - lmActSelect(0), - lmSwitchHandlesError(false), - defaultTokenInstance(0), - wasEmpty(false) - {} - - InputLoc loc; - - /* This gets saved off during the name walk. Can save it off because token - * regions are referenced once only. */ - NameInst *regionNameInst; - - TokenInstanceListReg tokenInstanceList; - Action *lmActSelect; - bool lmSwitchHandlesError; - TokenInstance *defaultTokenInstance; - - /* We alway init empty scanners with a single token. If we had to do this - * then wasEmpty is true. */ - bool wasEmpty; - - RegionImpl *prev, *next; - - void runLongestMatch( Compiler *pd, FsmGraph *graph ); - void transferScannerLeavingActions( FsmGraph *graph ); - FsmGraph *walk( Compiler *pd ); - - void restart( FsmGraph *graph, FsmTrans *trans ); - void makeNameTree( const InputLoc &loc, Compiler *pd ); - void makeActions( Compiler *pd ); - Action *newAction( Compiler *pd, const InputLoc &loc, - const String &name, InlineList *inlineList ); -}; - -struct TokenRegion -{ - /* Construct with a list of joins */ - TokenRegion( const InputLoc &loc, int id, RegionImpl *impl ) - : - loc(loc), - id(id), - preEofBlock(0), - zeroLel(0), - ignoreOnly(0), - impl(impl) - { } - - InputLoc loc; - int id; - - CodeBlock *preEofBlock; - - LangEl *zeroLel; - TokenRegion *ignoreOnly; - - RegionImpl *impl; - - TokenRegion *next, *prev; - - /* tree_t traversal. */ - void findName( Compiler *pd ); -}; - -struct RegionSet -{ - RegionSet( RegionImpl *implTokenIgnore, RegionImpl *implTokenOnly, - RegionImpl *implIgnoreOnly, TokenRegion *tokenIgnore, - TokenRegion *tokenOnly, TokenRegion *ignoreOnly, - TokenRegion *collectIgnore ) - : - implTokenIgnore(implTokenIgnore), - implTokenOnly(implTokenOnly), - implIgnoreOnly(implIgnoreOnly), - - tokenIgnore(tokenIgnore), - tokenOnly(tokenOnly), - ignoreOnly(ignoreOnly), - collectIgnore(collectIgnore) - {} - - /* Provides the scanner state machines. We reuse ignore-only. */ - RegionImpl *implTokenIgnore; - RegionImpl *implTokenOnly; - RegionImpl *implIgnoreOnly; - - TokenRegion *tokenIgnore; - TokenRegion *tokenOnly; - TokenRegion *ignoreOnly; - TokenRegion *collectIgnore; - - TokenDefListReg tokenDefList; - - RegionSet *next, *prev; -}; - -typedef Vector<RegionSet*> RegionSetVect; - -typedef DList<RegionSet> RegionSetList; -typedef DList<TokenRegion> RegionList; -typedef DList<RegionImpl> RegionImplList; - -typedef Vector<Namespace*> NamespaceVect; -typedef Vector<Reduction*> ReductionVect; - -/* Generics have runtime-representations, so we must track them as unique - * types. This gives the runtimes some idea of what is contained in the - * structures. */ -struct GenericType - : public DListEl<GenericType> -{ - GenericType( long typeId, long id, TypeRef *elTr, - TypeRef *keyTr, TypeRef *valueTr, ObjectField *el ) - : - typeId(typeId), id(id), - elTr(elTr), keyTr(keyTr), valueTr(valueTr), - elUt(0), keyUt(0), valueUt(0), - objDef(0), el(el), elOffset(0) - {} - - void declare( Compiler *pd, Namespace *nspace ); - - long typeId; - long id; - - TypeRef *elTr; - TypeRef *keyTr; - TypeRef *valueTr; - - UniqueType *elUt; - UniqueType *keyUt; - UniqueType *valueUt; - - ObjectDef *objDef; - ObjectField *el; - long elOffset; -}; - -typedef DList<GenericType> GenericList; - -/* Graph dictionary. */ -struct GraphDictEl -: - public AvlTreeEl<GraphDictEl>, - public DListEl<GraphDictEl> -{ - GraphDictEl( const String &key ) - : key(key), value(0), isInstance(false) { } - - GraphDictEl( const String &key, LexDefinition *value ) - : key(key), value(value), isInstance(false) { } - - const String &getKey() { return key; } - - String key; - LexDefinition *value; - bool isInstance; - - /* Location info of graph definition. Points to variable name of assignment. */ - InputLoc loc; -}; - -typedef AvlTree<GraphDictEl, String, ColmCmpStr> GraphDict; -typedef DList<GraphDictEl> GraphList; - -struct TypeAlias -{ - TypeAlias( const InputLoc &loc, Namespace *nspace, - const String &name, TypeRef *typeRef ) - : - loc(loc), - nspace(nspace), - name(name), - typeRef(typeRef) - {} - - InputLoc loc; - Namespace *nspace; - String name; - TypeRef *typeRef; - - TypeAlias *prev, *next; -}; - -typedef DList<TypeAlias> TypeAliasList; - -typedef AvlMap<String, ObjectField*, ColmCmpStr> FieldMap; -typedef AvlMapEl<String, ObjectField*> FieldMapEl; - -typedef AvlMap<String, ObjectMethod*, ColmCmpStr> MethodMap; -typedef AvlMapEl<String, ObjectMethod*> MethodMapEl; - -/* tree_t of name scopes for an object def. All of the object fields inside this - * tree live in one object def. This is used for scoping names in functions. */ -struct NameScope -{ - NameScope() - : - owningObj(0), - parentScope(0), - childIter(0), - caseClauseVarRef(0) - {} - - ObjectDef *owningObj; - FieldMap fieldMap; - MethodMap methodMap; - - NameScope *parentScope; - DList<NameScope> children; - - /* For iteration after declaration. */ - NameScope *childIter; - LangVarRef *caseClauseVarRef; - - NameScope *prev, *next; - - int depth() - { - int depth = 0; - NameScope *scope = this; - while ( scope != 0 ) { - depth += 1; - scope = scope->parentScope; - } - return depth; - } - - ObjectField *findField( const String &name ) const; - ObjectMethod *findMethod( const String &name ) const; - - ObjectField *checkRedecl( const String &name ); - void insertField( const String &name, ObjectField *value ); - -}; - - -struct Namespace -{ - /* Construct with a list of joins */ - Namespace( const InputLoc &loc, const String &name, int id, - Namespace *parentNamespace ) : - loc(loc), name(name), id(id), - parentNamespace(parentNamespace) - { - rootScope = new NameScope; - } - - /* tree_t traversal. */ - Namespace *findNamespace( const String &name ); - Reduction *findReduction( const String &name ); - - InputLoc loc; - String name; - int id; - - /* Literal patterns and the dictionary mapping literals to the underlying - * tokens. */ - LiteralDict literalDict; - - /* List of tokens defs in the namespace. */ - TokenDefListNs tokenDefList; - - /* List of nonterminal defs in the namespace. */ - NtDefList ntDefList; - - StructDefList structDefList; - - /* Dictionary of symbols within the region. */ - TypeMap typeMap; - GenericList genericList; - - /* regular language definitions. */ - GraphDict rlMap; - - TypeAliasList typeAliasList; - - Namespace *parentNamespace; - NamespaceVect childNamespaces; - - ReductionVect reductions; - - NameScope *rootScope; - - Namespace *next, *prev; - - void declare( Compiler *pd ); -}; - -typedef DList<Namespace> NamespaceList; -typedef BstSet< Namespace*, CmpOrd<Namespace*> > NamespaceSet; - -struct ReduceTextItem -{ - enum Type { - LhsRef, - RhsRef, - TreeRef, - RhsLoc, - Txt - }; - - ReduceTextItem() : n(0) {} - - Type type; - String txt; - int n; - - ReduceTextItem *prev, *next; -}; - -typedef DList<ReduceTextItem> ReduceTextItemList; - -struct ReduceNonTerm -{ - ReduceNonTerm( const InputLoc &loc, TypeRef *nonTerm ) - : - loc(loc), - nonTerm(nonTerm) - {} - - InputLoc loc; - TypeRef *nonTerm; - ReduceTextItemList itemList; - - ReduceNonTerm *prev, *next; -}; - -struct ReduceAction -{ - ReduceAction( const InputLoc &loc, TypeRef *nonTerm, - const String &prod ) - : - loc(loc), nonTerm(nonTerm), - prod(prod), - production(0) - {} - - InputLoc loc; - TypeRef *nonTerm; - String prod; - ReduceTextItemList itemList; - - Production *production; - - ReduceAction *prev, *next; -}; - -typedef DList<ReduceAction> ReduceActionList; -typedef DList<ReduceNonTerm> ReduceNonTermList; - -typedef Vector<ReduceAction*> ReduceActionVect; - -struct Reduction -{ - Reduction( const InputLoc &loc, String name ) - : - loc(loc), name(name), - needData(0), needLoc(0), - postfixBased(false), - parserBased(false) - { - static int nextId = 1; - id = nextId++; - var = name.data; - var.data[0] = tolower( var.data[0] ); - } - - InputLoc loc; - String name; - String var; - int id; - - bool *needData; - bool *needLoc; - - bool postfixBased; - bool parserBased; - - ReduceActionList reduceActions; - ReduceNonTermList reduceNonTerms; -}; - -/* - * LexJoin - */ -struct LexJoin -{ - LexJoin() - : - expr(0), - context(0), - mark(0) - {} - - static LexJoin *cons( LexExpression *expr ) - { - LexJoin *j = new LexJoin; - j->expr = expr; - return j; - } - - /* tree_t traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - void varDecl( Compiler *pd, TokenDef *tokenDef ); - - /* Data. */ - LexExpression *expr; - LexJoin *context; - Action *mark; -}; - -/* - * LexExpression - */ -struct LexExpression -{ - enum Type { - OrType, - IntersectType, - SubtractType, - StrongSubtractType, - TermType, - BuiltinType - }; - - LexExpression( ) : - expression(0), term(0), builtin((BuiltinMachine)-1), - type((Type)-1), prev(this), next(this) { } - - /* Construct with an expression on the left and a term on the right. */ - static LexExpression *cons( LexExpression *expression, LexTerm *term, Type type ) - { - LexExpression *ret = new LexExpression; - ret->type = type; - ret->expression = expression; - ret->term = term; - return ret; - } - - /* Construct with only a term. */ - static LexExpression *cons( LexTerm *term ) - { - LexExpression *ret = new LexExpression; - ret->type = TermType; - ret->term = term; - return ret; - } - - /* Construct with a builtin type. */ - static LexExpression *cons( BuiltinMachine builtin ) - { - LexExpression *ret = new LexExpression; - ret->type = BuiltinType; - ret->builtin = builtin; - return ret; - } - - ~LexExpression(); - - /* tree_t traversal. */ - FsmGraph *walk( Compiler *pd, bool lastInSeq = true ); - void makeNameTree( Compiler *pd ); - void varDecl( Compiler *pd, TokenDef *tokenDef ); - - /* Node data. */ - LexExpression *expression; - LexTerm *term; - BuiltinMachine builtin; - Type type; - - LexExpression *prev, *next; -}; - -/* - * LexTerm - */ -struct LexTerm -{ - enum Type { - ConcatType, - RightStartType, - RightFinishType, - LeftType, - FactorAugType - }; - - LexTerm() : - term(0), factorAug(0), type((Type)-1) { } - - static LexTerm *cons( LexTerm *term, LexFactorAug *factorAug ) - { - LexTerm *ret = new LexTerm; - ret->type = ConcatType; - ret->term = term; - ret->factorAug = factorAug; - return ret; - } - - static LexTerm *cons( LexTerm *term, LexFactorAug *factorAug, Type type ) - { - LexTerm *ret = new LexTerm; - ret->type = type; - ret->term = term; - ret->factorAug = factorAug; - return ret; - } - - static LexTerm *cons( LexFactorAug *factorAug ) - { - LexTerm *ret = new LexTerm; - ret->type = FactorAugType; - ret->factorAug = factorAug; - return ret; - } - - ~LexTerm(); - - FsmGraph *walk( Compiler *pd, bool lastInSeq = true ); - void makeNameTree( Compiler *pd ); - void varDecl( Compiler *pd, TokenDef *tokenDef ); - - LexTerm *term; - LexFactorAug *factorAug; - Type type; - - /* Priority descriptor for RightFinish type. */ - PriorDesc priorDescs[2]; -}; - - -/* Third level of precedence. Augmenting nodes with actions and priorities. */ -struct LexFactorAug -{ - LexFactorAug() : - factorRep(0) { } - - static LexFactorAug *cons( LexFactorRep *factorRep ) - { - LexFactorAug *f = new LexFactorAug; - f->factorRep = factorRep; - return f; - } - - ~LexFactorAug(); - - /* tree_t traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - void varDecl( Compiler *pd, TokenDef *tokenDef ); - - void assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd ); - - /* Actions and priorities assigned to the factor node. */ - Vector<ParserAction> actions; - ReCaptureVect reCaptureVect; - - LexFactorRep *factorRep; -}; - -/* Fourth level of precedence. Trailing unary operators. Provide kleen star, - * optional and plus. */ -struct LexFactorRep -{ - enum Type { - StarType, - StarStarType, - OptionalType, - PlusType, - ExactType, - MaxType, - MinType, - RangeType, - FactorNegType - }; - - LexFactorRep() - : - factorRep(0), - factorNeg(0), - lowerRep(0), - upperRep(0), - type((Type)-1) - { } - - static LexFactorRep *cons( const InputLoc &loc, LexFactorRep *factorRep, - int lowerRep, int upperRep, Type type ) - { - LexFactorRep *f = new LexFactorRep; - f->type = type; - f->loc = loc; - f->factorRep = factorRep; - f->factorNeg = 0; - f->lowerRep = lowerRep; - f->upperRep = upperRep; - return f; - } - - static LexFactorRep *cons( LexFactorNeg *factorNeg ) - { - LexFactorRep *f = new LexFactorRep; - f->type = FactorNegType; - f->factorNeg = factorNeg; - return f; - } - - ~LexFactorRep(); - - /* tree_t traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - InputLoc loc; - LexFactorRep *factorRep; - LexFactorNeg *factorNeg; - int lowerRep, upperRep; - Type type; - - /* Priority descriptor for StarStar type. */ - PriorDesc priorDescs[2]; -}; - -/* Fifth level of precedence. Provides Negation. */ -struct LexFactorNeg -{ - enum Type { - NegateType, - CharNegateType, - FactorType - }; - - LexFactorNeg() - : - factorNeg(0), - factor(0), - type((Type)-1) - {} - - static LexFactorNeg *cons( LexFactorNeg *factorNeg, Type type ) - { - LexFactorNeg *f = new LexFactorNeg; - f->type = type; - f->factorNeg = factorNeg; - f->factor = 0; - return f; - } - - static LexFactorNeg *cons( LexFactor *factor ) - { - LexFactorNeg *f = new LexFactorNeg; - f->type = FactorType; - f->factorNeg = 0; - f->factor = factor; - return f; - } - - ~LexFactorNeg(); - - /* tree_t traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - LexFactorNeg *factorNeg; - LexFactor *factor; - Type type; -}; - -/* - * LexFactor - */ -struct LexFactor -{ - /* Language elements a factor node can be. */ - enum Type { - LiteralType, - RangeType, - OrExprType, - RegExprType, - ReferenceType, - ParenType - }; - - LexFactor() - : - literal(0), - range(0), - reItem(0), - regExp(0), - varDef(0), - join(0), - lower(0), - upper(0), - type((Type)-1) - {} - - /* Construct with a literal fsm. */ - static LexFactor *cons( Literal *literal ) - { - LexFactor *f = new LexFactor; - f->type = LiteralType; - f->literal = literal; - return f; - } - - /* Construct with a range. */ - static LexFactor *cons( Range *range ) - { - LexFactor *f = new LexFactor; - f->type = RangeType; - f->range = range; - return f; - } - - /* Construct with the or part of a regular expression. */ - static LexFactor *cons( ReItem *reItem ) - { - LexFactor *f = new LexFactor; - f->type = OrExprType; - f->reItem = reItem; - return f; - } - - /* Construct with a regular expression. */ - static LexFactor *cons( RegExpr *regExp ) - { - LexFactor *f = new LexFactor; - f->type = RegExprType; - f->regExp = regExp; - return f; - } - - /* Construct with a reference to a var def. */ - static LexFactor *cons( const InputLoc &loc, LexDefinition *varDef ) - { - LexFactor *f = new LexFactor; - f->type = ReferenceType; - f->loc = loc; - f->varDef = varDef; - return f; - } - - /* Construct with a parenthesized join. */ - static LexFactor *cons( LexJoin *join ) - { - LexFactor *f = new LexFactor; - f->type = ParenType; - f->join = join; - return f; - } - - /* Cleanup. */ - ~LexFactor(); - - /* tree_t traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - InputLoc loc; - Literal *literal; - Range *range; - ReItem *reItem; - RegExpr *regExp; - LexDefinition *varDef; - LexJoin *join; - int lower, upper; - Type type; -}; - -/* A range machine. Only ever composed of two literals. */ -struct Range -{ - static Range *cons( Literal *lowerLit, Literal *upperLit ) - { - Range *r = new Range; - r->lowerLit = lowerLit; - r->upperLit = upperLit; - return r; - } - - ~Range(); - FsmGraph *walk( Compiler *pd ); - bool verifyRangeFsm( FsmGraph *rangeEnd ); - - Literal *lowerLit; - Literal *upperLit; -}; - -/* Some literal machine. Can be a number or literal string. */ -struct Literal -{ - enum LiteralType { Number, LitString }; - - static Literal *cons( const InputLoc &loc, const String &literal, LiteralType type ) - { - Literal *l = new Literal; - l->loc = loc; - l->literal = literal; - l->type = type; - return l; - } - - FsmGraph *walk( Compiler *pd ); - - InputLoc loc; - String literal; - LiteralType type; -}; - -/* Regular expression. */ -struct RegExpr -{ - enum RegExpType { RecurseItem, Empty }; - - /* Constructors. */ - static RegExpr *cons() - { - RegExpr *r = new RegExpr; - r->type = Empty; - r->caseInsensitive = false; - return r; - } - - static RegExpr *cons( RegExpr *regExp, ReItem *item ) - { - RegExpr *r = new RegExpr; - r->regExp = regExp; - r->item = item; - r->type = RecurseItem; - r->caseInsensitive = false; - return r; - } - - ~RegExpr(); - FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); - - RegExpr *regExp; - ReItem *item; - RegExpType type; - bool caseInsensitive; -}; - -/* An item in a regular expression. */ -struct ReItem -{ - enum ReItemType { Data, Dot, OrBlock, NegOrBlock }; - - static ReItem *cons( const String &data ) - { - ReItem *r = new ReItem; - r->data = data; - r->type = Data; - return r; - } - - static ReItem *cons( ReItemType type ) - { - ReItem *r = new ReItem; - r->type = type; - return r; - } - - static ReItem *cons( ReOrBlock *orBlock, ReItemType type ) - { - ReItem *r = new ReItem; - r->orBlock = orBlock; - r->type = type; - return r; - } - - ~ReItem(); - FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); - - String data; - ReOrBlock *orBlock; - ReItemType type; -}; - -/* An or block item. */ -struct ReOrBlock -{ - enum ReOrBlockType { RecurseItem, Empty }; - - /* Constructors. */ - static ReOrBlock *cons() - { - ReOrBlock *r = new ReOrBlock; - r->type = Empty; - return r; - } - - static ReOrBlock *cons( ReOrBlock *orBlock, ReOrItem *item ) - { - ReOrBlock *r = new ReOrBlock; - r->orBlock = orBlock; - r->item = item; - r->type = RecurseItem; - return r; - } - - ~ReOrBlock(); - FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); - - ReOrBlock *orBlock; - ReOrItem *item; - ReOrBlockType type; -}; - -/* An item in an or block. */ -struct ReOrItem -{ - enum ReOrItemType { Data, Range }; - - static ReOrItem *cons( const InputLoc &loc, const String &data ) - { - ReOrItem *r = new ReOrItem; - r->loc = loc; - r->data = data; - r->type = Data; - return r; - } - - static ReOrItem *cons( const InputLoc &loc, char lower, char upper ) - { - ReOrItem *r = new ReOrItem; - r->loc = loc; - r->lower = lower; - r->upper = upper; - r->type = Range; - return r; - } - - FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); - - InputLoc loc; - String data; - char lower; - char upper; - ReOrItemType type; -}; - - -/* - * Inline code tree - */ -struct InlineList; -struct InlineItem -{ - enum Type - { - Text, - LmSwitch, - LmSetActId, - LmSetTokEnd, - LmOnLast, - LmOnNext, - LmOnLagBehind, - LmInitAct, - LmInitTokStart, - LmSetTokStart - }; - - static InlineItem *cons( const InputLoc &loc, const String &data, Type type ) - { - InlineItem *i = new InlineItem; - i->loc = loc; - i->data = data; - i->nameRef = 0; - i->children = 0; - i->type = type; - return i; - } - - static InlineItem *cons( const InputLoc &loc, NameRef *nameRef, Type type ) - { - InlineItem *i = new InlineItem; - i->loc = loc; - i->nameRef = nameRef; - i->children = 0; - i->type = type; - return i; - } - - static InlineItem *cons( const InputLoc &loc, RegionImpl *tokenRegion, - TokenInstance *longestMatchPart, Type type ) - { - InlineItem *i = new InlineItem; - i->loc = loc; - i->nameRef = 0; - i->children = 0; - i->tokenRegion = tokenRegion; - i->longestMatchPart = longestMatchPart; - i->type = type; - return i; - } - - static InlineItem *cons( const InputLoc &loc, NameInst *nameTarg, Type type ) - { - InlineItem *i = new InlineItem; - i->loc = loc; - i->nameRef = 0; - i->nameTarg = nameTarg; - i->children = 0; - i->type = type; - return i; - } - - static InlineItem *cons( const InputLoc &loc, Type type ) - { - InlineItem *i = new InlineItem; - i->loc = loc; - i->nameRef = 0; - i->children = 0; - i->type = type; - return i; - } - - InputLoc loc; - String data; - NameRef *nameRef; - NameInst *nameTarg; - InlineList *children; - RegionImpl *tokenRegion; - TokenInstance *longestMatchPart; - Type type; - - InlineItem *prev, *next; -}; - -struct InlineList -: - public DList<InlineItem> -{ - InlineList( int i ) {} - - static InlineList *cons() - { - return new InlineList( 0 ); - } -}; - - -struct ProdEl; -struct LangVarRef; -struct ObjectField; - -struct PatternItem -{ - enum Form { - TypeRefForm, - InputTextForm - }; - - static PatternItem *cons( Form form, const InputLoc &loc, const String &data ) - { - PatternItem *p = new PatternItem; - p->form = form; - p->loc = loc; - p->prodEl = 0; - p->data = data; - p->region = 0; - p->varRef = 0; - p->bindId = 0; - return p; - } - - static PatternItem *cons( Form form, const InputLoc &loc, ProdEl *prodEl ) - { - PatternItem *p = new PatternItem; - p->form = form; - p->loc = loc; - p->prodEl = prodEl; - p->region = 0; - p->varRef = 0; - p->bindId = 0; - return p; - } - - Form form; - InputLoc loc; - ProdEl *prodEl; - String data; - TokenRegion *region; - LangVarRef *varRef; - long bindId; - PatternItem *prev, *next; -}; - -struct LangExpr; - -struct PatternItemList - : public DList<PatternItem> -{ - static PatternItemList *cons( PatternItem *patternItem ) - { - PatternItemList *list = new PatternItemList; - list->append( patternItem ); - return list; - } -}; - -struct ConsItem -{ - enum Trim { - TrimYes, - TrimNo, - TrimDefault - }; - - enum Type { - InputText, - ExprType, - LiteralType - }; - - ConsItem() - : - type((Type)-1), - expr(0), - langEl(0), - prodEl(0), - bindId(-1), - trim(TrimDefault) - { - } - - static ConsItem *cons( const InputLoc &loc, Type type, const String &data ) - { - ConsItem *r = new ConsItem; - r->loc = loc; - r->type = type; - r->data = data; - return r; - } - - static ConsItem *cons( const InputLoc &loc, Type type, LangExpr *expr, Trim trim ) - { - ConsItem *r = new ConsItem; - r->loc = loc; - r->type = type; - r->expr = expr; - r->trim = trim; - return r; - } - - static ConsItem *cons( const InputLoc &loc, Type type, ProdEl *prodEl ) - { - ConsItem *r = new ConsItem; - r->loc = loc; - r->type = type; - r->expr = 0; - r->prodEl = prodEl; - return r; - } - - InputLoc loc; - Type type; - String data; - LangExpr *expr; - LangEl *langEl; - ProdEl *prodEl; - long bindId; - Trim trim; - - ConsItem *prev, *next; -}; - -struct ConsItemList -: - public DList<ConsItem> -{ - static ConsItemList *cons( ConsItem *ci ) - { - ConsItemList *cil = new ConsItemList; - cil->append( ci ); - return cil; - } - - static ConsItemList *cons() - { - return new ConsItemList; - } - - void resolve( Compiler *pd ); - void evaluateSendStream( Compiler *pd, CodeVect &code ); -}; - -struct Pattern -{ - Pattern() - : - nspace(0), - list(0), - patRepId(0), - langEl(0), - pdaRun(0), - nextBindId(1) - {} - - static Pattern *cons( const InputLoc &loc, Namespace *nspace, - PatternItemList *list, int patRepId ) - { - Pattern *p = new Pattern; - p->loc = loc; - p->nspace = nspace; - p->list = list; - p->patRepId = patRepId; - return p; - } - - InputLoc loc; - Namespace *nspace; - PatternItemList *list; - long patRepId; - LangEl *langEl; - struct pda_run *pdaRun; - long nextBindId; - Pattern *prev, *next; -}; - -typedef DList<Pattern> PatList; - -struct Constructor -{ - static Constructor *cons( const InputLoc &loc, Namespace *nspace, - ConsItemList *list, int patRepId ) - { - Constructor *r = new Constructor; - r->loc = loc; - r->nspace = nspace; - r->list = list; - r->patRepId = patRepId; - r->langEl = 0; - r->pdaRun = 0; - r->nextBindId = 1; - r->parse = true; - return r; - } - - InputLoc loc; - Namespace *nspace; - ConsItemList *list; - int patRepId; - LangEl *langEl; - struct pda_run *pdaRun; - long nextBindId; - bool parse; - - Constructor *prev, *next; -}; - -typedef DList<Constructor> ConsList; - -struct ParserText -{ - static ParserText *cons( const InputLoc &loc, - Namespace *nspace, ConsItemList *list, - bool used, bool reduce, bool read, - const String &reducer ) - { - ParserText *p = new ParserText; - p->loc = loc; - p->nspace = nspace; - p->list = list; - p->langEl = 0; - p->pdaRun = 0; - p->nextBindId = 1; - p->parse = true; - p->used = used; - p->reduce = reduce; - p->read = read; - p->reducer = reducer; - p->reducerId = -1; - return p; - } - - InputLoc loc; - Namespace *nspace; - ConsItemList *list; - LangEl *langEl; - struct pda_run *pdaRun; - long nextBindId; - bool parse; - bool used; - bool reduce; - bool read; - String reducer; - int reducerId; - - ParserText *prev, *next; -}; - -typedef DList<ParserText> ParserTextList; - -struct Function; - -struct IterDef -{ - enum Type { Tree, Child, RevChild, Repeat, - RevRepeat, User, ListEl, - RevListVal, MapEl }; - - IterDef( Type type, Function *func ); - IterDef( Type type ); - - Type type; - - Function *func; -}; - -struct IterImpl -{ - enum Type { Tree, Child, RevChild, Repeat, - RevRepeat, User, ListEl, ListVal, - RevListVal, MapEl, MapVal }; - - IterImpl( Type type, Function *func ); - IterImpl( Type type ); - - Type type; - - Function *func; - bool useFuncId; - bool useSearchUT; - bool useGenericId; - - code_t inCreateWV; - code_t inCreateWC; - code_t inUnwind; - code_t inDestroy; - code_t inAdvance; - - code_t inGetCurR; - code_t inGetCurWC; - code_t inSetCurWC; - - code_t inRefFromCur; -}; - -struct CmpIterDef -{ - static int compare( const IterDef &id1, const IterDef &id2 ) - { - if ( id1.type < id2.type ) - return -1; - else if ( id1.type > id2.type ) - return 1; - else if ( id1.type == IterDef::User ) { - if ( id1.func < id2.func ) - return -1; - else if ( id1.func > id2.func ) - return 1; - } - - return 0; - } -}; - -typedef AvlSet<IterDef, CmpIterDef> IterDefSet; -typedef AvlSetEl<IterDef> IterDefSetEl; - - -/* - * Unique Types. - */ - -/* - * type_ref -> qualified_name - * type_ref -> '*' type_ref - * type_ref -> '&' type_ref - * type_ref -> list type_ref type_ref - * type_ref -> map type_ref type_ref - * type_ref -> vector type_ref - * type_ref -> parser type_ref - * type_ref -> iter_tree type_ref - * type_ref -> iter_child type_ref - * type_ref -> iter_revchild type_ref - * type_ref -> iter_repeat type_ref - * type_ref -> iter_revrepeat type_ref - * type_ref -> iter_user type_ref - * - * type -> nil - * type -> def term - * type -> def nonterm - * type -> '*' type - * type -> '&' type - * type -> list type - * type -> map type type - * type -> vector type - * type -> parser type - * type -> iter_tree type - * type -> iter_child type - * type -> iter_revchild type - * type -> iter_repeat type - * type -> iter_revrepeat type - * type -> iter_user type - */ - -struct UniqueType : public AvlTreeEl<UniqueType> -{ - UniqueType( enum TYPE typeId ) : - typeId(typeId), - langEl(0), - iterDef(0), - structEl(0), - generic(0) - {} - - UniqueType( enum TYPE typeId, LangEl *langEl ) : - typeId(typeId), - langEl(langEl), - iterDef(0), - structEl(0), - generic(0) - {} - - UniqueType( enum TYPE typeId, IterDef *iterDef ) : - typeId(typeId), - langEl(0), - iterDef(iterDef), - structEl(0), - generic(0) - {} - - UniqueType( enum TYPE typeId, StructEl *structEl ) : - typeId(typeId), - langEl(0), - iterDef(0), - structEl(structEl), - generic(0) - {} - - UniqueType( enum TYPE typeId, GenericType *generic ) : - typeId(typeId), - langEl(0), - iterDef(0), - structEl(0), - generic(generic) - {} - - enum TYPE typeId; - LangEl *langEl; - IterDef *iterDef; - StructEl *structEl; - GenericType *generic; - - ObjectDef *objectDef(); - - bool tree() - { return typeId == TYPE_TREE; } - - bool parser() - { return typeId == TYPE_GENERIC && generic->typeId == GEN_PARSER; } - - bool ptr() - { return typeId == TYPE_STRUCT || typeId == TYPE_GENERIC; } - - bool listOf( UniqueType *ut ) - { return typeId == TYPE_GENERIC && generic->typeId == GEN_LIST && generic->valueUt == ut; } - - bool val() { - return typeId == TYPE_STRUCT || - typeId == TYPE_GENERIC || - typeId == TYPE_INT || - typeId == TYPE_BOOL; - } -}; - -struct CmpUniqueType -{ - static int compare( const UniqueType &ut1, const UniqueType &ut2 ); -}; - -typedef AvlBasic< UniqueType, CmpUniqueType > UniqueTypeMap; - -enum RepeatType { - RepeatNone = 1, - RepeatRepeat, - RepeatList, - RepeatOpt -}; - -/* - * Repeat types. - */ - -struct UniqueRepeat - : public AvlTreeEl<UniqueRepeat> -{ - UniqueRepeat( RepeatType repeatType, LangEl *langEl ) : - repeatType(repeatType), - langEl(langEl), declLangEl(0) {} - - RepeatType repeatType; - LangEl *langEl; - LangEl *declLangEl; -}; - -struct CmpUniqueRepeat -{ - static int compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 ); -}; - -typedef AvlBasic< UniqueRepeat, CmpUniqueRepeat > UniqueRepeatMap; - -/* - * Unique generics. Allows us to do singleton declarations of generic types and - * supporting structures. For example, the list type, but also the list element - * struct created for the list type. - */ - -struct UniqueGeneric - : public AvlTreeEl<UniqueGeneric> -{ - enum Type - { - List, - ListEl, - Map, - MapEl, - Parser - }; - - UniqueGeneric( Type type, UniqueType *value ) - : - type(type), - key(0), - value(value), - generic(0), - structEl(0) - {} - - UniqueGeneric( Type type, UniqueType *key, UniqueType *value ) - : - type(type), - key(key), - value(value), - generic(0), - structEl(0) - {} - - Type type; - UniqueType *key; - UniqueType *value; - - GenericType *generic; - StructEl *structEl; -}; - -struct CmpUniqueGeneric -{ - static int compare( const UniqueGeneric &ut1, - const UniqueGeneric &ut2 ); -}; - -typedef AvlBasic< UniqueGeneric, CmpUniqueGeneric > UniqueGenericMap; - -/* - * - */ - -typedef AvlMap< StringVect, int, CmpStrVect > VectorTypeIdMap; -typedef AvlMapEl< StringVect, int > VectorTypeIdMapEl; - -typedef Vector<TypeRef*> TypeRefVect; - -struct TypeRef -{ - enum Type - { - Unspecified, - Name, - Literal, - Iterator, - List, - ListPtrs, - ListEl, - Map, - MapEl, - MapPtrs, - Parser, - Ref - }; - - TypeRef() - : - type((Type)-1), - nspaceQual(0), - pdaLiteral(0), - iterCall(0), - iterDef(0), - typeRef1(0), - typeRef2(0), - typeRef3(0), - repeatType(RepeatNone), - parsedVarRef(0), - parsedTypeRef(0), - nspace(0), - uniqueType(0), - searchUniqueType(0), - generic(0), - searchTypeRef(0) - {} - - /* Qualification and a type name. These require lookup. */ - static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual, - const String &typeName ) - { - TypeRef *t = new TypeRef; - t->type = Name; - t->loc = loc; - t->nspaceQual = nspaceQual; - t->typeName = typeName; - t->repeatType = RepeatNone; - return t; - } - - /* Qualification and a type name. These require lookup. */ - static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual, - String typeName, RepeatType repeatType ) - { - TypeRef *t = cons( loc, nspaceQual, typeName ); - t->repeatType = repeatType; - return t; - } - - static TypeRef *cons( const InputLoc &loc, LangVarRef *parsedVarRef, - NamespaceQual *nspaceQual, String typeName, RepeatType repeatType ) - { - TypeRef *t = cons( loc, nspaceQual, typeName ); - t->parsedVarRef = parsedVarRef; - t->repeatType = repeatType; - return t; - } - - static TypeRef *cons( const InputLoc &loc, TypeRef *parsedTypeRef, - NamespaceQual *nspaceQual, String typeName, RepeatType repeatType ) - { - TypeRef *t = cons( loc, nspaceQual, typeName ); - t->parsedTypeRef = parsedTypeRef; - t->repeatType = repeatType; - return t; - } - - /* Qualification and a type name. These require lookup. */ - static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual, - PdaLiteral *pdaLiteral ) - { - TypeRef *t = new TypeRef; - t->type = Literal; - t->loc = loc; - t->nspaceQual = nspaceQual; - t->pdaLiteral = pdaLiteral; - t->repeatType = RepeatNone; - return t; - } - - static TypeRef *cons( const InputLoc &loc, TypeRef *parsedTypeRef, - NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral ) - { - TypeRef *t = cons( loc, nspaceQual, pdaLiteral ); - t->parsedTypeRef = parsedTypeRef; - return t; - } - - /* Qualification and a type name. These require lookup. */ - static TypeRef *cons( const InputLoc &loc, NamespaceQual *nspaceQual, - PdaLiteral *pdaLiteral, RepeatType repeatType ) - { - TypeRef *t = cons( loc, nspaceQual, pdaLiteral ); - t->repeatType = repeatType; - return t; - } - - static TypeRef *cons( const InputLoc &loc, LangVarRef *parsedVarRef, - NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral, RepeatType repeatType ) - { - TypeRef *t = cons( loc, nspaceQual, pdaLiteral ); - t->parsedVarRef = parsedVarRef; - t->repeatType = repeatType; - return t; - } - - static TypeRef *cons( const InputLoc &loc, TypeRef *parsedTypeRef, - NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral, RepeatType repeatType ) - { - TypeRef *t = cons( loc, nspaceQual, pdaLiteral ); - t->parsedTypeRef = parsedTypeRef; - t->repeatType = repeatType; - return t; - } - - /* Generics. */ - static TypeRef *cons( const InputLoc &loc, Type type, - NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 ) - { - TypeRef *t = new TypeRef; - t->type = type; - t->loc = loc; - t->nspaceQual = nspaceQual; - t->typeRef1 = typeRef1; - t->typeRef2 = typeRef2; - t->repeatType = RepeatNone; - return t; - } - - static TypeRef *cons( const InputLoc &loc, Type type, - NamespaceQual *nspaceQual, TypeRef *typeRef1, - TypeRef *typeRef2, TypeRef *typeRef3 ) - { - TypeRef *t = new TypeRef; - t->type = type; - t->loc = loc; - t->nspaceQual = nspaceQual; - t->typeRef1 = typeRef1; - t->typeRef2 = typeRef2; - t->typeRef3 = typeRef3; - t->repeatType = RepeatNone; - return t; - } - - /* Pointers and Refs. */ - static TypeRef *cons( const InputLoc &loc, Type type, TypeRef *typeRef1 ) - { - TypeRef *t = new TypeRef; - t->type = type; - t->loc = loc; - t->typeRef1 = typeRef1; - t->repeatType = RepeatNone; - return t; - } - - /* Resolution not needed. */ - - /* Iterator definition. */ - static TypeRef *cons( const InputLoc &loc, TypeRef *typeRef, IterCall *iterCall ) - { - TypeRef *t = new TypeRef; - t->type = Iterator; - t->loc = loc; - t->repeatType = RepeatNone; - t->iterCall = iterCall; - t->searchTypeRef = typeRef; - return t; - } - - /* Unique type is given directly. */ - static TypeRef *cons( const InputLoc &loc, UniqueType *uniqueType ) - { - TypeRef *t = new TypeRef; - t->type = Unspecified; - t->loc = loc; - t->repeatType = RepeatNone; - t->uniqueType = uniqueType; - return t; - } - - void resolveRepeat( Compiler *pd ); - - Namespace *resolveNspace( Compiler *pd ); - UniqueType *resolveIterator( Compiler *pd ); - UniqueType *resolveTypeName( Compiler *pd ); - UniqueType *resolveTypeLiteral( Compiler *pd ); - UniqueType *resolveTypeList( Compiler *pd ); - UniqueType *resolveTypeListEl( Compiler *pd ); - UniqueType *resolveTypeMap( Compiler *pd ); - UniqueType *resolveTypeMapEl( Compiler *pd ); - UniqueType *resolveTypeParser( Compiler *pd ); - UniqueType *resolveType( Compiler *pd ); - UniqueType *resolveTypeRef( Compiler *pd ); - - bool uniqueGeneric( UniqueGeneric *&inMap, - Compiler *pd, const UniqueGeneric &searchKey ); - - StructEl *declareMapElStruct( Compiler *pd, TypeRef *keyType, TypeRef *valType ); - StructEl *declareListEl( Compiler *pd, TypeRef *valType ); - - Type type; - InputLoc loc; - NamespaceQual *nspaceQual; - String typeName; - PdaLiteral *pdaLiteral; - IterCall *iterCall; - IterDef *iterDef; - TypeRef *typeRef1; - TypeRef *typeRef2; - TypeRef *typeRef3; - RepeatType repeatType; - - /* For pattern and constructor context. */ - LangVarRef *parsedVarRef; - TypeRef *parsedTypeRef; - - /* Resolved. */ - Namespace *nspace; - UniqueType *uniqueType; - UniqueType *searchUniqueType; - GenericType *generic; - TypeRef *searchTypeRef; -}; - -typedef DList<ObjectField> ParameterList; - -struct ObjectMethod -{ - enum Type - { - Call, - ParseFinish - }; - - ObjectMethod( TypeRef *returnTypeRef, String name, - int opcodeWV, int opcodeWC, int numParams, - UniqueType **types, ParameterList *paramList, bool isConst ) - : - type(Call), - returnUT(0), - returnTypeRef(returnTypeRef), - returnTypeId(0), - name(name), - opcodeWV(opcodeWV), - opcodeWC(opcodeWC), - numParams(numParams), - paramList(paramList), - isConst(isConst), - funcId(0), - useFuncId(false), - useCallObj(true), - func(0), - iterDef(0), - useFnInstr(false), - useGenericId(false), - generic(0) - { - } - - ObjectMethod( UniqueType *returnUT, String name, - int opcodeWV, int opcodeWC, int numParams, - UniqueType **types, ParameterList *paramList, - bool isConst ) - : - type(Call), - returnUT(returnUT), - returnTypeRef(0), - returnTypeId(0), - name(name), - opcodeWV(opcodeWV), - opcodeWC(opcodeWC), - numParams(numParams), - paramList(paramList), - isConst(isConst), - funcId(0), - useFuncId(false), - useCallObj(true), - func(0), - iterDef(0), - useFnInstr(false), - useGenericId(false), - generic(0) - { - this->paramUTs = new UniqueType*[numParams]; - memcpy( this->paramUTs, types, sizeof(UniqueType*)*numParams ); - } - - Type type; - UniqueType *returnUT; - TypeRef *returnTypeRef; - long returnTypeId; - String name; - long opcodeWV; - long opcodeWC; - long numParams; - UniqueType **paramUTs; - ParameterList *paramList; - bool isConst; - long funcId; - bool useFuncId; - bool useCallObj; - Function *func; - IterDef *iterDef; - bool useFnInstr; - - bool useGenericId; - GenericType *generic; -}; - -struct RhsVal -{ - RhsVal( ProdEl *prodEl ) - : - prodEl(prodEl) - {} - - ProdEl *prodEl; -}; - -struct ObjectField -{ - enum Type - { - UserLocalType = 1, - UserFieldType, - StructFieldType, - LhsElType, - RedRhsType, - InbuiltFieldType, - InbuiltOffType, - InbuiltObjectType, - RhsNameType, - ParamValType, - ParamRefType, - LexSubstrType, - GenericElementType, - GenericDependentType - }; - - ObjectField() - : - typeRef(0), - scope(0), - offset(0), - beenReferenced(false), - isConst(false), - refActive(false), - isExport(false), - isConstVal(false), - useGenericId(false), - generic(0), - mapKeyField(0), - dirtyTree(false), - inGetR( IN_HALT ), - inGetWC( IN_HALT ), - inGetWV( IN_HALT ), - inSetWC( IN_HALT ), - inSetWV( IN_HALT ), - inGetValR( IN_HALT ), - inGetValWC( IN_HALT ), - inGetValWV( IN_HALT ), - inSetValWC( IN_HALT ), - inSetValWV( IN_HALT ), - iterImpl( 0 ) - {} - - static ObjectField *cons( const InputLoc &loc, - Type type, TypeRef *typeRef, const String &name ) - { - ObjectField *c = new ObjectField; - c->loc = loc; - c->type = type; - c->typeRef = typeRef; - c->name = name; - c->initField( ); - return c; - } - - void initField(); - - bool isParam() - { return type == ParamValType || type == ParamRefType; } - - bool isLhsEl() - { return type == LhsElType; } - - bool isRhsGet() - { return type == RhsNameType; } - - bool useOffset() - { - return type != RhsNameType && - type != InbuiltFieldType && - type != InbuiltObjectType; - } - - bool isInbuiltObject() - { return type == InbuiltObjectType; } - - bool exists() - { - switch ( type ) { - case ObjectField::LhsElType: - case ObjectField::UserLocalType: - case ObjectField::RedRhsType: - case ObjectField::UserFieldType: - case ObjectField::StructFieldType: - case ObjectField::GenericDependentType: - return true; - default: - return false; - } - } - - InputLoc loc; - Type type; - TypeRef *typeRef; - String name; - NameScope *scope; - long offset; - bool beenReferenced; - /* Declared const. */ - bool isConst; - bool refActive; - bool isExport; - - /* Value is a const thing when that retrieved by the runtime. Requires a - * const val id. */ - bool isConstVal; - int constValId; - String constValArg; - - bool useGenericId; - GenericType *generic; - - ObjectField *mapKeyField; - - /* True if some aspect of the tree has possibly been written to. This does - * not include attributes. This is here so we can optimize the storage of - * old lhs vars. If only a lhs attribute changes we don't need to preserve - * the original for backtracking. */ - bool dirtyTree; - - Vector<RhsVal> rhsVal; - - code_t inGetR; - code_t inGetWC; - code_t inGetWV; - code_t inSetWC; - code_t inSetWV; - code_t inGetValR; - code_t inGetValWC; - code_t inGetValWV; - code_t inSetValWC; - code_t inSetValWV; - - IterImpl *iterImpl; - - ObjectField *prev, *next; -}; - -typedef DListVal<ObjectField*> FieldList; - -typedef DList<ObjectField> ParameterList; - - -struct ObjectDef -{ - enum Type { - UserType, - FrameType, - IterType, - BuiltinType, - StructType - }; - - ObjectDef() - : - nextOffset(0), - firstNonTree(0) - {} - - static ObjectDef *cons( Type type, String name, int id ) - { - ObjectDef *o = new ObjectDef; - - o->type = type; - o->name = name; - o->id = id; - - o->rootScope = new NameScope; - o->rootScope->owningObj = o; - - return o; - } - - Type type; - String name; - FieldList fieldList; - - NameScope *rootScope; - - NameScope *pushScope( NameScope *curScope ); - - long id; - long nextOffset; - long firstNonTree; - - void referenceField( Compiler *pd, ObjectField *field ); - void placeField( Compiler *pd, ObjectField *field ); - void createCode( Compiler *pd, CodeVect &code ); - ObjectField *findFieldInScope( const NameScope *scope, const String &name ) const; - ObjectField *checkRedecl( NameScope *inScope, const String &name ); - void insertField( NameScope *inScope, const String &name, ObjectField *value ); - void resolve( Compiler *pd ); - ObjectField *findFieldNum( long offset ); - ObjectField *findFieldType( Compiler *pd, UniqueType *ut ); - - long size() { return nextOffset; } - long sizeTrees() { return firstNonTree; } -}; - -struct CallArg -{ - CallArg( LangExpr *expr ) - : expr(expr), exprUT(0), offTmp(-1), offQualRef(-1) {} - - LangExpr *expr; - UniqueType *exprUT; - int offTmp; - int offQualRef; -}; - -typedef Vector<LangExpr*> ExprVect; -typedef Vector<CallArg*> CallArgVect; -typedef Vector<String> StringVect; - -struct FieldInit -{ - static FieldInit *cons( const InputLoc &loc, String name, LangExpr *expr ) - { - FieldInit *fi = new FieldInit; - fi->loc = loc; - fi->name = name; - fi->expr = expr; - return fi; - } - - InputLoc loc; - String name; - LangExpr *expr; - - UniqueType *exprUT; -}; - -typedef Vector<FieldInit*> FieldInitVect; - -struct VarRefLookup -{ - VarRefLookup( int lastPtrInQual, int firstConstPart, - ObjectDef *inObject, NameScope *inScope ) - : - lastPtrInQual(lastPtrInQual), - firstConstPart(firstConstPart), - inObject(inObject), - inScope(inScope), - objField(0), - objMethod(0), - uniqueType(0), - iterSearchUT(0) - {} - - int lastPtrInQual; - int firstConstPart; - ObjectDef *inObject; - NameScope *inScope; - ObjectField *objField; - ObjectMethod *objMethod; - UniqueType *uniqueType; - UniqueType *iterSearchUT; -}; - -struct QualItem -{ - enum Form { Dot, Arrow }; - - QualItem( Form form, const InputLoc &loc, const String &data ) - : form(form), loc(loc), data(data) {} - - Form form; - InputLoc loc; - String data; -}; - -typedef Vector<QualItem> QualItemVect; - -struct LangVarRef -{ - static LangVarRef *cons( const InputLoc &loc, Namespace *nspace, - StructDef *structDef, NameScope *scope, - NamespaceQual *nspaceQual, QualItemVect *qual, - const String &name ) - { - LangVarRef *l = new LangVarRef; - l->loc = loc; - l->nspace = nspace; - l->structDef = structDef; - l->scope = scope; - l->nspaceQual = nspaceQual; - l->qual = qual; - l->name = name; - return l; - } - - static LangVarRef *cons( const InputLoc &loc, Namespace *nspace, - StructDef *structDef, NameScope *scope, const String &name ) - { - return cons( loc, nspace, structDef, scope, - NamespaceQual::cons( nspace ), new QualItemVect, name ); - } - - void resolve( Compiler *pd ) const; - UniqueType *lookup( Compiler *pd ) const; - - UniqueType *loadField( Compiler *pd, CodeVect &code, ObjectDef *inObject, - ObjectField *el, bool forWriting, bool revert ) const; - - VarRefLookup lookupIterCall( Compiler *pd ) const; - VarRefLookup lookupMethod( Compiler *pd ) const; - VarRefLookup lookupField( Compiler *pd ) const; - - VarRefLookup lookupQualification( Compiler *pd, NameScope *rootScope ) const; - VarRefLookup lookupObj( Compiler *pd ) const; - VarRefLookup lookupMethodObj( Compiler *pd ) const; - - bool isInbuiltObject() const; - bool isLocalRef() const; - bool isProdRef( Compiler *pd ) const; - bool isStructRef() const; - void loadQualification( Compiler *pd, CodeVect &code, NameScope *rootScope, - int lastPtrInQual, bool forWriting, bool revert ) const; - void loadInbuiltObject( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const; - void loadLocalObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const; - void loadContextObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const; - void loadGlobalObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const; - void loadObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const; - void loadScopedObj( Compiler *pd, CodeVect &code, - NameScope *scope, int lastPtrInQual, bool forWriting ) const; - - void verifyRefPossible( Compiler *pd, VarRefLookup &lookup ) const; - bool canTakeRef( Compiler *pd, VarRefLookup &lookup ) const; - - void setFieldIter( Compiler *pd, CodeVect &code, ObjectDef *inObject, - ObjectField *objField, UniqueType *objUT, UniqueType *exprType, - bool revert ) const; - void setFieldSearch( Compiler *pd, CodeVect &code, - ObjectDef *inObject, UniqueType *exprType ) const; - void setField( Compiler *pd, CodeVect &code, ObjectDef *inObject, - ObjectField *el, UniqueType *exprUT, bool revert ) const; - - void assignValue( Compiler *pd, CodeVect &code, UniqueType *exprUT ) const; - - IterImpl *chooseTriterCall( Compiler *pd, UniqueType *searchUT, CallArgVect *args ); - - /* The deref generics value is for iterator calls with lists and maps as args. */ - ObjectField **evaluateArgs( Compiler *pd, CodeVect &code, - VarRefLookup &lookup, CallArgVect *args ); - - void callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const; - UniqueType *evaluateCall( Compiler *pd, CodeVect &code, CallArgVect *args ); - UniqueType *evaluate( Compiler *pd, CodeVect &code, bool forWriting = false ) const; - ObjectField *evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const; - ObjectField *preEvaluateRef( Compiler *pd, CodeVect &code ) const; - void resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjectField **paramRefs ) const; - long loadQualificationRefs( Compiler *pd, CodeVect &code, NameScope *rootScope ) const; - void popRefQuals( Compiler *pd, CodeVect &code, - VarRefLookup &lookup, CallArgVect *args, bool temps ) const; - - bool isFinishCall( VarRefLookup &lookup ) const; - - InputLoc loc; - Namespace *nspace; - StructDef *structDef; - NameScope *scope; - NamespaceQual *nspaceQual; - QualItemVect *qual; - String name; - long argSize; -}; - -struct LangTerm -{ - enum Type { - VarRefType, - MethodCallType, - NumberType, - StringType, - MatchType, - ProdCompareType, - NewType, - ConstructType, - TypeIdType, - SearchType, - NilType, - TrueType, - FalseType, - ParseType, - ParseTreeType, - ParseStopType, - SendType, - SendTreeType, - MakeTreeType, - MakeTokenType, - EmbedStringType, - CastType - }; - - LangTerm() - : - generic(0), - constructor(0), - consItemList(0), - parserText(0) - {} - - static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->varRef = varRef; - return t; - } - - static LangTerm *cons( const InputLoc &loc, LangVarRef *varRef, CallArgVect *args ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = MethodCallType; - t->varRef = varRef; - t->args = args; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, CallArgVect *args ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->args = args; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, String data ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->varRef = 0; - t->data = data; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->varRef = 0; - t->typeRef = 0; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, TypeRef *typeRef ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->varRef = 0; - t->typeRef = typeRef; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, TypeRef *typeRef, - LangExpr *langExpr ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->varRef = 0; - t->typeRef = typeRef; - t->expr = langExpr; - return t; - } - - static LangTerm *consMatch( const InputLoc &loc, - LangVarRef *varRef, Pattern *pattern ) - { - LangTerm *t = new LangTerm; - t->type = MatchType; - t->loc = loc; - t->varRef = varRef; - t->pattern = pattern; - return t; - } - - static LangTerm *consProdCompare( const InputLoc &loc, - LangVarRef *varRef, const String &prod, LangExpr *matchExpr ) - { - LangTerm *t = new LangTerm; - t->type = ProdCompareType; - t->loc = loc; - t->varRef = varRef; - t->prod = prod; - t->expr = matchExpr; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef, - Pattern *pattern ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->varRef = varRef; - t->pattern = pattern; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, TypeRef *typeRef, - LangVarRef *varRef ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->varRef = varRef; - t->typeRef = typeRef; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef, - ObjectField *objField, TypeRef *typeRef, FieldInitVect *fieldInitArgs, - Constructor *constructor ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->varRef = varRef; - t->objField = objField; - t->typeRef = typeRef; - t->fieldInitArgs = fieldInitArgs; - t->constructor = constructor; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef, - ObjectField *objField, TypeRef *typeRef, FieldInitVect *fieldInitArgs, - ConsItemList *consItemList, ParserText *parserText ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->varRef = varRef; - t->objField = objField; - t->typeRef = typeRef; - t->fieldInitArgs = fieldInitArgs; - t->consItemList = consItemList; - t->parserText = parserText; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, LangExpr *expr ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = type; - t->expr = expr; - return t; - } - - static LangTerm *cons( const InputLoc &loc, ConsItemList *consItemList ) - { - LangTerm *t = new LangTerm; - t->loc = loc; - t->type = EmbedStringType; - t->consItemList = consItemList; - return t; - } - - static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef, - ParserText *parserText ) - { - LangTerm *s = new LangTerm; - s->loc = loc; - s->type = type; - s->varRef = varRef; - s->parserText = parserText; - return s; - } - - static LangTerm *consSend( const InputLoc &loc, LangVarRef *varRef, - ParserText *parserText, bool eof ) - { - LangTerm *s = new LangTerm; - s->loc = loc; - s->type = SendType; - s->varRef = varRef; - s->parserText = parserText; - s->eof = eof; - return s; - } - - static LangTerm *consSendTree( const InputLoc &loc, LangVarRef *varRef, - ParserText *parserText, bool eof ) - { - LangTerm *s = new LangTerm; - s->loc = loc; - s->type = SendTreeType; - s->varRef = varRef; - s->parserText = parserText; - s->eof = eof; - return s; - } - - static LangTerm *consNew( const InputLoc &loc, TypeRef *typeRef, - LangVarRef *captureVarRef, FieldInitVect *fieldInitArgs ) - { - LangTerm *s = new LangTerm; - s->type = NewType; - s->loc = loc; - s->typeRef = typeRef; - s->varRef = captureVarRef; - s->fieldInitArgs = fieldInitArgs; - return s; - } - - void resolveFieldArgs( Compiler *pd ); - void resolve( Compiler *pd ); - - void evaluateCapture( Compiler *pd, CodeVect &code, UniqueType *valUt ) const; - void evaluateCapture( Compiler *pd, CodeVect &code, bool isTree ) const; - UniqueType *evaluateNew( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateConstruct( Compiler *pd, CodeVect &code ) const; - - static void parseFrag( Compiler *pd, CodeVect &code, int stopId ); - - UniqueType *evaluateParse( Compiler *pd, CodeVect &code, bool tree, bool stop ) const; - UniqueType *evaluateReadReduce( Compiler *pd, CodeVect &code ) const; - void evaluateSendStream( Compiler *pd, CodeVect &code ) const; - void evaluateSendParser( Compiler *pd, CodeVect &code, bool strings ) const; - UniqueType *evaluateSend( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateSendTree( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateMatch( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateProdCompare( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluate( Compiler *pd, CodeVect &code ) const; - void assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const; - UniqueType *evaluateMakeToken( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateMakeTree( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateEmbedString( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateSearch( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateCast( Compiler *pd, CodeVect &code ) const; - void resolveFieldArgs( Compiler *pd ) const; - - InputLoc loc; - Type type; - LangVarRef *varRef; - CallArgVect *args; - NamespaceQual *nspaceQual; - String data; - ObjectField *objField; - TypeRef *typeRef; - Pattern *pattern; - String prod; - FieldInitVect *fieldInitArgs; - GenericType *generic; - Constructor *constructor; - ConsItemList *consItemList; - ParserText *parserText; - LangExpr *expr; - bool eof; -}; - -struct LangExpr -{ - enum Type { - BinaryType, - UnaryType, - TermType - }; - - static LangExpr *cons( const InputLoc &loc, LangExpr *left, - char op, LangExpr *right ) - { - LangExpr *e = new LangExpr; - e->loc = loc; - e->type = BinaryType; - e->left = left; - e->op = op; - e->right = right; - return e; - } - - static LangExpr *cons( const InputLoc &loc, char op, LangExpr *right ) - { - LangExpr *e = new LangExpr; - e->loc = loc; - e->type = UnaryType; - e->left = 0; - e->op = op; - e->right =right; - return e; - } - - static LangExpr *cons( LangTerm *term ) - { - LangExpr *e = new LangExpr; - e->type = TermType; - e->term = term; - return e; - } - - void resolve( Compiler *pd ) const; - - UniqueType *evaluate( Compiler *pd, CodeVect &code ) const; - bool canTakeRef( Compiler *pd ) const; - - InputLoc loc; - Type type; - LangExpr *left; - char op; - LangExpr *right; - LangTerm *term; -}; - -struct LangStmt; -typedef DList<LangStmt> StmtList; - -struct IterCall -{ - enum Form { - Call, - Expr - }; - - IterCall() - : - langTerm(0), - langExpr(0), - wasExpr(false) - {} - - static IterCall *cons( Form form, LangTerm *langTerm ) - { - IterCall *iterCall = new IterCall; - iterCall->form = form; - iterCall->langTerm = langTerm; - return iterCall; - } - - static IterCall *cons( Form form, LangExpr *langExpr ) - { - IterCall *iterCall = new IterCall; - iterCall->form = form; - iterCall->langExpr = langExpr; - return iterCall; - } - - void resolve( Compiler *pd ) const; - - Form form; - LangTerm *langTerm; - LangExpr *langExpr; - bool wasExpr; -}; - -struct LangStmt -{ - enum Type { - AssignType, - ExprType, - IfType, - ElseType, - RejectType, - WhileType, - ReturnType, - YieldType, - ForIterType, - BreakType - }; - - LangStmt() - : - type((Type)-1), - varRef(0), - langTerm(0), - objField(0), - typeRef(0), - expr(0), - constructor(0), - parserText(0), - exprPtrVect(0), - fieldInitVect(0), - stmtList(0), - elsePart(0), - iterCall(0), - context(0), - scope(0), - consItemList(0), - - /* Normally you don't need to initialize double list pointers, however, - * we make use of the next pointer for returning a pair of statements - * using one pointer to a LangStmt, so we need to initialize the - * pointers. */ - prev(0), - next(0) - {} - - static LangStmt *cons( const InputLoc &loc, Type type, FieldInitVect *fieldInitVect ) - { - LangStmt *s = new LangStmt; - s->loc = loc; - s->type = type; - s->fieldInitVect = fieldInitVect; - return s; - } - - static LangStmt *cons( const InputLoc &loc, Type type, CallArgVect *exprPtrVect ) - { - LangStmt *s = new LangStmt; - s->loc = loc; - s->type = type; - s->exprPtrVect = exprPtrVect; - return s; - } - - static LangStmt *cons( const InputLoc &loc, Type type, LangExpr *expr ) - { - LangStmt *s = new LangStmt; - s->loc = loc; - s->type = type; - s->expr = expr; - return s; - } - - static LangStmt *cons( Type type, LangVarRef *varRef ) - { - LangStmt *s = new LangStmt; - s->type = type; - s->varRef = varRef; - return s; - } - - static LangStmt *cons( const InputLoc &loc, Type type, ObjectField *objField ) - { - LangStmt *s = new LangStmt; - s->loc = loc; - s->type = type; - s->objField = objField; - return s; - } - - static LangStmt *cons( const InputLoc &loc, Type type, LangVarRef *varRef, LangExpr *expr ) - { - LangStmt *s = new LangStmt; - s->loc = loc; - s->type = type; - s->varRef = varRef; - s->expr = expr; - return s; - } - - static LangStmt *cons( Type type, LangExpr *expr, StmtList *stmtList ) - { - LangStmt *s = new LangStmt; - s->type = type; - s->expr = expr; - s->stmtList = stmtList; - return s; - } - - static LangStmt *cons( Type type, LangExpr *expr, StmtList *stmtList, LangStmt *elsePart ) - { - LangStmt *s = new LangStmt; - s->type = type; - s->expr = expr; - s->stmtList = stmtList; - s->elsePart = elsePart; - return s; - } - - void setElsePart( LangStmt *elsePart ) - { - this->elsePart = elsePart; - } - - static LangStmt *cons( Type type, StmtList *stmtList ) - { - LangStmt *s = new LangStmt; - s->type = type; - s->stmtList = stmtList; - return s; - } - - - static LangStmt *cons( const InputLoc &loc, Type type ) - { - LangStmt *s = new LangStmt; - s->loc = loc; - s->type = type; - return s; - } - - static LangStmt *cons( Type type, LangVarRef *varRef, Constructor *constructor ) - { - LangStmt *s = new LangStmt; - s->type = type; - s->varRef = varRef; - s->constructor = constructor; - return s; - } - - static LangStmt *cons( const InputLoc &loc, Type type, ObjectField *objField, - TypeRef *typeRef, LangTerm *langTerm, StmtList *stmtList ) - { - LangStmt *s = new LangStmt; - s->loc = loc; - s->type = type; - s->langTerm = langTerm; - s->objField = objField; - s->typeRef = typeRef; - s->stmtList = stmtList; - return s; - } - - static LangStmt *cons( const InputLoc &loc, Type type, ObjectField *objField, - TypeRef *typeRef, IterCall *iterCall, StmtList *stmtList, - StructDef *context, NameScope *scope ) - { - LangStmt *s = new LangStmt; - s->loc = loc; - s->type = type; - s->objField = objField; - s->typeRef = typeRef; - s->iterCall = iterCall; - s->stmtList = stmtList; - s->context = context; - s->scope = scope; - return s; - } - - static LangStmt *cons( const InputLoc &loc, Type type, ConsItemList *consItemList ) - { - LangStmt *s = new LangStmt; - s->loc = loc; - s->type = type; - s->consItemList = consItemList; - return s; - } - - static LangStmt *cons( Type type ) - { - LangStmt *s = new LangStmt; - s->type = type; - return s; - } - - void declareForIter( Compiler *pd ) const; - - void declare( Compiler *pd ) const; - - void resolveForIter( Compiler *pd ) const; - void resolve( Compiler *pd ) const; - void resolveParserItems( Compiler *pd ) const; - - void chooseDefaultIter( Compiler *pd, IterCall *iterCall ) const; - void compileWhile( Compiler *pd, CodeVect &code ) const; - void compileForIterBody( Compiler *pd, CodeVect &code, UniqueType *iterUT ) const; - void compileForIter( Compiler *pd, CodeVect &code ) const; - void compile( Compiler *pd, CodeVect &code ) const; - - InputLoc loc; - Type type; - LangVarRef *varRef; - LangTerm *langTerm; - ObjectField *objField; - TypeRef *typeRef; - LangExpr *expr; - Constructor *constructor; - ParserText *parserText; - CallArgVect *exprPtrVect; - FieldInitVect *fieldInitVect; - StmtList *stmtList; - /* Either another if, or an else. */ - LangStmt *elsePart; - String name; - IterCall *iterCall; - StructDef *context; - NameScope *scope; - ConsItemList *consItemList; - - /* Normally you don't need to initialize double list pointers, however, we - * make use of the next pointer for returning a pair of statements using - * one pointer to a LangStmt, so we need to initialize it above. */ - LangStmt *prev, *next; -}; - -struct CodeBlock -{ - CodeBlock() - : - frameId(-1), - context(0) - {} - - static CodeBlock *cons( StmtList *stmtList, ObjectDef *localFrame ) - { - CodeBlock *c = new CodeBlock; - c->stmtList = stmtList; - c->localFrame = localFrame; - return c; - } - - void declare( Compiler *pd ) const; - void resolve( Compiler *pd ) const; - void compile( Compiler *pd, CodeVect &code ) const; - - long frameId; - StmtList *stmtList; - ObjectDef *localFrame; - Locals locals; - StructDef *context; - - /* Each frame has two versions of - * the code: revert and commit. */ - CodeVect codeWV, codeWC; -}; - -struct Function -{ - Function() - : - nspace(0), - paramListSize(0), - paramUTs(0), - inContext(0), - objMethod(0), - inHost(false) - {} - - static Function *cons( Namespace *nspace, TypeRef *typeRef, const String &name, - ParameterList *paramList, CodeBlock *codeBlock, - int funcId, bool isUserIter, bool exprt ) - { - Function *f = new Function; - - f->nspace = nspace; - f->typeRef = typeRef; - f->name = name; - f->paramList = paramList; - f->codeBlock = codeBlock; - f->funcId = funcId; - f->isUserIter = isUserIter; - f->exprt = exprt; - - return f; - } - - Namespace *nspace; - TransBlock *transBlock; - TypeRef *typeRef; - String name; - String hostCall; - ParameterList *paramList; - CodeBlock *codeBlock; - ObjectDef *localFrame; - long funcId; - bool isUserIter; - long paramListSize; - UniqueType **paramUTs; - StructDef *inContext; - bool exprt; - ObjectMethod *objMethod; - bool inHost; - - Function *prev, *next; -}; - -typedef DList<Function> FunctionList; - -#endif /* _COLM_PARSETREE_H */ - diff --git a/src/pcheck.cc b/src/pcheck.cc deleted file mode 100644 index 6f41a7ce..00000000 --- a/src/pcheck.cc +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "pcheck.h" - -#include <stdbool.h> - -/* Construct a new parameter checker with for paramSpec. */ -ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv ) -: - state(noparam), - argOffset(0), - curArg(0), - iCurArg(1), - paramSpec(paramSpec), - argc(argc), - argv(argv) -{ -} - -/* Check a single option. Returns the index of the next parameter. Sets p to - * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if - * there is one, NULL otherwise. */ -bool ParamCheck::check() -{ - bool requiresParam; - - if ( iCurArg >= argc ) { /* Off the end of the arg list. */ - state = noparam; - return false; - } - - if ( argOffset != 0 && *argOffset == 0 ) { - /* We are at the end of an arg string. */ - iCurArg += 1; - if ( iCurArg >= argc ) { - state = noparam; - return false; - } - argOffset = 0; - } - - if ( argOffset == 0 ) { - /* Set the current arg. */ - curArg = argv[iCurArg]; - - /* We are at the beginning of an arg string. */ - if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */ - argv[iCurArg][0] != '-' || /* Not a param. */ - argv[iCurArg][1] == 0 ) { /* Only a dash. */ - parameter = 0; - parameterArg = 0; - - iCurArg += 1; - state = noparam; - return true; - } - argOffset = argv[iCurArg] + 1; - } - - /* Get the arg char. */ - char argChar = *argOffset; - - /* Loop over all the parms and look for a match. */ - const char *pSpec = paramSpec; - while ( *pSpec != 0 ) { - char pSpecChar = *pSpec; - - /* If there is a ':' following the char then - * it requires a parm. If a parm is required - * then move ahead two in the parmspec. Otherwise - * move ahead one in the parm spec. */ - if ( pSpec[1] == ':' ) { - requiresParam = true; - pSpec += 2; - } - else { - requiresParam = false; - pSpec += 1; - } - - /* Do we have a match. */ - if ( argChar == pSpecChar ) { - if ( requiresParam ) { - if ( argOffset[1] == 0 ) { - /* The param must follow. */ - if ( iCurArg + 1 == argc ) { - /* We are the last arg so there - * cannot be a parameter to it. */ - parameter = argChar; - parameterArg = 0; - iCurArg += 1; - argOffset = 0; - state = invalid; - return true; - } - else { - /* the parameter to the arg is the next arg. */ - parameter = pSpecChar; - parameterArg = argv[iCurArg + 1]; - iCurArg += 2; - argOffset = 0; - state = match; - return true; - } - } - else { - /* The param for the arg is built in. */ - parameter = pSpecChar; - parameterArg = argOffset + 1; - iCurArg += 1; - argOffset = 0; - state = match; - return true; - } - } - else { - /* Good, we matched the parm and no - * arg is required. */ - parameter = pSpecChar; - parameterArg = 0; - argOffset += 1; - state = match; - return true; - } - } - } - - /* We did not find a match. Bad Argument. */ - parameter = argChar; - parameterArg = 0; - argOffset += 1; - state = invalid; - return true; -} - - diff --git a/src/pcheck.h b/src/pcheck.h deleted file mode 100644 index 96746470..00000000 --- a/src/pcheck.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_PCHECK_H -#define _COLM_PCHECK_H - -class ParamCheck -{ -public: - ParamCheck( const char *paramSpec, int argc, const char **argv ); - - bool check(); - - const char *parameterArg; /* The argument to the parameter. */ - char parameter; /* The parameter matched. */ - enum { match, invalid, noparam } state; - - const char *argOffset; /* If we are reading params inside an - * arg this points to the offset. */ - - const char *curArg; /* Pointer to the current arg. */ - int iCurArg; /* Index to the current arg. */ - -private: - const char *paramSpec; /* Parameter spec supplied by the coder. */ - int argc; /* Arguement data from the command line. */ - const char **argv; -}; - -#endif /* _COLM_PCHECK_H */ - diff --git a/src/pdabuild.cc b/src/pdabuild.cc deleted file mode 100644 index ba4850df..00000000 --- a/src/pdabuild.cc +++ /dev/null @@ -1,2204 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#define EOF_REGION 0 - -#include <stdio.h> -#include <string.h> -#include <stdbool.h> -#include <assert.h> - -#include <iostream> - -/* Dumping the fsm. */ -#include <mergesort.h> - -/* Parsing. */ -#include "compiler.h" -#include "pdacodegen.h" - -using std::endl; -using std::cerr; -using std::cout; - -char startDefName[] = "start"; - -extern "C" tree_t **internal_host_call( program_t *prg, long code, tree_t **sp ) -{ - return 0; -} - -extern "C" void internal_commit_reduce_forward( program_t *prg, tree_t **root, - struct pda_run *pda_run, parse_tree_t *pt ) -{ - commit_clear_parse_tree( prg, root, pda_run, pt->child ); -} - -extern "C" long internal_commit_union_sz( int reducer ) -{ - return 0; -} - -extern "C" void internal_init_need() -{ -} - -extern "C" int internal_reducer_need_tok( program_t *prg, struct pda_run *, int id ) -{ - return 3; -} - -extern "C" int internal_reducer_need_ign( program_t *prg, struct pda_run * ) -{ - return 3; -} - -/* Count the transitions in the fsm by walking the state list. */ -int countTransitions( PdaGraph *fsm ) -{ - int numTrans = 0; - PdaState *state = fsm->stateList.head; - while ( state != 0 ) { - numTrans += state->transMap.length(); - state = state->next; - } - return numTrans; -} - -LangEl::LangEl( Namespace *nspace, const String &name, Type type ) -: - nspace(nspace), - name(name), - lit(name), - type(type), - id(-1), - numAppearances(0), - commit(false), - isIgnore(false), - reduceFirst(false), - isLiteral(false), - isRepeat(false), - isList(false), - isOpt(false), - parseStop(false), - isEOF(false), - repeatOf(0), - tokenDef(0), - rootDef(0), - termDup(0), - eofLel(0), - pdaGraph(0), - pdaTables(0), - transBlock(0), - objectDef(0), - thisSize(0), - ofiOffset(0), - parserId(-1), - predType(PredNone), - predValue(0), - contextDef(0), - contextIn(0), - noPreIgnore(false), - noPostIgnore(false), - isZero(false) -{ -} - -PdaGraph *ProdElList::walk( Compiler *pd, Production *prod ) -{ - PdaGraph *prodFsm = new PdaGraph(); - PdaState *last = prodFsm->addState(); - prodFsm->setStartState( last ); - - int prodLength = 0; - for ( Iter prodEl = first(); prodEl.lte(); prodEl++, prodLength++ ) { - //PdaGraph *itemFsm = prodEl->walk( pd ); - long value = prodEl->langEl->id; - - PdaState *newState = prodFsm->addState(); - PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value ); - - newTrans->isShift = true; - newTrans->shiftPrior = prodEl->priorVal; - //cerr << "PRIOR VAL: " << newTrans->shiftPrior << endl; - - if ( prodEl->commit ) { - //cout << "COMMIT: inserting commit of length: " << pd->prodLength << endl; - /* Insert the commit into transitions out of last */ - for ( TransMap::Iter trans = last->transMap; trans.lte(); trans++ ) - trans->value->commits.insert( prodLength ); - } - - last = newState; - } - - /* Make the last state the final state. */ - prodFsm->setFinState( last ); - return prodFsm; -} - - -ProdElList *Compiler::makeProdElList( LangEl *langEl ) -{ - ProdElList *prodElList = new ProdElList(); - UniqueType *uniqueType = findUniqueType( TYPE_TREE, langEl ); - TypeRef *typeRef = TypeRef::cons( internal, uniqueType ); - prodElList->append( new ProdEl( internal, typeRef ) ); - prodElList->tail->langEl = langEl; - return prodElList; -} - -void Compiler::makeDefinitionNames() -{ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - int prodNum = 1; - for ( LelDefList::Iter def = lel->defList; def.lte(); def++ ) { - def->data.setAs( lel->name.length() + 32, "%s-%i", - lel->name.data, prodNum++ ); - } - } -} - -/* Make sure there there are no language elements whose type is unkonwn. This - * can happen when an id is used on the rhs of a definition but is not defined - * as anything. */ -void Compiler::noUndefindLangEls() -{ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->type == LangEl::Unknown ) - error() << "'" << lel->name << "' was not defined as anything" << endp; - } -} - -void Compiler::makeLangElIds() -{ - /* The first id 0 is reserved for the stack sentinal. A negative id means - * error to the parsing function, inducing backtracking. */ - nextLelId = 1; - - /* First pass assigns to the user terminals. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - /* Must be a term, and not any of the special reserved terminals. - * Remember if the non terminal is a user non terminal. */ - if ( lel->type == LangEl::Term && - !lel->isEOF && - lel != errorLangEl && - lel != noTokenLangEl ) - { - lel->id = nextLelId++; - } - } - - //eofLangEl->id = nextLelId++; - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - /* Must be a term, and not any of the special reserved terminals. - * Remember if the non terminal is a user non terminal. */ - if ( lel->isEOF ) - lel->id = nextLelId++; - } - - /* Next assign to the eof notoken, which we always create. */ - noTokenLangEl->id = nextLelId++; - - /* Possibly assign to the error language element. */ - if ( errorLangEl != 0 ) - errorLangEl->id = nextLelId++; - - /* Save this for the code generation. */ - firstNonTermId = nextLelId; - - /* A third and final pass assigns to everything else. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - /* Anything else not yet assigned gets assigned now. */ - if ( lel->id < 0 ) - lel->id = nextLelId++; - } - - assert( ptrLangEl->id == LEL_ID_PTR ); - assert( strLangEl->id == LEL_ID_STR ); - assert( ignoreLangEl->id == LEL_ID_IGNORE ); -} - -void Compiler::makeStructElIds() -{ - firstStructElId = nextLelId; - - /* Start at the next lang el id and go up from there. Using disjoint sets - * allows us to verify that a tree is a tree and struct is a struct because - * the ID field is at the same offset. */ - int nextId = nextLelId; - for ( StructElList::Iter sel = structEls; sel.lte(); sel++ ) - sel->id = nextId++; - - structInbuiltId = nextId++; - structInputId = nextId++; - structStreamId = nextId++; -} - -void Compiler::refNameSpace( LangEl *lel, Namespace *nspace ) -{ - if ( nspace == rootNamespace ) { - lel->refName = "::" + lel->refName; - return; - } - - lel->refName = nspace->name + "::" + lel->refName; - lel->declName = nspace->name + "::" + lel->declName; - lel->xmlTag = nspace->name + "::" + lel->xmlTag; - refNameSpace( lel, nspace->parentNamespace ); -} - -void Compiler::makeLangElNames() -{ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - lel->fullName = lel->name; - lel->fullLit = lel->lit; - lel->refName = lel->lit; - lel->declName = lel->lit; - lel->xmlTag = lel->name; - - /* If there is also a namespace next to the type, we add a prefix to - * the type. It's not convenient to name C++ classes the same as a - * namespace in the same scope. We don't want to restrict colm, so we - * add a workaround for the least-common case. The type gets t_ prefix. - * */ - Namespace *nspace = lel->nspace->findNamespace( lel->name ); - if ( nspace != 0 ) { - lel->refName = "t_" + lel->refName; - lel->fullName = "t_" + lel->fullName; - lel->declName = "t_" + lel->declName; - lel->xmlTag = "t_" + lel->xmlTag; - } - - refNameSpace( lel, lel->nspace ); - } -} - -/* Set up dot sets, shift info, and prod sets. */ -void Compiler::makeProdFsms() -{ - /* There are two items in the index for each production (high and low). */ - int indexLen = prodList.length() * 2; - dotItemIndex.setAsNew( indexLen ); - int dsiLow = 0, indexPos = 0; - - /* Build FSMs for all production language elements. */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) - prod->fsm = prod->prodElList->walk( this, prod ); - - makeNonTermFirstSets(); - makeFirstSets(); - - /* Build FSMs for all production language elements. */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( addUniqueEmptyProductions ) { - /* This must be re-implemented. */ - assert( false ); - //if ( !prod->isLeftRec && prod->uniqueEmptyLeader != 0 ) { - // PdaGraph *emptyLeader = prod->uniqueEmptyLeader->walk( this ); - // emptyLeader->concatOp( prod->fsm ); - // prod->fsm = emptyLeader; - //} - } - - /* Compute the machine's length. */ - prod->fsmLength = prod->fsm->fsmLength( ); - - /* Productions have a unique production id for each final state. - * This lets us use a production length specific to each final state. - * Start states are always isolated therefore if the start state is - * final then reductions from it will always have a fixed production - * length. This is a simple method for determining the length - * of zero-length derivations when reducing. */ - - /* Number of dot items needed for the production is elements + 1 - * because the dot can be before the first and after the last element. */ - int numForProd = prod->fsm->stateList.length() + 1; - - /* Set up the low and high values in the index for this production. */ - dotItemIndex.data[indexPos].key = dsiLow; - dotItemIndex.data[indexPos].value = prod; - dotItemIndex.data[indexPos+1].key = dsiLow + numForProd - 1; - dotItemIndex.data[indexPos+1].value = prod; - - int dsi = dsiLow; - for ( PdaStateList::Iter state = prod->fsm->stateList; state.lte(); state++, dsi++ ) { - /* All transitions are shifts. */ - for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) - assert( out->value->isShift ); - - state->dotSet.insert( dsi ); - } - - /* Move over the production. */ - dsiLow += numForProd; - indexPos += 2; - - if ( prod->prodCommit ) { - for ( PdaStateSet::Iter fin = prod->fsm->finStateSet; fin.lte(); fin++ ) { - int length = prod->fsmLength; - //cerr << "PENDING COMMIT IN FINAL STATE of " << prod->prodId << - // " with len: " << length << endl; - (*fin)->pendingCommits.insert( ProdIdPair( prod->prodId, length ) ); - } - } - } - - /* Make the final state specific prod id to prod id mapping. */ - prodIdIndex = new Production*[prodList.length()]; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) - prodIdIndex[prod->prodId] = prod; -} - -/* Want the first set of over src. If the first set contains epsilon, go over - * it and over tab. If overSrc is the end of the production, find the follow - * from the table, taking only the characters on which the parent is reduced. - * */ -void Compiler::findFollow( AlphSet &result, PdaState *overTab, - PdaState *overSrc, Production *parentDef ) -{ - if ( overSrc->isFinState() ) { - assert( overSrc->transMap.length() == 0 ); - - /* At the end of the production. Turn to the table. */ - long redCode = makeReduceCode( parentDef->prodId, false ); - for ( TransMap::Iter tabTrans = overTab->transMap; tabTrans.lte(); tabTrans++ ) { - for ( ActDataList::Iter adl = tabTrans->value->actions; adl.lte(); adl++ ) { - if ( *adl == redCode ) - result.insert( tabTrans->key ); - } - } - } - else { - /* Get the first set of the item. If the first set contains epsilon - * then move over overSrc and overTab and recurse. */ - assert( overSrc->transMap.length() == 1 ); - TransMap::Iter pastTrans = overSrc->transMap; - - LangEl *langEl = langElIndex[pastTrans->key]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - bool hasEpsilon = false; - for ( LelDefList::Iter def = langEl->defList; def.lte(); def++ ) { - result.insert( def->firstSet ); - - if ( def->firstSet.find( -1 ) ) - hasEpsilon = true; - } - - /* Find the equivalent state in the parser. */ - if ( hasEpsilon ) { - PdaTrans *tabTrans = overTab->findTrans( pastTrans->key ); - findFollow( result, tabTrans->toState, - pastTrans->value->toState, parentDef ); - } - - /* Now possibly the dup. */ - if ( langEl->termDup != 0 ) - result.insert( langEl->termDup->id ); - } - else { - result.insert( pastTrans->key ); - } - } -} - -PdaState *Compiler::followProd( PdaState *tabState, PdaState *prodState ) -{ - while ( prodState->transMap.length() == 1 ) { - TransMap::Iter prodTrans = prodState->transMap; - PdaTrans *tabTrans = tabState->findTrans( prodTrans->key ); - prodState = prodTrans->value->toState; - tabState = tabTrans->toState; - } - return tabState; -} - -void Compiler::trySetTime( PdaTrans *trans, long code, long &time ) -{ - /* Find the item. */ - for ( ActDataList::Iter adl = trans->actions; adl.lte(); adl++ ) { - if ( *adl == code ) { - /* If the time of the shift is not already set, set it. */ - if ( trans->actOrds[adl.pos()] == 0 ) { - //cerr << "setting time: state = " << tabState->stateNum - // << ", trans = " << tabTrans->lowKey - // << ", time = " << time << endl; - trans->actOrds[adl.pos()] = time++; - } - break; - } - } -} - -/* Go down a defintiion and then handle the follow actions. */ -void Compiler::pdaOrderFollow( LangEl *rootEl, PdaState *tabState, - PdaTrans *tabTrans, PdaTrans *srcTrans, Production *parentDef, - Production *definition, long &time ) -{ - /* We need the follow from tabState/srcState over the defintion we are - * currently processing. */ - PdaState *overTab = tabTrans->toState; - PdaState *overSrc = srcTrans->toState; - - AlphSet alphSet; - if ( parentDef == rootEl->rootDef ) - alphSet.insert( rootEl->eofLel->id ); - else - findFollow( alphSet, overTab, overSrc, parentDef ); - - /* Now follow the production to find out where it expands to. */ - PdaState *expandToState = followProd( tabState, definition->fsm->startState ); - - /* Find the reduce item. */ - long redCode = makeReduceCode( definition->prodId, false ); - - for ( TransMap::Iter tt = expandToState->transMap; tt.lte(); tt++ ) { - if ( alphSet.find( tt->key ) ) { - trySetTime( tt->value, redCode, time ); - - /* If the items token region is not recorded in the state, do it now. */ - addRegion( expandToState, tt->value, tt->key, - tt->value->noPreIgnore, tt->value->noPostIgnore ); - } - } -} - -bool regionVectHas( RegionVect ®Vect, TokenRegion *region ) -{ - for ( RegionVect::Iter trvi = regVect; trvi.lte(); trvi++ ) { - if ( *trvi == region ) - return true; - } - return false; -} - -void Compiler::addRegion( PdaState *tabState, PdaTrans *tabTrans, - long pdaKey, bool noPreIgnore, bool noPostIgnore ) -{ - LangEl *langEl = langElIndex[pdaKey]; - if ( langEl != 0 && langEl->type == LangEl::Term ) { - TokenRegion *region = 0; - RegionSet *regionSet = 0; - - /* If it is not the eof, then use the region associated - * with the token definition. */ - if ( langEl->isZero ) { - region = langEl->tokenDef->regionSet->collectIgnore; - regionSet = langEl->tokenDef->regionSet; - } - else if ( !langEl->isEOF && langEl->tokenDef != 0 ) { - region = langEl->tokenDef->regionSet->tokenIgnore; - regionSet = langEl->tokenDef->regionSet; - } - - if ( region != 0 ) { - /* region. */ - TokenRegion *scanRegion = region; - - if ( langEl->noPreIgnore ) - scanRegion = regionSet->tokenOnly; - - if ( !regionVectHas( tabState->regions, scanRegion ) ) - tabState->regions.append( scanRegion ); - - /* Pre-region of to state */ - PdaState *toState = tabTrans->toState; - if ( !langEl->noPostIgnore && - regionSet->ignoreOnly != 0 && - !regionVectHas( toState->preRegions, regionSet->ignoreOnly ) ) - { - toState->preRegions.append( regionSet->ignoreOnly ); - } - } - } -} - -#if 0 - orderState( tabState, prodState, time ): - if not tabState.dotSet.find( prodState.dotID ) - tabState.dotSet.insert( prodState.dotID ) - tabTrans = tabState.findMatchingTransition( prodState.getTransition() ) - - if tabTrans is NonTerminal: - for production in tabTrans.nonTerm.prodList: - orderState( tabState, production.startState, time ) - - for all expandToState in tabTrans.expandToStates: - for all followTrans in expandToState.transList - reduceAction = findAction( production.reduction ) - if reduceAction.time is unset: - reduceAction.time = time++ - end - end - end - end - end - - shiftAction = tabTrans.findAction( shift ) - if shiftAction.time is unset: - shiftAction.time = time++ - end - - orderState( tabTrans.toState, prodTrans.toState, time ) - end - end - - orderState( parseTable.startState, startProduction.startState, 1 ) -#endif - -void Compiler::pdaOrderProd( LangEl *rootEl, PdaState *tabState, - PdaState *srcState, Production *parentDef, long &time ) -{ - assert( srcState->dotSet.length() == 1 ); - if ( tabState->dotSet2.find( srcState->dotSet[0] ) ) - return; - tabState->dotSet2.insert( srcState->dotSet[0] ); - - assert( srcState->transMap.length() == 0 || srcState->transMap.length() == 1 ); - - if ( srcState->transMap.length() == 1 ) { - TransMap::Iter srcTrans = srcState->transMap; - - /* Find the equivalent state in the parser. */ - PdaTrans *tabTrans = tabState->findTrans( srcTrans->key ); - - /* Recurse into the transition if it is a non-terminal. */ - LangEl *langEl = langElIndex[srcTrans->key]; - if ( langEl != 0 ) { - if ( langEl->reduceFirst ) { - /* Use a shortest match ordering for the contents of this - * nonterminal. Does follows for all productions first, then - * goes down the productions. */ - for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) { - pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value, - parentDef, expDef, time ); - } - for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) - pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time ); - - } - else { - /* The default action ordering. For each prod, goes down the - * prod then sets the follow before going to the next prod. */ - for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) { - pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time ); - - pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value, - parentDef, expDef, time ); - } - } - } - - trySetTime( tabTrans, SHIFT_CODE, time ); - - /* Now possibly for the dup. */ - if ( langEl != 0 && langEl->termDup != 0 ) { - PdaTrans *dupTrans = tabState->findTrans( langEl->termDup->id ); - trySetTime( dupTrans, SHIFT_CODE, time ); - } - - /* If the items token region is not recorded in the state, do it now. */ - addRegion( tabState, tabTrans, srcTrans->key, - srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore ); - - /* Go over one in the production. */ - pdaOrderProd( rootEl, tabTrans->toState, - srcTrans->value->toState, parentDef, time ); - } -} - -void Compiler::pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls ) -{ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - assert( (state->stateBits & SB_ISMARKED) == 0 ); - - /* Traverse the src state's transitions. */ - long last = 0; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( ! trans.first() ) - assert( last < trans->key ); - last = trans->key; - } - } - - /* Compute the action orderings, record the max value. */ - long time = 1; - for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { - PdaState *startState = (*pe)->rootDef->fsm->startState; - pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time ); - - /* Walk over the start lang el and set the time for shift of - * the eof action that completes the parse. */ - PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id ); - PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id ); - eofTrans->actOrds[0] = time++; - } - - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - if ( state->regions.length() == 0 ) { - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - /* There are no regions and EOF leaves the state. Add the eof - * token region. */ - PdaTrans *trans = tel->value; - LangEl *lel = langElIndex[trans->lowKey]; - if ( lel != 0 && lel->isEOF ) - state->regions.append( EOF_REGION ); - } - } - } - - ///* Warn about states with empty token region lists. */ - //for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - // if ( state->regions.length() == 0 ) { - // warning() << "state has an empty token region, state: " << - // state->stateNum << endl; - // } - //} - - /* Some actions may not have an ordering. I believe these to be actions - * that result in a parse error and they arise because the state tables - * are LALR(1) but the action ordering is LR(1). LALR(1) causes some - * reductions that lead nowhere. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - PdaTrans *trans = tel->value; - /* Check every action has an ordering. */ - for ( ActDataList::Iter adl = trans->actOrds; adl.lte(); adl++ ) { - if ( *adl == 0 ) - *adl = time++; - } - } - } -} - -void Compiler::advanceReductions( PdaGraph *pdaGraph ) -{ - /* Loop all states. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - if ( !state->advanceReductions ) - continue; - - bool outHasShift = false; - ReductionMap outReds; - LongSet outCommits; - for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) { - /* Get the transition from the trans el. */ - if ( out->value->isShift ) - outHasShift = true; - outReds.insert( out->value->reductions ); - outCommits.insert( out->value->commits ); - } - - bool inHasShift = false; - ReductionMap inReds; - for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { - /* Get the transition from the trans el. */ - if ( in->isShift ) - inHasShift = true; - inReds.insert( in->reductions ); - } - - if ( !outHasShift && outReds.length() == 1 && - inHasShift && inReds.length() == 0 ) - { - //cerr << "moving reduction to shift" << endl; - - /* Move the reduction to all in transitions. */ - for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { - assert( in->actions.length() == 1 ); - assert( in->actions[0] == SHIFT_CODE ); - in->actions[0] = makeReduceCode( outReds[0].key, true ); - in->afterShiftCommits.insert( outCommits ); - } - - /* - * Remove all transitions out of the state. - */ - - /* Detach out range transitions. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - pdaGraph->detachTrans( state, trans->value->toState, trans->value ); - delete trans->value; - } - state->transMap.empty(); - - /* Redirect all the in transitions to the actionDestState. */ - pdaGraph->inTransMove( actionDestState, state ); - } - } - - pdaGraph->removeUnreachableStates(); -} - -void Compiler::sortActions( PdaGraph *pdaGraph ) -{ - /* Sort the actions. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - PdaTrans *trans = tel->value; - - /* Sort by the action ords. */ - ActDataList actions( trans->actions ); - ActDataList actOrds( trans->actOrds ); - ActDataList actPriors( trans->actPriors ); - trans->actions.empty(); - trans->actOrds.empty(); - trans->actPriors.empty(); - while ( actOrds.length() > 0 ) { - int min = 0; - for ( int i = 1; i < actOrds.length(); i++ ) { - if ( actPriors[i] > actPriors[min] || - (actPriors[i] == actPriors[min] && - actOrds[i] < actOrds[min] ) ) - { - min = i; - } - } - trans->actions.append( actions[min] ); - trans->actOrds.append( actOrds[min] ); - trans->actPriors.append( actPriors[min] ); - actions.remove(min); - actOrds.remove(min); - actPriors.remove(min); - } - - if ( branchPointInfo && trans->actions.length() > 1 ) { - cerr << "info: branch point" - << " state: " << state->stateNum - << " trans: "; - LangEl *lel = langElIndex[trans->lowKey]; - if ( lel == 0 ) - cerr << (char)trans->lowKey << endl; - else - cerr << lel->lit << endl; - - for ( ActDataList::Iter act = trans->actions; act.lte(); act++ ) { - switch ( *act & 0x3 ) { - case 1: - cerr << " shift" << endl; - break; - case 2: - cerr << " reduce " << - prodIdIndex[(*act >> 2)]->data << endl; - break; - case 3: - cerr << " shift-reduce" << endl; - break; - } - } - } - - /* Verify that shifts of nonterminals don't have any branch - * points or commits. */ - if ( trans->lowKey >= firstNonTermId ) { - if ( trans->actions.length() != 1 || - (trans->actions[0] & 0x3) != 1 ) - { - error() << "TRANS ON NONTERMINAL is something " - "other than a shift" << endl; - } - if ( trans->commits.length() > 0 ) - error() << "TRANS ON NONTERMINAL has a commit" << endl; - } - - /* TODO: Shift-reduces are optimizations. Verify that - * shift-reduces exist only if they don't entail a conflict. */ - } - } -} - -void Compiler::reduceActions( PdaGraph *pdaGraph ) -{ - /* Reduce the actions. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - PdaTrans *trans = tel->value; - PdaActionSetEl *inSet; - - int commitLen = trans->commits.length() > 0 ? - trans->commits[trans->commits.length()-1] : 0; - - if ( trans->afterShiftCommits.length() > 0 ) { - int afterShiftCommit = trans->afterShiftCommits[ - trans->afterShiftCommits.length()-1]; - - if ( commitLen > 0 && commitLen+1 > afterShiftCommit ) - commitLen = ( commitLen + 1 ); - else - commitLen = afterShiftCommit; - } - else { - commitLen = commitLen * -1; - } - - //if ( commitLen != 0 ) { - // cerr << "FINAL ACTION COMMIT LEN: " << commitLen << endl; - //} - - pdaGraph->actionSet.insert( ActionData( trans->toState->stateNum, - trans->actions, commitLen ), &inSet ); - trans->actionSetEl = inSet; - } - } -} - -void Compiler::computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph ) -{ - /* Get the entry into the graph and traverse over the root. The resulting - * state can have eof, nothing else can. */ - PdaState *overStart = pdaGraph->followFsm( - langEl->startState, - langEl->rootDef->fsm ); - - /* The graph must reduce to root all on it's own. It cannot depend on - * require EOF. */ - for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { - if ( st == overStart ) - continue; - - for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { - if ( tr->value->lowKey == langEl->eofLel->id ) - st->advanceReductions = true; - } - } -} - -void Compiler::verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph ) -{ - /* Get the entry into the graph and traverse over the root. The resulting - * state can have eof, nothing else can. */ - PdaState *overStart = pdaGraph->followFsm( - langEl->startState, - langEl->rootDef->fsm ); - - /* The graph must reduce to root all on it's own. It cannot depend on - * require EOF. */ - for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { - if ( st == overStart ) - continue; - - for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { - if ( tr->value->lowKey == langEl->eofLel->id ) { - /* This needs a better error message. Appears to be voodoo. */ - error() << "grammar is not usable with parse_stop" << endp; - } - } - } -} - -LangEl *Compiler::predOf( PdaTrans *trans, long action ) -{ - LangEl *lel; - if ( action == SHIFT_CODE ) - lel = langElIndex[trans->lowKey]; - else - lel = prodIdIndex[action >> 2]->predOf; - return lel; -} - - -bool Compiler::precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 ) -{ - bool swap = false; - if ( l2->predValue > l1->predValue ) - swap = true; - else if ( l1->predValue == l2->predValue ) { - if ( l1->predType == PredLeft && action1 == SHIFT_CODE ) - swap = true; - else if ( l1->predType == PredRight && action2 == SHIFT_CODE ) - swap = true; - } - return swap; -} - -bool Compiler::precedenceRemoveBoth( LangEl *l1, LangEl *l2 ) -{ - if ( l1->predValue == l2->predValue && l1->predType == PredNonassoc ) - return true; - return false; -} - -void Compiler::resolvePrecedence( PdaGraph *pdaGraph ) -{ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); - - for ( long t = 0; t < state->transMap.length(); /* increment at end */ ) { - PdaTrans *trans = state->transMap[t].value; - -again: - /* Find action with precedence. */ - for ( int i = 0; i < trans->actions.length(); i++ ) { - LangEl *li = predOf( trans, trans->actions[i] ); - - if ( li != 0 && li->predType != PredNone ) { - /* Find another action with precedence. */ - for ( int j = i+1; j < trans->actions.length(); j++ ) { - LangEl *lj = predOf( trans, trans->actions[j] ); - - if ( lj != 0 && lj->predType != PredNone ) { - /* Conflict to check. */ - bool swap = precedenceSwap( trans->actions[i], - trans->actions[j], li, lj ); - - if ( swap ) { - long t = trans->actions[i]; - trans->actions[i] = trans->actions[j]; - trans->actions[j] = t; - } - - trans->actions.remove( j ); - if ( precedenceRemoveBoth( li, lj ) ) - trans->actions.remove( i ); - - goto again; - } - } - } - } - - /* If there are still actions then move to the next one. If not, - * (due to nonassoc) then remove the transition. */ - if ( trans->actions.length() > 0 ) - t += 1; - else - state->transMap.vremove( t ); - } - } -} - -void Compiler::analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls ) -{ - pdaGraph->maxState = pdaGraph->stateList.length() - 1; - pdaGraph->maxLelId = nextLelId - 1; - pdaGraph->maxOffset = pdaGraph->stateList.length() * pdaGraph->maxLelId; - - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( trans->value->isShift ) { - trans->value->actions.append( SHIFT_CODE ); - trans->value->actPriors.append( trans->value->shiftPrior ); - } - for ( ReductionMap::Iter red = trans->value->reductions; red.lte(); red++ ) { - trans->value->actions.append( makeReduceCode( red->key, false ) ); - trans->value->actPriors.append( red->value ); - } - trans->value->actOrds.appendDup( 0, trans->value->actions.length() ); - } - } - - pdaActionOrder( pdaGraph, parserEls ); - sortActions( pdaGraph ); - resolvePrecedence( pdaGraph ); - - /* Verify that any type we parse_stop can actually be parsed that way. */ - for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { - LangEl *lel = *pe; - if ( lel->parseStop ) - computeAdvanceReductions(lel , pdaGraph); - } - - advanceReductions( pdaGraph ); - pdaGraph->setStateNumbers(); - reduceActions( pdaGraph ); - - /* Set the action ids. */ - int actionSetId = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) - asi->key.id = actionSetId++; - - /* Get the max index. */ - pdaGraph->maxIndex = actionSetId - 1; - - /* Compute the max prod length. */ - pdaGraph->maxProdLen = 0; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( (unsigned)prod->fsmLength > pdaGraph->maxProdLen ) - pdaGraph->maxProdLen = prod->fsmLength; - } - - /* Asserts that any transition with a nonterminal has a single action - * which is either a shift or a shift-reduce. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - LangEl *langEl = langElIndex[trans->value->lowKey]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - assert( trans->value->actions.length() == 1 ); - assert( trans->value->actions[0] == SHIFT_CODE || - (trans->value->actions[0] & 0x3) == SHIFT_REDUCE_CODE ); - } - } - } - - /* Assert that shift reduces always appear on their own. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) { - if ( (*act & 0x3) == SHIFT_REDUCE_CODE ) - assert( trans->value->actions.length() == 1 ); - } - } - } - - /* Verify that any type we parse_stop can actually be parsed that way. */ - for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { - LangEl *lel = *pe; - if ( lel->parseStop ) - verifyParseStopGrammar(lel , pdaGraph); - } -} - -void Compiler::wrapNonTerminals() -{ - /* Make a language element that will be used to make the root productions. - * These are used for making parsers rooted at any production (including - * the start symbol). */ - rootLangEl = declareLangEl( this, rootNamespace, "_root", LangEl::NonTerm ); - - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - /* Make a single production used when the lel is a root. */ - ProdElList *prodElList = makeProdElList( lel ); - lel->rootDef = Production::cons( InputLoc(), rootLangEl, - prodElList, String(), false, 0, - prodList.length(), rootLangEl->defList.length() ); - prodList.append( lel->rootDef ); - rootLangEl->defList.append( lel->rootDef ); - - /* First resolve. */ - for ( ProdElList::Iter prodEl = *prodElList; prodEl.lte(); prodEl++ ) - resolveProdEl( prodEl ); - } -} - -bool Compiler::makeNonTermFirstSetProd( Production *prod, PdaState *state ) -{ - bool modified = false; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( trans->key >= firstNonTermId ) { - long *inserted = prod->nonTermFirstSet.insert( trans->key ); - if ( inserted != 0 ) - modified = true; - - bool hasEpsilon = false; - LangEl *lel = langElIndex[trans->key]; - for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) { - for ( ProdIdSet::Iter pid = ldef->nonTermFirstSet; - pid.lte(); pid++ ) - { - if ( *pid == -1 ) - hasEpsilon = true; - else { - long *inserted = prod->nonTermFirstSet.insert( *pid ); - if ( inserted != 0 ) - modified = true; - } - } - } - - if ( hasEpsilon ) { - if ( trans->value->toState->isFinState() ) { - long *inserted = prod->nonTermFirstSet.insert( -1 ); - if ( inserted != 0 ) - modified = true; - } - - bool lmod = makeNonTermFirstSetProd( prod, trans->value->toState ); - if ( lmod ) - modified = true; - } - } - } - return modified; -} - - -void Compiler::makeNonTermFirstSets() -{ - bool modified = true; - while ( modified ) { - modified = false; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->fsm->startState->isFinState() ) { - long *inserted = prod->nonTermFirstSet.insert( -1 ); - if ( inserted != 0 ) - modified = true; - } - - bool lmod = makeNonTermFirstSetProd( prod, prod->fsm->startState ); - if ( lmod ) - modified = true; - } - } - - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->nonTermFirstSet.find( prod->prodName->id ) ) - prod->isLeftRec = true; - } -} - -void Compiler::printNonTermFirstSets() -{ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - cerr << prod->data << ": "; - for ( ProdIdSet::Iter pid = prod->nonTermFirstSet; pid.lte(); pid++ ) - { - if ( *pid < 0 ) - cerr << " <EPSILON>"; - else { - LangEl *lel = langElIndex[*pid]; - cerr << " " << lel->name; - } - } - cerr << endl; - - if ( prod->isLeftRec ) - cerr << "PROD IS LEFT REC: " << prod->data << endl; - } -} - -bool Compiler::makeFirstSetProd( Production *prod, PdaState *state ) -{ - bool modified = false; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( trans->key < firstNonTermId ) { - long *inserted = prod->firstSet.insert( trans->key ); - if ( inserted != 0 ) - modified = true; - } - else { - long *inserted = prod->firstSet.insert( trans->key ); - if ( inserted != 0 ) - modified = true; - - LangEl *klangEl = langElIndex[trans->key]; - if ( klangEl != 0 && klangEl->termDup != 0 ) { - long *inserted2 = prod->firstSet.insert( klangEl->termDup->id ); - if ( inserted2 != 0 ) - modified = true; - } - - bool hasEpsilon = false; - LangEl *lel = langElIndex[trans->key]; - for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) { - for ( ProdIdSet::Iter pid = ldef->firstSet; - pid.lte(); pid++ ) - { - if ( *pid == -1 ) - hasEpsilon = true; - else { - long *inserted = prod->firstSet.insert( *pid ); - if ( inserted != 0 ) - modified = true; - } - } - } - - if ( hasEpsilon ) { - if ( trans->value->toState->isFinState() ) { - long *inserted = prod->firstSet.insert( -1 ); - if ( inserted != 0 ) - modified = true; - } - - bool lmod = makeFirstSetProd( prod, trans->value->toState ); - if ( lmod ) - modified = true; - } - } - } - return modified; -} - - -void Compiler::makeFirstSets() -{ - bool modified = true; - while ( modified ) { - modified = false; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->fsm->startState->isFinState() ) { - long *inserted = prod->firstSet.insert( -1 ); - if ( inserted != 0 ) - modified = true; - } - - bool lmod = makeFirstSetProd( prod, prod->fsm->startState ); - if ( lmod ) - modified = true; - } - } -} - -void Compiler::printFirstSets() -{ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - cerr << prod->data << ": "; - for ( ProdIdSet::Iter pid = prod->firstSet; pid.lte(); pid++ ) - { - if ( *pid < 0 ) - cerr << " <EPSILON>"; - else { - LangEl *lel = langElIndex[*pid]; - if ( lel != 0 ) - cerr << endl << " " << lel->name; - else - cerr << endl << " " << *pid; - } - } - cerr << endl; - } -} - -void Compiler::insertUniqueEmptyProductions() -{ - int limit = prodList.length(); - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->prodId == limit ) - break; - - /* Get a language element. */ - char name[20]; - sprintf(name, "U%li", prodList.length()); - LangEl *prodName = addLangEl( this, rootNamespace, name, LangEl::NonTerm ); - Production *newDef = Production::cons( InputLoc(), prodName, - 0, String(), false, 0, prodList.length(), prodName->defList.length() ); - prodName->defList.append( newDef ); - prodList.append( newDef ); - - prod->uniqueEmptyLeader = prodName; - } -} - -struct local_info *Compiler::makeLocalInfo( Locals &locals ) -{ - struct local_info *localInfo = new local_info[locals.locals.length()]; - memset( localInfo, 0, sizeof(struct local_info) * locals.locals.length() ); - - for ( Vector<LocalLoc>::Iter l = locals.locals; l.lte(); l++ ) { - localInfo[l.pos()].type = (int) l->type; - localInfo[l.pos()].offset = l->offset; - } - return localInfo; -} - -short *Compiler::makeTrees( ObjectDef *objectDef, int &numTrees ) -{ - numTrees = 0; - for ( FieldList::Iter of = objectDef->fieldList; of.lte(); of++ ) { - if ( of->value->exists() ) { - UniqueType *ut = of->value->typeRef->resolveType( this ); - if ( ut->typeId == TYPE_TREE ) - numTrees += 1; - } - } - - short *trees = new short[numTrees]; - memset( trees, 0, sizeof(short) * numTrees ); - - short pos = 0; - for ( FieldList::Iter of = objectDef->fieldList; of.lte(); of++ ) { - if ( of->value->exists() ) { - UniqueType *ut = of->value->typeRef->resolveType( this ); - if ( ut->typeId == TYPE_TREE ) { - trees[pos] = of->value->offset; - pos += 1; - } - } - } - - return trees; -} - - -void Compiler::makeRuntimeData() -{ - long count = 0; - - /* - * ProdLengths - * ProdLhsIs - * ProdNames - * ProdCodeBlocks - * ProdCodeBlockLens - */ - - runtimeData->frame_info = new frame_info[nextFrameId]; - runtimeData->num_frames = nextFrameId; - memset( runtimeData->frame_info, 0, sizeof(struct frame_info) * nextFrameId ); - - /* - * Init code block. - */ - if ( rootCodeBlock == 0 ) { - runtimeData->root_code = 0; - runtimeData->root_code_len = 0; - runtimeData->root_frame_id = 0; - } - else { - runtimeData->root_code = rootCodeBlock->codeWC.data; - runtimeData->root_code_len = rootCodeBlock->codeWC.length(); - runtimeData->root_frame_id = rootCodeBlock->frameId; - } - - runtimeData->frame_info[rootCodeBlock->frameId].codeWV = 0; - runtimeData->frame_info[rootCodeBlock->frameId].codeLenWV = 0; - - runtimeData->frame_info[rootCodeBlock->frameId].locals = makeLocalInfo( rootCodeBlock->locals ); - runtimeData->frame_info[rootCodeBlock->frameId].locals_len = rootCodeBlock->locals.locals.length(); - - runtimeData->frame_info[rootCodeBlock->frameId].frame_size = rootLocalFrame->size(); - runtimeData->frame_info[rootCodeBlock->frameId].arg_size = 0; - runtimeData->frame_info[rootCodeBlock->frameId].ret_tree = false; - - /* - * prodInfo - */ - count = prodList.length(); - runtimeData->prod_info = new prod_info[count]; - runtimeData->num_prods = count; - - count = 0; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - runtimeData->prod_info[count].lhs_id = prod->prodName->id; - runtimeData->prod_info[count].prod_num = prod->prodNum; - runtimeData->prod_info[count].length = prod->fsmLength; - runtimeData->prod_info[count].name = prod->data; - runtimeData->prod_info[count].frame_id = -1; - - CodeBlock *block = prod->redBlock; - if ( block != 0 ) { - runtimeData->prod_info[count].frame_id = block->frameId; - runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data; - runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length(); - - runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals ); - runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length(); - - runtimeData->frame_info[block->frameId].frame_size = block->localFrame->size(); - runtimeData->frame_info[block->frameId].arg_size = 0; - runtimeData->frame_info[block->frameId].ret_tree = false; - } - - runtimeData->prod_info[count].lhs_upref = true; - runtimeData->prod_info[count].copy = prod->copy.data; - runtimeData->prod_info[count].copy_len = prod->copy.length() / 2; - count += 1; - } - - /* - * regionInfo - */ - runtimeData->num_regions = regionList.length()+1; - runtimeData->region_info = new region_info[runtimeData->num_regions]; - memset( runtimeData->region_info, 0, - sizeof(struct region_info) * runtimeData->num_regions ); - - runtimeData->region_info[0].default_token = -1; - runtimeData->region_info[0].eof_frame_id = -1; - runtimeData->region_info[0].ci_lel_id = 0; - - for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { - long regId = reg->id+1; - runtimeData->region_info[regId].default_token = - reg->impl->defaultTokenInstance == 0 ? - -1 : - reg->impl->defaultTokenInstance->tokenDef->tdLangEl->id; - runtimeData->region_info[regId].eof_frame_id = -1; - runtimeData->region_info[regId].ci_lel_id = reg->zeroLel != 0 ? reg->zeroLel->id : 0; - - CodeBlock *block = reg->preEofBlock; - if ( block != 0 ) { - runtimeData->region_info[regId].eof_frame_id = block->frameId; - runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data; - runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length(); - - runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals ); - runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length(); - - runtimeData->frame_info[block->frameId].frame_size = block->localFrame->size(); - runtimeData->frame_info[block->frameId].arg_size = 0; - runtimeData->frame_info[block->frameId].ret_tree = false; - } - } - - /* - * lelInfo - */ - - count = nextLelId; - runtimeData->lel_info = new lang_el_info[count]; - runtimeData->num_lang_els = count; - memset( runtimeData->lel_info, 0, sizeof(struct lang_el_info)*count ); - - for ( int i = 0; i < nextLelId; i++ ) { - LangEl *lel = langElIndex[i]; - if ( lel != 0 ) { - runtimeData->lel_info[i].name = lel->fullLit; - runtimeData->lel_info[i].xml_tag = lel->xmlTag; - runtimeData->lel_info[i].repeat = lel->isRepeat; - runtimeData->lel_info[i].list = lel->isList; - runtimeData->lel_info[i].literal = lel->isLiteral; - runtimeData->lel_info[i].ignore = lel->isIgnore; - runtimeData->lel_info[i].frame_id = -1; - - CodeBlock *block = lel->transBlock; - if ( block != 0 ) { - runtimeData->lel_info[i].frame_id = block->frameId; - runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data; - runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length(); - - runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals ); - runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length(); - - runtimeData->frame_info[block->frameId].frame_size = block->localFrame->size(); - runtimeData->frame_info[block->frameId].arg_size = 0; - runtimeData->frame_info[block->frameId].ret_tree = false; - } - - runtimeData->lel_info[i].object_type_id = - lel->objectDef == 0 ? 0 : lel->objectDef->id; - runtimeData->lel_info[i].ofi_offset = lel->ofiOffset; - runtimeData->lel_info[i].object_length = - lel->objectDef != 0 ? lel->objectDef->size() : 0; - -// runtimeData->lelInfo[i].contextTypeId = 0; -// lel->context == 0 ? 0 : lel->context->contextObjDef->id; -// runtimeData->lelInfo[i].contextLength = 0; //lel->context == 0 ? 0 : -// lel->context->contextObjDef->size(); -// if ( lel->context != 0 ) { -// cout << "type: " << runtimeData->lelInfo[i].contextTypeId << " length: " << -// runtimeData->lelInfo[i].contextLength << endl; -// } - - runtimeData->lel_info[i].term_dup_id = lel->termDup == 0 ? 0 : lel->termDup->id; - - if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 && - lel->tokenDef->join->context != 0 ) - runtimeData->lel_info[i].mark_id = lel->tokenDef->join->mark->markId; - else - runtimeData->lel_info[i].mark_id = -1; - - runtimeData->lel_info[i].num_capture_attr = 0; - } - else { - memset(&runtimeData->lel_info[i], 0, sizeof(struct lang_el_info) ); - runtimeData->lel_info[i].name = "__UNUSED"; - runtimeData->lel_info[i].xml_tag = "__UNUSED"; - runtimeData->lel_info[i].frame_id = -1; - } - } - - /* - * struct_el_info - */ - - count = structEls.length(); - runtimeData->sel_info = new struct_el_info[count]; - runtimeData->num_struct_els = count; - memset( runtimeData->sel_info, 0, sizeof(struct struct_el_info)*count ); - StructElList::Iter sel = structEls; - for ( int i = 0; i < count; i++, sel++ ) { - int treesLen; - runtimeData->sel_info[i].size = sel->structDef->objectDef->size(); - runtimeData->sel_info[i].trees = makeTrees( sel->structDef->objectDef, treesLen ); - runtimeData->sel_info[i].trees_len = treesLen; - } - - /* - * function_info - */ - count = functionList.length(); - - runtimeData->function_info = new function_info[count]; - runtimeData->num_functions = count; - memset( runtimeData->function_info, 0, sizeof(struct function_info)*count ); - for ( FunctionList::Iter func = functionList; func.lte(); func++ ) { - - runtimeData->function_info[func->funcId].frame_id = -1; - - CodeBlock *block = func->codeBlock; - if ( block != 0 ) { - runtimeData->function_info[func->funcId].frame_id = block->frameId; - - /* Name. */ - runtimeData->frame_info[block->frameId].name = func->name; - - /* Code. */ - runtimeData->frame_info[block->frameId].codeWV = block->codeWV.data; - runtimeData->frame_info[block->frameId].codeLenWV = block->codeWV.length(); - runtimeData->frame_info[block->frameId].codeWC = block->codeWC.data; - runtimeData->frame_info[block->frameId].codeLenWC = block->codeWC.length(); - - /* Locals. */ - runtimeData->frame_info[block->frameId].locals = makeLocalInfo( block->locals ); - runtimeData->frame_info[block->frameId].locals_len = block->locals.locals.length(); - - /* Meta. */ - runtimeData->frame_info[block->frameId].frame_size = func->localFrame->size(); - runtimeData->frame_info[block->frameId].arg_size = func->paramListSize; - - bool retTree = false; - if ( func->typeRef ) { - UniqueType *ut = func->typeRef->resolveType( this ); - retTree = ut->tree(); - } - runtimeData->frame_info[block->frameId].ret_tree = retTree; - } - - runtimeData->function_info[func->funcId].frame_size = func->localFrame->size(); - runtimeData->function_info[func->funcId].arg_size = func->paramListSize; - } - - /* - * pat_cons_info - */ - - /* Filled in later after patterns are parsed. */ - runtimeData->pat_repl_info = new pat_cons_info[nextPatConsId]; - memset( runtimeData->pat_repl_info, 0, sizeof(struct pat_cons_info) * nextPatConsId ); - runtimeData->num_patterns = nextPatConsId; - runtimeData->pat_repl_nodes = 0; - runtimeData->num_pattern_nodes = 0; - - - /* - * generic_info - */ - count = 1; - for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) - count += nspace->genericList.length(); - assert( count == nextGenericId ); - - runtimeData->generic_info = new generic_info[count]; - runtimeData->num_generics = count; - memset( &runtimeData->generic_info[0], 0, sizeof(struct generic_info) ); - for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) { - for ( GenericList::Iter gen = nspace->genericList; gen.lte(); gen++ ) { - runtimeData->generic_info[gen->id].type = gen->typeId; - - runtimeData->generic_info[gen->id].el_struct_id = - ( gen->typeId == GEN_MAP || gen->typeId == GEN_LIST ) ? - gen->elUt->structEl->id : -1; - runtimeData->generic_info[gen->id].el_offset = - gen->el != 0 ? gen->el->offset : -1; - - runtimeData->generic_info[gen->id].key_type = - gen->keyUt != 0 ? gen->keyUt->typeId : TYPE_NOTYPE; - runtimeData->generic_info[gen->id].key_offset = 0; - - runtimeData->generic_info[gen->id].value_type = - gen->valueUt != 0 ? gen->valueUt->typeId : TYPE_NOTYPE; - runtimeData->generic_info[gen->id].value_offset = 0; - - runtimeData->generic_info[gen->id].parser_id = - gen->typeId == GEN_PARSER ? gen->elUt->langEl->parserId : -1; - } - } - - runtimeData->argv_generic_id = argvTypeRef->generic->id; - runtimeData->stds_generic_id = stdsTypeRef->generic->id; - - /* - * Literals - */ - runtimeData->num_literals = literalStrings.length(); - runtimeData->litdata = new const char *[literalStrings.length()]; - runtimeData->litlen = new long [literalStrings.length()]; - runtimeData->literals = 0; - for ( StringMap::Iter el = literalStrings; el.lte(); el++ ) { - /* Data. */ - char *data = new char[el->key.length()+1]; - memcpy( data, el->key.data, el->key.length() ); - data[el->key.length()] = 0; - runtimeData->litdata[el->value] = data; - - /* Length. */ - runtimeData->litlen[el->value] = el->key.length(); - } - - /* Captured attributes. Loop over tokens and count first. */ - long numCapturedAttr = 0; -// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { -// for ( TokenInstanceListReg::Iter td = reg->tokenInstanceList; td.lte(); td++ ) -// numCapturedAttr += td->reCaptureVect.length(); -// } - runtimeData->capture_attr = new CaptureAttr[numCapturedAttr]; - runtimeData->num_captured_attr = numCapturedAttr; - memset( runtimeData->capture_attr, 0, sizeof( CaptureAttr ) * numCapturedAttr ); - - count = 0; -// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { -// for ( TokenInstanceListReg::Iter td = reg->tokenInstanceList; td.lte(); td++ ) { -// runtimeData->lelInfo[td->token->id].captureAttr = count; -// runtimeData->lelInfo[td->token->id].numCaptureAttr = td->reCaptureVect.length(); -// for ( ReCaptureVect::Iter c = td->reCaptureVect; c.lte(); c++ ) { -// runtimeData->captureAttr[count].mark_enter = c->markEnter->markId; -// runtimeData->captureAttr[count].mark_leave = c->markLeave->markId; -// runtimeData->captureAttr[count].offset = c->objField->offset; -// -// count += 1; -// } -// } -// } - - runtimeData->fsm_tables = fsmTables; - runtimeData->pda_tables = pdaTables; - - /* FIXME: need a parser descriptor. */ - runtimeData->start_states = new int[nextParserId]; - runtimeData->eof_lel_ids = new int[nextParserId]; - runtimeData->parser_lel_ids = new int[nextParserId]; - runtimeData->num_parsers = nextParserId; - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->parserId >= 0 ) { - runtimeData->start_states[lel->parserId] = lel->startState->stateNum; - runtimeData->eof_lel_ids[lel->parserId] = lel->eofLel->id; - runtimeData->parser_lel_ids[lel->parserId] = lel->id; - } - } - - runtimeData->global_size = globalObjectDef->size(); - - /* - * Boundary between terms and non-terms. - */ - runtimeData->first_non_term_id = firstNonTermId; - - /* - * Boundary between trees and structs - */ - runtimeData->first_struct_el_id = firstStructElId; - - /* Special trees. */ - runtimeData->integer_id = -1; //intLangEl->id; - runtimeData->string_id = strLangEl->id; - runtimeData->any_id = anyLangEl->id; - runtimeData->eof_id = 0; //eofLangEl->id; - runtimeData->no_token_id = noTokenLangEl->id; - runtimeData->global_id = globalSel->id; - runtimeData->argv_el_id = argvElSel->id; - runtimeData->stds_el_id = stdsElSel->id; - runtimeData->struct_inbuilt_id = structInbuiltId; - runtimeData->struct_stream_id = structStreamId; - runtimeData->struct_input_id = structInputId; - - runtimeData->fsm_execute = &internalFsmExecute; - runtimeData->send_named_lang_el = &internalSendNamedLangEl; - runtimeData->init_bindings = &internalInitBindings; - runtimeData->pop_binding = &internalPopBinding; - - runtimeData->host_call = &internal_host_call; - runtimeData->commit_reduce_forward = &internal_commit_reduce_forward; - runtimeData->commit_union_sz = &internal_commit_union_sz; - runtimeData->init_need = &internal_init_need; - runtimeData->reducer_need_tok = &internal_reducer_need_tok; - runtimeData->reducer_need_ign = &internal_reducer_need_ign; -} - -/* Borrow alg->state for mapsTo. */ -void countNodes( program_t *prg, int &count, parse_tree_t *parseTree, kid_t *kid ) -{ - if ( kid != 0 ) { - count += 1; - - /* Should't have to recurse here. */ - tree_t *ignoreList = tree_left_ignore( prg, kid->tree ); - if ( ignoreList != 0 ) { - kid_t *ignore = ignoreList->child; - while ( ignore != 0 ) { - count += 1; - ignore = ignore->next; - } - } - - ignoreList = tree_right_ignore( prg, kid->tree ); - if ( ignoreList != 0 ) { - kid_t *ignore = ignoreList->child; - while ( ignore != 0 ) { - count += 1; - ignore = ignore->next; - } - } - - //count += prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; - - if ( !( parseTree->flags & PF_NAMED ) && - !( parseTree->flags & PF_ARTIFICIAL ) && - tree_child( prg, kid->tree ) != 0 ) - { - countNodes( prg, count, parseTree->child, tree_child( prg, kid->tree ) ); - } - countNodes( prg, count, parseTree->next, kid->next ); - } -} - -void fillNodes( program_t *prg, int &nextAvail, struct bindings *bindings, long &bindId, - struct pat_cons_node *nodes, parse_tree_t *parseTree, kid_t *kid, int ind ) -{ - if ( kid != 0 ) { - struct pat_cons_node &node = nodes[ind]; - - kid_t *child = - !( parseTree->flags & PF_NAMED ) && - !( parseTree->flags & PF_ARTIFICIAL ) && - tree_child( prg, kid->tree ) != 0 - ? - tree_child( prg, kid->tree ) : 0; - - parse_tree_t *ptChild = - !( parseTree->flags & PF_NAMED ) && - !( parseTree->flags & PF_ARTIFICIAL ) && - tree_child( prg, kid->tree ) != 0 - ? - parseTree->child : 0; - - /* Set up the fields. */ - node.id = kid->tree->id; - node.prod_num = kid->tree->prod_num; - node.length = string_length( kid->tree->tokdata ); - node.data = string_data( kid->tree->tokdata ); - - /* Ignore items. */ - tree_t *ignoreList = tree_left_ignore( prg, kid->tree ); - kid_t *ignore = ignoreList == 0 ? 0 : ignoreList->child; - node.left_ignore = ignore == 0 ? -1 : nextAvail; - - while ( ignore != 0 ) { - struct pat_cons_node &node = nodes[nextAvail++]; - - memset( &node, 0, sizeof(struct pat_cons_node) ); - node.id = ignore->tree->id; - node.prod_num = ignore->tree->prod_num; - node.next = ignore->next == 0 ? -1 : nextAvail; - - node.length = string_length( ignore->tree->tokdata ); - node.data = string_data( ignore->tree->tokdata ); - - ignore = ignore->next; - } - - /* Ignore items. */ - ignoreList = tree_right_ignore( prg, kid->tree ); - ignore = ignoreList == 0 ? 0 : ignoreList->child; - node.right_ignore = ignore == 0 ? -1 : nextAvail; - - while ( ignore != 0 ) { - struct pat_cons_node &node = nodes[nextAvail++]; - - memset( &node, 0, sizeof(struct pat_cons_node) ); - node.id = ignore->tree->id; - node.prod_num = ignore->tree->prod_num; - node.next = ignore->next == 0 ? -1 : nextAvail; - - node.length = string_length( ignore->tree->tokdata ); - node.data = string_data( ignore->tree->tokdata ); - - ignore = ignore->next; - } - - ///* The captured attributes. */ - //for ( int i = 0; i < prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; i++ ) { - // CaptureAttr *cap = prg->rtd->captureAttr + - // prg->rtd->lelInfo[kid->tree->id].captureAttr + i; - // - // tree_t *attr = colm_get_attr( kid->tree, cap->offset ); - // - // struct pat_cons_node &node = nodes[nextAvail++]; - // memset( &node, 0, sizeof(struct pat_cons_node) ); - // - // node.id = attr->id; - // node.prodNum = attr->prodNum; - // node.length = stringLength( attr->tokdata ); - // node.data = stringData( attr->tokdata ); - //} - - node.stop = parseTree->flags & PF_TERM_DUP; - - node.child = child == 0 ? -1 : nextAvail++; - - /* Recurse. */ - fillNodes( prg, nextAvail, bindings, bindId, nodes, ptChild, child, node.child ); - - /* Since the parser is bottom up the bindings are in a bottom up - * traversal order. Check after recursing. */ - node.bind_id = 0; - if ( bindId < bindings->length() && bindings->data[bindId] == parseTree ) { - /* Remember that binding ids are indexed from one. */ - node.bind_id = bindId++; - - //cout << "binding match in " << __PRETTY_FUNCTION__ << endl; - //cout << "bindId: " << node.bindId << endl; - } - - node.next = kid->next == 0 ? -1 : nextAvail++; - - /* Move to the next child. */ - fillNodes( prg, nextAvail, bindings, bindId, nodes, parseTree->next, kid->next, node.next ); - } -} - -void Compiler::fillInPatterns( program_t *prg ) -{ - /* - * patReplNodes - */ - - /* Count is referenced and computed by mapNode. */ - int count = 0; - for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { - countNodes( prg, count, - pat->pdaRun->stack_top->next, - pat->pdaRun->stack_top->next->shadow ); - } - - for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) { - countNodes( prg, count, - repl->pdaRun->stack_top->next, - repl->pdaRun->stack_top->next->shadow ); - } - - runtimeData->pat_repl_nodes = new pat_cons_node[count]; - runtimeData->num_pattern_nodes = count; - - int nextAvail = 0; - - for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { - int ind = nextAvail++; - runtimeData->pat_repl_info[pat->patRepId].offset = ind; - - /* BindIds are indexed base one. */ - runtimeData->pat_repl_info[pat->patRepId].num_bindings = - pat->pdaRun->bindings->length() - 1; - - /* Init the bind */ - long bindId = 1; - fillNodes( prg, nextAvail, pat->pdaRun->bindings, bindId, - runtimeData->pat_repl_nodes, - pat->pdaRun->stack_top->next, - pat->pdaRun->stack_top->next->shadow, - ind ); - } - - for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) { - int ind = nextAvail++; - runtimeData->pat_repl_info[repl->patRepId].offset = ind; - - /* BindIds are indexed base one. */ - runtimeData->pat_repl_info[repl->patRepId].num_bindings = - repl->pdaRun->bindings->length() - 1; - - long bindId = 1; - fillNodes( prg, nextAvail, repl->pdaRun->bindings, bindId, - runtimeData->pat_repl_nodes, - repl->pdaRun->stack_top->next, - repl->pdaRun->stack_top->next->shadow, - ind ); - } - - assert( nextAvail == count ); -} - - -int Compiler::findIndexOff( struct pda_tables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &curLen ) -{ - for ( int start = 0; start < curLen; ) { - int offset = start; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( pdaTables->owners[offset] != -1 ) - goto next_start; - - offset++; - if ( ! trans.last() ) { - TransMap::Iter next = trans.next(); - offset += next->key - trans->key - 1; - } - } - - /* Got though the whole list without a conflict. */ - return start; - -next_start: - start++; - } - - return curLen; -} - -struct CmpSpan -{ - static int compare( PdaState *state1, PdaState *state2 ) - { - int dist1 = 0, dist2 = 0; - - if ( state1->transMap.length() > 0 ) { - TransMap::Iter first1 = state1->transMap.first(); - TransMap::Iter last1 = state1->transMap.last(); - dist1 = last1->key - first1->key; - } - - if ( state2->transMap.length() > 0 ) { - TransMap::Iter first2 = state2->transMap.first(); - TransMap::Iter last2 = state2->transMap.last(); - dist2 = last2->key - first2->key; - } - - if ( dist1 < dist2 ) - return 1; - else if ( dist2 < dist1 ) - return -1; - return 0; - } -}; - -PdaGraph *Compiler::makePdaGraph( LangElSet &parserEls ) -{ - //for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) - // cerr << prod->prodId << " " << prod->data << endl; - - PdaGraph *pdaGraph = new PdaGraph(); - lalr1GenerateParser( pdaGraph, parserEls ); - pdaGraph->setStateNumbers(); - analyzeMachine( pdaGraph, parserEls ); - - //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl; - - return pdaGraph; -} - -struct pda_tables *Compiler::makePdaTables( PdaGraph *pdaGraph ) -{ - int count, pos; - struct pda_tables *pdaTables = new pda_tables; - - /* - * Counting max indices. - */ - count = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - count++; - if ( ! trans.last() ) { - TransMap::Iter next = trans.next(); - count += next->key - trans->key - 1; - } - } - } - - - /* Allocate indicies and owners. */ - pdaTables->num_indicies = count; - pdaTables->indicies = new int[count]; - pdaTables->owners = new int[count]; - for ( long i = 0; i < count; i++ ) { - pdaTables->indicies[i] = -1; - pdaTables->owners[i] = -1; - } - - /* Allocate offsets. */ - int numStates = pdaGraph->stateList.length(); - pdaTables->offsets = new unsigned int[numStates]; - pdaTables->num_states = numStates; - - /* Place transitions into indicies/owners */ - PdaState **states = new PdaState*[numStates]; - long ds = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) - states[ds++] = state; - - /* Sorting baseded on span length. Gives an improvement, but incures a - * cost. Off for now. */ - //MergeSort< PdaState*, CmpSpan > mergeSort; - //mergeSort.sort( states, numStates ); - - int indLen = 0; - for ( int s = 0; s < numStates; s++ ) { - PdaState *state = states[s]; - - int indOff = findIndexOff( pdaTables, pdaGraph, state, indLen ); - pdaTables->offsets[state->stateNum] = indOff; - - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - pdaTables->indicies[indOff] = trans->value->actionSetEl->key.id; - pdaTables->owners[indOff] = state->stateNum; - indOff++; - - if ( ! trans.last() ) { - TransMap::Iter next = trans.next(); - indOff += next->key - trans->key - 1; - } - } - - if ( indOff > indLen ) - indLen = indOff; - } - - /* We allocated the max, but cmpression gives us less. */ - pdaTables->num_indicies = indLen; - delete[] states; - - - /* - * Keys - */ - count = pdaGraph->stateList.length() * 2;; - pdaTables->keys = new int[count]; - pdaTables->num_keys = count; - - count = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - if ( state->transMap.length() == 0 ) { - pdaTables->keys[count+0] = 0; - pdaTables->keys[count+1] = 0; - } - else { - TransMap::Iter first = state->transMap.first(); - TransMap::Iter last = state->transMap.last(); - pdaTables->keys[count+0] = first->key; - pdaTables->keys[count+1] = last->key; - } - count += 2; - } - - /* - * Targs - */ - count = pdaGraph->actionSet.length(); - pdaTables->targs = new unsigned int[count]; - pdaTables->num_targs = count; - - count = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) - pdaTables->targs[count++] = asi->key.targ; - - /* - * ActInds - */ - count = pdaGraph->actionSet.length(); - pdaTables->act_inds = new unsigned int[count]; - pdaTables->num_act_inds = count; - - count = pos = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) { - pdaTables->act_inds[count++] = pos; - pos += asi->key.actions.length() + 1; - } - - /* - * Actions - */ - count = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) - count += asi->key.actions.length() + 1; - - pdaTables->actions = new unsigned int[count]; - pdaTables->num_actions = count; - - count = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) { - for ( ActDataList::Iter ali = asi->key.actions; ali.lte(); ali++ ) - pdaTables->actions[count++] = *ali; - - pdaTables->actions[count++] = 0; - } - - /* - * CommitLen - */ - count = pdaGraph->actionSet.length(); - pdaTables->commit_len = new int[count]; - pdaTables->num_commit_len = count; - - count = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) - pdaTables->commit_len[count++] = asi->key.commitLen; - - /* - * tokenRegionInds. Start at one so region index 0 is null (unset). - */ - count = 0; - pos = 1; - pdaTables->token_region_inds = new int[pdaTables->num_states]; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - pdaTables->token_region_inds[count++] = pos; - pos += state->regions.length() + 1; - } - - - /* - * tokenRegions. Build in a null at the beginning. - */ - - count = 1; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) - count += state->regions.length() + 1; - - pdaTables->num_region_items = count; - pdaTables->token_regions = new int[pdaTables->num_region_items]; - - count = 0; - pdaTables->token_regions[count++] = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) { - int id = ( *reg == EOF_REGION ) ? 0 : (*reg)->id + 1; - pdaTables->token_regions[count++] = id; - } - - pdaTables->token_regions[count++] = 0; - } - - /* - * tokenPreRegions. Build in a null at the beginning. - */ - - count = 1; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) - count += state->regions.length() + 1; - - pdaTables->num_pre_region_items = count; - pdaTables->token_pre_regions = new int[pdaTables->num_pre_region_items]; - - count = 0; - pdaTables->token_pre_regions[count++] = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) { - assert( state->preRegions.length() <= 1 ); - if ( state->preRegions.length() == 0 || state->preRegions[0]->impl->wasEmpty ) - pdaTables->token_pre_regions[count++] = -1; - else - pdaTables->token_pre_regions[count++] = state->preRegions[0]->id + 1; - } - - pdaTables->token_pre_regions[count++] = 0; - } - - - return pdaTables; -} - -void Compiler::makeParser( LangElSet &parserEls ) -{ - pdaGraph = makePdaGraph( parserEls ); - pdaTables = makePdaTables( pdaGraph ); -} - diff --git a/src/pdacodegen.cc b/src/pdacodegen.cc deleted file mode 100644 index 15ae658c..00000000 --- a/src/pdacodegen.cc +++ /dev/null @@ -1,698 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <string.h> - -#include <iostream> - -#include "compiler.h" -#include "pdacodegen.h" - -using std::cerr; -using std::endl; - -#define FRESH_BLOCK 8128 -#define act_sb "0x1" -#define act_rb "0x2" -#define lower "0x0000ffff" -#define upper "0xffff0000" - -void escapeLiteralString( std::ostream &out, const char *path, int length ) -{ - for ( const char *pc = path, *end = path+length; pc != end; pc++ ) { - switch ( *pc ) { - case '\\': out << "\\\\"; break; - case '"': out << "\\\""; break; - case '\a': out << "\\a"; break; - case '\b': out << "\\b"; break; - case '\t': out << "\\t"; break; - case '\n': out << "\\n"; break; - case '\v': out << "\\v"; break; - case '\f': out << "\\f"; break; - case '\r': out << "\\r"; break; - default: out << *pc; break; - } - } -} - -void escapeLiteralString( std::ostream &out, const char *path ) -{ - escapeLiteralString( out, path, strlen(path) ); -} - -void PdaCodeGen::defineRuntime() -{ - out << - "extern struct colm_sections " << objectName << ";\n" - "\n"; -} - -void PdaCodeGen::writeRuntimeData( colm_sections *runtimeData, struct pda_tables *pdaTables ) -{ - /* - * Blocks of code in frames. - */ - for ( int i = 0; i < runtimeData->num_frames; i++ ) { - /* FIXME: horrible code cloning going on here. */ - if ( runtimeData->frame_info[i].codeLenWV > 0 ) { - out << "static code_t code_" << i << "_wv[] = {\n\t"; - - code_t *block = runtimeData->frame_info[i].codeWV; - for ( int j = 0; j < runtimeData->frame_info[i].codeLenWV; j++ ) { - out << (unsigned long) block[j]; - - if ( j < runtimeData->frame_info[i].codeLenWV-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - } - - if ( runtimeData->frame_info[i].codeLenWC > 0 ) { - out << "static code_t code_" << i << "_wc[] = {\n\t"; - - code_t *block = runtimeData->frame_info[i].codeWC; - for ( int j = 0; j < runtimeData->frame_info[i].codeLenWC; j++ ) { - out << (unsigned long) block[j]; - - if ( j < runtimeData->frame_info[i].codeLenWC-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - } - - if ( runtimeData->frame_info[i].locals_len > 0 ) { - out << "static struct local_info locals_" << i << "[] = {\n\t"; - - struct local_info *li = runtimeData->frame_info[i].locals; - for ( int j = 0; j < runtimeData->frame_info[i].locals_len; j++ ) { - out << "{ " << (int)li[j].type << ", " << li[j].offset << " }"; - - if ( j < runtimeData->frame_info[i].locals_len-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - } - } - - /* - * Blocks in production info. - */ - for ( int i = 0; i < runtimeData->num_prods; i++ ) { - if ( runtimeData->prod_info[i].copy_len > 0 ) { - out << "static unsigned char copy_" << i << "[] = {\n\t"; - - unsigned char *block = runtimeData->prod_info[i].copy; - for ( int j = 0; j < runtimeData->prod_info[i].copy_len; j++ ) { - out << (long) block[j*2] << ", " << (long) block[j*2+1]; - - if ( j < runtimeData->prod_info[i].copy_len-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - } - } - - /* - * Init code. - */ - out << "static code_t " << rootCode() << "[] = {\n\t"; - code_t *block = runtimeData->root_code ; - for ( int j = 0; j < runtimeData->root_code_len; j++ ) { - out << (unsigned int) block[j]; - - if ( j < runtimeData->root_code_len-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - /* - * lelInfo - */ - out << "static struct lang_el_info " << lelInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_lang_els; i++ ) { - struct lang_el_info *el = &runtimeData->lel_info[i]; - out << "\t{"; - - /* Name. */ - out << " \""; - escapeLiteralString( out, el->name ); - out << "\", "; - - /* Name. */ - out << " \""; - escapeLiteralString( out, el->xml_tag ); - out << "\", "; - - /* Repeat, literal, ignore flags. */ - out << (int)el->repeat << ", "; - out << (int)el->list << ", "; - out << (int)el->literal << ", "; - out << (int)el->ignore << ", "; - out << el->frame_id << ", "; - out << el->object_type_id << ", "; - out << el->ofi_offset << ", "; - out << el->object_length << ", "; - out << el->term_dup_id << ", "; - out << el->mark_id << ", "; - out << el->capture_attr << ", "; - out << el->num_capture_attr; - - out << " }"; - - if ( i < runtimeData->num_lang_els-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - - for ( int i = 0; i < runtimeData->num_struct_els; i++ ) { - struct struct_el_info *el = &runtimeData->sel_info[i]; - if ( el->trees_len > 0 ) { - out << "static short struct_trees_" << i << "[] = {\n\t"; - - short *ti = el->trees; - for ( int j = 0; j < el->trees_len; j++ ) - out << ti[j] << ", "; - out << "\n};\n\n"; - } - } - - /* - * selInfo - */ - out << "static struct struct_el_info " << selInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_struct_els; i++ ) { - struct struct_el_info *el = &runtimeData->sel_info[i]; - out << "\t{ "; - out << el->size << ", "; - - /* trees. */ - if ( el->trees_len > 0 ) - out << "struct_trees_" << i << ", "; - else - out << "0, "; - out << el->trees_len << ", "; - - out << " },\n"; - } - out << "\n};\n\n"; - - /* - * frameInfo - */ - out << "static struct frame_info " << frameInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_frames; i++ ) { - out << "\t{ "; - - /* The Name. */ - if ( runtimeData->frame_info[i].name ) - out << "\"" << runtimeData->frame_info[i].name << "\", "; - else - out << "\"\", "; - - if ( runtimeData->frame_info[i].codeLenWV > 0 ) - out << "code_" << i << "_wv, "; - else - out << "0, "; - out << runtimeData->frame_info[i].codeLenWV << ", "; - - if ( runtimeData->frame_info[i].codeLenWC > 0 ) - out << "code_" << i << "_wc, "; - else - out << "0, "; - out << runtimeData->frame_info[i].codeLenWC << ", "; - - /* locals. */ - if ( runtimeData->frame_info[i].locals_len > 0 ) - out << "locals_" << i << ", "; - else - out << "0, "; - - out << runtimeData->frame_info[i].locals_len << ", "; - - out << - runtimeData->frame_info[i].arg_size << ", " << - runtimeData->frame_info[i].frame_size; - - out << " }"; - - if ( i < runtimeData->num_frames-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - - /* - * prodInfo - */ - out << "static struct prod_info " << prodInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_prods; i++ ) { - out << "\t{ "; - - out << runtimeData->prod_info[i].lhs_id << ", "; - out << runtimeData->prod_info[i].prod_num << ", "; - out << runtimeData->prod_info[i].length << ", "; - - out << - '"' << runtimeData->prod_info[i].name << "\", " << - runtimeData->prod_info[i].frame_id << ", " << - (int)runtimeData->prod_info[i].lhs_upref << ", "; - - if ( runtimeData->prod_info[i].copy_len > 0 ) - out << "copy_" << i << ", "; - else - out << "0, "; - - out << runtimeData->prod_info[i].copy_len << ", "; - - - out << " }"; - - if ( i < runtimeData->num_prods-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - /* - * patReplInfo - */ - out << "static struct pat_cons_info " << patReplInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_patterns; i++ ) { - out << " { " << runtimeData->pat_repl_info[i].offset << ", " << - runtimeData->pat_repl_info[i].num_bindings << " },\n"; - } - out << "};\n\n"; - - /* - * patReplNodes - */ - out << "static struct pat_cons_node " << patReplNodes() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_pattern_nodes; i++ ) { - struct pat_cons_node &node = runtimeData->pat_repl_nodes[i]; - out << " { " << node.id << ", " << - node.prod_num << ", " << node.next << ", " << - node.child << ", " << node.bind_id << ", "; - if ( node.data == 0 ) - out << "0"; - else { - out << '\"'; - escapeLiteralString( out, node.data, node.length ); - out << '\"'; - } - out << ", " << node.length << ", "; - - out << node.left_ignore << ", "; - out << node.right_ignore << ", "; - - out << (int)node.stop << " },\n"; - } - out << "};\n\n"; - - /* - * functionInfo - */ - out << "static struct function_info " << functionInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_functions; i++ ) { - out << "\t{ " << - runtimeData->function_info[i].frame_id << ", " << - runtimeData->function_info[i].arg_size << ", " << - runtimeData->function_info[i].frame_size; - out << " }"; - - if ( i < runtimeData->num_functions-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - /* - * regionInfo - */ - out << "static struct region_info " << regionInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_regions; i++ ) { - out << "\t{ " << runtimeData->region_info[i].default_token << - ", " << runtimeData->region_info[i].eof_frame_id << - ", " << runtimeData->region_info[i].ci_lel_id << - " }"; - - if ( i < runtimeData->num_regions-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - /* - * genericInfo - */ - out << "static struct generic_info " << genericInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_generics; i++ ) { - out << "\t{ " << - runtimeData->generic_info[i].type << ", " << - runtimeData->generic_info[i].el_struct_id << ", " << - runtimeData->generic_info[i].el_offset << ", " << - runtimeData->generic_info[i].key_type << ", " << - runtimeData->generic_info[i].key_offset << ", " << - runtimeData->generic_info[i].value_type << ", " << - runtimeData->generic_info[i].value_offset << ", " << - runtimeData->generic_info[i].parser_id; - out << " },\n"; - } - out << "};\n\n"; - - /* - * literals - */ - out << "static const char *" << litdata() << "[] = {\n"; - for ( int i = 0; i < runtimeData->num_literals; i++ ) { - out << "\t\""; - escapeLiteralString( out, runtimeData->litdata[i] ); - out << "\",\n"; - } - out << "};\n\n"; - - out << "static long " << litlen() << "[] = {\n\t"; - for ( int i = 0; i < runtimeData->num_literals; i++ ) - out << runtimeData->litlen[i] << ", "; - out << "};\n\n"; - - out << "static head_t *" << literals() << "[] = {\n\t"; - for ( int i = 0; i < runtimeData->num_literals; i++ ) - out << "0, "; - out << "};\n\n"; - - out << "static int startStates[] = {\n\t"; - for ( long i = 0; i < runtimeData->num_parsers; i++ ) { - out << runtimeData->start_states[i] << ", "; - } - out << "};\n\n"; - - out << "static int eofLelIds[] = {\n\t"; - for ( long i = 0; i < runtimeData->num_parsers; i++ ) { - out << runtimeData->eof_lel_ids[i] << ", "; - } - out << "};\n\n"; - - out << "static int parserLelIds[] = {\n\t"; - for ( long i = 0; i < runtimeData->num_parsers; i++ ) { - out << runtimeData->parser_lel_ids[i] << ", "; - } - out << "};\n\n"; - - out << "static CaptureAttr captureAttr[] = {\n"; - for ( long i = 0; i < runtimeData->num_captured_attr; i++ ) { - out << "\t{ " << - runtimeData->capture_attr[i].mark_enter << ", " << - runtimeData->capture_attr[i].mark_leave << ", " << - runtimeData->capture_attr[i].offset << " },\n"; - } - - out << "};\n\n"; - - out << - "tree_t **" << objectName << "_host_call( program_t *prg, long code, tree_t **sp );\n" - "void " << objectName << "_commit_reduce_forward( program_t *prg, tree_t **root,\n" - " struct pda_run *pda_run, parse_tree_t *pt );\n" - "long " << objectName << "_commit_union_sz( int reducer );\n" - "void " << objectName << "_init_need();\n" - "int " << objectName << "_reducer_need_tok( program_t *prg, " - "struct pda_run *pda_run, int id );\n" - "int " << objectName << "_reducer_need_ign( program_t *prg, " - "struct pda_run *pda_run );\n" - "void " << objectName << "_read_reduce( program_t *prg, int reducer, input_t *stream );\n" - "\n"; - - out << - "struct colm_sections " << objectName << " = \n" - "{\n" - " " << lelInfo() << ",\n" - " " << runtimeData->num_lang_els << ",\n" - "\n" - " " << selInfo() << ",\n" - " " << runtimeData->num_struct_els << ",\n" - "\n" - " " << prodInfo() << ",\n" - " " << runtimeData->num_prods << ",\n" - "\n" - " " << regionInfo() << ",\n" - " " << runtimeData->num_regions << ",\n" - "\n" - " " << rootCode() << ",\n" - " " << runtimeData->root_code_len << ",\n" - " " << runtimeData->root_frame_id << ",\n" - "\n" - " " << frameInfo() << ",\n" - " " << runtimeData->num_frames << ",\n" - "\n" - " " << functionInfo() << ",\n" - " " << runtimeData->num_functions << ",\n" - "\n" - " " << patReplInfo() << ",\n" - " " << runtimeData->num_patterns << ",\n" - "\n" - " " << patReplNodes() << ",\n" - " " << runtimeData->num_pattern_nodes << ",\n" - "\n" - " " << genericInfo() << ",\n" - " " << runtimeData->num_generics << ",\n" - " " << runtimeData->argv_generic_id << ",\n" - " " << runtimeData->stds_generic_id << ",\n" - "\n" - " " << litdata() << ",\n" - " " << litlen() << ",\n" - " " << literals() << ",\n" - " " << runtimeData->num_literals << ",\n" - "\n" - " captureAttr,\n" - " " << runtimeData->num_captured_attr << ",\n" - "\n" - " &fsmTables_start,\n" - " &pid_0_pdaTables,\n" - " startStates, eofLelIds, parserLelIds, " << runtimeData->num_parsers << ",\n" - "\n" - " " << runtimeData->global_size << ",\n" - "\n" - " " << runtimeData->first_non_term_id << ",\n" - " " << runtimeData->first_struct_el_id << ",\n" - " " << runtimeData->integer_id << ",\n" - " " << runtimeData->string_id << ",\n" - " " << runtimeData->any_id << ",\n" - " " << runtimeData->eof_id << ",\n" - " " << runtimeData->no_token_id << ",\n" - " " << runtimeData->global_id << ",\n" - " " << runtimeData->argv_el_id << ",\n" - " " << runtimeData->stds_el_id << ",\n" - " " << runtimeData->struct_inbuilt_id << ",\n" - " " << runtimeData->struct_inbuilt_id << ",\n" - " " << runtimeData->struct_stream_id << ",\n" - " &fsm_execute,\n" - " &sendNamedLangEl,\n" - " &initBindings,\n" - " &popBinding,\n" - " &" << objectName << "_host_call,\n" - " &" << objectName << "_commit_reduce_forward,\n" - " &" << objectName << "_commit_union_sz,\n" - " &" << objectName << "_init_need,\n" - " &" << objectName << "_reducer_need_tok,\n" - " &" << objectName << "_reducer_need_ign,\n" - " &" << objectName << "_read_reduce,\n" - "};\n" - "\n"; -} - -void PdaCodeGen::writeParserData( long id, struct pda_tables *tables ) -{ - String prefix = "pid_" + String(0, "%ld", id) + "_"; - - out << "static int " << prefix << indicies() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_indicies; i++ ) { - out << tables->indicies[i]; - - if ( i < tables->num_indicies-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static int " << prefix << owners() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_indicies; i++ ) { - out << tables->owners[i]; - - if ( i < tables->num_indicies-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static int " << prefix << keys() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_keys; i++ ) { - out << tables->keys[i]; - - if ( i < tables->num_keys-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static unsigned int " << prefix << offsets() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_states; i++ ) { - out << tables->offsets[i]; - - if ( i < tables->num_states-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static unsigned int " << prefix << targs() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_targs; i++ ) { - out << tables->targs[i]; - - if ( i < tables->num_targs-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static unsigned int " << prefix << actInds() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_act_inds; i++ ) { - out << tables->act_inds[i]; - - if ( i < tables->num_act_inds-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static unsigned int " << prefix << actions() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_actions; i++ ) { - out << tables->actions[i]; - - if ( i < tables->num_actions-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static int " << prefix << commitLen() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_commit_len; i++ ) { - out << tables->commit_len[i]; - - if ( i < tables->num_commit_len-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static int " << prefix << tokenRegionInds() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_states; i++ ) { - out << tables->token_region_inds[i]; - - if ( i < tables->num_states-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static int " << prefix << tokenRegions() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_region_items; i++ ) { - out << tables->token_regions[i]; - - if ( i < tables->num_region_items-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "static int " << prefix << tokenPreRegions() << "[] = {\n\t"; - for ( int i = 0; i < tables->num_pre_region_items; i++ ) { - out << tables->token_pre_regions[i]; - - if ( i < tables->num_pre_region_items-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << - "static struct pda_tables " << prefix << "pdaTables =\n" - "{\n" - " " << prefix << indicies() << ",\n" - " " << prefix << owners() << ",\n" - " " << prefix << keys() << ",\n" - " " << prefix << offsets() << ",\n" - " " << prefix << targs() << ",\n" - " " << prefix << actInds() << ",\n" - " " << prefix << actions() << ",\n" - " " << prefix << commitLen() << ",\n" - - " " << prefix << tokenRegionInds() << ",\n" - " " << prefix << tokenRegions() << ",\n" - " " << prefix << tokenPreRegions() << ",\n" - "\n" - " " << tables->num_indicies << ",\n" - " " << tables->num_keys << ",\n" - " " << tables->num_states << ",\n" - " " << tables->num_targs << ",\n" - " " << tables->num_act_inds << ",\n" - " " << tables->num_actions << ",\n" - " " << tables->num_commit_len << ",\n" - " " << tables->num_region_items << ",\n" - " " << tables->num_pre_region_items << "\n" - "};\n" - "\n"; -} - diff --git a/src/pdacodegen.h b/src/pdacodegen.h deleted file mode 100644 index a2cb440e..00000000 --- a/src/pdacodegen.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_PDACODEGEN_H -#define _COLM_PDACODEGEN_H - -struct Compiler; - -struct PdaCodeGen -{ - PdaCodeGen( ostream &out ) - : - out(out) - {} - - /* - * Code Generation. - */ - void startCodeGen(); - void endCodeGen( int endLine ); - - void writeReference( Production *prod, char *data ); - void writeUndoReference( Production *prod, char *data ); - void writeFinalReference( Production *prod, char *data ); - void writeFirstLocate( Production *prod ); - void writeRhsLocate( Production *prod ); - - void defineRuntime(); - void writeRuntimeData( colm_sections *runtimeData, struct pda_tables *pdaTables ); - void writeParserData( long id, struct pda_tables *tables ); - - String PARSER() { return "parser_"; } - - String startState() { return PARSER() + "startState"; } - String indicies() { return PARSER() + "indicies"; } - String owners() { return PARSER() + "owners"; } - String keys() { return PARSER() + "keys"; } - String offsets() { return PARSER() + "offsets"; } - String targs() { return PARSER() + "targs"; } - String actInds() { return PARSER() + "actInds"; } - String actions() { return PARSER() + "actions"; } - String commitLen() { return PARSER() + "commitLen"; } - String fssProdIdIndex() { return PARSER() + "fssProdIdIndex"; } - String prodLengths() { return PARSER() + "prodLengths"; } - String prodLhsIds() { return PARSER() + "prodLhsIds"; } - String prodNames() { return PARSER() + "prodNames"; } - String lelInfo() { return PARSER() + "lelInfo"; } - String selInfo() { return PARSER() + "selInfo"; } - String prodInfo() { return PARSER() + "prodInfo"; } - String tokenRegionInds() { return PARSER() + "tokenRegionInds"; } - String tokenRegions() { return PARSER() + "tokenRegions"; } - String tokenPreRegions() { return PARSER() + "tokenPreRegions"; } - String prodCodeBlocks() { return PARSER() + "prodCodeBlocks"; } - String prodCodeBlockLens() { return PARSER() + "prodCodeBlockLens"; } - String rootCode() { return PARSER() + "rootCode"; } - String frameInfo() { return PARSER() + "frameInfo"; } - String functionInfo() { return PARSER() + "functionInfo"; } - String objFieldInfo() { return PARSER() + "objFieldInfo"; } - String patReplInfo() { return PARSER() + "patReplInfo"; } - String patReplNodes() { return PARSER() + "patReplNodes"; } - String regionInfo() { return PARSER() + "regionInfo"; } - String genericInfo() { return PARSER() + "genericInfo"; } - String litdata() { return PARSER() + "litdata"; } - String litlen() { return PARSER() + "litlen"; } - String literals() { return PARSER() + "literals"; } - String fsmTables() { return PARSER() + "fsmTables"; } - - /* - * Graphviz Generation - */ - void writeTransList( PdaState *state ); - void writeDotFile( PdaGraph *graph ); - void writeDotFile( ); - - ostream &out; -}; - -extern "C" -{ - void internalFsmExecute( struct pda_run *pdaRun, struct input_impl *inputStream ); - void internalSendNamedLangEl( program_t *prg, tree_t **sp, - struct pda_run *pdaRun, struct input_impl *is ); - void internalInitBindings( struct pda_run *pdaRun ); - void internalPopBinding( struct pda_run *pdaRun, parse_tree_t *parseTree ); -} - -#endif /* _COLM_PDACODEGEN_H */ - diff --git a/src/pdagraph.cc b/src/pdagraph.cc deleted file mode 100644 index c18c61e1..00000000 --- a/src/pdagraph.cc +++ /dev/null @@ -1,533 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "pdagraph.h" - -#include <assert.h> -#include <stdbool.h> - -#include <iostream> - -using std::cerr; -using std::endl; - -/* Create a new fsm state. State has not out transitions or in transitions, not - * out out transition data and not number. */ -PdaState::PdaState() -: - /* No in transitions. */ - inRange(), - - /* No entry points, or epsilon trans. */ - pendingCommits(), - - stateSet(0), - - /* Only used during merging. Normally null. */ - stateDictEl(0), - - /* No state identification bits. */ - stateBits(0), - - onClosureQueue(false), - inClosedMap(false), - followMarked(false), - - advanceReductions(false) -{ -} - -/* Copy everything except the action transitions. That is left up to the - * PdaGraph copy constructor. */ -PdaState::PdaState(const PdaState &other) -: - inRange(), - - /* Duplicate the entry id set, epsilon transitions and context sets. These - * are sets of integers and as such need no fixing. */ - pendingCommits(other.pendingCommits), - - stateSet(0), - - /* This is only used during merging. Normally null. */ - stateDictEl(0), - - /* Fsm state data. */ - stateBits(other.stateBits), - - dotSet(other.dotSet), - onClosureQueue(false), - inClosedMap(false), - followMarked(false), - - transMap() -{ - /* Duplicate all the transitions. */ - for ( TransMap::Iter trans = other.transMap; trans.lte(); trans++ ) { - /* Dupicate and store the orginal target in the transition. This will - * be corrected once all the states have been created. */ - PdaTrans *newTrans = new PdaTrans(*trans->value); - newTrans->toState = trans->value->toState; - transMap.append( TransMapEl( newTrans->lowKey, newTrans ) ); - } -} - -/* If there is a state dict element, then delete it. Everything else is left - * up to the FsmGraph destructor. */ -PdaState::~PdaState() -{ - if ( stateDictEl != 0 ) - delete stateDictEl; -} - -/* Graph constructor. */ -PdaGraph::PdaGraph() -: - /* No start state. */ - startState(0) -{ -} - -/* Copy all graph data including transitions. */ -PdaGraph::PdaGraph( const PdaGraph &graph ) -: - /* Lists start empty. Will be filled by copy. */ - stateList(), - misfitList(), - - /* Copy in the entry points, - * pointers will be resolved later. */ - startState(graph.startState), - - /* Will be filled by copy. */ - finStateSet() -{ - /* Create the states and record their map in the original state. */ - PdaStateList::Iter origState = graph.stateList; - for ( ; origState.lte(); origState++ ) { - /* Make the new state. */ - PdaState *newState = new PdaState( *origState ); - - /* Add the state to the list. */ - stateList.append( newState ); - - /* Set the mapsTo item of the old state. */ - origState->stateMap = newState; - } - - /* Derefernce all the state maps. */ - for ( PdaStateList::Iter state = stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - /* The points to the original in the src machine. The taget's duplicate - * is in the statemap. */ - PdaState *toState = trans->value->toState != 0 ? - trans->value->toState->stateMap : 0; - - /* Attach The transition to the duplicate. */ - trans->value->toState = 0; - attachTrans( state, toState, trans->value ); - } - } - - /* Fix the start state pointer and the new start state's count of in - * transiions. */ - startState = startState->stateMap; - - /* Build the final state set. */ - PdaStateSet::Iter st = graph.finStateSet; - for ( ; st.lte(); st++ ) - finStateSet.insert((*st)->stateMap); -} - -/* Deletes all transition data then deletes each state. */ -PdaGraph::~PdaGraph() -{ - /* Delete all the transitions. */ - PdaStateList::Iter state = stateList; - for ( ; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) - delete trans->value; - } - - /* Delete all the states. */ - stateList.empty(); -} - -/* Set a state final. The state has its isFinState set to true and the state - * is added to the finStateSet. */ -void PdaGraph::setFinState( PdaState *state ) -{ - /* Is it already a fin state. */ - if ( state->stateBits & SB_ISFINAL ) - return; - - state->stateBits |= SB_ISFINAL; - finStateSet.insert( state ); -} - -void PdaGraph::unsetAllFinStates( ) -{ - for ( PdaStateSet::Iter st = finStateSet; st.lte(); st++ ) { - PdaState *state = *st; - state->stateBits &= ~ SB_ISFINAL; - } - finStateSet.empty(); -} - -/* Set and unset a state as the start state. */ -void PdaGraph::setStartState( PdaState *state ) -{ - /* Sould change from unset to set. */ - assert( startState == 0 ); - startState = state; -} - -/* Mark all states reachable from state. Traverses transitions forward. Used - * for removing states that have no path into them. */ -void PdaGraph::markReachableFromHere( PdaState *state ) -{ - /* Base case: return; */ - if ( state->stateBits & SB_ISMARKED ) - return; - - /* Set this state as processed. We are going to visit all states that this - * state has a transition to. */ - state->stateBits |= SB_ISMARKED; - - /* Recurse on all out transitions. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( trans->value->toState != 0 ) - markReachableFromHere( trans->value->toState ); - } -} - -void PdaGraph::setStateNumbers() -{ - int curNum = 0; - PdaStateList::Iter state = stateList; - for ( ; state.lte(); state++ ) - state->stateNum = curNum++; -} - -/* Insert a transition into an inlist. The head must be supplied. */ -void PdaGraph::attachToInList( PdaState *from, PdaState *to, - PdaTrans *&head, PdaTrans *trans ) -{ - trans->ilnext = head; - trans->ilprev = 0; - - /* If in trans list is not empty, set the head->prev to trans. */ - if ( head != 0 ) - head->ilprev = trans; - - /* Now insert ourselves at the front of the list. */ - head = trans; -}; - -/* Detach a transition from an inlist. The head of the inlist must be supplied. */ -void PdaGraph::detachFromInList( PdaState *from, PdaState *to, - PdaTrans *&head, PdaTrans *trans ) -{ - /* Detach in the inTransList. */ - if ( trans->ilprev == 0 ) - head = trans->ilnext; - else - trans->ilprev->ilnext = trans->ilnext; - - if ( trans->ilnext != 0 ) - trans->ilnext->ilprev = trans->ilprev; -} - -/* Attach states on the default transition, range list or on out/in list key. - * Type of attaching and is controlled by keyType. First makes a new - * transition. If there is already a transition out from fromState on the - * default, then will assertion fail. */ -PdaTrans *PdaGraph::appendNewTrans( PdaState *from, PdaState *to, long lowKey, long ) -{ - /* Make the new transition. */ - PdaTrans *retVal = new PdaTrans(); - - /* The transition is now attached. Remember the parties involved. */ - retVal->fromState = from; - retVal->toState = to; - - /* Make the entry in the out list for the transitions. */ - from->transMap.append( TransMapEl( lowKey, retVal ) ); - - /* Set the the keys of the new trans. */ - retVal->lowKey = lowKey; - - /* Attach using inRange as the head pointer. */ - attachToInList( from, to, to->inRange.head, retVal ); - - return retVal; -} - -PdaTrans *PdaGraph::insertNewTrans( PdaState *from, PdaState *to, long lowKey, long ) -{ - /* Make the new transition. */ - PdaTrans *retVal = new PdaTrans(); - - /* The transition is now attached. Remember the parties involved. */ - retVal->fromState = from; - retVal->toState = to; - - /* Make the entry in the out list for the transitions. */ - from->transMap.insert( lowKey, retVal ); - - /* Set the the keys of the new trans. */ - retVal->lowKey = lowKey; - - /* Attach using inRange as the head pointer. */ - attachToInList( from, to, to->inRange.head, retVal ); - - return retVal; -} - -/* Attach for range lists or for the default transition. Type of attaching is - * controlled by the keyType parameter. This attach should be used when a - * transition already is allocated and must be attached to a target state. - * Does not handle adding the transition into the out list. */ -void PdaGraph::attachTrans( PdaState *from, PdaState *to, PdaTrans *trans ) -{ - assert( trans->fromState == 0 && trans->toState == 0 ); - trans->fromState = from; - trans->toState = to; - - /* Attach using the inRange pointer as the head pointer. */ - attachToInList( from, to, to->inRange.head, trans ); -} - -/* Detach for out/in lists or for default transition. The type of detaching is - * controlled by the keyType parameter. */ -void PdaGraph::detachTrans( PdaState *from, PdaState *to, PdaTrans *trans ) -{ - assert( trans->fromState == from && trans->toState == to ); - trans->fromState = 0; - trans->toState = 0; - - /* Detach using to's inRange pointer as the head. */ - detachFromInList( from, to, to->inRange.head, trans ); -} - - -/* Detach a state from the graph. Detaches and deletes transitions in and out - * of the state. Empties inList and outList. Removes the state from the final - * state set. A detached state becomes useless and should be deleted. */ -void PdaGraph::detachState( PdaState *state ) -{ - /* Detach the in transitions from the inRange list of transitions. */ - while ( state->inRange.head != 0 ) { - /* Get pointers to the trans and the state. */ - PdaTrans *trans = state->inRange.head; - PdaState *fromState = trans->fromState; - - /* Detach the transitions from the source state. */ - detachTrans( fromState, state, trans ); - - /* Ok to delete the transition. */ - fromState->transMap.remove( trans->lowKey ); - delete trans; - } - - /* Detach out range transitions. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - detachTrans( state, trans->value->toState, trans->value ); - delete trans->value; - } - - /* Delete all of the out range pointers. */ - state->transMap.empty(); - - /* Unset final stateness before detaching from graph. */ - if ( state->stateBits & SB_ISFINAL ) - finStateSet.remove( state ); -} - -/* Move all the transitions that go into src so that they go into dest. */ -void PdaGraph::inTransMove( PdaState *dest, PdaState *src ) -{ - /* Do not try to move in trans to and from the same state. */ - assert( dest != src ); - - /* If src is the start state, dest becomes the start state. */ - assert( src != startState ); - - /* Move the transitions in inRange. */ - while ( src->inRange.head != 0 ) { - /* Get trans and from state. */ - PdaTrans *trans = src->inRange.head; - PdaState *fromState = trans->fromState; - - /* Detach from src, reattach to dest. */ - detachTrans( fromState, src, trans ); - attachTrans( fromState, dest, trans ); - } -} - -void PdaGraph::addInReduction( PdaTrans *dest, long prodId, long prior ) -{ - /* Look for the reduction. If not there insert it, otherwise take - * the max of the priorities. */ - ReductionMapEl *redMapEl = dest->reductions.find( prodId ); - if ( redMapEl == 0 ) - dest->reductions.insert( prodId, prior ); - else if ( prior > redMapEl->value ) - redMapEl->value = prior; -} - -/* Callback invoked when another trans (or possibly this) is added into this - * transition during the merging process. Draw in any properties of srcTrans - * into this transition. AddInTrans is called when a new transitions is made - * that will be a duplicate of another transition or a combination of several - * other transitions. AddInTrans will be called for each transition that the - * new transition is to represent. */ -void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans ) -{ - /* Protect against adding in from ourselves. */ - if ( srcTrans != destTrans ) { - - /* Add in the shift priority. */ - if ( destTrans->isShift && srcTrans->isShift ) { - /* Both shifts are set. We want the max of the two. */ - if ( srcTrans->shiftPrior > destTrans->shiftPrior ) - destTrans->shiftPrior = srcTrans->shiftPrior; - } - else if ( srcTrans->isShift ) { - /* Just the source is set, copy the source prior over. */ - destTrans->shiftPrior = srcTrans->shiftPrior; - } - - /* If either is a shift, dest is a shift. */ - destTrans->isShift = destTrans->isShift || srcTrans->isShift; - - /* Add in the reductions. */ - for ( ReductionMap::Iter red = srcTrans->reductions; red.lte(); red++ ) - addInReduction( destTrans, red->key, red->value ); - - /* Add in the commit points. */ - destTrans->commits.insert( srcTrans->commits ); - - if ( srcTrans->toState->advanceReductions ) - destTrans->toState->advanceReductions = true; - - if ( srcTrans->noPreIgnore ) - destTrans->noPreIgnore = true; - if ( srcTrans->noPostIgnore ) - destTrans->noPostIgnore = true; - } -} - -/* NO LONGER USED. */ -void PdaGraph::addInState( PdaState *destState, PdaState *srcState ) -{ - /* Draw in any properties of srcState into destState. */ - if ( srcState != destState ) { - /* Get the epsilons, context, out priorities. */ - destState->pendingCommits.insert( srcState->pendingCommits ); - if ( srcState->pendingCommits.length() > 0 ) - cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl; - - /* Parser generation data. */ - destState->dotSet.insert( srcState->dotSet ); - - if ( srcState->onClosureQueue && !destState->onClosureQueue ) { - stateClosureQueue.append( destState ); - destState->onClosureQueue = true; - } - } -} - -/* Make a new state. The new state will be put on the graph's - * list of state. The new state can be created final or non final. */ -PdaState *PdaGraph::addState() -{ - /* Make the new state to return. */ - PdaState *state = new PdaState(); - - /* Create the new state. */ - stateList.append( state ); - - return state; -} - - -/* Follow from to the final state of srcFsm. */ -PdaState *PdaGraph::followFsm( PdaState *from, PdaGraph *srcFsm ) -{ - PdaState *followSrc = srcFsm->startState; - - while ( ! followSrc->isFinState() ) { - assert( followSrc->transMap.length() == 1 ); - PdaTrans *followTrans = followSrc->transMap[0].value; - - PdaTrans *inTrans = from->findTrans( followTrans->lowKey ); - assert( inTrans != 0 ); - - from = inTrans->toState; - followSrc = followTrans->toState; - } - - return from; -} - -int PdaGraph::fsmLength( ) -{ - int length = 0; - PdaState *state = startState; - while ( ! state->isFinState() ) { - length += 1; - state = state->transMap[0].value->toState; - } - return length; -} - -/* Remove states that have no path to them from the start state. Recursively - * traverses the graph marking states that have paths into them. Then removes - * all states that did not get marked. */ -void PdaGraph::removeUnreachableStates() -{ - /* Mark all the states that can be reached - * through the existing set of entry points. */ - if ( startState != 0 ) - markReachableFromHere( startState ); - - for ( PdaStateSet::Iter si = entryStateSet; si.lte(); si++ ) - markReachableFromHere( *si ); - - /* Delete all states that are not marked - * and unmark the ones that are marked. */ - PdaState *state = stateList.head; - while ( state ) { - PdaState *next = state->next; - - if ( state->stateBits & SB_ISMARKED ) - state->stateBits &= ~ SB_ISMARKED; - else { - detachState( state ); - stateList.detach( state ); - delete state; - } - - state = next; - } -} diff --git a/src/pdagraph.h b/src/pdagraph.h deleted file mode 100644 index 5cfc2a76..00000000 --- a/src/pdagraph.h +++ /dev/null @@ -1,517 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_PDAGRAPH_H -#define _COLM_PDAGRAPH_H - -#include <assert.h> - -#include <avltree.h> -#include <bstmap.h> -#include <vector.h> -#include <sbstmap.h> -#include <sbstset.h> -#include <sbsttable.h> -#include <bstset.h> -#include <compare.h> -#include <avltree.h> -#include <dlist.h> -#include <avlset.h> -#include <dlistmel.h> - -/* Flags for states. */ -#define SB_ISFINAL 0x04 -#define SB_ISMARKED 0x08 -#define SB_ISSTART 0x10 - -/* Flags for transitions. */ -#define TB_ISMARKED 0x01 - -struct PdaTrans; -struct PdaState; -struct PdaGraph; -struct TokenInstance; -struct Production; -struct LangEl; -struct TokenRegion; - -typedef Vector<TokenRegion*> RegionVect; - -typedef Vector<long> ActDataList; - -struct ActionData -{ - ActionData( int targ, ActDataList &actions, int commitLen ) - : targ(targ), commitLen(commitLen), id(0), actions(actions) { } - - int targ; - int commitLen; - int id; - - ActDataList actions; -}; - - -struct CmpActionData -{ - static int compare( const ActionData &ap1, const ActionData &ap2 ) - { - if ( ap1.targ < ap2.targ ) - return -1; - else if ( ap1.targ > ap2.targ ) - return 1; - else if ( ap1.commitLen < ap2.commitLen ) - return -1; - else if ( ap1.commitLen > ap2.commitLen ) - return 1; - else if ( ap1.id < ap2.id ) - return -1; - else if ( ap1.id > ap2.id ) - return 1; - - return CmpTable< long, CmpOrd<long> >:: - compare( ap1.actions, ap2.actions ); - } -}; - -typedef AvlSet<ActionData, CmpActionData> PdaActionSet; -typedef AvlSetEl<ActionData> PdaActionSetEl; - -/* List pointers for the closure queue. Goes into state. */ -struct ClosureQueueListEl { PdaState *prev, *next; }; - -/* Queue of states, transitions to be closed. */ -typedef DListMel< PdaState, ClosureQueueListEl > StateClosureQueue; -typedef DList<PdaTrans> TransClosureQueue; - -typedef BstSet< Production*, CmpOrd<Production*> > DefSet; -typedef CmpTable< Production*, CmpOrd<Production*> > CmpDefSet; -typedef BstSet< DefSet, CmpDefSet > DefSetSet; - -typedef Vector< Production* > DefVect; -typedef BstSet< long, CmpOrd<long> > AlphSet; - -struct ExpandToEl -{ - ExpandToEl( PdaState *state, int prodId ) - : state(state), prodId(prodId) { } - - PdaState *state; - int prodId; -}; - -struct CmpExpandToEl -{ - static inline int compare( const ExpandToEl &etel1, const ExpandToEl &etel2 ) - { - if ( etel1.state < etel2.state ) - return -1; - else if ( etel1.state > etel2.state ) - return 1; - else if ( etel1.prodId < etel2.prodId ) - return -1; - else if ( etel1.prodId > etel2.prodId ) - return 1; - else - return 0; - } -}; - -typedef BstSet<ExpandToEl, CmpExpandToEl> ExpandToSet; -typedef BstSet< int, CmpOrd<int> > IntSet; -typedef CmpTable< int, CmpOrd<int> > CmpIntSet; - -typedef BstSet< long, CmpOrd<long> > LongSet; -typedef CmpTable< long, CmpOrd<long> > CmpLongSet; - -typedef BstMap< long, long, CmpOrd<long> > LongMap; -typedef BstMapEl< long, long > LongMapEl; - -typedef LongSet ProdIdSet; -typedef CmpLongSet CmpProdIdSet; - -/* Set of states, list of states. */ -typedef BstSet<PdaState*> PdaStateSet; -typedef Vector<PdaState*> StateVect; -typedef DList<PdaState> PdaStateList; - -typedef LongMap FollowToAdd; -typedef LongMap ReductionMap; -typedef LongMapEl ReductionMapEl; - -struct ProdIdPair -{ - ProdIdPair( int onReduce, int length ) - : onReduce(onReduce), length(length) {} - - int onReduce; - int length; -}; - -struct CmpProdIdPair -{ - static inline int compare( const ProdIdPair &pair1, const ProdIdPair &pair2 ) - { - if ( pair1.onReduce < pair2.onReduce ) - return -1; - else if ( pair1.onReduce > pair2.onReduce ) - return 1; - else if ( pair1.length < pair2.length ) - return -1; - else if ( pair1.length > pair2.length ) - return 1; - else - return 0; - } -}; - -typedef BstSet< ProdIdPair, CmpProdIdPair > ProdIdPairSet; - -/* Transition class that implements actions and priorities. */ -struct PdaTrans -{ - PdaTrans() : - fromState(0), - toState(0), - isShift(false), - isShiftReduce(false), - shiftPrior(0), - noPreIgnore(false), - noPostIgnore(false) - { } - - PdaTrans( const PdaTrans &other ) : - lowKey(other.lowKey), - fromState(0), toState(0), - isShift(other.isShift), - isShiftReduce(other.isShiftReduce), - shiftPrior(other.shiftPrior), - reductions(other.reductions), - commits(other.commits), - noPreIgnore(false), - noPostIgnore(false) - { } - - long lowKey; - PdaState *fromState; - PdaState *toState; - - /* Pointers for outlist. */ - PdaTrans *prev, *next; - - /* Pointers for in-list. */ - PdaTrans *ilprev, *ilnext; - - long maxPrior(); - - /* Parse Table construction data. */ - bool isShift, isShiftReduce; - int shiftPrior; - ReductionMap reductions; - ActDataList actions; - ActDataList actOrds; - ActDataList actPriors; - - ExpandToSet expandTo; - - PdaActionSetEl *actionSetEl; - - LongSet commits; - LongSet afterShiftCommits; - - bool noPreIgnore; - bool noPostIgnore; -}; - -/* In transition list. Like DList except only has head pointers, which is all - * that is required. Insertion and deletion is handled by the graph. This - * class provides the iterator of a single list. */ -struct PdaTransInList -{ - PdaTransInList() : head(0) { } - - PdaTrans *head; - - struct Iter - { - /* Default construct. */ - Iter() : ptr(0) { } - - /* Construct, assign from a list. */ - Iter( const PdaTransInList &il ) : ptr(il.head) { } - Iter &operator=( const PdaTransInList &dl ) { ptr = dl.head; return *this; } - - /* At the end */ - bool lte() const { return ptr != 0; } - bool end() const { return ptr == 0; } - - /* At the first, last element. */ - bool first() const { return ptr && ptr->ilprev == 0; } - bool last() const { return ptr && ptr->ilnext == 0; } - - /* Cast, dereference, arrow ops. */ - operator PdaTrans*() const { return ptr; } - PdaTrans &operator *() const { return *ptr; } - PdaTrans *operator->() const { return ptr; } - - /* Increment, decrement. */ - inline void operator++(int) { ptr = ptr->ilnext; } - inline void operator--(int) { ptr = ptr->ilprev; } - - /* The iterator is simply a pointer. */ - PdaTrans *ptr; - }; -}; - -typedef DList<PdaTrans> PdaTransList; - -/* A element in a state dict. */ -struct PdaStateDictEl -: - public AvlTreeEl<PdaStateDictEl> -{ - PdaStateDictEl(const PdaStateSet &stateSet) - : stateSet(stateSet) { } - - const PdaStateSet &getKey() { return stateSet; } - PdaStateSet stateSet; - PdaState *targState; -}; - -/* Dictionary mapping a set of states to a target state. */ -typedef AvlTree< PdaStateDictEl, PdaStateSet, CmpTable<PdaState*> > PdaStateDict; - -/* What items does a particular state encompass. */ -typedef BstSet< long, CmpOrd<long> > DotSet; -typedef CmpTable< long, CmpOrd<long> > CmpDotSet; - -/* Map of dot sets to states. */ -typedef AvlTree< PdaState, DotSet, CmpDotSet > DotSetMap; -typedef PdaState DotSetMapEl; - -typedef BstMap< long, PdaTrans* > TransMap; -typedef BstMapEl< long, PdaTrans* > TransMapEl; - -/* State class that implements actions and priorities. */ -struct PdaState -: - public ClosureQueueListEl, - public AvlTreeEl< PdaState > -{ - PdaState(); - PdaState(const PdaState &other); - ~PdaState(); - - /* Is the state final? */ - bool isFinState() { return stateBits & SB_ISFINAL; } - - PdaTrans *findTrans( long key ) - { - TransMapEl *transMapEl = transMap.find( key ); - if ( transMapEl == 0 ) - return 0; - return transMapEl->value; - } - - /* In transition list. */ - PdaTransInList inRange; - - ProdIdPairSet pendingCommits; - - /* When duplicating the fsm we need to map each - * state to the new state representing it. */ - PdaState *stateMap; - - /* When merging states (state machine operations) this next pointer is - * used for the list of states that need to be filled in. */ - PdaState *alg_next; - - PdaStateSet *stateSet; - - /* Identification for printing and stable minimization. */ - int stateNum; - - /* A pointer to a dict element that contains the set of states this state - * represents. This cannot go into alg, because alg.next is used during - * the merging process. */ - PdaStateDictEl *stateDictEl; - - /* Bits controlling the behaviour of the state during collapsing to dfa. */ - int stateBits; - - /* State list elements. */ - PdaState *next, *prev; - - /* For dotset map. */ - DotSet &getKey() { return dotSet; } - - /* Closure management. */ - DotSet dotSet; - DotSet dotSet2; - bool onClosureQueue; - bool inClosedMap; - bool followMarked; - bool onStateList; - - TransMap transMap; - - RegionVect regions; - RegionVect preRegions; - - bool advanceReductions; -}; - -/* Compare lists of epsilon transitions. Entries are name ids of targets. */ -typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; - -/* Compare sets of context values. */ -typedef CmpTable< int, CmpOrd<int> > CmpContextSets; - -/* Graph class that implements actions and priorities. */ -struct PdaGraph -{ - /* Constructors/Destructors. */ - PdaGraph(); - PdaGraph( const PdaGraph &graph ); - ~PdaGraph(); - - /* The list of states. */ - PdaStateList stateList; - PdaStateList misfitList; - - /* The start state. */ - PdaState *startState; - PdaStateSet entryStateSet; - - /* The set of final states. */ - PdaStateSet finStateSet; - - /* Closure queues and maps. */ - DotSetMap closedMap; - StateClosureQueue stateClosureQueue; - StateClosureQueue stateClosedList; - - TransClosureQueue transClosureQueue; - PdaState *stateClosureHead; - - LangEl **langElIndex; - - void setStartState( PdaState *state ); - void unsetStartState( ); - - /* - * Basic attaching and detaching. - */ - - /* Common to attaching/detaching list and default. */ - void attachToInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans ); - void detachFromInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans ); - - /* Attach with a new transition. */ - PdaTrans *appendNewTrans( PdaState *from, PdaState *to, long onChar1, long ); - PdaTrans *insertNewTrans( PdaState *from, PdaState *to, long lowKey, long ); - - /* Attach with an existing transition that already in an out list. */ - void attachTrans( PdaState *from, PdaState *to, PdaTrans *trans ); - - /* Detach a transition from a target state. */ - void detachTrans( PdaState *from, PdaState *to, PdaTrans *trans ); - - /* Detach a state from the graph. */ - void detachState( PdaState *state ); - - /* - * Callbacks. - */ - - /* Add in the properties of srcTrans into this. */ - void addInReduction( PdaTrans *dest, long prodId, long prior ); - void addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans ); - void addInState( PdaState *destState, PdaState *srcState ); - - /* - * Allocation. - */ - - /* New up a state and add it to the graph. */ - PdaState *addState(); - - /* - * Fsm operators. - */ - - /* Follow to the fin state of src fsm. */ - PdaState *followFsm( PdaState *from, PdaGraph *srcFsm ); - - /* - * Final states - */ - - /* Set and Unset a state as final. */ - void setFinState( PdaState *state ); - void unsetFinState( PdaState *state ); - void unsetAllFinStates( ); - - /* Set State numbers starting at 0. */ - void setStateNumbers(); - - /* - * Path pruning - */ - - /* Mark all states reachable from state. */ - void markReachableFromHere( PdaState *state ); - - /* Removes states that cannot be reached by any path in the fsm and are - * thus wasted silicon. */ - void removeUnreachableStates(); - - /* Remove error actions from states on which the error transition will - * never be taken. */ - bool outListCovers( PdaState *state ); - - /* Remove states that are on the misfit list. */ - void removeMisfits(); - - - /* - * Other - */ - - /* Move the in trans into src into dest. */ - void inTransMove(PdaState *dest, PdaState *src); - - int fsmLength( ); - - /* Collected machine information. */ - unsigned long long maxState; - unsigned long long maxAction; - unsigned long long maxLelId; - unsigned long long maxOffset; - unsigned long long maxIndex; - unsigned long long maxProdLen; - - PdaActionSet actionSet; -}; - -#endif /* _COLM_PDAGRAPH_H */ - diff --git a/src/pdarun.c b/src/pdarun.c deleted file mode 100644 index 327f801a..00000000 --- a/src/pdarun.c +++ /dev/null @@ -1,2290 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "pdarun.h" - -#include <errno.h> -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <stdbool.h> -#include <assert.h> - -#include "config.h" -#include "debug.h" -#include "bytecode.h" -#include "tree.h" -#include "pool.h" -#include "internal.h" - -#define act_sb 0x1 -#define act_rb 0x2 - -#define read_word_p( i, p ) do { \ - i = ((word_t) p[0]); \ - i |= ((word_t) p[1]) << 8; \ - i |= ((word_t) p[2]) << 16; \ - i |= ((word_t) p[3]) << 24; \ -} while(0) - -#define read_tree_p( i, p ) do { \ - word_t w; \ - w = ((word_t) p[0]); \ - w |= ((word_t) p[1]) << 8; \ - w |= ((word_t) p[2]) << 16; \ - w |= ((word_t) p[3]) << 24; \ - i = (tree_t*)w; \ -} while(0) - -/* bit 0: data needed. bit 1: loc needed */ -#define RN_NONE 0x0 -#define RN_DATA 0x1 -#define RN_LOC 0x2 -#define RN_BOTH 0x3 - - -static void init_fsm_run( program_t *prg, struct pda_run *pda_run ) -{ - pda_run->fsm_tables = prg->rtd->fsm_tables; - - pda_run->consume_buf = 0; - - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - pda_run->scan_eof = 0; - - pda_run->pre_region = -1; -} - -static void clear_fsm_run( program_t *prg, struct pda_run *pda_run ) -{ - if ( pda_run->consume_buf != 0 ) { - /* Transfer the run buf list to the program */ - struct run_buf *head = pda_run->consume_buf; - struct run_buf *tail = head; - while ( tail->next != 0 ) - tail = tail->next; - - tail->next = prg->alloc_run_buf; - prg->alloc_run_buf = head; - } -} - -void colm_increment_steps( struct pda_run *pda_run ) -{ - pda_run->steps += 1; - //debug( prg, REALM_PARSE, "steps up to %ld\n", pdaRun->steps ); -} - -void colm_decrement_steps( struct pda_run *pda_run ) -{ - pda_run->steps -= 1; - //debug( prg, REALM_PARSE, "steps down to %ld\n", pdaRun->steps ); -} - -head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run, - struct input_impl *is, long length ) -{ - if ( pda_run != 0 ) { - struct run_buf *run_buf = pda_run->consume_buf; - if ( length > ( FSM_BUFSIZE - run_buf->length ) ) { - run_buf = new_run_buf( 0 ); - run_buf->next = pda_run->consume_buf; - pda_run->consume_buf = run_buf; - } - - char *dest = run_buf->data + run_buf->length; - - is->funcs->get_data( prg, is, dest, length ); - location_t *loc = location_allocate( prg ); - is->funcs->consume_data( prg, is, length, loc ); - - run_buf->length += length; - - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - - head_t *tokdata = colm_string_alloc_pointer( prg, dest, length ); - tokdata->location = loc; - - return tokdata; - } - else { - head_t *head = init_str_space( length ); - char *dest = (char*)head->data; - - is->funcs->get_data( prg, is, dest, length ); - location_t *loc = location_allocate( prg ); - is->funcs->consume_data( prg, is, length, loc ); - head->location = loc; - - return head; - } -} - -void colm_stream_push_text( struct colm_program *prg, struct input_impl *is, const char *data, long length ) -{ - is->funcs->prepend_data( prg, is, data, length ); -} - -void colm_stream_push_tree( struct colm_program *prg, struct input_impl *is, tree_t *tree, int ignore ) -{ - is->funcs->prepend_tree( prg, is, tree, ignore ); -} - -void colm_stream_push_stream( struct colm_program *prg, struct input_impl *is, stream_t *stream ) -{ - is->funcs->prepend_stream( prg, is, stream ); -} - -void colm_undo_stream_push( program_t *prg, tree_t **sp, struct input_impl *is, long length ) -{ - if ( length < 0 ) { - /* tree_t *tree = */ is->funcs->undo_prepend_tree( prg, is ); - // colm_tree_downref( prg, sp, tree ); - } - else { - is->funcs->undo_prepend_data( prg, is, length ); - } -} - -/* Should only be sending back whole tokens/ignores, therefore the send back - * should never cross a buffer boundary. Either we slide back data, or we move to - * a previous buffer and slide back data. */ -static void send_back_text( struct colm_program *prg, struct input_impl *is, const char *data, long length ) -{ - //debug( REALM_PARSE, "push back of %ld characters\n", length ); - - if ( length == 0 ) - return; - - //debug( REALM_PARSE, "sending back text: %.*s\n", - // (int)length, data ); - - is->funcs->undo_consume_data( prg, is, data, length ); -} - -static void send_back_tree( struct colm_program *prg, struct input_impl *is, tree_t *tree ) -{ - is->funcs->undo_consume_tree( prg, is, tree, false ); -} - -/* - * Stops on: - * PCR_REVERSE - */ -static void send_back_ignore( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is, parse_tree_t *parse_tree ) -{ - #ifdef DEBUG - struct lang_el_info *lel_info = prg->rtd->lel_info; - debug( prg, REALM_PARSE, "sending back: %s%s\n", - lel_info[parse_tree->shadow->tree->id].name, - parse_tree->flags & PF_ARTIFICIAL ? " (artificial)" : "" ); - #endif - - head_t *head = parse_tree->shadow->tree->tokdata; - int artificial = parse_tree->flags & PF_ARTIFICIAL; - - if ( head != 0 ) { - if ( artificial ) - send_back_tree( prg, is, parse_tree->shadow->tree ); - else - send_back_text( prg, is, string_data( head ), head->length ); - } - - colm_decrement_steps( pda_run ); - - /* Check for reverse code. */ - if ( parse_tree->flags & PF_HAS_RCODE ) { - pda_run->on_deck = true; - parse_tree->flags &= ~PF_HAS_RCODE; - } - - if ( pda_run->steps == pda_run->target_steps ) { - debug( prg, REALM_PARSE, "trigger parse stop, steps = " - "target = %d\n", pda_run->target_steps ); - pda_run->stop = true; - } -} - -static void reset_token( struct pda_run *pda_run ) -{ - /* If there is a token started, but never finished for a lack of data, we - * must first backup over it. */ - if ( pda_run->tokstart != 0 ) { - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - pda_run->scan_eof = 0; - } -} - -/* Stops on: - * PCR_REVERSE - */ - -static void send_back( program_t *prg, tree_t **sp, struct pda_run *pda_run, - struct input_impl *is, parse_tree_t *parse_tree ) -{ - debug( prg, REALM_PARSE, "sending back: %s\n", - prg->rtd->lel_info[parse_tree->id].name ); - - if ( parse_tree->flags & PF_NAMED ) { - /* Send the named lang el back first, then send back any leading - * whitespace. */ - is->funcs->undo_consume_lang_el( prg, is ); - } - - colm_decrement_steps( pda_run ); - - /* Artifical were not parsed, instead sent in as items. */ - if ( parse_tree->flags & PF_ARTIFICIAL ) { - /* Check for reverse code. */ - if ( parse_tree->flags & PF_HAS_RCODE ) { - debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" ); - pda_run->on_deck = true; - parse_tree->flags &= ~PF_HAS_RCODE; - } - - colm_tree_upref( prg, parse_tree->shadow->tree ); - - send_back_tree( prg, is, parse_tree->shadow->tree ); - } - else { - /* Check for reverse code. */ - if ( parse_tree->flags & PF_HAS_RCODE ) { - debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" ); - pda_run->on_deck = true; - parse_tree->flags &= ~PF_HAS_RCODE; - } - - /* Push back the token data. */ - send_back_text( prg, is, string_data( parse_tree->shadow->tree->tokdata ), - string_length( parse_tree->shadow->tree->tokdata ) ); - - /* If eof was just sent back remember that it needs to be sent again. */ - if ( parse_tree->id == prg->rtd->eof_lel_ids[pda_run->parser_id] ) - pda_run->eof_term_recvd = false; - - /* If the item is bound then store remove it from the bindings array. */ - prg->rtd->pop_binding( pda_run, parse_tree ); - } - - if ( pda_run->steps == pda_run->target_steps ) { - debug( prg, REALM_PARSE, "trigger parse stop, " - "steps = target = %d\n", pda_run->target_steps ); - pda_run->stop = true; - } - - /* Downref the tree that was sent back and free the kid. */ - colm_tree_downref( prg, sp, parse_tree->shadow->tree ); - kid_free( prg, parse_tree->shadow ); - parse_tree_free( pda_run, parse_tree ); -} - -static void set_region( struct pda_run *pda_run, int empty_ignore, parse_tree_t *tree ) -{ - if ( empty_ignore ) { - /* Recording the next region. */ - tree->retry_region = pda_run->next_region_ind; - if ( pda_run->pda_tables->token_regions[tree->retry_region+1] != 0 ) - pda_run->num_retry += 1; - } -} - -static void ignore_tree( program_t *prg, struct pda_run *pda_run, tree_t *tree ) -{ - int empty_ignore = pda_run->accum_ignore == 0; - - colm_increment_steps( pda_run ); - - parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); - parse_tree->shadow = kid_allocate( prg ); - parse_tree->shadow->tree = tree; - - parse_tree->next = pda_run->accum_ignore; - pda_run->accum_ignore = parse_tree; - - colm_transfer_reverse_code( pda_run, parse_tree ); - - if ( pda_run->pre_region >= 0 ) - parse_tree->flags |= PF_RIGHT_IGNORE; - - set_region( pda_run, empty_ignore, pda_run->accum_ignore ); -} - -static void ignore_tree_art( program_t *prg, struct pda_run *pda_run, tree_t *tree ) -{ - int empty_ignore = pda_run->accum_ignore == 0; - - colm_increment_steps( pda_run ); - - parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); - parse_tree->flags |= PF_ARTIFICIAL; - parse_tree->shadow = kid_allocate( prg ); - parse_tree->shadow->tree = tree; - - parse_tree->next = pda_run->accum_ignore; - pda_run->accum_ignore = parse_tree; - - colm_transfer_reverse_code( pda_run, parse_tree ); - - set_region( pda_run, empty_ignore, pda_run->accum_ignore ); -} - -kid_t *make_token_with_data( program_t *prg, struct pda_run *pda_run, - struct input_impl *is, int id, head_t *tokdata ) -{ - /* Make the token object. */ - long object_length = prg->rtd->lel_info[id].object_length; - kid_t *attrs = alloc_attrs( prg, object_length ); - - kid_t *input = 0; - input = kid_allocate( prg ); - input->tree = tree_allocate( prg ); - - debug( prg, REALM_PARSE, "made token %p\n", input->tree ); - - input->tree->refs = 1; - input->tree->id = id; - input->tree->tokdata = tokdata; - - /* No children and ignores get added later. */ - input->tree->child = attrs; - - struct lang_el_info *lel_info = prg->rtd->lel_info; - if ( lel_info[id].num_capture_attr > 0 ) { - int i; - for ( i = 0; i < lel_info[id].num_capture_attr; i++ ) { - CaptureAttr *ca = &prg->rtd->capture_attr[lel_info[id].capture_attr + i]; - head_t *data = string_alloc_full( prg, - pda_run->mark[ca->mark_enter], - pda_run->mark[ca->mark_leave] - - pda_run->mark[ca->mark_enter] ); - tree_t *string = construct_string( prg, data ); - colm_tree_upref( prg, string ); - colm_tree_set_field( prg, input->tree, ca->offset, string ); - } - } - - return input; -} - -static void report_parse_error( program_t *prg, tree_t **sp, struct pda_run *pda_run ) -{ - kid_t *kid = pda_run->bt_point; - head_t *deepest = 0; - while ( kid != 0 ) { - head_t *head = kid->tree->tokdata; - if ( head != 0 && head->location != 0 ) { - if ( deepest == 0 || head->location->byte > deepest->location->byte ) - deepest = head; - } - kid = kid->next; - } - - head_t *error_head = 0; - - /* If there are no error points on record assume the error occurred at the - * beginning of the stream. */ - if ( deepest == 0 ) { - error_head = string_alloc_full( prg, "<input>:1:1: parse error", 32 ); - error_head->location = location_allocate( prg ); - error_head->location->line = 1; - error_head->location->column = 1; - } - else { - debug( prg, REALM_PARSE, "deepest location byte: %d\n", - deepest->location->byte ); - - const char *name = deepest->location->name; - long line = deepest->location->line; - long i, column = deepest->location->column; - long byte = deepest->location->byte; - - for ( i = 0; i < deepest->length; i++ ) { - if ( deepest->data[i] != '\n' ) - column += 1; - else { - line += 1; - column = 1; - } - byte += 1; - } - - if ( name == 0 ) - name = "<input>"; - char *formatted = malloc( strlen( name ) + 128 ); - sprintf( formatted, "%s:%ld:%ld: parse error", name, line, column ); - error_head = string_alloc_full( prg, formatted, strlen(formatted) ); - free( formatted ); - - error_head->location = location_allocate( prg ); - - error_head->location->name = deepest->location->name; - error_head->location->line = line; - error_head->location->column = column; - error_head->location->byte = byte; - } - - tree_t *tree = construct_string( prg, error_head ); - colm_tree_downref( prg, sp, pda_run->parse_error_text ); - pda_run->parse_error_text = tree; - colm_tree_upref( prg, pda_run->parse_error_text ); -} - -static void attach_right_ignore( program_t *prg, tree_t **sp, - struct pda_run *pda_run, parse_tree_t *parse_tree ) -{ - if ( pda_run->accum_ignore == 0 ) - return; - - if ( pda_run->stack_top->id > 0 && - pda_run->stack_top->id < prg->rtd->first_non_term_id ) - { - /* OK, do it */ - debug( prg, REALM_PARSE, "attaching right ignore\n" ); - - /* Reset. */ - assert( ! ( parse_tree->flags & PF_RIGHT_IL_ATTACHED ) ); - - parse_tree_t *accum = pda_run->accum_ignore; - - parse_tree_t *stop_at = 0, *use = accum; - while ( use != 0 ) { - if ( ! (use->flags & PF_RIGHT_IGNORE) ) - stop_at = use; - use = use->next; - } - - if ( stop_at != 0 ) { - /* Stop at was set. Make it the last item in the igore list. Take - * the rest. */ - accum = stop_at->next; - stop_at->next = 0; - } - else { - /* Stop at was never set. All right ignore. Use it all. */ - pda_run->accum_ignore = 0; - } - - /* The data list needs to be extracted and reversed. The parse tree list - * can remain in stack order. */ - parse_tree_t *child = accum, *last = 0; - kid_t *data_child = 0, *data_last = 0; - - while ( child ) { - data_child = child->shadow; - parse_tree_t *next = child->next; - - /* Reverse the lists. */ - data_child->next = data_last; - child->next = last; - - /* Detach the parse tree from the data tree. */ - child->shadow = 0; - - /* Keep the last for reversal. */ - data_last = data_child; - last = child; - - child = next; - } - - /* Last is now the first. */ - parse_tree->right_ignore = last; - - if ( data_child != 0 ) { - debug( prg, REALM_PARSE, "attaching ignore right\n" ); - - kid_t *ignore_kid = data_last; - - /* Copy the ignore list first if we need to attach it as a right - * ignore. */ - tree_t *right_ignore = 0; - - right_ignore = tree_allocate( prg ); - right_ignore->id = LEL_ID_IGNORE; - right_ignore->child = ignore_kid; - - tree_t *push_to = parse_tree->shadow->tree; - - push_to = push_right_ignore( prg, push_to, right_ignore ); - - parse_tree->shadow->tree = push_to; - - parse_tree->flags |= PF_RIGHT_IL_ATTACHED; - } - } -} - -static void attach_left_ignore( program_t *prg, tree_t **sp, - struct pda_run *pda_run, parse_tree_t *parse_tree ) -{ - /* Reset. */ - assert( ! ( parse_tree->flags & PF_LEFT_IL_ATTACHED ) ); - - parse_tree_t *accum = pda_run->accum_ignore; - pda_run->accum_ignore = 0; - - /* The data list needs to be extracted and reversed. The parse tree list - * can remain in stack order. */ - parse_tree_t *child = accum, *last = 0; - kid_t *data_child = 0, *data_last = 0; - - while ( child ) { - data_child = child->shadow; - parse_tree_t *next = child->next; - - /* Reverse the lists. */ - data_child->next = data_last; - child->next = last; - - /* Detach the parse tree from the data tree. */ - child->shadow = 0; - - /* Keep the last for reversal. */ - data_last = data_child; - last = child; - - child = next; - } - - /* Last is now the first. */ - parse_tree->left_ignore = last; - - if ( data_child != 0 ) { - debug( prg, REALM_PARSE, "attaching left ignore\n" ); - - kid_t *ignore_kid = data_child; - - /* Make the ignore list for the left-ignore. */ - tree_t *left_ignore = tree_allocate( prg ); - left_ignore->id = LEL_ID_IGNORE; - left_ignore->child = ignore_kid; - - tree_t *push_to = parse_tree->shadow->tree; - - push_to = push_left_ignore( prg, push_to, left_ignore ); - - parse_tree->shadow->tree = push_to; - - parse_tree->flags |= PF_LEFT_IL_ATTACHED; - } -} - -/* Not currently used. Need to revive this. WARNING: untested changes here */ -static void detach_right_ignore( program_t *prg, tree_t **sp, - struct pda_run *pda_run, parse_tree_t *parse_tree ) -{ - /* Right ignore are immediately discarded since they are copies of - * left-ignores. */ - tree_t *right_ignore = 0; - if ( parse_tree->flags & PF_RIGHT_IL_ATTACHED ) { - tree_t *pop_from = parse_tree->shadow->tree; - - pop_from = pop_right_ignore( prg, sp, pop_from, &right_ignore ); - - parse_tree->shadow->tree = pop_from; - - parse_tree->flags &= ~PF_RIGHT_IL_ATTACHED; - } - - if ( parse_tree->right_ignore != 0 ) { - assert( right_ignore != 0 ); - - /* Transfer the trees to accumIgnore. */ - parse_tree_t *ignore = parse_tree->right_ignore; - parse_tree->right_ignore = 0; - - kid_t *data_ignore = right_ignore->child; - right_ignore->child = 0; - - parse_tree_t *last = 0; - kid_t *data_last = 0; - while ( ignore != 0 ) { - parse_tree_t *next = ignore->next; - kid_t *data_next = data_ignore->next; - - /* Put the data trees underneath the parse trees. */ - ignore->shadow = data_ignore; - - /* Reverse. */ - ignore->next = last; - data_ignore->next = data_last; - - /* Keep last for reversal. */ - last = ignore; - data_last = data_ignore; - - ignore = next; - data_ignore = data_next; - } - - pda_run->accum_ignore = last; - - colm_tree_downref( prg, sp, right_ignore ); - } -} - -static void detach_left_ignore( program_t *prg, tree_t **sp, - struct pda_run *pda_run, parse_tree_t *parse_tree ) -{ - /* Detach left. */ - tree_t *left_ignore = 0; - if ( parse_tree->flags & PF_LEFT_IL_ATTACHED ) { - tree_t *pop_from = parse_tree->shadow->tree; - - pop_from = pop_left_ignore( prg, sp, pop_from, &left_ignore ); - - parse_tree->shadow->tree = pop_from; - - parse_tree->flags &= ~PF_LEFT_IL_ATTACHED; - } - - if ( parse_tree->left_ignore != 0 ) { - assert( left_ignore != 0 ); - - /* Transfer the trees to accumIgnore. */ - parse_tree_t *ignore = parse_tree->left_ignore; - parse_tree->left_ignore = 0; - - kid_t *data_ignore = left_ignore->child; - left_ignore->child = 0; - - parse_tree_t *last = 0; - kid_t *data_last = 0; - while ( ignore != 0 ) { - parse_tree_t *next = ignore->next; - kid_t *data_next = data_ignore->next; - - /* Put the data trees underneath the parse trees. */ - ignore->shadow = data_ignore; - - /* Reverse. */ - ignore->next = last; - data_ignore->next = data_last; - - /* Keep last for reversal. */ - last = ignore; - data_last = data_ignore; - - ignore = next; - data_ignore = data_next; - } - - pda_run->accum_ignore = last; - } - - colm_tree_downref( prg, sp, left_ignore ); -} - -static int is_parser_stop_finished( struct pda_run *pda_run ) -{ - int done = - pda_run->stack_top->next != 0 && - pda_run->stack_top->next->next == 0 && - pda_run->stack_top->id == pda_run->stop_target; - return done; -} - -static void handle_error( program_t *prg, tree_t **sp, struct pda_run *pda_run ) -{ - /* Check the result. */ - if ( pda_run->parse_error ) { - /* Error occured in the top-level parser. */ - report_parse_error( prg, sp, pda_run ); - } - else { - if ( is_parser_stop_finished( pda_run ) ) { - debug( prg, REALM_PARSE, "stopping the parse\n" ); - pda_run->stop_parsing = true; - } - } -} - -static head_t *extract_match( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is ) -{ - long length = pda_run->toklen; - - //debug( prg, REALM_PARSE, "extracting token of length: %ld\n", length ); - - struct run_buf *run_buf = pda_run->consume_buf; - if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) { - run_buf = new_run_buf( length ); - run_buf->next = pda_run->consume_buf; - pda_run->consume_buf = run_buf; - } - - char *dest = run_buf->data + run_buf->length; - - is->funcs->get_data( prg, is, dest, length ); - location_t *location = location_allocate( prg ); - is->funcs->consume_data( prg, is, length, location ); - - run_buf->length += length; - - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - pda_run->tokstart = 0; - - head_t *head = colm_string_alloc_pointer( prg, dest, length ); - - head->location = location; - - debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte ); - - return head; -} - -static head_t *extract_no_d( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is ) -{ - long length = pda_run->toklen; - - /* Just a consume, no data allocate. */ - location_t *location = location_allocate( prg ); - is->funcs->consume_data( prg, is, length, location ); - - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - pda_run->tokstart = 0; - - head_t *head = colm_string_alloc_pointer( prg, 0, 0 ); - - head->location = location; - - debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte ); - - return head; -} - -static head_t *extract_no_l( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is ) -{ - long length = pda_run->toklen; - - //debug( prg, REALM_PARSE, "extracting token of length: %ld\n", length ); - - struct run_buf *run_buf = pda_run->consume_buf; - if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) { - run_buf = new_run_buf( length ); - run_buf->next = pda_run->consume_buf; - pda_run->consume_buf = run_buf; - } - - char *dest = run_buf->data + run_buf->length; - - is->funcs->get_data( prg, is, dest, length ); - - /* Using a dummpy location. */ - location_t location; - memset( &location, 0, sizeof( location ) ); - is->funcs->consume_data( prg, is, length, &location ); - - run_buf->length += length; - - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - pda_run->tokstart = 0; - - head_t *head = colm_string_alloc_pointer( prg, dest, length ); - - /* Don't pass the location. */ - head->location = 0; - - debug( prg, REALM_PARSE, "location byte: %d\n", location.byte ); - - return head; -} - -static head_t *consume_match( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is ) -{ - long length = pda_run->toklen; - - /* No data or location returned. We just consume the data. */ - location_t dummy_loc; - memset( &dummy_loc, 0, sizeof(dummy_loc) ); - is->funcs->consume_data( prg, is, length, &dummy_loc ); - - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - pda_run->tokstart = 0; - - debug( prg, REALM_PARSE, "location byte: %d\n", dummy_loc.byte ); - - return 0; -} - - -static head_t *peek_match( program_t *prg, struct pda_run *pda_run, struct input_impl *is ) -{ - long length = pda_run->toklen; - - struct run_buf *run_buf = pda_run->consume_buf; - if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) { - run_buf = new_run_buf( 0 ); - run_buf->next = pda_run->consume_buf; - pda_run->consume_buf = run_buf; - } - - char *dest = run_buf->data + run_buf->length; - - is->funcs->get_data( prg, is, dest, length ); - - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - - head_t *head = colm_string_alloc_pointer( prg, dest, length ); - - head->location = location_allocate( prg ); - is->funcs->transfer_loc( prg, head->location, is ); - - debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte ); - - return head; -} - - -static void send_ignore( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is, long id ) -{ - if ( prg->rtd->reducer_need_ign( prg, pda_run ) == RN_NONE ) { - consume_match( prg, sp, pda_run, is ); - } - else { - debug( prg, REALM_PARSE, "ignoring: %s\n", prg->rtd->lel_info[id].name ); - - /* Make the ignore string. */ - head_t *ignore_str = extract_match( prg, sp, pda_run, is ); - - debug( prg, REALM_PARSE, "ignoring: %.*s\n", ignore_str->length, ignore_str->data ); - - tree_t *tree = tree_allocate( prg ); - tree->refs = 1; - tree->id = id; - tree->tokdata = ignore_str; - - /* Send it to the pdaRun. */ - ignore_tree( prg, pda_run, tree ); - } -} - -static void send_token( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is, long id ) -{ - int empty_ignore = pda_run->accum_ignore == 0; - - /* Make the token data. */ - head_t *tokdata = 0; - int rn = prg->rtd->reducer_need_tok( prg, pda_run, id ); - - switch ( rn ) { - case RN_NONE: - tokdata = consume_match( prg, sp, pda_run, is ); - break; - case RN_DATA: - tokdata = extract_no_l( prg, sp, pda_run, is ); - break; - case RN_LOC: - tokdata = extract_no_d( prg, sp, pda_run, is ); - break; - case RN_BOTH: - tokdata = extract_match( prg, sp, pda_run, is ); - break; - } - - debug( prg, REALM_PARSE, "token: %s text: %.*s\n", - prg->rtd->lel_info[id].name, - string_length(tokdata), string_data(tokdata) ); - - kid_t *input = make_token_with_data( prg, pda_run, is, id, tokdata ); - - colm_increment_steps( pda_run ); - - parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); - parse_tree->id = input->tree->id; - parse_tree->shadow = input; - - pda_run->parse_input = parse_tree; - - /* Store any alternate scanning region. */ - if ( input != 0 && pda_run->pda_cs >= 0 ) - set_region( pda_run, empty_ignore, parse_tree ); -} - -static void send_tree( program_t *prg, tree_t **sp, struct pda_run *pda_run, - struct input_impl *is ) -{ - kid_t *input = kid_allocate( prg ); - input->tree = is->funcs->consume_tree( prg, is ); - - colm_increment_steps( pda_run ); - - parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); - parse_tree->id = input->tree->id; - parse_tree->flags |= PF_ARTIFICIAL; - parse_tree->shadow = input; - - pda_run->parse_input = parse_tree; -} - -static void send_ignore_tree( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is ) -{ - tree_t *tree = is->funcs->consume_tree( prg, is ); - ignore_tree_art( prg, pda_run, tree ); -} - -static void send_collect_ignore( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is, int id ) -{ - debug( prg, REALM_PARSE, "token: CI\n" ); - - int empty_ignore = pda_run->accum_ignore == 0; - - /* Make the token data. */ - head_t *tokdata = head_allocate( prg ); - tokdata->location = location_allocate( prg ); - is->funcs->transfer_loc( prg, tokdata->location, is ); - - debug( prg, REALM_PARSE, "token: %s text: %.*s\n", - prg->rtd->lel_info[id].name, - string_length(tokdata), string_data(tokdata) ); - - kid_t *input = make_token_with_data( prg, pda_run, is, id, tokdata ); - - colm_increment_steps( pda_run ); - - parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); - parse_tree->id = input->tree->id; - parse_tree->shadow = input; - - pda_run->parse_input = parse_tree; - - /* Store any alternate scanning region. */ - if ( input != 0 && pda_run->pda_cs >= 0 ) - set_region( pda_run, empty_ignore, parse_tree ); -} - -/* Offset can be used to look at the next nextRegionInd. */ -static int get_next_region( struct pda_run *pda_run, int offset ) -{ - return pda_run->pda_tables->token_regions[pda_run->next_region_ind+offset]; -} - -static int get_next_pre_region( struct pda_run *pda_run ) -{ - return pda_run->pda_tables->token_pre_regions[pda_run->next_region_ind]; -} - -static void send_eof( program_t *prg, tree_t **sp, struct pda_run *pda_run, - struct input_impl *is ) -{ - debug( prg, REALM_PARSE, "token: _EOF\n" ); - - colm_increment_steps( pda_run ); - - head_t *head = head_allocate( prg ); - head->location = location_allocate( prg ); - is->funcs->transfer_loc( prg, head->location, is ); - - kid_t *input = kid_allocate( prg ); - input->tree = tree_allocate( prg ); - - input->tree->refs = 1; - input->tree->id = prg->rtd->eof_lel_ids[pda_run->parser_id]; - input->tree->tokdata = head; - - /* Set the state using the state of the parser. */ - pda_run->region = get_next_region( pda_run, 0 ); - pda_run->pre_region = get_next_pre_region( pda_run ); - pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->region]; - - parse_tree_t *parse_tree = parse_tree_allocate( pda_run ); - parse_tree->id = input->tree->id; - parse_tree->shadow = input; - - pda_run->parse_input = parse_tree; -} - -static void new_token( program_t *prg, struct pda_run *pda_run ) -{ - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - pda_run->scan_eof = 0; - - /* Init the scanner vars. */ - pda_run->act = 0; - pda_run->tokstart = 0; - pda_run->tokend = 0; - pda_run->matched_token = 0; - - /* Set the state using the state of the parser. */ - pda_run->region = get_next_region( pda_run, 0 ); - pda_run->pre_region = get_next_pre_region( pda_run ); - if ( pda_run->pre_region > 0 ) { - pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->pre_region]; - pda_run->next_cs = pda_run->fsm_tables->entry_by_region[pda_run->region]; - } - else { - pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->region]; - } - - - /* Clear the mark array. */ - memset( pda_run->mark, 0, sizeof(pda_run->mark) ); -} - -static void push_bt_point( program_t *prg, struct pda_run *pda_run ) -{ - tree_t *tree = 0; - if ( pda_run->accum_ignore != 0 ) - tree = pda_run->accum_ignore->shadow->tree; - else if ( pda_run->token_list != 0 ) - tree = pda_run->token_list->kid->tree; - - if ( tree != 0 ) { - debug( prg, REALM_PARSE, "pushing bt point with location byte %d\n", - ( tree != 0 && tree->tokdata != 0 && tree->tokdata->location != 0 ) ? - tree->tokdata->location->byte : 0 ); - - kid_t *kid = kid_allocate( prg ); - kid->tree = tree; - colm_tree_upref( prg, tree ); - kid->next = pda_run->bt_point; - pda_run->bt_point = kid; - } -} - - -#define SCAN_UNDO -7 -#define SCAN_IGNORE -6 -#define SCAN_TREE -5 -#define SCAN_TRY_AGAIN_LATER -4 -#define SCAN_ERROR -3 -#define SCAN_LANG_EL -2 -#define SCAN_EOF -1 - -static long scan_token( program_t *prg, struct pda_run *pda_run, struct input_impl *is ) -{ - if ( pda_run->trigger_undo ) - return SCAN_UNDO; - - while ( true ) { - char *pd = 0; - int len = 0; - int toklen = pda_run->toklen; - int type = is->funcs->get_parse_block( prg, is, &toklen, &pd, &len ); - - switch ( type ) { - case INPUT_DATA: - pda_run->p = pd; - pda_run->pe = pd + len; - break; - - case INPUT_EOS: - pda_run->p = pda_run->pe = 0; - if ( pda_run->tokstart != 0 ) - pda_run->scan_eof = 1; - debug( prg, REALM_SCAN, "EOS *******************\n" ); - break; - - case INPUT_EOF: - pda_run->p = pda_run->pe = 0; - if ( pda_run->tokstart != 0 ) - pda_run->scan_eof = 1; - else - return SCAN_EOF; - break; - - case INPUT_EOD: - pda_run->p = pda_run->pe = 0; - return SCAN_TRY_AGAIN_LATER; - - case INPUT_LANG_EL: - if ( pda_run->tokstart != 0 ) - pda_run->scan_eof = 1; - else - return SCAN_LANG_EL; - break; - - case INPUT_TREE: - if ( pda_run->tokstart != 0 ) - pda_run->scan_eof = 1; - else - return SCAN_TREE; - break; - case INPUT_IGNORE: - if ( pda_run->tokstart != 0 ) - pda_run->scan_eof = 1; - else - return SCAN_IGNORE; - break; - } - - prg->rtd->fsm_execute( pda_run, is ); - - /* First check if scanning stopped because we have a token. */ - if ( pda_run->matched_token > 0 ) { - /* If the token has a marker indicating the end (due to trailing - * context) then adjust data now. */ - struct lang_el_info *lel_info = prg->rtd->lel_info; - if ( lel_info[pda_run->matched_token].mark_id >= 0 ) - pda_run->p = pda_run->mark[lel_info[pda_run->matched_token].mark_id]; - - return pda_run->matched_token; - } - - /* Check for error. */ - if ( pda_run->fsm_cs == pda_run->fsm_tables->error_state ) { - /* If a token was started, but not finished (tokstart != 0) then - * restore data to the beginning of that token. */ - if ( pda_run->tokstart != 0 ) - pda_run->p = pda_run->tokstart; - - /* Check for a default token in the region. If one is there - * then send it and continue with the processing loop. */ - if ( prg->rtd->region_info[pda_run->region].default_token >= 0 ) { - pda_run->toklen = 0; - return prg->rtd->region_info[pda_run->region].default_token; - } - - return SCAN_ERROR; - } - - /* Check for no match on eof (trailing data that partially matches a token). */ - if ( pda_run->scan_eof ) - return SCAN_ERROR; - - /* Got here because the state machine didn't match a token or encounter - * an error. Must be because we got to the end of the buffer data. */ - assert( pda_run->p == pda_run->pe ); - } - - /* Should not be reached. */ - return SCAN_ERROR; -} - -tree_t *get_parsed_root( struct pda_run *pda_run, int stop ) -{ - if ( pda_run->parse_error ) - return 0; - else if ( stop ) { - if ( pda_run->stack_top->shadow != 0 ) - return pda_run->stack_top->shadow->tree; - } - else { - if ( pda_run->stack_top->next->shadow != 0 ) - return pda_run->stack_top->next->shadow->tree; - } - return 0; -} - -static void clear_parse_tree( program_t *prg, tree_t **sp, - struct pda_run *pda_run, parse_tree_t *pt ) -{ - tree_t **top = vm_ptop(); - - if ( pt == 0 ) - return; - -free_tree: - if ( pt->next != 0 ) { - vm_push_ptree( pt->next ); - } - - if ( pt->left_ignore != 0 ) { - vm_push_ptree( pt->left_ignore ); - } - - if ( pt->child != 0 ) { - vm_push_ptree( pt->child ); - } - - if ( pt->right_ignore != 0 ) { - vm_push_ptree( pt->right_ignore ); - } - - if ( pt->shadow != 0 ) { - colm_tree_downref( prg, sp, pt->shadow->tree ); - kid_free( prg, pt->shadow ); - } - - parse_tree_free( pda_run, pt ); - - /* Any trees to downref? */ - if ( sp != top ) { - pt = vm_pop_ptree(); - goto free_tree; - } -} - -void colm_pda_clear( program_t *prg, tree_t **sp, struct pda_run *pda_run ) -{ - clear_fsm_run( prg, pda_run ); - - /* Remaining stack and parse trees underneath. */ - clear_parse_tree( prg, sp, pda_run, pda_run->stack_top ); - pda_run->stack_top = 0; - - /* Traverse the token list downreffing. */ - ref_t *ref = pda_run->token_list; - while ( ref != 0 ) { - ref_t *next = ref->next; - kid_free( prg, (kid_t*)ref ); - ref = next; - } - pda_run->token_list = 0; - - /* Traverse the btPoint list downreffing */ - kid_t *btp = pda_run->bt_point; - while ( btp != 0 ) { - kid_t *next = btp->next; - colm_tree_downref( prg, sp, btp->tree ); - kid_free( prg, (kid_t*)btp ); - btp = next; - } - pda_run->bt_point = 0; - - /* Clear out any remaining ignores. */ - clear_parse_tree( prg, sp, pda_run, pda_run->accum_ignore ); - pda_run->accum_ignore = 0; - - /* Clear the input list (scanned tokes, sent trees). */ - clear_parse_tree( prg, sp, pda_run, pda_run->parse_input ); - pda_run->parse_input = 0; - - colm_rcode_downref_all( prg, sp, &pda_run->reverse_code ); - colm_rt_code_vect_empty( &pda_run->reverse_code ); - colm_rt_code_vect_empty( &pda_run->rcode_collect ); - - colm_tree_downref( prg, sp, pda_run->parse_error_text ); - - if ( pda_run->reducer ) { - long local_lost = pool_alloc_num_lost( &pda_run->local_pool ); - - if ( local_lost ) - message( "warning: reducer local lost parse trees: %ld\n", local_lost ); - pool_alloc_clear( &pda_run->local_pool ); - } -} - -void colm_pda_init( program_t *prg, struct pda_run *pda_run, struct pda_tables *tables, - int parser_id, long stop_target, int revert_on, struct_t *context, int reducer ) -{ - memset( pda_run, 0, sizeof(struct pda_run) ); - - pda_run->pda_tables = tables; - pda_run->parser_id = parser_id; - pda_run->stop_target = stop_target; - pda_run->revert_on = revert_on; - pda_run->target_steps = -1; - pda_run->reducer = reducer; - - /* An initial commit shift count of -1 means we won't ever back up to zero - * shifts and think parsing cannot continue. */ - pda_run->shift_count = 0; - pda_run->commit_shift_count = -1; - - if ( reducer ) { - init_pool_alloc( &pda_run->local_pool, sizeof(parse_tree_t) + - prg->rtd->commit_union_sz(reducer) ); - pda_run->parse_tree_pool = &pda_run->local_pool; - } - else { - pda_run->parse_tree_pool = &prg->parse_tree_pool; - } - - debug( prg, REALM_PARSE, "initializing struct pda_run %s\n", - prg->rtd->lel_info[prg->rtd->parser_lel_ids[parser_id]].name ); - - /* FIXME: need the right one here. */ - pda_run->pda_cs = prg->rtd->start_states[pda_run->parser_id]; - - kid_t *sentinal = kid_allocate( prg ); - sentinal->tree = tree_allocate( prg ); - sentinal->tree->refs = 1; - - /* Init the element allocation variables. */ - pda_run->stack_top = parse_tree_allocate( pda_run ); - pda_run->stack_top->state = -1; - pda_run->stack_top->shadow = sentinal; - - pda_run->num_retry = 0; - pda_run->next_region_ind = pda_run->pda_tables->token_region_inds[pda_run->pda_cs]; - pda_run->stop_parsing = false; - pda_run->accum_ignore = 0; - pda_run->bt_point = 0; - pda_run->check_next = false; - pda_run->check_stop = false; - - prg->rtd->init_bindings( pda_run ); - - init_rt_code_vect( &pda_run->reverse_code ); - init_rt_code_vect( &pda_run->rcode_collect ); - - pda_run->context = context; - pda_run->parse_error = 0; - pda_run->parse_input = 0; - pda_run->trigger_undo = 0; - - pda_run->token_id = 0; - - pda_run->on_deck = false; - pda_run->parsed = 0; - pda_run->reject = false; - - pda_run->rc_block_count = 0; - pda_run->eof_term_recvd = 0; - - init_fsm_run( prg, pda_run ); - new_token( prg, pda_run ); -} - -static long stack_top_target( program_t *prg, struct pda_run *pda_run ) -{ - long state; - if ( pda_run->stack_top->state < 0 ) - state = prg->rtd->start_states[pda_run->parser_id]; - else { - unsigned shift = pda_run->stack_top->id - - pda_run->pda_tables->keys[pda_run->stack_top->state<<1]; - unsigned offset = pda_run->pda_tables->offsets[pda_run->stack_top->state] + shift; - int index = pda_run->pda_tables->indicies[offset]; - state = pda_run->pda_tables->targs[index]; - } - return state; -} - -/* - * shift: retry goes into lower of shifted node. - * reduce: retry goes into upper of reduced node. - * shift-reduce: cannot be a retry - */ - -/* Stops on: - * PCR_REDUCTION - * PCR_REVERSE - */ -static long parse_token( program_t *prg, tree_t **sp, - struct pda_run *pda_run, struct input_impl *is, long entry ) -{ - int pos; - unsigned int *action; - int rhs_len; - int owner; - int induce_reject; - int ind_pos; - - /* COROUTINE */ - switch ( entry ) { - case PCR_START: - - /* The scanner will send a null token if it can't find a token. */ - if ( pda_run->parse_input == 0 ) - goto parse_error; - - /* This will cause parseInput to be lost. This - * path should be traced. */ - if ( pda_run->pda_cs < 0 ) - return PCR_DONE; - - /* Record the state in the parse tree. */ - pda_run->parse_input->state = pda_run->pda_cs; - -again: - if ( pda_run->parse_input == 0 ) - goto _out; - - pda_run->lel = pda_run->parse_input; - pda_run->cur_state = pda_run->pda_cs; - - if ( pda_run->lel->id < pda_run->pda_tables->keys[pda_run->cur_state<<1] || - pda_run->lel->id > pda_run->pda_tables->keys[(pda_run->cur_state<<1)+1] ) - { - debug( prg, REALM_PARSE, "parse error, no transition 1\n" ); - push_bt_point( prg, pda_run ); - goto parse_error; - } - - ind_pos = pda_run->pda_tables->offsets[pda_run->cur_state] + - (pda_run->lel->id - pda_run->pda_tables->keys[pda_run->cur_state<<1]); - - owner = pda_run->pda_tables->owners[ind_pos]; - if ( owner != pda_run->cur_state ) { - debug( prg, REALM_PARSE, "parse error, no transition 2\n" ); - push_bt_point( prg, pda_run ); - goto parse_error; - } - - pos = pda_run->pda_tables->indicies[ind_pos]; - if ( pos < 0 ) { - debug( prg, REALM_PARSE, "parse error, no transition 3\n" ); - push_bt_point( prg, pda_run ); - goto parse_error; - } - - /* Checking complete. */ - - induce_reject = false; - pda_run->pda_cs = pda_run->pda_tables->targs[pos]; - action = pda_run->pda_tables->actions + pda_run->pda_tables->act_inds[pos]; - if ( pda_run->lel->retry_lower ) - action += pda_run->lel->retry_lower; - - /* - * Shift - */ - - if ( *action & act_sb ) { - debug( prg, REALM_PARSE, "shifted: %s\n", - prg->rtd->lel_info[pda_run->lel->id].name ); - /* Consume. */ - pda_run->parse_input = pda_run->parse_input->next; - - pda_run->lel->state = pda_run->cur_state; - - /* If its a token then attach ignores and record it in the token list - * of the next ignore attachment to use. */ - if ( pda_run->lel->id < prg->rtd->first_non_term_id ) { - if ( pda_run->lel->cause_reduce == 0 ) - attach_right_ignore( prg, sp, pda_run, pda_run->stack_top ); - } - - pda_run->lel->next = pda_run->stack_top; - pda_run->stack_top = pda_run->lel; - - /* If its a token then attach ignores and record it in the token list - * of the next ignore attachment to use. */ - if ( pda_run->lel->id < prg->rtd->first_non_term_id ) { - attach_left_ignore( prg, sp, pda_run, pda_run->lel ); - - ref_t *ref = (ref_t*)kid_allocate( prg ); - ref->kid = pda_run->lel->shadow; - //colm_tree_upref( prg, pdaRun->tree ); - ref->next = pda_run->token_list; - pda_run->token_list = ref; - } - - if ( action[1] == 0 ) - pda_run->lel->retry_lower = 0; - else { - debug( prg, REALM_PARSE, "retry: %p\n", pda_run->stack_top ); - pda_run->lel->retry_lower += 1; - assert( pda_run->lel->retry_upper == 0 ); - /* FIXME: Has the retry already been counted? */ - pda_run->num_retry += 1; - } - - pda_run->shift_count += 1; - } - - /* - * Commit - */ - - if ( pda_run->pda_tables->commit_len[pos] != 0 ) { - debug( prg, REALM_PARSE, "commit point\n" ); - pda_run->commit_shift_count = pda_run->shift_count; - - /* Not in a reverting context and the parser result is not used. */ - if ( pda_run->reducer ) - commit_reduce( prg, sp, pda_run ); - - if ( pda_run->fail_parsing ) - goto fail; - - } - - /* - * Reduce - */ - - if ( *action & act_rb ) { - int r, object_length; - parse_tree_t *last, *child; - kid_t *attrs; - kid_t *data_last, *data_child; - - /* If there was shift don't attach again. */ - if ( !( *action & act_sb ) && pda_run->lel->id < prg->rtd->first_non_term_id ) - attach_right_ignore( prg, sp, pda_run, pda_run->stack_top ); - - pda_run->reduction = *action >> 2; - - if ( pda_run->parse_input != 0 ) - pda_run->parse_input->cause_reduce += 1; - - kid_t *value = kid_allocate( prg ); - value->tree = tree_allocate( prg ); - value->tree->refs = 1; - value->tree->id = prg->rtd->prod_info[pda_run->reduction].lhs_id; - value->tree->prod_num = prg->rtd->prod_info[pda_run->reduction].prod_num; - - pda_run->red_lel = parse_tree_allocate( pda_run ); - pda_run->red_lel->id = prg->rtd->prod_info[pda_run->reduction].lhs_id; - pda_run->red_lel->next = 0; - pda_run->red_lel->cause_reduce = 0; - pda_run->red_lel->retry_lower = 0; - pda_run->red_lel->shadow = value; - - /* Transfer. */ - pda_run->red_lel->retry_upper = pda_run->lel->retry_lower; - pda_run->lel->retry_lower = 0; - - /* Allocate the attributes. */ - object_length = prg->rtd->lel_info[pda_run->red_lel->id].object_length; - attrs = alloc_attrs( prg, object_length ); - - /* Build the list of children. We will be giving up a reference when we - * detach parse tree and data tree, but gaining the reference when we - * put the children under the new data tree. No need to alter refcounts - * here. */ - rhs_len = prg->rtd->prod_info[pda_run->reduction].length; - child = last = 0; - data_child = data_last = 0; - for ( r = 0; r < rhs_len; r++ ) { - - /* The child. */ - child = pda_run->stack_top; - data_child = child->shadow; - - /* Pop. */ - pda_run->stack_top = pda_run->stack_top->next; - - /* Detach the parse tree from the data. */ - child->shadow = 0; - - /* Reverse list. */ - child->next = last; - data_child->next = data_last; - - /* Track last for reversal. */ - last = child; - data_last = data_child; - } - - pda_run->red_lel->child = child; - pda_run->red_lel->shadow->tree->child = kid_list_concat( attrs, data_child ); - - debug( prg, REALM_PARSE, "reduced: %s rhsLen %d\n", - prg->rtd->prod_info[pda_run->reduction].name, rhs_len ); - if ( action[1] == 0 ) - pda_run->red_lel->retry_upper = 0; - else { - pda_run->red_lel->retry_upper += 1; - assert( pda_run->lel->retry_lower == 0 ); - pda_run->num_retry += 1; - debug( prg, REALM_PARSE, "retry: %p\n", pda_run->red_lel ); - } - - /* When the production is of zero length we stay in the same state. - * Otherwise we use the state stored in the first child. */ - pda_run->pda_cs = rhs_len == 0 ? pda_run->cur_state : child->state; - - if ( prg->ctx_dep_parsing && prg->rtd->prod_info[pda_run->reduction].frame_id >= 0 ) { - /* Frame info for reduction. */ - pda_run->fi = &prg->rtd->frame_info[prg->rtd->prod_info[pda_run->reduction].frame_id]; - pda_run->frame_id = prg->rtd->prod_info[pda_run->reduction].frame_id; - pda_run->reject = false; - pda_run->parsed = 0; - pda_run->code = pda_run->fi->codeWV; - - /* COROUTINE */ - return PCR_REDUCTION; - case PCR_REDUCTION: - - if ( prg->induce_exit ) - goto fail; - - /* If the lhs was stored and it changed then we need to restore the - * original upon backtracking, otherwise downref since we took a - * copy above. */ - if ( pda_run->parsed != 0 ) { - if ( pda_run->parsed != pda_run->red_lel->shadow->tree ) { - debug( prg, REALM_PARSE, "lhs tree was modified, " - "adding a restore instruction\n" ); -// -// /* Make it into a parse tree. */ -// tree_t *newPt = prepParseTree( prg, sp, pdaRun->redLel->tree ); -// colm_tree_downref( prg, sp, pdaRun->redLel->tree ); -// -// /* Copy it in. */ -// pdaRun->redLel->tree = newPt; -// colm_tree_upref( prg, pdaRun->redLel->tree ); - - /* Add the restore instruct. */ - append_code_val( &pda_run->rcode_collect, IN_RESTORE_LHS ); - append_word( &pda_run->rcode_collect, (word_t)pda_run->parsed ); - append_code_val( &pda_run->rcode_collect, SIZEOF_CODE + SIZEOF_WORD ); - } - else { - /* Not changed. Done with parsed. */ - colm_tree_downref( prg, sp, pda_run->parsed ); - } - pda_run->parsed = 0; - } - - /* Pull out the reverse code, if any. */ - colm_make_reverse_code( pda_run ); - colm_transfer_reverse_code( pda_run, pda_run->red_lel ); - - /* Perhaps the execution environment is telling us we need to - * reject the reduction. */ - induce_reject = pda_run->reject; - } - - /* If the left hand side was replaced then the only parse algorithm - * data that is contained in it will the PF_HAS_RCODE flag. Everthing - * else will be in the original. This requires that we restore first - * when going backwards and when doing a commit. */ - - if ( induce_reject ) { - debug( prg, REALM_PARSE, "error induced during reduction of %s\n", - prg->rtd->lel_info[pda_run->red_lel->id].name ); - pda_run->red_lel->state = pda_run->cur_state; - pda_run->red_lel->next = pda_run->stack_top; - pda_run->stack_top = pda_run->red_lel; - /* FIXME: What is the right argument here? */ - push_bt_point( prg, pda_run ); - goto parse_error; - } - - pda_run->red_lel->next = pda_run->parse_input; - pda_run->parse_input = pda_run->red_lel; - } - - goto again; - -parse_error: - debug( prg, REALM_PARSE, "hit error, backtracking\n" ); - -#if 0 - if ( pda_run->num_retry == 0 ) { - debug( prg, REALM_PARSE, "out of retries failing parse\n" ); - goto fail; - } -#endif - - while ( 1 ) { - if ( pda_run->on_deck ) { - debug( prg, REALM_BYTECODE, "dropping out for reverse code call\n" ); - - pda_run->frame_id = -1; - pda_run->code = colm_pop_reverse_code( &pda_run->reverse_code ); - - /* COROUTINE */ - return PCR_REVERSE; - case PCR_REVERSE: - - colm_decrement_steps( pda_run ); - } - else if ( pda_run->check_next ) { - pda_run->check_next = false; - - if ( pda_run->next > 0 && pda_run->pda_tables->token_regions[pda_run->next] != 0 ) { - debug( prg, REALM_PARSE, "found a new region\n" ); - pda_run->num_retry -= 1; - pda_run->pda_cs = stack_top_target( prg, pda_run ); - pda_run->next_region_ind = pda_run->next; - return PCR_DONE; - } - } - else if ( pda_run->check_stop ) { - pda_run->check_stop = false; - - if ( pda_run->stop ) { - debug( prg, REALM_PARSE, "stopping the backtracking, " - "steps is %d\n", pda_run->steps ); - - pda_run->pda_cs = stack_top_target( prg, pda_run ); - goto _out; - } - } - else if ( pda_run->parse_input != 0 ) { - /* Either we are dealing with a terminal that was shifted or a - * nonterminal that was reduced. */ - if ( pda_run->parse_input->id < prg->rtd->first_non_term_id ) { - /* This is a terminal. */ - assert( pda_run->parse_input->retry_upper == 0 ); - - if ( pda_run->parse_input->retry_lower != 0 ) { - debug( prg, REALM_PARSE, "found retry targ: %p\n", pda_run->parse_input ); - - pda_run->num_retry -= 1; - pda_run->pda_cs = pda_run->parse_input->state; - goto again; - } - - if ( pda_run->parse_input->cause_reduce != 0 ) { - /* The terminal caused a reduce. Unshift the reduced thing - * (will unreduce in the next step. */ - if ( pda_run->shift_count == pda_run->commit_shift_count ) { - debug( prg, REALM_PARSE, "backed up to commit point, " - "failing parse\n" ); - goto fail; - } - pda_run->shift_count -= 1; - - pda_run->undo_lel = pda_run->stack_top; - - /* Check if we've arrived at the stack sentinal. This guard - * is here to allow us to initially set numRetry to one to - * cause the parser to backup all the way to the beginning - * when an error occurs. */ - if ( pda_run->undo_lel->next == 0 ) - break; - - /* Either we are dealing with a terminal that was - * shifted or a nonterminal that was reduced. */ - assert( !(pda_run->stack_top->id < prg->rtd->first_non_term_id) ); - - debug( prg, REALM_PARSE, "backing up over non-terminal: %s\n", - prg->rtd->lel_info[pda_run->stack_top->id].name ); - - /* Pop the item from the stack. */ - pda_run->stack_top = pda_run->stack_top->next; - - /* Queue it as next parseInput item. */ - pda_run->undo_lel->next = pda_run->parse_input; - pda_run->parse_input = pda_run->undo_lel; - } - else { - long region = pda_run->parse_input->retry_region; - pda_run->next = region > 0 ? region + 1 : 0; - pda_run->check_next = true; - pda_run->check_stop = true; - - send_back( prg, sp, pda_run, is, pda_run->parse_input ); - - pda_run->parse_input = 0; - } - } - else if ( pda_run->parse_input->flags & PF_HAS_RCODE ) { - debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" ); - pda_run->on_deck = true; - pda_run->parsed = 0; - - /* Only the RCODE flag was in the replaced lhs. All the rest is in - * the the original. We read it after restoring. */ - - pda_run->parse_input->flags &= ~PF_HAS_RCODE; - } - else { - /* Remove it from the input queue. */ - pda_run->undo_lel = pda_run->parse_input; - pda_run->parse_input = pda_run->parse_input->next; - - /* Extract children from the child list. */ - parse_tree_t *first = pda_run->undo_lel->child; - pda_run->undo_lel->child = 0; - - /* This will skip the ignores/attributes, etc. */ - kid_t *data_first = tree_extract_child( prg, pda_run->undo_lel->shadow->tree ); - - /* Walk the child list and and push the items onto the parsing - * stack one at a time. */ - while ( first != 0 ) { - /* Get the next item ahead of time. */ - parse_tree_t *next = first->next; - kid_t *data_next = data_first->next; - - /* Push onto the stack. */ - first->next = pda_run->stack_top; - pda_run->stack_top = first; - - /* Reattach the data and the parse tree. */ - first->shadow = data_first; - - first = next; - data_first = data_next; - } - - /* If there is an parseInput queued, this is one less reduction it has - * caused. */ - if ( pda_run->parse_input != 0 ) - pda_run->parse_input->cause_reduce -= 1; - - if ( pda_run->undo_lel->retry_upper != 0 ) { - /* There is always an parseInput item here because reduce - * conflicts only happen on a lookahead character. */ - assert( pda_run->parse_input != pda_run->undo_lel ); - assert( pda_run->parse_input != 0 ); - assert( pda_run->undo_lel->retry_lower == 0 ); - assert( pda_run->parse_input->retry_upper == 0 ); - - /* Transfer the retry from undoLel to parseInput. */ - pda_run->parse_input->retry_lower = pda_run->undo_lel->retry_upper; - pda_run->parse_input->retry_upper = 0; - pda_run->parse_input->state = stack_top_target( prg, pda_run ); - } - - /* Free the reduced item. */ - colm_tree_downref( prg, sp, pda_run->undo_lel->shadow->tree ); - kid_free( prg, pda_run->undo_lel->shadow ); - parse_tree_free( pda_run, pda_run->undo_lel ); - - /* If the stacktop had right ignore attached, detach now. */ - if ( pda_run->stack_top->flags & PF_RIGHT_IL_ATTACHED ) - detach_right_ignore( prg, sp, pda_run, pda_run->stack_top ); - } - } - else if ( pda_run->accum_ignore != 0 ) { - debug( prg, REALM_PARSE, "have accumulated ignore to undo\n" ); - - /* Send back any accumulated ignore tokens, then trigger error - * in the the parser. */ - parse_tree_t *ignore = pda_run->accum_ignore; - pda_run->accum_ignore = pda_run->accum_ignore->next; - ignore->next = 0; - - long region = ignore->retry_region; - pda_run->next = region > 0 ? region + 1 : 0; - pda_run->check_next = true; - pda_run->check_stop = true; - - send_back_ignore( prg, sp, pda_run, is, ignore ); - - colm_tree_downref( prg, sp, ignore->shadow->tree ); - kid_free( prg, ignore->shadow ); - parse_tree_free( pda_run, ignore ); - } - else { - if ( pda_run->shift_count == pda_run->commit_shift_count ) { - debug( prg, REALM_PARSE, "backed up to commit point, failing parse\n" ); - goto fail; - } - - pda_run->shift_count -= 1; - - /* Now it is time to undo something. Pick an element from the top of - * the stack. */ - pda_run->undo_lel = pda_run->stack_top; - - /* Check if we've arrived at the stack sentinal. This guard is - * here to allow us to initially set numRetry to one to cause the - * parser to backup all the way to the beginning when an error - * occurs. */ - if ( pda_run->undo_lel->next == 0 ) - break; - - /* Either we are dealing with a terminal that was - * shifted or a nonterminal that was reduced. */ - if ( pda_run->stack_top->id < prg->rtd->first_non_term_id ) { - debug( prg, REALM_PARSE, "backing up over effective terminal: %s\n", - prg->rtd->lel_info[pda_run->stack_top->id].name ); - - /* Pop the item from the stack. */ - pda_run->stack_top = pda_run->stack_top->next; - - /* Queue it as next parseInput item. */ - pda_run->undo_lel->next = pda_run->parse_input; - pda_run->parse_input = pda_run->undo_lel; - - /* Pop from the token list. */ - ref_t *ref = pda_run->token_list; - pda_run->token_list = ref->next; - kid_free( prg, (kid_t*)ref ); - - assert( pda_run->accum_ignore == 0 ); - detach_left_ignore( prg, sp, pda_run, pda_run->parse_input ); - } - else { - debug( prg, REALM_PARSE, "backing up over non-terminal: %s\n", - prg->rtd->lel_info[pda_run->stack_top->id].name ); - - /* Pop the item from the stack. */ - pda_run->stack_top = pda_run->stack_top->next; - - /* Queue it as next parseInput item. */ - pda_run->undo_lel->next = pda_run->parse_input; - pda_run->parse_input = pda_run->undo_lel; - } - - /* Undo attach of right ignore. */ - if ( pda_run->stack_top->flags & PF_RIGHT_IL_ATTACHED ) - detach_right_ignore( prg, sp, pda_run, pda_run->stack_top ); - } - } - -fail: - pda_run->pda_cs = -1; - pda_run->parse_error = 1; - - /* FIXME: do we still need to fall through here? A fail is permanent now, - * no longer called into again. */ - - return PCR_DONE; - -_out: - pda_run->next_region_ind = pda_run->pda_tables->token_region_inds[pda_run->pda_cs]; - - /* COROUTINE */ - case PCR_DONE: - break; } - - return PCR_DONE; -} - -/* - * colm_parse_loop - * - * Stops on: - * PCR_PRE_EOF - * PCR_GENERATION - * PCR_REDUCTION - * PCR_REVERSE - */ - -long colm_parse_loop( program_t *prg, tree_t **sp, struct pda_run *pda_run, - struct input_impl *is, long entry ) -{ - struct lang_el_info *lel_info = prg->rtd->lel_info; - - /* COROUTINE */ - switch ( entry ) { - case PCR_START: - - pda_run->stop = false; - - while ( true ) { - debug( prg, REALM_PARSE, "parse loop start\n" ); - - /* Pull the current scanner from the parser. This can change during - * parsing due to inputStream pushes, usually for the purpose of includes. - * */ - pda_run->token_id = scan_token( prg, pda_run, is ); - - if ( pda_run->token_id == SCAN_ERROR ) { - if ( pda_run->pre_region >= 0 ) { - pda_run->pre_region = -1; - pda_run->fsm_cs = pda_run->next_cs; - continue; - } - } - - if ( pda_run->token_id == SCAN_ERROR && - ( prg->rtd->region_info[pda_run->region].ci_lel_id > 0 ) ) - { - debug( prg, REALM_PARSE, "sending a collect ignore\n" ); - send_collect_ignore( prg, sp, pda_run, is, - prg->rtd->region_info[pda_run->region].ci_lel_id ); - goto yes; - } - - if ( pda_run->token_id == SCAN_TRY_AGAIN_LATER ) { - debug( prg, REALM_PARSE, "scanner says try again later\n" ); - break; - } - - assert( pda_run->parse_input == 0 ); - pda_run->parse_input = 0; - - /* Check for EOF. */ - if ( pda_run->token_id == SCAN_EOF ) { - pda_run->eof_term_recvd = true; - send_eof( prg, sp, pda_run, is ); - - pda_run->frame_id = prg->rtd->region_info[pda_run->region].eof_frame_id; - - if ( prg->ctx_dep_parsing && pda_run->frame_id >= 0 ) { - debug( prg, REALM_PARSE, "HAVE PRE_EOF BLOCK\n" ); - - pda_run->fi = &prg->rtd->frame_info[pda_run->frame_id]; - pda_run->code = pda_run->fi->codeWV; - - /* COROUTINE */ - return PCR_PRE_EOF; - case PCR_PRE_EOF: - - colm_make_reverse_code( pda_run ); - } - } - else if ( pda_run->token_id == SCAN_UNDO ) { - /* Fall through with parseInput = 0. FIXME: Do we need to send back ignore? */ - debug( prg, REALM_PARSE, "invoking undo from the scanner\n" ); - } - else if ( pda_run->token_id == SCAN_ERROR ) { - /* Scanner error, maybe retry. */ - if ( pda_run->accum_ignore == 0 && get_next_region( pda_run, 1 ) != 0 ) { - debug( prg, REALM_PARSE, "scanner failed, trying next region\n" ); - - pda_run->next_region_ind += 1; - goto skip_send; - } - else { // if ( pdaRun->numRetry > 0 ) { - debug( prg, REALM_PARSE, "invoking parse error from the scanner\n" ); - - /* Fall through to send null (error). */ - push_bt_point( prg, pda_run ); - } -#if 0 - else { - debug( prg, REALM_PARSE, "no alternate scanning regions\n" ); - - /* There are no alternative scanning regions to try, nor are - * there any alternatives stored in the current parse tree. No - * choice but to end the parse. */ - push_bt_point( prg, pda_run ); - - report_parse_error( prg, sp, pda_run ); - pda_run->parse_error = 1; - goto skip_send; - } -#endif - } - else if ( pda_run->token_id == SCAN_LANG_EL ) { - debug( prg, REALM_PARSE, "sending an named lang el\n" ); - - /* A named language element (parsing colm program). */ - prg->rtd->send_named_lang_el( prg, sp, pda_run, is ); - } - else if ( pda_run->token_id == SCAN_TREE ) { - debug( prg, REALM_PARSE, "sending a tree\n" ); - - /* A tree already built. */ - send_tree( prg, sp, pda_run, is ); - } - else if ( pda_run->token_id == SCAN_IGNORE ) { - debug( prg, REALM_PARSE, "sending an ignore token\n" ); - - /* A tree to ignore. */ - send_ignore_tree( prg, sp, pda_run, is ); - goto skip_send; - } - else if ( prg->ctx_dep_parsing && lel_info[pda_run->token_id].frame_id >= 0 ) { - /* Has a generation action. */ - debug( prg, REALM_PARSE, "token gen action: %s\n", - prg->rtd->lel_info[pda_run->token_id].name ); - - /* Make the token data. */ - pda_run->tokdata = peek_match( prg, pda_run, is ); - - /* Note that we don't update the position now. It is done when the token - * data is pulled from the inputStream. */ - - pda_run->p = pda_run->pe = 0; - pda_run->toklen = 0; - pda_run->scan_eof = 0; - - pda_run->fi = &prg->rtd->frame_info[prg->rtd->lel_info[pda_run->token_id].frame_id]; - pda_run->frame_id = prg->rtd->lel_info[pda_run->token_id].frame_id; - pda_run->code = pda_run->fi->codeWV; - - /* COROUTINE */ - return PCR_GENERATION; - case PCR_GENERATION: - - colm_make_reverse_code( pda_run ); - - /* Finished with the match text. */ - string_free( prg, pda_run->tokdata ); - - goto skip_send; - } - else if ( lel_info[pda_run->token_id].ignore ) { - debug( prg, REALM_PARSE, "sending an ignore token: %s\n", - prg->rtd->lel_info[pda_run->token_id].name ); - - /* Is an ignore token. */ - send_ignore( prg, sp, pda_run, is, pda_run->token_id ); - goto skip_send; - } - else { - debug( prg, REALM_PARSE, "sending a plain old token: %s\n", - prg->rtd->lel_info[pda_run->token_id].name ); - - /* Is a plain token. */ - send_token( prg, sp, pda_run, is, pda_run->token_id ); - } -yes: - - if ( pda_run->parse_input != 0 ) - colm_transfer_reverse_code( pda_run, pda_run->parse_input ); - - if ( pda_run->parse_input != 0 ) { - /* If it's a nonterminal with a termdup then flip the parse tree to - * the terminal. */ - if ( pda_run->parse_input->id >= prg->rtd->first_non_term_id ) { - pda_run->parse_input->id = - prg->rtd->lel_info[pda_run->parse_input->id].term_dup_id; - pda_run->parse_input->flags |= PF_TERM_DUP; - } - } - - long pcr = parse_token( prg, sp, pda_run, is, PCR_START ); - - while ( pcr != PCR_DONE ) { - - /* COROUTINE */ - return pcr; - case PCR_REDUCTION: - case PCR_REVERSE: - - pcr = parse_token( prg, sp, pda_run, is, entry ); - } - - assert( pcr == PCR_DONE ); - - handle_error( prg, sp, pda_run ); - -skip_send: - new_token( prg, pda_run ); - - /* Various stop conditions. This should all be coverned by one test - * eventually. */ - - if ( pda_run->trigger_undo ) { - debug( prg, REALM_PARSE, "parsing stopped by triggerUndo\n" ); - break; - } - - if ( pda_run->eof_term_recvd ) { - debug( prg, REALM_PARSE, "parsing stopped by EOF\n" ); - break; - } - - if ( pda_run->stop_parsing ) { - debug( prg, REALM_PARSE, "scanner has been stopped\n" ); - break; - } - - if ( pda_run->stop ) { - debug( prg, REALM_PARSE, "parsing has been stopped by consumedCount\n" ); - break; - } - - if ( prg->induce_exit ) { - debug( prg, REALM_PARSE, "parsing has been stopped by a call to exit\n" ); - break; - } - - if ( pda_run->parse_error ) { - debug( prg, REALM_PARSE, "parsing stopped by a parse error\n" ); - break; - } - - /* Disregard any alternate parse paths, just go right to failure. */ - if ( pda_run->fail_parsing ) { - debug( prg, REALM_PARSE, "parsing failed by explicit request\n" ); - break; - } - } - - /* COROUTINE */ - case PCR_DONE: - break; } - - return PCR_DONE; -} - - -long colm_parse_frag( program_t *prg, tree_t **sp, - struct pda_run *pda_run, input_t *input, long entry ) -{ - /* COROUTINE */ - switch ( entry ) { - case PCR_START: - - if ( ! pda_run->parse_error ) { - long pcr = colm_parse_loop( prg, sp, pda_run, - input_to_impl( input ), entry ); - - while ( pcr != PCR_DONE ) { - - /* COROUTINE */ - return pcr; - case PCR_REDUCTION: - case PCR_GENERATION: - case PCR_PRE_EOF: - case PCR_REVERSE: - - pcr = colm_parse_loop( prg, sp, pda_run, - input_to_impl( input ), entry ); - } - } - - /* COROUTINE */ - case PCR_DONE: - break; } - - return PCR_DONE; -} - -long colm_parse_undo_frag( program_t *prg, tree_t **sp, struct pda_run *pda_run, - input_t *input, long entry, long steps ) -{ - debug( prg, REALM_PARSE, - "undo parse frag, target steps: %ld, pdarun steps: %ld\n", - steps, pda_run->steps ); - - reset_token( pda_run ); - - /* COROUTINE */ - switch ( entry ) { - case PCR_START: - - if ( steps < pda_run->steps ) { - /* Setup environment for going backwards until we reduced steps to - * what we want. */ - pda_run->num_retry += 1; - pda_run->target_steps = steps; - pda_run->trigger_undo = 1; - - /* The parse loop will recognise the situation. */ - long pcr = colm_parse_loop( prg, sp, pda_run, input_to_impl(input), entry ); - while ( pcr != PCR_DONE ) { - - /* COROUTINE */ - return pcr; - case PCR_REDUCTION: - case PCR_GENERATION: - case PCR_PRE_EOF: - case PCR_REVERSE: - - pcr = colm_parse_loop( prg, sp, pda_run, input_to_impl(input), entry ); - } - - /* Reset environment. */ - pda_run->trigger_undo = 0; - pda_run->target_steps = -1; - pda_run->num_retry -= 1; - } - - /* COROUTINE */ - case PCR_DONE: - break; } - - return PCR_DONE; -} - -void colm_parse_reduce_commit( program_t *prg, tree_t **sp, - struct pda_run *pda_run ) -{ - /* Flush out anything not committed. */ - if ( pda_run->reducer ) - commit_reduce( prg, sp, pda_run ); -} - diff --git a/src/pdarun.h b/src/pdarun.h deleted file mode 100644 index 0174644d..00000000 --- a/src/pdarun.h +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_PDARUN_H -#define _COLM_PDARUN_H - -#include <colm/input.h> -#include <colm/defs.h> -#include <colm/tree.h> -#include <colm/struct.h> - -#ifdef __cplusplus -extern "C" { -#endif - -struct colm_program; - -#define MARK_SLOTS 32 - -struct fsm_tables -{ - long *actions; - long *key_offsets; - char *trans_keys; - long *single_lengths; - long *range_lengths; - long *index_offsets; - long *transTargsWI; - long *transActionsWI; - long *to_state_actions; - long *from_state_actions; - long *eof_actions; - long *eof_targs; - long *entry_by_region; - - long num_states; - long num_actions; - long num_trans_keys; - long num_single_lengths; - long num_range_lengths; - long num_index_offsets; - long numTransTargsWI; - long numTransActionsWI; - long num_regions; - - long start_state; - long first_final; - long error_state; - - struct GenAction **action_switch; - long num_action_switch; -}; - -#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 - #error "SIZEOF_LONG contained an unexpected value" -#endif - -struct colm_execution; - -struct rt_code_vect -{ - code_t *data; - long tab_len; - long alloc_len; - - /* FIXME: leak when freed. */ -}; - -void list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el ); -void list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el ); - -void list_prepend( list_t *list, list_el_t *new_el ); -void list_append( list_t *list, list_el_t *new_el ); - -list_el_t *list_detach( list_t *list, list_el_t *el ); -list_el_t *list_detach_first(list_t *list ); -list_el_t *list_detach_last(list_t *list ); - -long list_length(list_t *list); - -struct function_info -{ - long frame_id; - long arg_size; - long frame_size; -}; - -/* - * Program Data. - */ - -struct pat_cons_info -{ - long offset; - long num_bindings; -}; - -struct pat_cons_node -{ - long id; - long prod_num; - long next; - long child; - long bind_id; - const char *data; - long length; - long left_ignore; - long right_ignore; - - /* Just match nonterminal, don't go inside. */ - unsigned char stop; -}; - -/* FIXME: should have a descriptor for object types to give the length. */ - -struct lang_el_info -{ - const char *name; - const char *xml_tag; - unsigned char repeat; - unsigned char list; - unsigned char literal; - unsigned char ignore; - - long frame_id; - - long object_type_id; - long ofi_offset; - long object_length; - - long term_dup_id; - long mark_id; - long capture_attr; - long num_capture_attr; -}; - -struct struct_el_info -{ - long size; - short *trees; - long trees_len; -}; - -struct prod_info -{ - unsigned long lhs_id; - short prod_num; - long length; - const char *name; - long frame_id; - unsigned char lhs_upref; - unsigned char *copy; - long copy_len; -}; - -/* Must match the LocalType enum. */ -#define LI_Tree 1 -#define LI_Iter 2 -#define LI_RevIter 3 -#define LI_UserIter 4 - -struct local_info -{ - char type; - short offset; -}; - -struct frame_info -{ - const char *name; - code_t *codeWV; - long codeLenWV; - code_t *codeWC; - long codeLenWC; - struct local_info *locals; - long locals_len; - long arg_size; - long frame_size; - char ret_tree; -}; - -struct region_info -{ - long default_token; - long eof_frame_id; - int ci_lel_id; -}; - -typedef struct _CaptureAttr -{ - long mark_enter; - long mark_leave; - long offset; -} CaptureAttr; - -struct pda_tables -{ - /* Parser table data. */ - int *indicies; - int *owners; - int *keys; - unsigned int *offsets; - unsigned int *targs; - unsigned int *act_inds; - unsigned int *actions; - int *commit_len; - int *token_region_inds; - int *token_regions; - int *token_pre_regions; - - int num_indicies; - int num_keys; - int num_states; - int num_targs; - int num_act_inds; - int num_actions; - int num_commit_len; - int num_region_items; - int num_pre_region_items; -}; - -struct pool_block -{ - void *data; - struct pool_block *next; -}; - -struct pool_item -{ - struct pool_item *next; -}; - -struct pool_alloc -{ - struct pool_block *head; - long nextel; - struct pool_item *pool; - int sizeofT; -}; - -struct pda_run -{ - /* - * Scanning. - */ - struct fsm_tables *fsm_tables; - - struct run_buf *consume_buf; - - long region, pre_region; - long fsm_cs, next_cs, act; - char *start; - char *tokstart; - long tokend; - long toklen; - char *p, *pe; - char scan_eof; - - char return_result; - char skip_toklen; - char eof_term_recvd; - - char *mark[MARK_SLOTS]; - long matched_token; - - /* - * Parsing - */ - int num_retry; - parse_tree_t *stack_top; - ref_t *token_list; - int pda_cs; - int next_region_ind; - - struct pda_tables *pda_tables; - int parser_id; - - /* Reused. */ - struct rt_code_vect rcode_collect; - struct rt_code_vect reverse_code; - - int stop_parsing; - long stop_target; - - parse_tree_t *accum_ignore; - - kid_t *bt_point; - - struct bindings *bindings; - - int revert_on; - - struct colm_struct *context; - - int stop; - int parse_error; - - long steps; - long target_steps; - - /* The shift count simply tracks the number of shifts that have happend. - * The commit shift count is the shift count when the last commit occurred. - * If we back up to this number of shifts then we decide we cannot proceed. - * The commit shift count is initialized to -1. */ - long shift_count; - long commit_shift_count; - - int on_deck; - - /* - * Data we added when refactoring the parsing engine into a coroutine. - */ - - parse_tree_t *parse_input; - struct frame_info *fi; - int reduction; - parse_tree_t *red_lel; - int cur_state; - parse_tree_t *lel; - int trigger_undo; - - int token_id; - head_t *tokdata; - int frame_id; - int next; - parse_tree_t *undo_lel; - - int check_next; - int check_stop; - - /* The lhs is sometimes saved before reduction actions in case it is - * replaced and we need to restore it on backtracking */ - tree_t *parsed; - - int reject; - - /* Instruction pointer to use when we stop parsing and execute code. */ - code_t *code; - - int rc_block_count; - - tree_t *parse_error_text; - - /* Zero indicates parsing proper. Nonzero is the reducer id. */ - int reducer; - - parse_tree_t *last_final; - - struct pool_alloc *parse_tree_pool; - struct pool_alloc local_pool; - - /* Disregard any alternate parse paths, just go right to failure. */ - int fail_parsing; -}; - -void colm_pda_init( struct colm_program *prg, struct pda_run *pda_run, - struct pda_tables *tables, int parser_id, long stop_target, - int revert_on, struct colm_struct *context, int reducer ); - -void colm_pda_clear( struct colm_program *prg, struct colm_tree **sp, - struct pda_run *pda_run ); - -void colm_rt_code_vect_replace( struct rt_code_vect *vect, long pos, - const code_t *val, long len ); -void colm_rt_code_vect_empty( struct rt_code_vect *vect ); -void colm_rt_code_vect_remove( struct rt_code_vect *vect, long pos, long len ); - -void init_rt_code_vect( struct rt_code_vect *code_vect ); - -inline static void append_code_val( struct rt_code_vect *vect, const code_t val ); -inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len ); -inline static void append_half( struct rt_code_vect *vect, half_t half ); -inline static void append_word( struct rt_code_vect *vect, word_t word ); - -inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len ) -{ - colm_rt_code_vect_replace( vect, vect->tab_len, val, len ); -} - -inline static void append_code_val( struct rt_code_vect *vect, const code_t val ) -{ - colm_rt_code_vect_replace( vect, vect->tab_len, &val, 1 ); -} - -inline static void append_half( struct rt_code_vect *vect, half_t half ) -{ - /* not optimal. */ - append_code_val( vect, half & 0xff ); - append_code_val( vect, (half>>8) & 0xff ); -} - -inline static void append_word( struct rt_code_vect *vect, word_t word ) -{ - /* not optimal. */ - append_code_val( vect, word & 0xff ); - append_code_val( vect, (word>>8) & 0xff ); - append_code_val( vect, (word>>16) & 0xff ); - append_code_val( vect, (word>>24) & 0xff ); - #if SIZEOF_LONG == 8 - append_code_val( vect, (word>>32) & 0xff ); - append_code_val( vect, (word>>40) & 0xff ); - append_code_val( vect, (word>>48) & 0xff ); - append_code_val( vect, (word>>56) & 0xff ); - #endif -} - -void colm_increment_steps( struct pda_run *pda_run ); -void colm_decrement_steps( struct pda_run *pda_run ); - -void colm_clear_stream_impl( struct colm_program *prg, tree_t **sp, struct stream_impl *input_stream ); - -#define PCR_START 1 -#define PCR_DONE 2 -#define PCR_REDUCTION 3 -#define PCR_GENERATION 4 -#define PCR_PRE_EOF 5 -#define PCR_REVERSE 6 - -head_t *colm_stream_pull( struct colm_program *prg, struct colm_tree **sp, - struct pda_run *pda_run, struct input_impl *is, long length ); -head_t *colm_string_alloc_pointer( struct colm_program *prg, const char *data, long length ); - -void colm_stream_push_text( struct colm_program *prg, struct input_impl *input_stream, const char *data, long length ); -void colm_stream_push_tree( struct colm_program *prg, struct input_impl *input_stream, tree_t *tree, int ignore ); -void colm_stream_push_stream( struct colm_program *prg, struct input_impl *input_stream, stream_t *stream ); -void colm_undo_stream_push( struct colm_program *prg, tree_t **sp, - struct input_impl *input_stream, long length ); - -kid_t *make_token_with_data( struct colm_program *prg, struct pda_run *pda_run, - struct input_impl *input_stream, int id, head_t *tokdata ); - -long colm_parse_loop( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run, - struct input_impl *input_stream, long entry ); - -long colm_parse_frag( struct colm_program *prg, tree_t **sp, - struct pda_run *pda_run, input_t *input, long entry ); -long colm_parse_finish( struct colm_program *prg, tree_t **sp, - struct pda_run *pda_run, stream_t *input, long entry ); -long colm_parse_undo_frag( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run, - input_t *input, long entry, long steps ); - -void commit_clear_kid_list( program_t *prg, tree_t **sp, kid_t *kid ); -void commit_clear_parse_tree( program_t *prg, tree_t **sp, - struct pda_run *pda_run, parse_tree_t *pt ); -void commit_reduce( program_t *prg, tree_t **root, - struct pda_run *pda_run ); - -tree_t *get_parsed_root( struct pda_run *pda_run, int stop ); - -void colm_parse_reduce_commit( program_t *prg, tree_t **sp, - struct pda_run *pda_run ); - -#ifdef __cplusplus -} -#endif - -#endif /* _COLM_PDRUN_H */ - diff --git a/src/pool.c b/src/pool.c deleted file mode 100644 index ffb32636..00000000 --- a/src/pool.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <colm/pool.h> - -#include <assert.h> -#include <string.h> -#include <stdlib.h> - -#include <colm/pdarun.h> -#include <colm/debug.h> - -void init_pool_alloc( struct pool_alloc *pool_alloc, int sizeofT ) -{ - pool_alloc->head = 0; - pool_alloc->nextel = FRESH_BLOCK; - pool_alloc->pool = 0; - pool_alloc->sizeofT = sizeofT; -} - -static void *pool_alloc_allocate( struct pool_alloc *pool_alloc ) -{ - //debug( REALM_POOL, "pool allocation\n" ); - -#ifdef POOL_MALLOC - void *res = malloc( pool_alloc->sizeofT ); - memset( res, 0, pool_alloc->sizeofT ); - return res; -#else - - void *new_el = 0; - if ( pool_alloc->pool == 0 ) { - if ( pool_alloc->nextel == FRESH_BLOCK ) { - struct pool_block *new_block = (struct pool_block*)malloc( sizeof(struct pool_block) ); - new_block->data = malloc( pool_alloc->sizeofT * FRESH_BLOCK ); - new_block->next = pool_alloc->head; - pool_alloc->head = new_block; - pool_alloc->nextel = 0; - } - - new_el = (char*)pool_alloc->head->data + pool_alloc->sizeofT * pool_alloc->nextel++; - } - else { - new_el = pool_alloc->pool; - pool_alloc->pool = pool_alloc->pool->next; - } - memset( new_el, 0, pool_alloc->sizeofT ); - return new_el; -#endif -} - -void pool_alloc_free( struct pool_alloc *pool_alloc, void *el ) -{ - #if 0 - /* Some sanity checking. Best not to normally run with this on. */ - char *p = (char*)el + sizeof(struct pool_item*); - char *pe = (char*)el + sizeof(T); - for ( ; p < pe; p++ ) - assert( *p != 0xcc ); - memset( el, 0xcc, sizeof(T) ); - #endif - -#ifdef POOL_MALLOC - free( el ); -#else - struct pool_item *pi = (struct pool_item*) el; - pi->next = pool_alloc->pool; - pool_alloc->pool = pi; -#endif -} - -void pool_alloc_clear( struct pool_alloc *pool_alloc ) -{ - struct pool_block *block = pool_alloc->head; - while ( block != 0 ) { - struct pool_block *next = block->next; - free( block->data ); - free( block ); - block = next; - } - - pool_alloc->head = 0; - pool_alloc->nextel = 0; - pool_alloc->pool = 0; -} - -long pool_alloc_num_lost( struct pool_alloc *pool_alloc ) -{ - /* Count the number of items allocated. */ - long lost = 0; - struct pool_block *block = pool_alloc->head; - if ( block != 0 ) { - lost = pool_alloc->nextel; - block = block->next; - while ( block != 0 ) { - lost += FRESH_BLOCK; - block = block->next; - } - } - - /* Subtract. Items that are on the free list. */ - struct pool_item *pi = pool_alloc->pool; - while ( pi != 0 ) { - lost -= 1; - pi = pi->next; - } - - return lost; -} - -/* - * kid_t - */ - -kid_t *kid_allocate( program_t *prg ) -{ - return (kid_t*) pool_alloc_allocate( &prg->kid_pool ); -} - -void kid_free( program_t *prg, kid_t *el ) -{ - pool_alloc_free( &prg->kid_pool, el ); -} - -void kid_clear( program_t *prg ) -{ - pool_alloc_clear( &prg->kid_pool ); -} - -long kid_num_lost( program_t *prg ) -{ - return pool_alloc_num_lost( &prg->kid_pool ); -} - -/* - * tree_t - */ - -tree_t *tree_allocate( program_t *prg ) -{ - return (tree_t*) pool_alloc_allocate( &prg->tree_pool ); -} - -void tree_free( program_t *prg, tree_t *el ) -{ - pool_alloc_free( &prg->tree_pool, el ); -} - -void tree_clear( program_t *prg ) -{ - pool_alloc_clear( &prg->tree_pool ); -} - -long tree_num_lost( program_t *prg ) -{ - return pool_alloc_num_lost( &prg->tree_pool ); -} - -/* - * parse_tree_t - */ - -parse_tree_t *parse_tree_allocate( struct pda_run *pda_run ) -{ - return (parse_tree_t*) pool_alloc_allocate( pda_run->parse_tree_pool ); -} - -void parse_tree_free( struct pda_run *pda_run, parse_tree_t *el ) -{ - pool_alloc_free( pda_run->parse_tree_pool, el ); -} - -void parse_tree_clear( struct pool_alloc *pool_alloc ) -{ - pool_alloc_clear( pool_alloc ); -} - -long parse_tree_num_lost( struct pool_alloc *pool_alloc ) -{ - return pool_alloc_num_lost( pool_alloc ); -} - -/* - * head_t - */ - -head_t *head_allocate( program_t *prg ) -{ - return (head_t*) pool_alloc_allocate( &prg->head_pool ); -} - -void head_free( program_t *prg, head_t *el ) -{ - pool_alloc_free( &prg->head_pool, el ); -} - -void head_clear( program_t *prg ) -{ - pool_alloc_clear( &prg->head_pool ); -} - -long head_num_lost( program_t *prg ) -{ - return pool_alloc_num_lost( &prg->head_pool ); -} - -/* - * location_t - */ - -location_t *location_allocate( program_t *prg ) -{ - return (location_t*) pool_alloc_allocate( &prg->location_pool ); -} - -void location_free( program_t *prg, location_t *el ) -{ - pool_alloc_free( &prg->location_pool, el ); -} - -void location_clear( program_t *prg ) -{ - pool_alloc_clear( &prg->location_pool ); -} - -long location_num_lost( program_t *prg ) -{ - return pool_alloc_num_lost( &prg->location_pool ); -} diff --git a/src/pool.h b/src/pool.h deleted file mode 100644 index 5e8f1de0..00000000 --- a/src/pool.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_POOL_H -#define _COLM_POOL_H - -/* Allocation, number of items. */ -#define FRESH_BLOCK 8128 - -#include <colm/pdarun.h> -#include <colm/map.h> -#include <colm/tree.h> - -#ifdef __cplusplus -extern "C" { -#endif - -void init_pool_alloc( struct pool_alloc *pool_alloc, int sizeofT ); - -kid_t *kid_allocate( program_t *prg ); -void kid_free( program_t *prg, kid_t *el ); -void kid_clear( program_t *prg ); -long kid_num_lost( program_t *prg ); - -tree_t *tree_allocate( program_t *prg ); -void tree_free( program_t *prg, tree_t *el ); -void tree_clear( program_t *prg ); -long tree_num_lost( program_t *prg ); - -/* Parse tree allocators go into pda_run structs. */ -parse_tree_t *parse_tree_allocate( struct pda_run *pda_run ); -void parse_tree_free( struct pda_run *pda_run, parse_tree_t *el ); -void parse_tree_clear( struct pool_alloc *pool_alloc ); -long parse_tree_num_lost( struct pool_alloc *pool_alloc ); - -head_t *head_allocate( program_t *prg ); -void head_free( program_t *prg, head_t *el ); -void head_clear( program_t *prg ); -long head_num_lost( program_t *prg ); - -location_t *location_allocate( program_t *prg ); -void location_free( program_t *prg, location_t *el ); -void location_clear( program_t *prg ); -long location_num_lost( program_t *prg ); - -void pool_alloc_clear( struct pool_alloc *pool_alloc ); -long pool_alloc_num_lost( struct pool_alloc *pool_alloc ); - -#ifdef __cplusplus -} -#endif - -#endif /* _COLM_POOL_H */ - diff --git a/src/print.c b/src/print.c deleted file mode 100644 index 317b197d..00000000 --- a/src/print.c +++ /dev/null @@ -1,778 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <string.h> -#include <stdlib.h> -#include <stdbool.h> -#include <stdio.h> -#include <unistd.h> -#include <assert.h> - -#include <colm/tree.h> -#include <colm/pool.h> -#include <colm/bytecode.h> -#include <colm/debug.h> - -#define BUFFER_INITIAL_SIZE 4096 - -static void xml_escape_data( struct colm_print_args *print_args, const char *data, long len ) -{ - int i; - for ( i = 0; i < len; i++ ) { - if ( data[i] == '<' ) - print_args->out( print_args, "<", 4 ); - else if ( data[i] == '>' ) - print_args->out( print_args, ">", 4 ); - else if ( data[i] == '&' ) - print_args->out( print_args, "&", 5 ); - else if ( (32 <= data[i] && data[i] <= 126) || - data[i] == '\t' || data[i] == '\n' || data[i] == '\r' ) - { - print_args->out( print_args, &data[i], 1 ); - } - else { - char out[64]; - sprintf( out, "&#%u;", ((unsigned)data[i]) ); - print_args->out( print_args, out, strlen(out) ); - } - } -} - -void init_str_collect( str_collect_t *collect ) -{ - collect->data = (char*) malloc( BUFFER_INITIAL_SIZE ); - collect->allocated = BUFFER_INITIAL_SIZE; - collect->length = 0; - collect->indent.indent = 0; - collect->indent.level = COLM_INDENT_OFF; -} - -void str_collect_destroy( str_collect_t *collect ) -{ - free( collect->data ); -} - -void str_collect_append( str_collect_t *collect, const char *data, long len ) -{ - long new_len = collect->length + len; - if ( new_len > collect->allocated ) { - collect->allocated = new_len * 2; - collect->data = (char*) realloc( collect->data, collect->allocated ); - } - memcpy( collect->data + collect->length, data, len ); - collect->length += len; -} - -void str_collect_clear( str_collect_t *collect ) -{ - collect->length = 0; -} - -#define INT_SZ 32 - -void print_str( struct colm_print_args *print_args, head_t *str ) -{ - print_args->out( print_args, (char*)(str->data), str->length ); -} - -void append_collect( struct colm_print_args *args, const char *data, int length ) -{ - str_collect_append( (str_collect_t*) args->arg, data, length ); -} - -void append_file( struct colm_print_args *args, const char *data, int length ) -{ - struct stream_impl_data *impl = (struct stream_impl_data*) args->arg; - fwrite( data, 1, length, impl->file ); -} - -static void out_indent( struct colm_print_args *args, const char *data, int length ) -{ - int level; -restart: - if ( args->indent->indent ) { - /* Consume mode. */ - while ( length > 0 && ( *data == ' ' || *data == '\t' ) ) { - data += 1; - length -= 1; - } - - if ( length > 0 ) { - /* Found some data, print the indentation and turn off indentation - * mode. */ - for ( level = 0; level < args->indent->level; level++ ) - args->out( args, "\t", 1 ); - - args->indent->indent = 0; - - goto restart; - } - } - else { - char *nl; - if ( args->indent->level != COLM_INDENT_OFF && - (nl = memchr( data, '\n', length )) ) - { - /* Print up to and including the newline. */ - int wl = nl - data + 1; - args->out( args, data, wl ); - - /* Go into consume state. If we see more non-indentation chars we - * will generate the appropriate indentation level. */ - data += wl; - length -= wl; - args->indent->indent = 1; - goto restart; - } - else { - /* Indentation off, or no indent trigger (newline). */ - args->out( args, data, length ); - } - } -} - - -tree_t *tree_trim( struct colm_program *prg, tree_t **sp, tree_t *tree ) -{ - if ( tree == 0 ) - return 0; - - debug( prg, REALM_PARSE, "attaching left ignore\n" ); - - /* Make the ignore list for the left-ignore. */ - tree_t *left_ignore = tree_allocate( prg ); - left_ignore->id = LEL_ID_IGNORE; - left_ignore->flags |= AF_SUPPRESS_RIGHT; - - tree = push_left_ignore( prg, tree, left_ignore ); - - debug( prg, REALM_PARSE, "attaching ignore right\n" ); - - /* Copy the ignore list first if we need to attach it as a right - * ignore. */ - tree_t *right_ignore = 0; - right_ignore = tree_allocate( prg ); - right_ignore->id = LEL_ID_IGNORE; - right_ignore->flags |= AF_SUPPRESS_LEFT; - - tree = push_right_ignore( prg, tree, right_ignore ); - - return tree; -} - -enum ReturnType -{ - Done = 1, - CollectIgnoreLeft, - CollectIgnoreRight, - RecIgnoreList, - ChildPrint -}; - -enum VisitType -{ - IgnoreWrapper, - IgnoreData, - Term, - NonTerm -}; - -#define TF_TERM_SEEN 0x1 - -void print_kid( program_t *prg, tree_t **sp, struct colm_print_args *print_args, kid_t *kid ) -{ - enum ReturnType rt; - kid_t *parent = 0; - kid_t *leading_ignore = 0; - enum VisitType visit_type; - int flags = 0; - - /* Iterate the kids passed in. We are expecting a next, which will allow us - * to print the trailing ignore list. */ - while ( kid != 0 ) { - vm_push_type( enum ReturnType, Done ); - goto rec_call; - rec_return_top: - kid = kid->next; - } - - return; - -rec_call: - if ( kid->tree == 0 ) - goto skip_null; - - /* If not currently skipping ignore data, then print it. Ignore data can - * be associated with terminals and nonterminals. */ - if ( kid->tree->flags & AF_LEFT_IGNORE ) { - vm_push_kid( parent ); - vm_push_kid( kid ); - parent = kid; - kid = tree_left_ignore_kid( prg, kid->tree ); - vm_push_type( enum ReturnType, CollectIgnoreLeft ); - goto rec_call; - rec_return_ign_left: - kid = vm_pop_kid(); - parent = vm_pop_kid(); - } - - if ( kid->tree->id == LEL_ID_IGNORE ) - visit_type = IgnoreWrapper; - else if ( parent != 0 && parent->tree->id == LEL_ID_IGNORE ) - visit_type = IgnoreData; - else if ( kid->tree->id < prg->rtd->first_non_term_id ) - visit_type = Term; - else - visit_type = NonTerm; - - debug( prg, REALM_PRINT, "visit type: %d\n", visit_type ); - - if ( visit_type == IgnoreData ) { - debug( prg, REALM_PRINT, "putting %p on ignore list\n", kid->tree ); - kid_t *new_ignore = kid_allocate( prg ); - new_ignore->next = leading_ignore; - leading_ignore = new_ignore; - leading_ignore->tree = kid->tree; - goto skip_node; - } - - if ( visit_type == IgnoreWrapper ) { - kid_t *new_ignore = kid_allocate( prg ); - new_ignore->next = leading_ignore; - leading_ignore = new_ignore; - leading_ignore->tree = kid->tree; - /* Don't skip. */ - } - - /* print leading ignore? Triggered by terminals. */ - if ( visit_type == Term ) { - /* Reverse the leading ignore list. */ - if ( leading_ignore != 0 ) { - kid_t *ignore = 0, *last = 0; - - /* Reverse the list and take the opportunity to implement the - * suppress left. */ - while ( true ) { - kid_t *next = leading_ignore->next; - leading_ignore->next = last; - - if ( leading_ignore->tree->flags & AF_SUPPRESS_LEFT ) { - /* We are moving left. Chop off the tail. */ - debug( prg, REALM_PRINT, "suppressing left\n" ); - free_kid_list( prg, next ); - break; - } - - if ( next == 0 ) - break; - - last = leading_ignore; - leading_ignore = next; - } - - /* Print the leading ignore list. Also implement the suppress right - * in the process. */ - if ( print_args->comm && (!print_args->trim || - (flags & TF_TERM_SEEN && kid->tree->id > 0)) ) - { - ignore = leading_ignore; - while ( ignore != 0 ) { - if ( ignore->tree->flags & AF_SUPPRESS_RIGHT ) - break; - - if ( ignore->tree->id != LEL_ID_IGNORE ) { - vm_push_type( enum VisitType, visit_type ); - vm_push_kid( leading_ignore ); - vm_push_kid( ignore ); - vm_push_kid( parent ); - vm_push_kid( kid ); - - leading_ignore = 0; - kid = ignore; - parent = 0; - - debug( prg, REALM_PRINT, "rec call on %p\n", kid->tree ); - vm_push_type( enum ReturnType, RecIgnoreList ); - goto rec_call; - rec_return_il: - - kid = vm_pop_kid(); - parent = vm_pop_kid(); - ignore = vm_pop_kid(); - leading_ignore = vm_pop_kid(); - visit_type = vm_pop_type(enum VisitType); - } - - ignore = ignore->next; - } - } - - /* Free the leading ignore list. */ - free_kid_list( prg, leading_ignore ); - leading_ignore = 0; - } - } - - if ( visit_type == Term || visit_type == NonTerm ) { - /* Open the tree. */ - print_args->open_tree( prg, sp, print_args, parent, kid ); - } - - if ( visit_type == Term ) - flags |= TF_TERM_SEEN; - - if ( visit_type == Term || visit_type == IgnoreData ) { - /* Print contents. */ - if ( kid->tree->id < prg->rtd->first_non_term_id ) { - debug( prg, REALM_PRINT, "printing terminal %p\n", kid->tree ); - if ( kid->tree->id != 0 ) - print_args->print_term( prg, sp, print_args, kid ); - } - } - - /* Print children. */ - kid_t *child = print_args->attr ? - tree_attr( prg, kid->tree ) : - tree_child( prg, kid->tree ); - - if ( child != 0 ) { - vm_push_type( enum VisitType, visit_type ); - vm_push_kid( parent ); - vm_push_kid( kid ); - parent = kid; - kid = child; - while ( kid != 0 ) { - vm_push_type( enum ReturnType, ChildPrint ); - goto rec_call; - rec_return: - kid = kid->next; - } - kid = vm_pop_kid(); - parent = vm_pop_kid(); - visit_type = vm_pop_type(enum VisitType); - } - - if ( visit_type == Term || visit_type == NonTerm ) { - /* close the tree. */ - print_args->close_tree( prg, sp, print_args, parent, kid ); - } - -skip_node: - - /* If not currently skipping ignore data, then print it. Ignore data can - * be associated with terminals and nonterminals. */ - if ( kid->tree->flags & AF_RIGHT_IGNORE ) { - debug( prg, REALM_PRINT, "right ignore\n" ); - vm_push_kid( parent ); - vm_push_kid( kid ); - parent = kid; - kid = tree_right_ignore_kid( prg, kid->tree ); - vm_push_type( enum ReturnType, CollectIgnoreRight ); - goto rec_call; - rec_return_ign_right: - kid = vm_pop_kid(); - parent = vm_pop_kid(); - } - -/* For skiping over content on null. */ -skip_null: - - rt = vm_pop_type(enum ReturnType); - switch ( rt ) { - case Done: - debug( prg, REALM_PRINT, "return: done\n" ); - goto rec_return_top; - break; - case CollectIgnoreLeft: - debug( prg, REALM_PRINT, "return: ignore left\n" ); - goto rec_return_ign_left; - case CollectIgnoreRight: - debug( prg, REALM_PRINT, "return: ignore right\n" ); - goto rec_return_ign_right; - case RecIgnoreList: - debug( prg, REALM_PRINT, "return: ignore list\n" ); - goto rec_return_il; - case ChildPrint: - debug( prg, REALM_PRINT, "return: child print\n" ); - goto rec_return; - } -} - -void colm_print_tree_args( program_t *prg, tree_t **sp, - struct colm_print_args *print_args, tree_t *tree ) -{ - if ( tree == 0 ) - out_indent( print_args, "NIL", 3 ); - else { - /* This term tree allows us to print trailing ignores. */ - tree_t term_tree; - memset( &term_tree, 0, sizeof(term_tree) ); - - kid_t kid, term; - term.tree = &term_tree; - term.next = 0; - term.flags = 0; - - kid.tree = tree; - kid.next = &term; - kid.flags = 0; - - print_kid( prg, sp, print_args, &kid ); - } -} - -void colm_print_null( program_t *prg, tree_t **sp, - struct colm_print_args *args, kid_t *parent, kid_t *kid ) -{ -} - -void colm_print_term_tree( program_t *prg, tree_t **sp, - struct colm_print_args *args, kid_t *kid ) -{ - debug( prg, REALM_PRINT, "printing term %p\n", kid->tree ); - - if ( kid->tree->id == LEL_ID_PTR ) { - char buf[INT_SZ]; - out_indent( args, "#<", 2 ); - sprintf( buf, "%lx", ((pointer_t*)kid->tree)->value ); - out_indent( args, buf, strlen(buf) ); - out_indent( args, ">", 1 ); - } - else if ( kid->tree->id == LEL_ID_STR ) { - print_str( args, ((str_t*)kid->tree)->value ); - } -// else if ( kid->tree->id == LEL_ID_STREAM ) { -// char buf[INT_SZ]; -// printArgs->out( printArgs, "#", 1 ); -// sprintf( buf, "%p", (void*) ((stream_t*)kid->tree)->in->file ); -// printArgs->out( printArgs, buf, strlen(buf) ); -// } - else if ( kid->tree->tokdata != 0 && - string_length( kid->tree->tokdata ) > 0 ) - { - out_indent( args, string_data( kid->tree->tokdata ), - string_length( kid->tree->tokdata ) ); - } - - struct lang_el_info *lel_info = prg->rtd->lel_info; - if ( strcmp( lel_info[kid->tree->id].name, "_IN_" ) == 0 ) { - if ( args->indent->level == COLM_INDENT_OFF ) { - args->indent->level = 1; - args->indent->indent = 1; - } - else { - args->indent->level += 1; - } - } - - if ( strcmp( lel_info[kid->tree->id].name, "_EX_" ) == 0 ) - args->indent->level -= 1; -} - -void colm_print_tree_collect( program_t *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ) -{ - struct colm_print_args print_args = { - collect, true, false, trim, &collect->indent, - &append_collect, &colm_print_null, - &colm_print_term_tree, &colm_print_null - }; - - colm_print_tree_args( prg, sp, &print_args, tree ); -} - -void colm_print_tree_collect_a( program_t *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ) -{ - struct colm_print_args print_args = { - collect, true, true, trim, &collect->indent, - &append_collect, &colm_print_null, - &colm_print_term_tree, &colm_print_null - }; - - colm_print_tree_args( prg, sp, &print_args, tree ); -} - -void colm_print_tree_file( program_t *prg, tree_t **sp, - struct stream_impl_data *impl, tree_t *tree, int trim ) -{ - struct colm_print_args print_args = { - impl, true, false, trim, &impl->indent, - &append_file, &colm_print_null, - &colm_print_term_tree, &colm_print_null - }; - - colm_print_tree_args( prg, sp, &print_args, tree ); -} - -static void xml_open( program_t *prg, tree_t **sp, struct colm_print_args *args, - kid_t *parent, kid_t *kid ) -{ - /* Skip the terminal that is for forcing trailing ignores out. */ - if ( kid->tree->id == 0 ) - return; - - struct lang_el_info *lel_info = prg->rtd->lel_info; - - /* List flattening: skip the repeats and lists that are a continuation of - * the list. */ - if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 && - ( lel_info[parent->tree->id].repeat || lel_info[parent->tree->id].list ) ) - { - return; - } - - const char *name = lel_info[kid->tree->id].xml_tag; - args->out( args, "<", 1 ); - args->out( args, name, strlen( name ) ); - args->out( args, ">", 1 ); -} - -static void xml_term( program_t *prg, tree_t **sp, - struct colm_print_args *print_args, kid_t *kid ) -{ - //kid_t *child; - - /*child = */ tree_child( prg, kid->tree ); - if ( kid->tree->id == LEL_ID_PTR ) { - char ptr[INT_SZ]; - sprintf( ptr, "%lx", ((pointer_t*)kid->tree)->value ); - print_args->out( print_args, ptr, strlen(ptr) ); - } - else if ( kid->tree->id == LEL_ID_STR ) { - head_t *head = (head_t*) ((str_t*)kid->tree)->value; - - xml_escape_data( print_args, (char*)(head->data), head->length ); - } - else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->first_non_term_id && - kid->tree->id != LEL_ID_IGNORE && - kid->tree->tokdata != 0 && - string_length( kid->tree->tokdata ) > 0 ) - { - xml_escape_data( print_args, string_data( kid->tree->tokdata ), - string_length( kid->tree->tokdata ) ); - } -} - -static void xml_close( program_t *prg, tree_t **sp, - struct colm_print_args *args, kid_t *parent, kid_t *kid ) -{ - /* Skip the terminal that is for forcing trailing ignores out. */ - if ( kid->tree->id == 0 ) - return; - - struct lang_el_info *lel_info = prg->rtd->lel_info; - - /* List flattening: skip the repeats and lists that are a continuation of - * the list. */ - if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 && - ( lel_info[parent->tree->id].repeat || lel_info[parent->tree->id].list ) ) - { - return; - } - - const char *name = lel_info[kid->tree->id].xml_tag; - args->out( args, "</", 2 ); - args->out( args, name, strlen( name ) ); - args->out( args, ">", 1 ); -} - -void colm_print_xml_stdout( program_t *prg, tree_t **sp, - struct stream_impl_data *impl, tree_t *tree, - int comm_attr, int trim ) -{ - struct colm_print_args print_args = { - impl, comm_attr, comm_attr, trim, &impl->indent, - &append_file, &xml_open, &xml_term, &xml_close }; - colm_print_tree_args( prg, sp, &print_args, tree ); -} - -static void postfix_open( program_t *prg, tree_t **sp, struct colm_print_args *args, - kid_t *parent, kid_t *kid ) -{ -} - -static void postfix_term_data( struct colm_print_args *args, const char *data, long len ) -{ - int i; - for ( i = 0; i < len; i++ ) { - if ( data[i] == '\\' ) - args->out( args, "\\5c", 3 ); - else if ( 33 <= data[i] && data[i] <= 126 ) - args->out( args, &data[i], 1 ); - else { - char out[64]; - sprintf( out, "\\%02x", ((unsigned char)data[i]) ); - args->out( args, out, strlen(out) ); - } - } -} - -static void postfix_term( program_t *prg, tree_t **sp, - struct colm_print_args *args, kid_t *kid ) -{ - //kid_t *child; - - /*child = */ tree_child( prg, kid->tree ); - if ( kid->tree->id == LEL_ID_PTR ) { - //char ptr[INT_SZ]; - //sprintf( ptr, "%lx", ((pointer_t*)kid->tree)->value ); - //args->out( args, ptr, strlen(ptr) ); - args->out( args, "p\n", 2 ); - } - else if ( kid->tree->id == LEL_ID_STR ) { - //head_t *head = (head_t*) ((str_t*)kid->tree)->value; - - //xml_escape_data( args, (char*)(head->data), head->length ); - args->out( args, "s\n", 2 ); - } - else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->first_non_term_id && - kid->tree->id != LEL_ID_IGNORE //&& - //kid->tree->tokdata != 0 && - //string_length( kid->tree->tokdata ) > 0 ) - ) - { - char buf[512]; - struct lang_el_info *lel_info = prg->rtd->lel_info; - const char *name = lel_info[kid->tree->id].xml_tag; - - args->out( args, "t ", 2 ); - args->out( args, name, strlen( name ) ); - - /* id. */ - sprintf( buf, " %d", kid->tree->id ); - args->out( args, buf, strlen( buf ) ); - - /* location. */ - if ( kid->tree->tokdata == 0 ) { - args->out( args, " 0 0 0 -", 8 ); - } - else { - struct colm_data *tokdata = kid->tree->tokdata; - struct colm_location *loc = tokdata->location; - if ( loc == 0 ) { - args->out( args, " 0 0 0 ", 7 ); - } - else { - sprintf( buf, " %ld %ld %ld ", loc->line, loc->column, loc->byte ); - args->out( args, buf, strlen( buf ) ); - } - - if ( string_length( tokdata ) == 0 ) { - args->out( args, "-", 1 ); - } - else { - postfix_term_data( args, string_data( tokdata ), string_length( tokdata ) ); - } - } - - args->out( args, "\n", 1 ); - } -} - -static void postfix_close( program_t *prg, tree_t **sp, - struct colm_print_args *args, kid_t *parent, kid_t *kid ) -{ - /* Skip the terminal that is for forcing trailing ignores out. */ - if ( kid->tree->id == 0 ) - return; - - if ( kid->tree->id >= prg->rtd->first_non_term_id ) { - char buf[512]; - struct lang_el_info *lel_info = prg->rtd->lel_info; - const char *name = lel_info[kid->tree->id].xml_tag; - - args->out( args, "r ", 2 ); - args->out( args, name, strlen( name ) ); - - /* id. */ - sprintf( buf, " %d", kid->tree->id ); - args->out( args, buf, strlen( buf ) ); - - /* Production number. */ - sprintf( buf, " %d", kid->tree->prod_num ); - args->out( args, buf, strlen( buf ) ); - - /* Child count. */ - int children = 0; - kid_t *child = tree_child( prg, kid->tree ); - while ( child != 0 ) { - child = child->next; - children += 1; - } - - sprintf( buf, " %d", children ); - args->out( args, buf, strlen( buf ) ); - - args->out( args, "\n", 1 ); - } -} - -void colm_postfix_tree_collect( program_t *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ) -{ - struct colm_print_args print_args = { - collect, false, false, false, &collect->indent, - &append_collect, &postfix_open, &postfix_term, &postfix_close - }; - - colm_print_tree_args( prg, sp, &print_args, tree ); -} - -#if 0 -void colm_postfix_tree_file( program_t *prg, tree_t **sp, struct stream_impl *impl, - tree_t *tree, int trim ) -{ - struct colm_print_args print_args = { - impl, false, false, false, &append_file, - &postfix_open, &postfix_term, &postfix_close - }; - - colm_print_tree_args( prg, sp, &print_args, tree ); - - //struct stream_impl *impl = (struct stream_impl*) args->arg; - fflush( impl->file ); -} -#endif - -void colm_print_tree_collect_xml( program_t *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ) -{ - struct colm_print_args print_args = { - collect, false, false, trim, &collect->indent, - &append_collect, &xml_open, &xml_term, &xml_close - }; - - colm_print_tree_args( prg, sp, &print_args, tree ); -} - -void colm_print_tree_collect_xml_ac( program_t *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ) -{ - struct colm_print_args print_args = { - collect, true, true, trim, &collect->indent, - &append_collect, &xml_open, &xml_term, &xml_close - }; - - colm_print_tree_args( prg, sp, &print_args, tree ); -} - diff --git a/src/program.c b/src/program.c deleted file mode 100644 index 0675c239..00000000 --- a/src/program.c +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if defined(HAVE_SYS_MMAN_H) -#include <sys/mman.h> -#endif -#include <string.h> -#include <assert.h> -#include <stdlib.h> - -#include <colm/pdarun.h> -#include <colm/tree.h> -#include <colm/bytecode.h> -#include <colm/pool.h> -#include <colm/debug.h> -#include <colm/config.h> -#include <colm/struct.h> - -#define VM_STACK_SIZE (8192) - -static void colm_alloc_global( program_t *prg ) -{ - /* Alloc the global. */ - prg->global = colm_struct_new( prg, prg->rtd->global_id ) ; -} - -void vm_init( program_t *prg ) -{ - struct stack_block *b = malloc( sizeof(struct stack_block) ); - b->data = malloc( sizeof(tree_t*) * VM_STACK_SIZE ); - b->len = VM_STACK_SIZE; - b->offset = 0; - b->next = 0; - - prg->stack_block = b; - - prg->sb_beg = prg->stack_block->data; - prg->sb_end = prg->stack_block->data + prg->stack_block->len; - - prg->stack_root = prg->sb_end; -} - -tree_t **colm_vm_root( program_t *prg ) -{ - return prg->stack_root; -} - -tree_t **vm_bs_add( program_t *prg, tree_t **sp, int n ) -{ - /* Close off the current block. */ - if ( prg->stack_block != 0 ) { - prg->stack_block->offset = sp - prg->stack_block->data; - prg->sb_total += prg->stack_block->len - prg->stack_block->offset; - } - - if ( prg->reserve != 0 && prg->reserve->len >= n) { - struct stack_block *b = prg->reserve; - b->next = prg->stack_block; - b->offset = 0; - - prg->stack_block = b; - prg->reserve = 0; - } - else { - struct stack_block *b = malloc( sizeof(struct stack_block) ); - int size = VM_STACK_SIZE; - if ( n > size ) - size = n; - b->next = prg->stack_block; - b->data = malloc( sizeof(tree_t*) * size ); - b->len = size; - b->offset = 0; - - prg->stack_block = b; - } - - prg->sb_beg = prg->stack_block->data; - prg->sb_end = prg->stack_block->data + prg->stack_block->len; - - return prg->sb_end; -} - -tree_t **vm_bs_pop( program_t *prg, tree_t **sp, int n ) -{ - while ( 1 ) { - tree_t **end = prg->stack_block->data + prg->stack_block->len; - int remaining = end - sp; - - /* Don't have to free this block. Remaining values to pop leave us - * inside it. */ - if ( n < remaining ) { - sp += n; - return sp; - } - - if ( prg->stack_block->next == 0 ) { - /* Don't delete the sentinal stack block. Returns the end as in the - * creation of the first stack block. */ - return prg->sb_end; - } - - /* Clear any previous reserve. We are going to save this block as the - * reserve. */ - if ( prg->reserve != 0 ) { - free( prg->reserve->data ); - free( prg->reserve ); - } - - /* Pop the stack block. */ - struct stack_block *b = prg->stack_block; - prg->stack_block = prg->stack_block->next; - prg->reserve = b; - - /* Setup the bounds. Note that we restore the full block, which is - * necessary to honour any CONTIGUOUS statements that counted on it - * before a subsequent CONTIGUOUS triggered a new block. */ - prg->sb_beg = prg->stack_block->data; - prg->sb_end = prg->stack_block->data + prg->stack_block->len; - - /* Update the total stack usage. */ - prg->sb_total -= prg->stack_block->len - prg->stack_block->offset; - - n -= remaining; - sp = prg->stack_block->data + prg->stack_block->offset; - } -} - -void vm_clear( program_t *prg ) -{ - while ( prg->stack_block != 0 ) { - struct stack_block *b = prg->stack_block; - prg->stack_block = prg->stack_block->next; - - free( b->data ); - free( b ); - } - - if ( prg->reserve != 0 ) { - free( prg->reserve->data ); - free( prg->reserve ); - } -} - -tree_t *colm_return_val( struct colm_program *prg ) -{ - return prg->return_val; -} - -void colm_set_debug( program_t *prg, long active_realm ) -{ - prg->active_realm = active_realm; -} - -void colm_set_reduce_clean( struct colm_program *prg, unsigned char reduce_clean ) -{ - prg->reduce_clean = reduce_clean; -} - -program_t *colm_new_program( struct colm_sections *rtd ) -{ - program_t *prg = malloc(sizeof(program_t)); - memset( prg, 0, sizeof(program_t) ); - - assert( sizeof(str_t) <= sizeof(tree_t) ); - assert( sizeof(pointer_t) <= sizeof(tree_t) ); - - prg->rtd = rtd; - prg->ctx_dep_parsing = 1; - prg->reduce_clean = 1; - - init_pool_alloc( &prg->kid_pool, sizeof(kid_t) ); - init_pool_alloc( &prg->tree_pool, sizeof(tree_t) ); - init_pool_alloc( &prg->parse_tree_pool, sizeof(parse_tree_t) ); - init_pool_alloc( &prg->head_pool, sizeof(head_t) ); - init_pool_alloc( &prg->location_pool, sizeof(location_t) ); - - prg->true_val = (tree_t*) 1; - prg->false_val = (tree_t*) 0; - - /* Allocate the global variable. */ - colm_alloc_global( prg ); - - /* Allocate the VM stack. */ - vm_init( prg ); - - rtd->init_need(); - - prg->stream_fns = malloc( sizeof(char*) * 1 ); - prg->stream_fns[0] = 0; - return prg; -} - -void colm_run_program2( program_t *prg, int argc, const char **argv, const int *argl ) -{ - if ( prg->rtd->root_code_len == 0 ) - return; - - /* Make the arguments available to the program. */ - prg->argc = argc; - prg->argv = argv; - prg->argl = argl; - - execution_t execution; - memset( &execution, 0, sizeof(execution) ); - execution.frame_id = prg->rtd->root_frame_id; - - colm_execute( prg, &execution, prg->rtd->root_code ); - - /* Clear the arg and stack. */ - prg->argc = 0; - prg->argv = 0; -} - -void colm_run_program( program_t *prg, int argc, const char **argv ) -{ - colm_run_program2( prg, argc, argv, 0 ); -} - -static void colm_clear_heap( program_t *prg, tree_t **sp ) -{ - struct colm_struct *hi = prg->heap.head; - while ( hi != 0 ) { - struct colm_struct *next = hi->next; - colm_struct_delete( prg, sp, hi ); - hi = next; - } -} - -void *colm_get_reduce_ctx( struct colm_program *prg ) -{ - return prg->red_ctx; -} - -void colm_set_reduce_ctx( struct colm_program *prg, void *ctx ) -{ - prg->red_ctx = ctx; -} - -const char **colm_extract_fns( struct colm_program *prg ) -{ - const char **fns = prg->stream_fns; - prg->stream_fns = 0; - return fns; -} - -const char *colm_error( struct colm_program *prg, int *length ) -{ - const char *rtn = 0; - if ( prg->error != 0 ) { - rtn = prg->error->tokdata->data; - if ( length != 0 ) - *length = prg->error->tokdata->length; - } - return rtn; -} - -int colm_delete_program( program_t *prg ) -{ - tree_t **sp = prg->stack_root; - int exit_status = prg->exit_status; - - colm_tree_downref( prg, sp, prg->return_val ); - colm_clear_heap( prg, sp ); - - colm_tree_downref( prg, sp, prg->error ); - -#if DEBUG - long kid_lost = kid_num_lost( prg ); - long tree_lost = tree_num_lost( prg ); - long parse_tree_lost = parse_tree_num_lost( &prg->parse_tree_pool ); - long head_lost = head_num_lost( prg ); - long location_lost = location_num_lost( prg ); - - if ( kid_lost ) - message( "warning: lost kids: %ld\n", kid_lost ); - - if ( tree_lost ) - message( "warning: lost trees: %ld\n", tree_lost ); - - if ( parse_tree_lost ) - message( "warning: lost parse trees: %ld\n", parse_tree_lost ); - - if ( head_lost ) - message( "warning: lost heads: %ld\n", head_lost ); - - if ( location_lost ) - message( "warning: lost locations: %ld\n", location_lost ); -#endif - - kid_clear( prg ); - tree_clear( prg ); - head_clear( prg ); - parse_tree_clear( &prg->parse_tree_pool ); - location_clear( prg ); - - struct run_buf *rb = prg->alloc_run_buf; - while ( rb != 0 ) { - struct run_buf *next = rb->next; - free( rb ); - rb = next; - } - - vm_clear( prg ); - - if ( prg->stream_fns ) { - char **ptr = (char**)prg->stream_fns; - while ( *ptr != 0 ) { - free( *ptr ); - ptr += 1; - } - - free( prg->stream_fns ); - } - - free( prg ); - - return exit_status; -} diff --git a/src/program.h b/src/program.h deleted file mode 100644 index 8ba716d4..00000000 --- a/src/program.h +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_PROGRAM_H -#define _COLM_PROGRAM_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <colm/pdarun.h> - -struct stack_block -{ - tree_t **data; - int len; - int offset; - struct stack_block *next; -}; - -struct colm_sections -{ - struct lang_el_info *lel_info; - long num_lang_els; - - struct struct_el_info *sel_info; - long num_struct_els; - - struct prod_info *prod_info; - long num_prods; - - struct region_info *region_info; - long num_regions; - - code_t *root_code; - long root_code_len; - long root_frame_id; - - struct frame_info *frame_info; - long num_frames; - - struct function_info *function_info; - long num_functions; - - struct pat_cons_info *pat_repl_info; - long num_patterns; - - struct pat_cons_node *pat_repl_nodes; - long num_pattern_nodes; - - struct generic_info *generic_info; - long num_generics; - - long argv_generic_id; - long stds_generic_id; - - const char **litdata; - long *litlen; - head_t **literals; - long num_literals; - - CaptureAttr *capture_attr; - long num_captured_attr; - - struct fsm_tables *fsm_tables; - struct pda_tables *pda_tables; - int *start_states; - int *eof_lel_ids; - int *parser_lel_ids; - long num_parsers; - - long global_size; - - long first_non_term_id; - long first_struct_el_id; - - long integer_id; - long string_id; - long any_id; - long eof_id; - long no_token_id; - long global_id; - long argv_el_id; - long stds_el_id; - long struct_inbuilt_id; - long struct_input_id; - long struct_stream_id; - - void (*fsm_execute)( struct pda_run *pda_run, struct input_impl *input_stream ); - void (*send_named_lang_el)( struct colm_program *prg, tree_t **tree, - struct pda_run *pda_run, struct input_impl *input_stream ); - void (*init_bindings)( struct pda_run *pda_run ); - void (*pop_binding)( struct pda_run *pda_run, parse_tree_t *tree ); - - tree_t **(*host_call)( program_t *prg, long code, tree_t **sp ); - - void (*commit_reduce_forward)( program_t *prg, tree_t **root, struct pda_run *pda_run, parse_tree_t *pt ); - long (*commit_union_sz)( int reducer ); - void (*init_need)(); - int (*reducer_need_tok)( program_t *prg, struct pda_run *pda_run, int id ); - int (*reducer_need_ign)( program_t *prg, struct pda_run *pda_run ); - void (*read_reduce)( program_t *prg, int reducer, input_t *input ); -}; - -struct heap_list -{ - struct colm_struct *head; - struct colm_struct *tail; -}; - -struct colm_program -{ - long active_realm; - - int argc; - const char **argv; - const int *argl; - - unsigned char ctx_dep_parsing; - unsigned char reduce_clean; - struct colm_sections *rtd; - struct colm_struct *global; - int induce_exit; - int exit_status; - - struct pool_alloc kid_pool; - struct pool_alloc tree_pool; - struct pool_alloc parse_tree_pool; - struct pool_alloc head_pool; - struct pool_alloc location_pool; - - tree_t *true_val; - tree_t *false_val; - - struct heap_list heap; - - stream_t *stdin_val; - stream_t *stdout_val; - stream_t *stderr_val; - - tree_t *error; - - struct run_buf *alloc_run_buf; - - /* Current stack block limits. Changed when crossing block boundaries. */ - tree_t **sb_beg; - tree_t **sb_end; - long sb_total; - struct stack_block *reserve; - struct stack_block *stack_block; - tree_t **stack_root; - - /* Returned value for main program and any exported functions. */ - tree_t *return_val; - - void *red_ctx; - - /* This can be extracted for ownership transfer before a program is deleted. */ - const char **stream_fns; -}; - -#ifdef __cplusplus -} -#endif - -#endif /* _COLM_PROGRAM_H */ - diff --git a/src/redbuild.cc b/src/redbuild.cc deleted file mode 100644 index 7e0396d7..00000000 --- a/src/redbuild.cc +++ /dev/null @@ -1,562 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "redbuild.h" - -#include <assert.h> -#include <string.h> -#include <stdbool.h> - -#include <iostream> - -#include "fsmcodegen.h" - -using namespace std; - -RedFsmBuild::RedFsmBuild( Compiler *pd, FsmGraph *fsm ) -: - pd(pd), - fsm(fsm), - nextActionTableId(0), - startState(-1), - errState(-1) -{ -} - -void RedFsmBuild::initActionList( unsigned long length ) -{ - redFsm->allActions = new GenAction[length]; - memset( redFsm->allActions, 0, sizeof(GenAction) * length ); - for ( unsigned long a = 0; a < length; a++ ) - redFsm->genActionList.append( redFsm->allActions+a ); -} - - -void RedFsmBuild::makeActionList() -{ - /* Determine which actions to write. */ - int nextActionId = 0; - for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { - if ( act->numRefs() > 0 || act->numCondRefs > 0 ) - act->actionId = nextActionId++; - } - - initActionList( nextActionId ); - curAction = 0; - - for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { - if ( act->actionId >= 0 ) - makeAction( act ); - } -} - -void RedFsmBuild::initActionTableList( unsigned long length ) -{ - redFsm->allActionTables = new RedAction[length]; -} - -void RedFsmBuild::initStateList( unsigned long length ) -{ - redFsm->allStates = new RedState[length]; - for ( unsigned long s = 0; s < length; s++ ) - redFsm->stateList.append( redFsm->allStates+s ); - - /* We get the start state as an offset, set the pointer now. */ - assert( startState >= 0 ); - redFsm->startState = redFsm->allStates + startState; - if ( errState >= 0 ) - redFsm->errState = redFsm->allStates + errState; - for ( EntryIdVect::Iter en = redFsm->entryPointIds; en.lte(); en++ ) - redFsm->entryPoints.insert( redFsm->allStates + *en ); - - /* The nextStateId is no longer used to assign state ids (they come in set - * from the frontend now), however generation code still depends on it. - * Should eventually remove this variable. */ - redFsm->nextStateId = redFsm->stateList.length(); -} - -void RedFsmBuild::addEntryPoint( int entryId, unsigned long entryState ) -{ - redFsm->entryPointIds.append( entryState ); - redFsm->redEntryMap.insert( entryId, entryState ); -} - -void RedFsmBuild::addRegionToEntry( int regionId, int entryId ) -{ - assert( regionId == redFsm->regionToEntry.length() ); - redFsm->regionToEntry.append( entryId ); -} - -void RedFsmBuild::initTransList( int snum, unsigned long length ) -{ - /* Could preallocate the out range to save time growing it. For now do - * nothing. */ -} - -void RedFsmBuild::newTrans( int snum, int tnum, Key lowKey, - Key highKey, long targ, long action ) -{ - /* Get the current state and range. */ - RedState *curState = redFsm->allStates + snum; - RedTransList &destRange = curState->outRange; - - if ( curState == redFsm->errState ) - return; - - /* Make the new transitions. */ - RedState *targState = targ >= 0 ? (redFsm->allStates + targ) : - redFsm->wantComplete ? redFsm->getErrorState() : 0; - RedAction *actionTable = action >= 0 ? (redFsm->allActionTables + action) : 0; - RedTrans *trans = redFsm->allocateTrans( targState, actionTable ); - RedTransEl transEl( lowKey, highKey, trans ); - - if ( redFsm->wantComplete ) { - /* If the machine is to be complete then we need to fill any gaps with - * the error transitions. */ - if ( destRange.length() == 0 ) { - /* Range is currently empty. */ - if ( keyOps->minKey < lowKey ) { - /* The first range doesn't start at the low end. */ - Key fillHighKey = lowKey; - fillHighKey.decrement(); - - /* Create the filler with the state's error transition. */ - RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() ); - destRange.append( newTel ); - } - } - else { - /* The range list is not empty, get the the last range. */ - RedTransEl *last = &destRange[destRange.length()-1]; - Key nextKey = last->highKey; - nextKey.increment(); - if ( nextKey < lowKey ) { - /* There is a gap to fill. Make the high key. */ - Key fillHighKey = lowKey; - fillHighKey.decrement(); - - /* Create the filler with the state's error transtion. */ - RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() ); - destRange.append( newTel ); - } - } - } - - /* Filler taken care of. Append the range. */ - destRange.append( RedTransEl( lowKey, highKey, trans ) ); -} - -void RedFsmBuild::finishTransList( int snum ) -{ - /* Get the current state and range. */ - RedState *curState = redFsm->allStates + snum; - RedTransList &destRange = curState->outRange; - - if ( curState == redFsm->errState ) - return; - - /* If building a complete machine we may need filler on the end. */ - if ( redFsm->wantComplete ) { - /* Check if there are any ranges already. */ - if ( destRange.length() == 0 ) { - /* Fill with the whole alphabet. */ - /* Add the range on the lower and upper bound. */ - RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() ); - destRange.append( newTel ); - } - else { - /* Get the last and check for a gap on the end. */ - RedTransEl *last = &destRange[destRange.length()-1]; - if ( last->highKey < keyOps->maxKey ) { - /* Make the high key. */ - Key fillLowKey = last->highKey; - fillLowKey.increment(); - - /* Create the new range with the error trans and append it. */ - RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() ); - destRange.append( newTel ); - } - } - } -} - -void RedFsmBuild::setId( int snum, int id ) -{ - RedState *curState = redFsm->allStates + snum; - curState->id = id; -} - -void RedFsmBuild::setEofTrans( int snum, int eofTarget, int actId ) -{ - RedState *curState = redFsm->allStates + snum; - RedState *targState = redFsm->allStates + eofTarget; - RedAction *eofAct = redFsm->allActionTables + actId; - curState->eofTrans = redFsm->allocateTrans( targState, eofAct ); -} - -void RedFsmBuild::setFinal( int snum ) -{ - RedState *curState = redFsm->allStates + snum; - curState->isFinal = true; -} - - -void RedFsmBuild::setStateActions( int snum, long toStateAction, - long fromStateAction, long eofAction ) -{ - RedState *curState = redFsm->allStates + snum; - if ( toStateAction >= 0 ) - curState->toStateAction = redFsm->allActionTables + toStateAction; - if ( fromStateAction >= 0 ) - curState->fromStateAction = redFsm->allActionTables + fromStateAction; - if ( eofAction >= 0 ) - curState->eofAction = redFsm->allActionTables + eofAction; -} - -void RedFsmBuild::closeMachine() -{ -} - - -void RedFsmBuild::initStateCondList( int snum, ulong length ) -{ - /* Could preallocate these, as we could with transitions. */ -} - -void RedFsmBuild::setForcedErrorState() -{ - redFsm->forcedErrorState = true; -} - -Key RedFsmBuild::findMaxKey() -{ - Key maxKey = keyOps->maxKey; - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { - assert( st->outSingle.length() == 0 ); - assert( st->defTrans == 0 ); - - long rangeLen = st->outRange.length(); - if ( rangeLen > 0 ) { - Key highKey = st->outRange[rangeLen-1].highKey; - if ( highKey > maxKey ) - maxKey = highKey; - } - } - return maxKey; -} - - -void RedFsmBuild::makeActionTableList() -{ - /* Must first order the action tables based on their id. */ - int numTables = nextActionTableId; - RedActionTable **tables = new RedActionTable*[numTables]; - for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) - tables[at->id] = at; - - initActionTableList( numTables ); - curActionTable = 0; - - for ( int t = 0; t < numTables; t++ ) { - long length = tables[t]->key.length(); - - /* Collect the action table. */ - RedAction *redAct = redFsm->allActionTables + curActionTable; - redAct->actListId = curActionTable; - redAct->key.setAsNew( length ); - - int pos = 0; - for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { - int actionId = atel->value->actionId; - redAct->key[pos].key = 0; - redAct->key[pos].value = redFsm->allActions+actionId; - pos += 1; - } - - /* Insert into the action table map. */ - redFsm->actionMap.insert( redAct ); - - curActionTable += 1; - - } - - delete[] tables; -} - -void RedFsmBuild::reduceActionTables() -{ - /* Reduce the actions tables to a set. */ - for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { - RedActionTable *actionTable = 0; - - /* Reduce To State Actions. */ - if ( st->toStateActionTable.length() > 0 ) { - if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) ) - actionTable->id = nextActionTableId++; - } - - /* Reduce From State Actions. */ - if ( st->fromStateActionTable.length() > 0 ) { - if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) ) - actionTable->id = nextActionTableId++; - } - - /* Reduce EOF actions. */ - if ( st->eofActionTable.length() > 0 ) { - if ( actionTableMap.insert( st->eofActionTable, &actionTable ) ) - actionTable->id = nextActionTableId++; - } - - /* Loop the transitions and reduce their actions. */ - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - if ( trans->actionTable.length() > 0 ) { - if ( actionTableMap.insert( trans->actionTable, &actionTable ) ) - actionTable->id = nextActionTableId++; - } - } - } -} - -void RedFsmBuild::appendTrans( TransListVect &outList, Key lowKey, - Key highKey, FsmTrans *trans ) -{ - if ( trans->toState != 0 || trans->actionTable.length() > 0 ) - outList.append( TransEl( lowKey, highKey, trans ) ); -} - -void RedFsmBuild::makeTrans( Key lowKey, Key highKey, FsmTrans *trans ) -{ - /* First reduce the action. */ - RedActionTable *actionTable = 0; - if ( trans->actionTable.length() > 0 ) - actionTable = actionTableMap.find( trans->actionTable ); - - long targ = trans->toState == 0 ? -1 : trans->toState->alg.stateNum; - long action = actionTable == 0 ? -1 : actionTable->id; - - newTrans( curState, curTrans++, lowKey, highKey, targ, action ); -} - -void RedFsmBuild::makeTransList( FsmState *state ) -{ - TransListVect outList; - - /* If there is only are no ranges the task is simple. */ - if ( state->outList.length() > 0 ) { - /* Loop each source range. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - /* Reduce the transition. If it reduced to anything then add it. */ - appendTrans( outList, trans->lowKey, trans->highKey, trans ); - } - } - - long length = outList.length(); - initTransList( curState, length ); - curTrans = 0; - - for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) - makeTrans( tvi->lowKey, tvi->highKey, tvi->value ); - finishTransList( curState ); -} - -void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *action ) -{ - redFsm->allActions[anum].actionId = anum; - redFsm->allActions[anum].name = name; - redFsm->allActions[anum].loc.line = line; - redFsm->allActions[anum].loc.col = col; - redFsm->allActions[anum].inlineList = action->inlineList; - redFsm->allActions[anum].objField = action->objField; - redFsm->allActions[anum].markType = action->markType; - redFsm->allActions[anum].markId = action->markId + 1; -} - -void RedFsmBuild::makeAction( Action *action ) -{ - int line = action->loc.line; - int col = action->loc.col; - - char *name = 0; - if ( action->name != 0 ) - name = action->name; - - newAction( curAction++, name, line, col, action ); -} - -void xmlEscapeHost( std::ostream &out, char *data, int len ) -{ - char *end = data + len; - while ( data != end ) { - switch ( *data ) { - case '<': out << "<"; break; - case '>': out << ">"; break; - case '&': out << "&"; break; - default: out << *data; break; - } - data += 1; - } -} - -void RedFsmBuild::makeStateActions( FsmState *state ) -{ - RedActionTable *toStateActions = 0; - if ( state->toStateActionTable.length() > 0 ) - toStateActions = actionTableMap.find( state->toStateActionTable ); - - RedActionTable *fromStateActions = 0; - if ( state->fromStateActionTable.length() > 0 ) - fromStateActions = actionTableMap.find( state->fromStateActionTable ); - - RedActionTable *eofActions = 0; - if ( state->eofActionTable.length() > 0 ) - eofActions = actionTableMap.find( state->eofActionTable ); - - if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) { - long toStateAction = -1; - long fromStateAction = -1; - long eofAction = -1; - - if ( toStateActions != 0 ) - toStateAction = toStateActions->id; - if ( fromStateActions != 0 ) - fromStateAction = fromStateActions->id; - if ( eofActions != 0 ) - eofAction = eofActions->id; - - setStateActions( curState, toStateAction, - fromStateAction, eofAction ); - } -} - -void RedFsmBuild::makeStateList() -{ - /* Write the list of states. */ - long length = fsm->stateList.length(); - initStateList( length ); - curState = 0; - - for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { - /* Both or neither should be set. */ - assert( !( (st->eofTarget != 0) xor (st->eofActionTable.length() > 0) ) ); - - makeStateActions( st ); - makeTransList( st ); - - setId( curState, st->alg.stateNum ); - if ( st->isFinState() ) - setFinal( curState ); - - /* If there is an eof target, make an eof transition. */ - if ( st->eofTarget != 0 ) { - /* Find the eof actions. */ - RedActionTable *eofActions = 0; - eofActions = actionTableMap.find( st->eofActionTable ); - setEofTrans( curState, st->eofTarget->alg.stateNum, eofActions->id ); - } - - curState += 1; - } -} - -void RedFsmBuild::makeEntryPoints() -{ - if ( fsm->lmRequiresErrorState ) - setForcedErrorState(); - - for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { - /* Get the name instantiation from nameIndex. */ - FsmState *state = en->value; - long entry = state->alg.stateNum; - addEntryPoint( en->key, entry ); - } - - for ( RegionList::Iter reg = pd->regionList; reg.lte(); reg++ ) { - assert( reg->impl->regionNameInst != 0 ); - - TokenRegion *use = reg; - - if ( use->zeroLel != 0 ) - use = use->ignoreOnly; - - NameInst *regionName = use->impl->regionNameInst; - addRegionToEntry( reg->id, regionName->id ); - } -} - -void RedFsmBuild::makeMachine() -{ - /* Action tables. */ - reduceActionTables(); - - makeActionList(); - makeActionTableList(); - makeConditions(); - - /* Start state. */ - startState = fsm->startState->alg.stateNum; - - /* Error state. */ - if ( fsm->errState != 0 ) - errState = fsm->errState->alg.stateNum; - - makeEntryPoints(); - makeStateList(); -} - -void RedFsmBuild::makeConditions() -{ -} - -RedFsm *RedFsmBuild::reduceMachine() -{ - redFsm = new RedFsm(); - redFsm->wantComplete = true; - - /* Open the definition. */ - makeMachine(); - - /* Do this before distributing transitions out to singles and defaults - * makes life easier. */ - redFsm->maxKey = findMaxKey(); - - redFsm->assignActionLocs(); - - /* Find the first final state (The final state with the lowest id). */ - redFsm->findFirstFinState(); - - /* Choose default transitions and the single transition. */ - redFsm->chooseDefaultSpan(); - - /* Maybe do flat expand, otherwise choose single. */ - redFsm->chooseSingle(); - - /* Set up incoming transitions. */ - redFsm->setInTrans(); - - /* Anlayze Machine will find the final action reference counts, among - * other things. We will use these in reporting the usage - * of fsm directives in action code. */ - redFsm->analyzeMachine(); - - return redFsm; -} - diff --git a/src/redbuild.h b/src/redbuild.h deleted file mode 100644 index e9ad0465..00000000 --- a/src/redbuild.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_FSMREDUCE_H -#define _COLM_FSMREDUCE_H - -#include <iostream> - -#include <avltree.h> - -#include "fsmgraph.h" -#include "compiler.h" - -/* Forwards. */ -struct FsmTrans; -struct FsmGraph; -struct Compiler; -struct FsmCodeGen; -struct RedFsm; -struct GenCondSpace; -struct Condition; - -struct RedActionTable -: - public AvlTreeEl<RedActionTable> -{ - RedActionTable( const ActionTable &key ) - : - key(key), - id(0) - { } - - const ActionTable &getKey() - { return key; } - - ActionTable key; - int id; -}; - -typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap; - -struct NextRedTrans -{ - Key lowKey, highKey; - FsmTrans *trans; - FsmTrans *next; - - void load() { - if ( trans != 0 ) { - next = trans->next; - lowKey = trans->lowKey; - highKey = trans->highKey; - } - } - - NextRedTrans( FsmTrans *t ) { - trans = t; - load(); - } - - void increment() { - trans = next; - load(); - } -}; - -class RedFsmBuild -{ -public: - RedFsmBuild( Compiler *pd, FsmGraph *fsm ); - RedFsm *reduceMachine( ); - -private: - void appendTrans( TransListVect &outList, Key lowKey, Key highKey, FsmTrans *trans ); - void makeStateActions( FsmState *state ); - void makeStateList(); - void makeStateConditions( FsmState *state ); - - void initActionList( unsigned long length ); - void newAction( int anum, char *name, int line, int col, Action *action ); - void initActionTableList( unsigned long length ); - void initCondSpaceList( ulong length ); - void condSpaceItem( int cnum, long condActionId ); - void newCondSpace( int cnum, int condSpaceId, Key baseKey ); - void initStateCondList( int snum, ulong length ); - void addStateCond( int snum, Key lowKey, Key highKey, long condNum ); - void initStateList( unsigned long length ); - void addRegionToEntry( int regionId, int entryId ); - void addEntryPoint( int entryId, unsigned long entryState ); - void setId( int snum, int id ); - void initTransList( int snum, unsigned long length ); - void newTrans( int snum, int tnum, Key lowKey, Key highKey, - long targ, long act ); - void finishTransList( int snum ); - void setFinal( int snum ); - void setEofTrans( int snum, int eofTarget, int actId ); - void setStateActions( int snum, long toStateAction, - long fromStateAction, long eofAction ); - void setForcedErrorState(); - void closeMachine(); - Key findMaxKey(); - - void makeEntryPoints(); - void makeGetKeyExpr(); - void makeAccessExpr(); - void makeCurStateExpr(); - void makeConditions(); - void makeInlineList( InlineList *inlineList, InlineItem *context ); - void makeActionList(); - void makeActionTableList(); - void reduceTrans( FsmTrans *trans ); - void reduceActionTables(); - void makeTransList( FsmState *state ); - void makeTrans( Key lowKey, Key highKey, FsmTrans *defTrans ); - void makeAction( Action *action ); - void makeLmSwitch( InlineItem *item ); - void makeMachine(); - void makeActionExec( InlineItem *item ); - void makeActionExecTE( InlineItem *item ); - - Compiler *pd; - FsmGraph *fsm; - ActionTableMap actionTableMap; - int nextActionTableId; - - int startState; - int errState; - -public: - RedFsm *redFsm; - -private: - int curAction; - int curActionTable; - int curTrans; - int curState; - int curCondSpace; - int curStateCond; -}; - -#endif /* _COLM_FSMREDUCE_H */ - diff --git a/src/redfsm.cc b/src/redfsm.cc deleted file mode 100644 index d79a1e32..00000000 --- a/src/redfsm.cc +++ /dev/null @@ -1,1049 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "redfsm.h" - -#include <assert.h> -#include <stdbool.h> -#include <string.h> -#include <stdio.h> - -#include <sstream> -#include <iostream> - -#include "fsmgraph.h" -#include "parsetree.h" - -using std::ostringstream; - -string nameOrLoc( GenAction *genAction ) -{ - if ( genAction->name != 0 ) - return string(genAction->name); - else { - ostringstream ret; - ret << genAction->loc.line << ":" << genAction->loc.col; - return ret.str(); - } -} - -RedFsm::RedFsm() -: - wantComplete(false), - forcedErrorState(false), - nextActionId(0), - nextTransId(0), - errState(0), - errTrans(0), - firstFinState(0), - numFinStates(0), - allActions(0), - allActionTables(0), - allStates(0), - bAnyToStateActions(false), - bAnyFromStateActions(false), - bAnyRegActions(false), - bAnyEofActions(false), - bAnyActionGotos(false), - bAnyActionCalls(false), - bAnyActionRets(false), - bAnyRegActionRets(false), - bAnyRegActionByValControl(false), - bAnyRegNextStmt(false), - bAnyRegCurStateRef(false), - bAnyRegBreak(false), - bAnyLmSwitchError(false), - bAnyConditions(false) -{ -} - -/* Does the machine have any actions. */ -bool RedFsm::anyActions() -{ - return actionMap.length() > 0; -} - -void RedFsm::depthFirstOrdering( RedState *state ) -{ - /* Nothing to do if the state is already on the list. */ - if ( state->onStateList ) - return; - - /* Doing depth first, put state on the list. */ - state->onStateList = true; - stateList.append( state ); - -// /* At this point transitions should only be in ranges. */ -// assert( state->outSingle.length() == 0 ); -// assert( state->defTrans == 0 ); - - /* Recurse on singles. */ - for ( RedTransList::Iter stel = state->outSingle; stel.lte(); stel++ ) { - if ( stel->value->targ != 0 ) - depthFirstOrdering( stel->value->targ ); - } - - /* Recurse on everything ranges. */ - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - if ( rtel->value->targ != 0 ) - depthFirstOrdering( rtel->value->targ ); - } - - if ( state->defTrans != 0 && state->defTrans->targ != 0 ) - depthFirstOrdering( state->defTrans->targ ); -} - -/* Ordering states by transition connections. */ -void RedFsm::depthFirstOrdering() -{ - /* Init on state list flags. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - st->onStateList = false; - - /* Clear out the state list, we will rebuild it. */ - int stateListLen = stateList.length(); - stateList.abandon(); - - /* Add back to the state list from the start state and all other entry - * points. */ - depthFirstOrdering( startState ); - for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) - depthFirstOrdering( *en ); - if ( forcedErrorState ) - depthFirstOrdering( errState ); - - /* Make sure we put everything back on. */ - assert( stateListLen == stateList.length() ); -} - -/* Assign state ids by appearance in the state list. */ -void RedFsm::sequentialStateIds() -{ - /* Table based machines depend on the state numbers starting at zero. */ - nextStateId = 0; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - st->id = nextStateId++; -} - -/* Stable sort the states by final state status. */ -void RedFsm::sortStatesByFinal() -{ - /* Move forward through the list and throw final states onto the end. */ - RedState *state = 0; - RedState *next = stateList.head; - RedState *last = stateList.tail; - while ( state != last ) { - /* Move forward and load up the next. */ - state = next; - next = state->next; - - /* Throw to the end? */ - if ( state->isFinal ) { - stateList.detach( state ); - stateList.append( state ); - } - } -} - -/* Assign state ids by final state state status. */ -void RedFsm::sortStateIdsByFinal() -{ - /* Table based machines depend on this starting at zero. */ - nextStateId = 0; - - /* First pass to assign non final ids. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - if ( ! st->isFinal ) - st->id = nextStateId++; - } - - /* Second pass to assign final ids. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - if ( st->isFinal ) - st->id = nextStateId++; - } -} - -struct CmpStateById -{ - static int compare( RedState *st1, RedState *st2 ) - { - if ( st1->id < st2->id ) - return -1; - else if ( st1->id > st2->id ) - return 1; - else - return 0; - } -}; - -void RedFsm::sortByStateId() -{ - /* Make the array. */ - int pos = 0; - RedState **ptrList = new RedState*[stateList.length()]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - ptrList[pos++] = st; - - MergeSort<RedState*, CmpStateById> mergeSort; - mergeSort.sort( ptrList, stateList.length() ); - - stateList.abandon(); - for ( int st = 0; st < pos; st++ ) - stateList.append( ptrList[st] ); - - delete[] ptrList; -} - -/* Find the final state with the lowest id. */ -void RedFsm::findFirstFinState() -{ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) ) - firstFinState = st; - } -} - -void RedFsm::assignActionLocs() -{ - int nextLocation = 0; - for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { - /* Store the loc, skip over the array and a null terminator. */ - act->location = nextLocation; - nextLocation += act->key.length() + 1; - } -} - -/* Check if we can extend the current range by displacing any ranges - * ahead to the singles. */ -bool RedFsm::canExtend( const RedTransList &list, int pos ) -{ - /* Get the transition that we want to extend. */ - RedTrans *extendTrans = list[pos].value; - - /* Look ahead in the transition list. */ - for ( int next = pos + 1; next < list.length(); pos++, next++ ) { - /* If they are not continuous then cannot extend. */ - Key nextKey = list[next].lowKey; - nextKey.decrement(); - if ( list[pos].highKey != nextKey ) - break; - - /* Check for the extenstion property. */ - if ( extendTrans == list[next].value ) - return true; - - /* If the span of the next element is more than one, then don't keep - * checking, it won't be moved to single. */ - unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey ); - if ( nextSpan > 1 ) - break; - } - return false; -} - -/* Move ranges to the singles list. */ -void RedFsm::moveTransToSingle( RedState *state ) -{ - RedTransList &range = state->outRange; - RedTransList &single = state->outSingle; - for ( int rpos = 0; rpos < range.length(); ) { - /* Check if this is a range we can extend. */ - if ( canExtend( range, rpos ) ) { - /* Transfer singles over. */ - while ( range[rpos].value != range[rpos+1].value ) { - /* Transfer the range to single. */ - single.append( range[rpos+1] ); - range.remove( rpos+1 ); - } - - /* Extend. */ - range[rpos].highKey = range[rpos+1].highKey; - range.remove( rpos+1 ); - } - /* Maybe move it to the singles. */ - else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) { - single.append( range[rpos] ); - range.remove( rpos ); - } - else { - /* Keeping it in the ranges. */ - rpos += 1; - } - } -} - -/* Look through ranges and choose suitable single character transitions. */ -void RedFsm::chooseSingle() -{ - /* Loop the states. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Rewrite the transition list taking out the suitable single - * transtions. */ - moveTransToSingle( st ); - } -} - -void RedFsm::makeFlat() -{ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - st->condLowKey = 0; - st->condHighKey = 0; - - if ( st->outRange.length() == 0 ) { - st->lowKey = st->highKey = 0; - st->transList = 0; - } - else { - st->lowKey = st->outRange[0].lowKey; - st->highKey = st->outRange[st->outRange.length()-1].highKey; - unsigned long long span = keyOps->span( st->lowKey, st->highKey ); - st->transList = new RedTrans*[ span ]; - memset( st->transList, 0, sizeof(RedTrans*)*span ); - - for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) { - unsigned long long base, trSpan; - base = keyOps->span( st->lowKey, trans->lowKey )-1; - trSpan = keyOps->span( trans->lowKey, trans->highKey ); - for ( unsigned long long pos = 0; pos < trSpan; pos++ ) - st->transList[base+pos] = trans->value; - } - - /* Fill in the gaps with the default transition. */ - for ( unsigned long long pos = 0; pos < span; pos++ ) { - if ( st->transList[pos] == 0 ) - st->transList[pos] = st->defTrans; - } - } - } -} - - -/* A default transition has been picked, move it from the outRange to the - * default pointer. */ -void RedFsm::moveToDefault( RedTrans *defTrans, RedState *state ) -{ - /* Rewrite the outRange, omitting any ranges that use - * the picked default. */ - RedTransList outRange; - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - /* If it does not take the default, copy it over. */ - if ( rtel->value != defTrans ) - outRange.append( *rtel ); - } - - /* Save off the range we just created into the state's range. */ - state->outRange.transfer( outRange ); - - /* Store the default. */ - state->defTrans = defTrans; -} - -bool RedFsm::alphabetCovered( RedTransList &outRange ) -{ - /* Cannot cover without any out ranges. */ - if ( outRange.length() == 0 ) - return false; - - /* If the first range doesn't start at the the lower bound then the - * alphabet is not covered. */ - RedTransList::Iter rtel = outRange; - if ( keyOps->minKey < rtel->lowKey ) - return false; - - /* Check that every range is next to the previous one. */ - rtel.increment(); - for ( ; rtel.lte(); rtel++ ) { - Key highKey = rtel[-1].highKey; - highKey.increment(); - if ( highKey != rtel->lowKey ) - return false; - } - - /* The last must extend to the upper bound. */ - RedTransEl *last = &outRange[outRange.length()-1]; - if ( last->highKey < keyOps->maxKey ) - return false; - - return true; -} - -RedTrans *RedFsm::chooseDefaultSpan( RedState *state ) -{ - /* Make a set of transitions from the outRange. */ - RedTransPtrSet stateTransSet; - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) - stateTransSet.insert( rtel->value ); - - /* For each transition in the find how many alphabet characters the - * transition spans. */ - unsigned long long *span = new unsigned long long[stateTransSet.length()]; - memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() ); - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - /* Lookup the transition in the set. */ - RedTrans **inSet = stateTransSet.find( rtel->value ); - int pos = inSet - stateTransSet.data; - span[pos] += keyOps->span( rtel->lowKey, rtel->highKey ); - } - - /* Find the max span, choose it for making the default. */ - RedTrans *maxTrans = 0; - unsigned long long maxSpan = 0; - for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { - if ( span[rtel.pos()] > maxSpan ) { - maxSpan = span[rtel.pos()]; - maxTrans = *rtel; - } - } - - delete[] span; - return maxTrans; -} - -/* Pick default transitions from ranges for the states. */ -void RedFsm::chooseDefaultSpan() -{ - /* Loop the states. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Only pick a default transition if the alphabet is covered. This - * avoids any transitions in the out range that go to error and avoids - * the need for an ERR state. */ - if ( alphabetCovered( st->outRange ) ) { - /* Pick a default transition by largest span. */ - RedTrans *defTrans = chooseDefaultSpan( st ); - - /* Rewrite the transition list taking out the transition we picked - * as the default and store the default. */ - moveToDefault( defTrans, st ); - } - } -} - -RedTrans *RedFsm::chooseDefaultGoto( RedState *state ) -{ - /* Make a set of transitions from the outRange. */ - RedTransPtrSet stateTransSet; - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - if ( rtel->value->targ == state->next ) - return rtel->value; - } - return 0; -} - -void RedFsm::chooseDefaultGoto() -{ - /* Loop the states. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Pick a default transition. */ - RedTrans *defTrans = chooseDefaultGoto( st ); - if ( defTrans == 0 ) - defTrans = chooseDefaultSpan( st ); - - /* Rewrite the transition list taking out the transition we picked - * as the default and store the default. */ - moveToDefault( defTrans, st ); - } -} - -RedTrans *RedFsm::chooseDefaultNumRanges( RedState *state ) -{ - /* Make a set of transitions from the outRange. */ - RedTransPtrSet stateTransSet; - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) - stateTransSet.insert( rtel->value ); - - /* For each transition in the find how many ranges use the transition. */ - int *numRanges = new int[stateTransSet.length()]; - memset( numRanges, 0, sizeof(int) * stateTransSet.length() ); - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - /* Lookup the transition in the set. */ - RedTrans **inSet = stateTransSet.find( rtel->value ); - numRanges[inSet - stateTransSet.data] += 1; - } - - /* Find the max number of ranges. */ - RedTrans *maxTrans = 0; - int maxNumRanges = 0; - for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { - if ( numRanges[rtel.pos()] > maxNumRanges ) { - maxNumRanges = numRanges[rtel.pos()]; - maxTrans = *rtel; - } - } - - delete[] numRanges; - return maxTrans; -} - -void RedFsm::chooseDefaultNumRanges() -{ - /* Loop the states. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Pick a default transition. */ - RedTrans *defTrans = chooseDefaultNumRanges( st ); - - /* Rewrite the transition list taking out the transition we picked - * as the default and store the default. */ - moveToDefault( defTrans, st ); - } -} - -RedTrans *RedFsm::getErrorTrans( ) -{ - /* If the error trans has not been made aready, make it. */ - if ( errTrans == 0 ) { - /* This insert should always succeed since no transition created by - * the user can point to the error state. */ - errTrans = new RedTrans( getErrorState(), 0, nextTransId++ ); - RedTrans *inRes = transSet.insert( errTrans ); - assert( inRes != 0 ); - } - return errTrans; -} - -RedState *RedFsm::getErrorState() -{ - /* Something went wrong. An error state is needed but one was not supplied - * by the frontend. */ - assert( errState != 0 ); - return errState; -} - - -RedTrans *RedFsm::allocateTrans( RedState *targ, RedAction *action ) -{ - /* Create a reduced trans and look for it in the transiton set. */ - RedTrans redTrans( targ, action, 0 ); - RedTrans *inDict = transSet.find( &redTrans ); - if ( inDict == 0 ) { - inDict = new RedTrans( targ, action, nextTransId++ ); - transSet.insert( inDict ); - } - return inDict; -} - -void RedFsm::partitionFsm( int nparts ) -{ - /* At this point the states are ordered by a depth-first traversal. We - * will allocate to partitions based on this ordering. */ - this->nParts = nparts; - int partSize = stateList.length() / nparts; - int remainder = stateList.length() % nparts; - int numInPart = partSize; - int partition = 0; - if ( remainder-- > 0 ) - numInPart += 1; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - st->partition = partition; - - numInPart -= 1; - if ( numInPart == 0 ) { - partition += 1; - numInPart = partSize; - if ( remainder-- > 0 ) - numInPart += 1; - } - } -} - -void RedFsm::setInTrans() -{ - /* First pass counts the number of transitions. */ - for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ ) - trans->targ->numInTrans += 1; - - /* Pass over states to allocate the needed memory. Reset the counts so we - * can use them as the current size. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - st->inTrans = new RedTrans*[st->numInTrans]; - st->numInTrans = 0; - } - - /* Second pass over transitions copies pointers into the in trans list. */ - for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ ) - trans->targ->inTrans[trans->targ->numInTrans++] = trans; -} - -void RedFsm::setValueLimits() -{ - maxSingleLen = 0; - maxRangeLen = 0; - maxKeyOffset = 0; - maxIndexOffset = 0; - maxActListId = 0; - maxActionLoc = 0; - maxActArrItem = 0; - maxSpan = 0; - maxCondSpan = 0; - maxFlatIndexOffset = 0; - maxCondOffset = 0; - maxCondLen = 0; - maxCondSpaceId = 0; - maxCondIndexOffset = 0; - - /* In both of these cases the 0 index is reserved for no value, so the max - * is one more than it would be if they started at 0. */ - maxIndex = transSet.length(); - maxCond = 0; - - /* The nextStateId - 1 is the last state id assigned. */ - maxState = nextStateId - 1; - - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Maximum single length. */ - if ( st->outSingle.length() > maxSingleLen ) - maxSingleLen = st->outSingle.length(); - - /* Maximum range length. */ - if ( st->outRange.length() > maxRangeLen ) - maxRangeLen = st->outRange.length(); - - /* The key offset index offset for the state after last is not used, skip it.. */ - if ( ! st.last() ) { - maxKeyOffset += st->outSingle.length() + st->outRange.length()*2; - maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1; - } - - /* Max key span. */ - if ( st->transList != 0 ) { - unsigned long long span = keyOps->span( st->lowKey, st->highKey ); - if ( span > maxSpan ) - maxSpan = span; - } - - /* Max flat index offset. */ - if ( ! st.last() ) { - if ( st->transList != 0 ) - maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey ); - maxFlatIndexOffset += 1; - } - } - - for ( GenActionTableMap::Iter at = actionMap; at.lte(); at++ ) { - /* Maximum id of action lists. */ - if ( at->actListId+1 > maxActListId ) - maxActListId = at->actListId+1; - - /* Maximum location of items in action array. */ - if ( at->location+1 > maxActionLoc ) - maxActionLoc = at->location+1; - - /* Maximum values going into the action array. */ - if ( at->key.length() > maxActArrItem ) - maxActArrItem = at->key.length(); - for ( GenActionTable::Iter item = at->key; item.lte(); item++ ) { - if ( item->value->actionId > maxActArrItem ) - maxActArrItem = item->value->actionId; - } - } -} - -void RedFsm::findFinalActionRefs() -{ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Rerence count out of single transitions. */ - for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { - if ( rtel->value->action != 0 ) { - rtel->value->action->numTransRefs += 1; - for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) - item->value->numTransRefs += 1; - } - } - - /* Reference count out of range transitions. */ - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { - if ( rtel->value->action != 0 ) { - rtel->value->action->numTransRefs += 1; - for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) - item->value->numTransRefs += 1; - } - } - - /* Reference count default transition. */ - if ( st->defTrans != 0 && st->defTrans->action != 0 ) { - st->defTrans->action->numTransRefs += 1; - for ( GenActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ ) - item->value->numTransRefs += 1; - } - - /* Reference count to state actions. */ - if ( st->toStateAction != 0 ) { - st->toStateAction->numToStateRefs += 1; - for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) - item->value->numToStateRefs += 1; - } - - /* Reference count from state actions. */ - if ( st->fromStateAction != 0 ) { - st->fromStateAction->numFromStateRefs += 1; - for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) - item->value->numFromStateRefs += 1; - } - - /* Reference count EOF actions. */ - if ( st->eofAction != 0 ) { - st->eofAction->numEofRefs += 1; - for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) - item->value->numEofRefs += 1; - } - } -} - -void RedFsm::analyzeAction( GenAction *act, InlineList *inlineList ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - /* Check for various things in regular actions. */ - if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || - act->numFromStateRefs > 0 || act->numEofRefs > 0 ) - { - if ( item->type == InlineItem::LmSwitch && - item->tokenRegion->lmSwitchHandlesError ) - { - bAnyLmSwitchError = true; - } - } - - if ( item->children != 0 ) - analyzeAction( act, item->children ); - } -} - -void RedFsm::analyzeActionList( RedAction *redAct, InlineList *inlineList ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - if ( item->children != 0 ) - analyzeActionList( redAct, item->children ); - } -} - -/* Assign ids to referenced actions. */ -void RedFsm::assignActionIds() -{ - int nextActionId = 0; - for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) { - /* Only ever interested in referenced actions. */ - if ( numRefs( act ) > 0 ) - act->actionId = nextActionId++; - } -} - -/* Gather various info on the machine. */ -void RedFsm::analyzeMachine() -{ - /* Find the true count of action references. */ - findFinalActionRefs(); - - /* Check if there are any calls in action code. */ - for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) { - /* Record the occurrence of various kinds of actions. */ - if ( act->numToStateRefs > 0 ) - bAnyToStateActions = true; - if ( act->numFromStateRefs > 0 ) - bAnyFromStateActions = true; - if ( act->numEofRefs > 0 ) - bAnyEofActions = true; - if ( act->numTransRefs > 0 ) - bAnyRegActions = true; - - /* Recurse through the action's parse tree looking for various things. */ - analyzeAction( act, act->inlineList ); - } - - /* Analyze reduced action lists. */ - for ( GenActionTableMap::Iter redAct = actionMap; redAct.lte(); redAct++ ) { - for ( GenActionTable::Iter act = redAct->key; act.lte(); act++ ) - analyzeActionList( redAct, act->value->inlineList ); - } - - /* Find states that have transitions with actions that have next - * statements. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Check any actions out of outSinge. */ - for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { - if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) - st->bAnyRegCurStateRef = true; - } - - /* Check any actions out of outRange. */ - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { - if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) - st->bAnyRegCurStateRef = true; - } - - /* Check any action out of default. */ - if ( st->defTrans != 0 && st->defTrans->action != 0 && - st->defTrans->action->anyCurStateRef() ) - st->bAnyRegCurStateRef = true; - } - - /* Assign ids to actions that are referenced. */ - assignActionIds(); - - /* Set the maximums of various values used for deciding types. */ - setValueLimits(); -} - -int transAction( RedTrans *trans ) -{ - int retAct = 0; - if ( trans->action != 0 ) - retAct = trans->action->location+1; - return retAct; -} - -int toStateAction( RedState *state ) -{ - int act = 0; - if ( state->toStateAction != 0 ) - act = state->toStateAction->location+1; - return act; -} - -int fromStateAction( RedState *state ) -{ - int act = 0; - if ( state->fromStateAction != 0 ) - act = state->fromStateAction->location+1; - return act; -} - -int eofAction( RedState *state ) -{ - int act = 0; - if ( state->eofAction != 0 ) - act = state->eofAction->location+1; - return act; -} - - -fsm_tables *RedFsm::makeFsmTables() -{ - /* The fsm runtime needs states sorted by id. */ - sortByStateId(); - - int pos, curKeyOffset, curIndOffset; - fsm_tables *fsmTables = new fsm_tables; - fsmTables->num_states = stateList.length(); - - /* - * actions - */ - - fsmTables->num_actions = 1; - for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) - fsmTables->num_actions += 1 + act->key.length(); - - pos = 0; - fsmTables->actions = new long[fsmTables->num_actions]; - fsmTables->actions[pos++] = 0; - for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { - fsmTables->actions[pos++] = act->key.length(); - for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) - fsmTables->actions[pos++] = item->value->actionId; - } - - /* - * keyOffset - */ - pos = 0, curKeyOffset = 0; - fsmTables->key_offsets = new long[fsmTables->num_states]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Store the current offset. */ - fsmTables->key_offsets[pos++] = curKeyOffset; - - /* Move the key offset ahead. */ - curKeyOffset += st->outSingle.length() + st->outRange.length()*2; - } - - /* - * transKeys - */ - fsmTables->num_trans_keys = 0; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - fsmTables->num_trans_keys += st->outSingle.length(); - fsmTables->num_trans_keys += 2 * st->outRange.length(); - } - - pos = 0; - fsmTables->trans_keys = new char[fsmTables->num_trans_keys]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) - fsmTables->trans_keys[pos++] = stel->lowKey.getVal(); - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { - fsmTables->trans_keys[pos++] = rtel->lowKey.getVal(); - fsmTables->trans_keys[pos++] = rtel->highKey.getVal(); - } - } - - /* - * singleLengths - */ - pos = 0; - fsmTables->single_lengths = new long[fsmTables->num_states]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->single_lengths[pos++] = st->outSingle.length(); - - /* - * rangeLengths - */ - pos = 0; - fsmTables->range_lengths = new long[fsmTables->num_states]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->range_lengths[pos++] = st->outRange.length(); - - /* - * indexOffsets - */ - pos = 0, curIndOffset = 0; - fsmTables->index_offsets = new long[fsmTables->num_states]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - fsmTables->index_offsets[pos++] = curIndOffset; - - curIndOffset += st->outSingle.length() + st->outRange.length(); - if ( st->defTrans != 0 ) - curIndOffset += 1; - } - - /* - * transTargsWI - */ - fsmTables->numTransTargsWI = 0; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - fsmTables->numTransTargsWI += st->outSingle.length(); - fsmTables->numTransTargsWI += st->outRange.length(); - if ( st->defTrans != 0 ) - fsmTables->numTransTargsWI += 1; - } - - pos = 0; - fsmTables->transTargsWI = new long[fsmTables->numTransTargsWI]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) - fsmTables->transTargsWI[pos++] = stel->value->targ->id; - - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) - fsmTables->transTargsWI[pos++] = rtel->value->targ->id; - - if ( st->defTrans != 0 ) - fsmTables->transTargsWI[pos++] = st->defTrans->targ->id; - } - - /* - * transActionsWI - */ - fsmTables->numTransActionsWI = 0; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - fsmTables->numTransActionsWI += st->outSingle.length(); - fsmTables->numTransActionsWI += st->outRange.length(); - if ( st->defTrans != 0 ) - fsmTables->numTransActionsWI += 1; - } - - pos = 0; - fsmTables->transActionsWI = new long[fsmTables->numTransActionsWI]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) - fsmTables->transActionsWI[pos++] = transAction( stel->value ); - - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) - fsmTables->transActionsWI[pos++] = transAction( rtel->value ); - - if ( st->defTrans != 0 ) - fsmTables->transActionsWI[pos++] = transAction( st->defTrans ); - } - - /* - * toStateActions - */ - pos = 0; - fsmTables->to_state_actions = new long[fsmTables->num_states]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->to_state_actions[pos++] = toStateAction( st ); - - /* - * fromStateActions - */ - pos = 0; - fsmTables->from_state_actions = new long[fsmTables->num_states]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->from_state_actions[pos++] = fromStateAction( st ); - - /* - * eofActions - */ - pos = 0; - fsmTables->eof_actions = new long[fsmTables->num_states]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->eof_actions[pos++] = eofAction( st ); - - /* - * eofTargs - */ - pos = 0; - fsmTables->eof_targs = new long[fsmTables->num_states]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - int targ = -1; - if ( st->eofTrans != 0 ) - targ = st->eofTrans->targ->id; - fsmTables->eof_targs[pos++] = targ; - } - - /* Start state. */ - fsmTables->start_state = startState->id; - - /* First final state. */ - fsmTables->first_final = ( firstFinState != 0 ) ? - firstFinState->id : nextStateId; - - /* The error state. */ - fsmTables->error_state = ( errState != 0 ) ? - errState->id : -1; - - /* The array pointing to actions. */ - pos = 0; - fsmTables->num_action_switch = genActionList.length(); - fsmTables->action_switch = new GenAction*[fsmTables->num_action_switch]; - for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) - fsmTables->action_switch[pos++] = act; - - /* - * entryByRegion - */ - - fsmTables->num_regions = regionToEntry.length()+1; - fsmTables->entry_by_region = new long[fsmTables->num_regions]; - fsmTables->entry_by_region[0] = fsmTables->error_state; - - pos = 1; - for ( RegionToEntry::Iter en = regionToEntry; en.lte(); en++ ) { - /* Find the entry state from the entry id. */ - RedEntryMapEl *entryMapEl = redEntryMap.find( *en ); - - /* Save it off. */ - fsmTables->entry_by_region[pos++] = entryMapEl != 0 ? entryMapEl->value - : fsmTables->error_state; - } - - return fsmTables; -} - - diff --git a/src/redfsm.h b/src/redfsm.h deleted file mode 100644 index 618fbd61..00000000 --- a/src/redfsm.h +++ /dev/null @@ -1,479 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_REDFSM_H -#define _COLM_REDFSM_H - -#include <assert.h> -#include <string.h> - -#include <string> - -#include <avlbasic.h> -#include <avltree.h> -#include <avlmap.h> -#include <bstmap.h> -#include <vector.h> -#include <dlist.h> -#include <bstset.h> -#include <mergesort.h> -#include <sbstmap.h> -#include <sbstset.h> -#include <sbsttable.h> - -#include "keyops.h" -#include "compare.h" -#include "global.h" -#include "pdarun.h" - -#define TRANS_ERR_TRANS 0 -#define STATE_ERR_STATE 0 -#define FUNC_NO_FUNC 0 - -using std::string; - -struct RedState; -struct InlineList; -struct Compiler; -struct ObjectField; - -/* Element in list of actions. Contains the string for the code to exectute. */ -struct GenAction -{ - /* Data collected during parse. */ - InputLoc loc; - char *name; - InlineList *inlineList; - int actionId; - MarkType markType; - ObjectField *objField; - long markId; - - int numTransRefs; - int numToStateRefs; - int numFromStateRefs; - int numEofRefs; - - GenAction *prev, *next; -}; - -typedef DList<GenAction> GenActionList; -string nameOrLoc( GenAction *genAction ); - -/* Number of references in the final machine. */ -inline int numRefs( GenAction *genAction ) -{ - return genAction->numTransRefs + - genAction->numToStateRefs + - genAction->numFromStateRefs + - genAction->numEofRefs; -} - - -/* Forwards. */ -struct RedState; -struct FsmState; - -/* Transistion GenAction Element. */ -typedef SBstMapEl< int, GenAction* > GenActionTableEl; - -/* Transition GenAction Table. */ -struct GenActionTable - : public SBstMap< int, GenAction*, CmpOrd<int> > -{ - void setAction( int ordering, GenAction *action ); - void setActions( int *orderings, GenAction **actions, int nActs ); - void setActions( const GenActionTable &other ); -}; - -/* Compare of a whole action table element (key & value). */ -struct GenCmpActionTableEl -{ - static int compare( const GenActionTableEl &action1, - const GenActionTableEl &action2 ) - { - if ( action1.key < action2.key ) - return -1; - else if ( action1.key > action2.key ) - return 1; - else if ( action1.value < action2.value ) - return -1; - else if ( action1.value > action2.value ) - return 1; - return 0; - } -}; - -/* Compare for GenActionTable. */ -typedef CmpSTable< GenActionTableEl, GenCmpActionTableEl > GenCmpActionTable; - -/* Set of states. */ -typedef BstSet<RedState*> RedStateSet; -typedef BstSet<int> IntSet; - -/* Reduced action. */ -struct RedAction -: - public AvlTreeEl<RedAction> -{ - RedAction( ) - : - key(), - eofRefs(0), - numTransRefs(0), - numToStateRefs(0), - numFromStateRefs(0), - numEofRefs(0), - bAnyNextStmt(false), - bAnyCurStateRef(false), - bAnyBreakStmt(false) - { } - - const GenActionTable &getKey() - { return key; } - - GenActionTable key; - int actListId; - int location; - IntSet *eofRefs; - - /* Number of references in the final machine. */ - bool numRefs() - { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } - int numTransRefs; - int numToStateRefs; - int numFromStateRefs; - int numEofRefs; - - bool anyNextStmt() { return bAnyNextStmt; } - bool anyCurStateRef() { return bAnyCurStateRef; } - bool anyBreakStmt() { return bAnyBreakStmt; } - - bool bAnyNextStmt; - bool bAnyCurStateRef; - bool bAnyBreakStmt; -}; -typedef AvlTree<RedAction, GenActionTable, GenCmpActionTable> GenActionTableMap; - -/* Reduced transition. */ -struct RedTrans -: - public AvlTreeEl<RedTrans> -{ - RedTrans( RedState *targ, RedAction *action, int id ) - : targ(targ), action(action), id(id), labelNeeded(true) { } - - RedState *targ; - RedAction *action; - int id; - bool partitionBoundary; - bool labelNeeded; -}; - -/* Compare of transitions for the final reduction of transitions. Comparison - * is on target and the pointer to the shared action table. It is assumed that - * when this is used the action tables have been reduced. */ -struct CmpRedTrans -{ - static int compare( const RedTrans &t1, const RedTrans &t2 ) - { - if ( t1.targ < t2.targ ) - return -1; - else if ( t1.targ > t2.targ ) - return 1; - else if ( t1.action < t2.action ) - return -1; - else if ( t1.action > t2.action ) - return 1; - else - return 0; - } -}; - -typedef AvlBasic<RedTrans, CmpRedTrans> RedTransSet; - -/* Element in out range. */ -struct RedTransEl -{ - /* Constructors. */ - RedTransEl( Key lowKey, Key highKey, RedTrans *value ) - : lowKey(lowKey), highKey(highKey), value(value) { } - - Key lowKey, highKey; - RedTrans *value; -}; - -typedef Vector<RedTransEl> RedTransList; -typedef Vector<RedState*> RedStateVect; - -typedef BstMapEl<RedState*, unsigned long long> RedSpanMapEl; -typedef BstMap<RedState*, unsigned long long> RedSpanMap; - -/* Compare used by span map sort. Reverse sorts by the span. */ -struct CmpRedSpanMapEl -{ - static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 ) - { - if ( smel1.value > smel2.value ) - return -1; - else if ( smel1.value < smel2.value ) - return 1; - else - return 0; - } -}; - -/* Sorting state-span map entries by span. */ -typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort; - -/* Set of entry ids that go into this state. */ -typedef Vector<int> EntryIdVect; -typedef Vector<char*> EntryNameVect; - -/* Maps entry ids (defined by the frontend, to reduced state ids. */ -typedef BstMap<int, int> RedEntryMap; -typedef BstMapEl<int, int> RedEntryMapEl; - -typedef Vector<int> RegionToEntry; - -/* Reduced state. */ -struct RedState -{ - RedState() - : - defTrans(0), - transList(0), - isFinal(false), - labelNeeded(false), - outNeeded(false), - onStateList(false), - toStateAction(0), - fromStateAction(0), - eofAction(0), - eofTrans(0), - id(0), - bAnyRegCurStateRef(false), - partitionBoundary(false), - inTrans(0), - numInTrans(0) - { } - - /* Transitions out. */ - RedTransList outSingle; - RedTransList outRange; - RedTrans *defTrans; - - /* For flat conditions. */ - Key condLowKey, condHighKey; - - /* For flat keys. */ - Key lowKey, highKey; - RedTrans **transList; - - /* The list of states that transitions from this state go to. */ - RedStateVect targStates; - - bool isFinal; - bool labelNeeded; - bool outNeeded; - bool onStateList; - RedAction *toStateAction; - RedAction *fromStateAction; - RedAction *eofAction; - RedTrans *eofTrans; - int id; - - /* Pointers for the list of states. */ - RedState *prev, *next; - - bool anyRegCurStateRef() { return bAnyRegCurStateRef; } - bool bAnyRegCurStateRef; - - int partition; - bool partitionBoundary; - - RedTrans **inTrans; - int numInTrans; -}; - -/* List of states. */ -typedef DList<RedState> RedStateList; - -/* Set of reduced transitons. Comparison is by pointer. */ -typedef BstSet< RedTrans*, CmpOrd<RedTrans*> > RedTransPtrSet; - -/* Next version of the fsm machine. */ -struct RedFsm -{ - RedFsm(); - - bool wantComplete; - bool forcedErrorState; - - int nextActionId; - int nextTransId; - - /* Next State Id doubles as the total number of state ids. */ - int nextStateId; - - RedTransSet transSet; - GenActionTableMap actionMap; - RedStateList stateList; - RedStateSet entryPoints; - RedState *startState; - RedState *errState; - RedTrans *errTrans; - RedTrans *errActionTrans; - RedState *firstFinState; - int numFinStates; - int nParts; - - GenAction *allActions; - RedAction *allActionTables; - RedState *allStates; - GenActionList genActionList; - EntryIdVect entryPointIds; - RedEntryMap redEntryMap; - RegionToEntry regionToEntry; - - bool bAnyToStateActions; - bool bAnyFromStateActions; - bool bAnyRegActions; - bool bAnyEofActions; - bool bAnyActionGotos; - bool bAnyActionCalls; - bool bAnyActionRets; - bool bAnyRegActionRets; - bool bAnyRegActionByValControl; - bool bAnyRegNextStmt; - bool bAnyRegCurStateRef; - bool bAnyRegBreak; - bool bAnyLmSwitchError; - bool bAnyConditions; - - int maxState; - int maxSingleLen; - int maxRangeLen; - int maxKeyOffset; - int maxIndexOffset; - int maxIndex; - int maxActListId; - int maxActionLoc; - int maxActArrItem; - unsigned long long maxSpan; - unsigned long long maxCondSpan; - int maxFlatIndexOffset; - Key maxKey; - int maxCondOffset; - int maxCondLen; - int maxCondSpaceId; - int maxCondIndexOffset; - int maxCond; - - bool anyActions(); - bool anyToStateActions() { return bAnyToStateActions; } - bool anyFromStateActions() { return bAnyFromStateActions; } - bool anyRegActions() { return bAnyRegActions; } - bool anyEofActions() { return bAnyEofActions; } - bool anyActionGotos() { return bAnyActionGotos; } - bool anyActionCalls() { return bAnyActionCalls; } - bool anyActionRets() { return bAnyActionRets; } - bool anyRegActionRets() { return bAnyRegActionRets; } - bool anyRegActionByValControl() { return bAnyRegActionByValControl; } - bool anyRegNextStmt() { return bAnyRegNextStmt; } - bool anyRegCurStateRef() { return bAnyRegCurStateRef; } - bool anyRegBreak() { return bAnyRegBreak; } - bool anyLmSwitchError() { return bAnyLmSwitchError; } - bool anyConditions() { return bAnyConditions; } - - /* Is is it possible to extend a range by bumping ranges that span only - * one character to the singles array. */ - bool canExtend( const RedTransList &list, int pos ); - - /* Pick single transitions from the ranges. */ - void moveTransToSingle( RedState *state ); - void chooseSingle(); - - void makeFlat(); - - /* Move a selected transition from ranges to default. */ - void moveToDefault( RedTrans *defTrans, RedState *state ); - - /* Pick a default transition by largest span. */ - RedTrans *chooseDefaultSpan( RedState *state ); - void chooseDefaultSpan(); - - /* Pick a default transition by most number of ranges. */ - RedTrans *chooseDefaultNumRanges( RedState *state ); - void chooseDefaultNumRanges(); - - /* Pick a default transition tailored towards goto driven machine. */ - RedTrans *chooseDefaultGoto( RedState *state ); - void chooseDefaultGoto(); - - /* Ordering states by transition connections. */ - void optimizeStateOrdering( RedState *state ); - void optimizeStateOrdering(); - - /* Ordering states by transition connections. */ - void depthFirstOrdering( RedState *state ); - void depthFirstOrdering(); - - /* Set state ids. */ - void sequentialStateIds(); - void sortStateIdsByFinal(); - - /* Arrange states in by final id. This is a stable sort. */ - void sortStatesByFinal(); - - /* Sorting states by id. */ - void sortByStateId(); - - /* Locating the first final state. This is the final state with the lowest - * id. */ - void findFirstFinState(); - - void assignActionLocs(); - - RedTrans *getErrorTrans(); - RedState *getErrorState(); - - /* Is every char in the alphabet covered? */ - bool alphabetCovered( RedTransList &outRange ); - - RedTrans *allocateTrans( RedState *targState, RedAction *actionTable ); - - void partitionFsm( int nParts ); - - void setInTrans(); - void setValueLimits(); - void assignActionIds(); - void analyzeActionList( RedAction *redAct, InlineList *inlineList ); - void analyzeAction( GenAction *act, InlineList *inlineList ); - void findFinalActionRefs(); - void analyzeMachine(); - - fsm_tables *makeFsmTables(); -}; - -#endif /* _COLM_REDFSM_H */ - diff --git a/src/reduce.cc b/src/reduce.cc deleted file mode 100644 index 89a95015..00000000 --- a/src/reduce.cc +++ /dev/null @@ -1,954 +0,0 @@ -/* - * Copyright 2015-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <string.h> -#include <stdbool.h> - -#include <iostream> - -#include "fsmcodegen.h" - -void Compiler::writeCommitStub() -{ - *outStream << - "void " << objectName << "_commit_reduce_forward( program_t *prg, tree_t **root,\n" - " struct pda_run *pda_run, parse_tree_t *pt )\n" - "{\n" - " commit_clear_parse_tree( prg, root, pda_run, pt->child );\n" - "}\n" - "\n" - "long " << objectName << "_commit_union_sz( int reducer ) { return 0; }\n" - "void " << objectName << "_init_need() {}\n" - "int " << objectName << "_reducer_need_tok( program_t *prg, " - "struct pda_run *pda_run, int id ) { return COLM_RN_BOTH; }\n" - "int " << objectName << "_reducer_need_ign( program_t *prg, " - "struct pda_run *pda_run ) { return COLM_RN_BOTH; }\n" - "\n" - "void " << objectName << "_read_reduce( program_t *prg, int reducer, input_t *stream ) {}\n" - ; -} - -void Compiler::findRhsRefs( bool &lhsUsed, Vector<ProdEl*> &rhsUsed, Vector<ProdEl*> &treeUsed, - Vector<ProdEl*> &locUsed, Reduction *reduction, Production *production, - const ReduceTextItemList &list ) -{ - ObjectDef *objectDef = production->prodName->objectDef; - - rhsUsed.setAsNew( production->prodElList->length() ); - treeUsed.setAsNew( production->prodElList->length() ); - locUsed.setAsNew( production->prodElList->length() ); - - for ( ReduceTextItemList::Iter i = list; i.lte(); i++ ) { - if ( i->type == ReduceTextItem::LhsRef ) { - lhsUsed = true; - } - - if ( i->type == ReduceTextItem::RhsRef || - i->type == ReduceTextItem::RhsLoc || - i->type == ReduceTextItem::TreeRef ) - { - if ( i->n > 0 ) { - /* Numbered. */ - ProdEl *prodEl = production->prodElList->head; - int adv = i->n - 1; - while ( adv > 0 ) { - prodEl = prodEl->next; - adv -= 1; - } - - if ( i->type == ReduceTextItem::RhsLoc ) - locUsed[i->n-1] = prodEl; - else if ( i->type == ReduceTextItem::TreeRef ) - treeUsed[i->n-1] = prodEl; - else - rhsUsed[i->n-1] = prodEl; - } - else { - /* Named. */ - String name( i->txt.data + 1, i->txt.length() - 1 ); - ObjectField *field = objectDef->rootScope->findField( name ); - if ( field != 0 ) { - for ( Vector<RhsVal>::Iter r = field->rhsVal; r.lte(); r++ ) { - if ( r->prodEl->production == production ) { - if ( i->type == ReduceTextItem::RhsLoc ) - locUsed[r->prodEl->pos] = r->prodEl; - else - rhsUsed[r->prodEl->pos] = r->prodEl; - } - } - } - } - } - } -} - -void Compiler::computeNeeded( Reduction *reduction, Production *production, - const ReduceTextItemList &list ) -{ - bool lhsUsed = false; - Vector<ProdEl*> rhsUsed; - Vector<ProdEl*> treeUsed; - Vector<ProdEl*> locUsed; - - findRhsRefs( lhsUsed, rhsUsed, treeUsed, locUsed, reduction, production, list ); - - /* Same length, can concurrently walk with one test. */ - Vector<ProdEl*>::Iter rhs = rhsUsed; - Vector<ProdEl*>::Iter loc = locUsed; - - for ( ; rhs.lte(); rhs++, loc++ ) { - ProdEl *prodEl = *rhs; - if ( prodEl != 0 ) { - if ( prodEl->production == production && prodEl->langEl->type == LangEl::Term ) - reduction->needData[prodEl->langEl->id] = true; - } - - ProdEl *locEl = *loc; - if ( locEl != 0 && locEl->production == production ) - reduction->needLoc[locEl->langEl->id] = true; - } -} - -void Compiler::loadRefs( Reduction *reduction, Production *production, - const ReduceTextItemList &list, bool read ) -{ - bool lhsUsed = false; - Vector<ProdEl*> rhsUsed; - Vector<ProdEl*> treeUsed; - Vector<ProdEl*> locUsed; - - findRhsRefs( lhsUsed, rhsUsed, treeUsed, locUsed, reduction, production, list ); - - if ( lhsUsed ) { - *outStream << " lel_" << production->prodName->fullName << " *_lhs = "; - - if ( read ) { - *outStream << - "&node->u." << production->prodName->fullName << ";\n"; - } - else { - *outStream << - "&((commit_reduce_union*)(lel+1))->" << production->prodName->fullName << ";\n"; - } - } - - /* - * In the first pass we load using a parse tree cursor. This is for - * nonterms. - */ - bool useCursor = false; - for ( Vector<ProdEl*>::Iter rhs = rhsUsed; rhs.lte(); rhs++ ) { - if ( *rhs != 0 && (*rhs)->production == production && - (*rhs)->langEl->type != LangEl::Term ) - { - useCursor = true; - break; - } - } - - if ( useCursor ) { - int cursorPos = 0; - - if ( read ) { - *outStream << - " struct read_reduce_node *_pt_cursor = node->child;\n"; - } - else { - *outStream << - " struct colm_parse_tree *_pt_cursor = lel->child;\n"; - } - - /* Same length, can concurrently walk with one test. */ - Vector<ProdEl*>::Iter rhs = rhsUsed; - Vector<ProdEl*>::Iter loc = locUsed; - - for ( ; rhs.lte(); rhs++, loc++ ) { - ProdEl *prodEl = *rhs; - if ( prodEl != 0 ) { - while ( cursorPos < rhs.pos() ) { - *outStream << - " _pt_cursor = _pt_cursor->next;\n"; - cursorPos += 1; - } - - if ( prodEl->production == production ) { - if ( prodEl->langEl->type != LangEl::Term ) { - *outStream << - "lel_" << prodEl->langEl->fullName << " *" "_rhs" << rhs.pos() << " = "; - - if ( read ) { - *outStream << "&_pt_cursor->u." << prodEl->langEl->fullName << ";\n"; - } - else { - *outStream << "&((commit_reduce_union*)(_pt_cursor+1))->" << prodEl->langEl->fullName << ";\n"; - } - } - } - - } - } - } - - /* In the second pass we load using a tree cursor. This is for token/tree - * data and locations. */ - - useCursor = false; - for ( Vector<ProdEl*>::Iter rhs = rhsUsed; rhs.lte(); rhs++ ) { - if ( *rhs != 0 && (*rhs)->production == production && - (*rhs)->langEl->type == LangEl::Term ) - { - useCursor = true; - break; - } - } - for ( Vector<ProdEl*>::Iter rhs = treeUsed; rhs.lte(); rhs++ ) { - if ( *rhs != 0 ) { - useCursor = true; - break; - } - } - for ( Vector<ProdEl*>::Iter loc = locUsed; loc.lte(); loc++ ) { - if ( *loc != 0 ) { - useCursor = true; - break; - } - } - - if ( useCursor ) { - int cursorPos = 0; - - if ( read ) { - *outStream << - " read_reduce_node *_tree_cursor = node->child;\n"; - } - else { - *outStream << - " kid_t *_tree_cursor = kid->tree->child;\n"; - } - - /* Same length, can concurrently walk with one test. */ - Vector<ProdEl*>::Iter rhs = rhsUsed; - Vector<ProdEl*>::Iter tree = treeUsed; - Vector<ProdEl*>::Iter loc = locUsed; - - for ( ; rhs.lte(); rhs++, loc++ ) { - - ProdEl *prodEl = *rhs; - if ( prodEl != 0 ) { - if ( prodEl->production == production ) { - if ( prodEl->langEl->type == LangEl::Term ) { - - while ( cursorPos < rhs.pos() ) { - *outStream << - " _tree_cursor = _tree_cursor->next;\n"; - cursorPos += 1; - } - - *outStream << " colm_data *_rhs" << rhs.pos() << " = "; - - if ( read ) { - *outStream << - "&_tree_cursor->data;\n"; - } - else { - *outStream << - "_tree_cursor->tree->tokdata;\n"; - } - } - } - } - - ProdEl *treeEl = *tree; - if ( treeEl != 0 ) { - if ( treeEl->production == production ) { - while ( cursorPos < rhs.pos() ) { - *outStream << - " _tree_cursor = _tree_cursor->next;\n"; - cursorPos += 1; - } - - *outStream << " colm_tree *_tree" << rhs.pos() << " = "; - *outStream << "_tree_cursor->tree;\n"; - } - } - - ProdEl *locEl = *loc; - if ( locEl != 0 ) { - if ( locEl->production == production ) { - - while ( cursorPos < rhs.pos() ) { - *outStream << - " _tree_cursor = _tree_cursor->next;\n"; - cursorPos += 1; - } - - *outStream << - " colm_location *_loc" << loc.pos() << " = "; - - if ( read ) { - *outStream << "&_tree_cursor->loc;\n"; - } - else { - *outStream << - "colm_find_location( prg, _tree_cursor->tree );\n"; - } - } - } - } - } -} - -void Compiler::writeRhsRef( Production *production, ReduceTextItem *i ) -{ - if ( i->n > 0 ) { - *outStream << "_rhs" << ( i->n - 1 ); - } - else { - ObjectDef *objectDef = production->prodName->objectDef; - String name( i->txt.data + 1, i->txt.length() - 1 ); - - /* Find the field in the rhsVal using capture field. */ - ObjectField *field = objectDef->rootScope->findField( name ); - if ( field != 0 ) { - for ( Vector<RhsVal>::Iter r = field->rhsVal; - r.lte(); r++ ) - { - if ( r->prodEl->production == production ) - *outStream << "_rhs" << r->prodEl->pos; - } - } - } -} - -void Compiler::writeTreeRef( Production *production, ReduceTextItem *i ) -{ - if ( i->n > 0 ) { - *outStream << "_tree" << ( i->n - 1 ); - } - else { - ObjectDef *objectDef = production->prodName->objectDef; - String name( i->txt.data + 1, i->txt.length() - 1 ); - - /* Find the field in the rhsVal using capture field. */ - ObjectField *field = objectDef->rootScope->findField( name ); - if ( field != 0 ) { - for ( Vector<RhsVal>::Iter r = field->rhsVal; - r.lte(); r++ ) - { - if ( r->prodEl->production == production ) - *outStream << "_tree" << r->prodEl->pos; - } - } - } -} - -void Compiler::writeRhsLoc( Production *production, ReduceTextItem *i ) -{ - if ( i->n > 0 ) { - *outStream << "_loc" << ( i->n - 1 ); - } - else { - ObjectDef *objectDef = production->prodName->objectDef; - String name( i->txt.data + 1, i->txt.length() - 1 ); - - /* Find the field in the rhsVal using capture field. */ - ObjectField *field = objectDef->rootScope->findField( name ); - if ( field != 0 ) { - for ( Vector<RhsVal>::Iter r = field->rhsVal; - r.lte(); r++ ) - { - if ( r->prodEl->production == production ) - *outStream << "_loc" << r->prodEl->pos; - } - } - } -} - -void Compiler::writeLhsRef( Production *production, ReduceTextItem *i ) -{ - *outStream << "_lhs"; -} - -void Compiler::writeHostItemList( Production *production, - const ReduceTextItemList &list ) -{ - for ( ReduceTextItemList::Iter i = list; i.lte(); i++ ) { - switch ( i->type ) { - case ReduceTextItem::LhsRef: - writeLhsRef( production, i ); - break; - case ReduceTextItem::RhsRef: - writeRhsRef( production, i ); - break; - case ReduceTextItem::TreeRef: - writeTreeRef( production, i ); - break; - case ReduceTextItem::RhsLoc: - writeRhsLoc( production, i ); - break; - case ReduceTextItem::Txt: - *outStream << i->txt; - break; - } - } -} - -/* For sorting according to prod name id, then by prod num. */ -struct CmpReduceAction -{ - static int compare( const ReduceAction *ra1 , const ReduceAction *ra2 ) - { - if ( ra1->production->prodName->id < ra2->production->prodName->id ) - return -1; - else if ( ra1->production->prodName->id > ra2->production->prodName->id ) - return 1; - else { - if ( ra1->production->prodNum < ra2->production->prodNum ) - return -1; - else if ( ra1->production->prodNum > ra2->production->prodNum ) - return 1; - } - return 0; - } -}; - -void Compiler::initReductionNeeds( Reduction *reduction ) -{ - reduction->needData = new bool[nextLelId]; - reduction->needLoc = new bool[nextLelId]; - memset( reduction->needData, 0, sizeof(bool)*nextLelId ); - memset( reduction->needLoc, 0, sizeof(bool)*nextLelId ); -} - -void Compiler::writeNeeds() -{ - - *outStream << - "struct reduction_info\n" - "{\n" - " unsigned char need_data[" << nextLelId << "];\n" - " unsigned char need_loc[" << nextLelId << "];\n" - "};\n" - "\n"; - - *outStream << - "static struct reduction_info ri[" << rootNamespace->reductions.length() + 1 << "];\n" - "\n"; - - *outStream << - "extern \"C\" void " << objectName << "_init_need()\n" - "{\n"; - - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - Reduction *reduction = *r; - *outStream << - " memset( ri[" << reduction->id << "]" - ".need_data, 0, sizeof(unsigned char) * " << nextLelId << " );\n" - " memset( ri[" << reduction->id << "]" - ".need_loc, 0, sizeof(unsigned char) * " << nextLelId << " );\n"; - - for ( int i = 0; i < nextLelId; i++ ) { - if ( reduction->needData[i] ) { - *outStream << - " ri[" << reduction->id << "].need_data[" << i << "] = COLM_RN_DATA;\n"; - } - - if ( reduction->needLoc[i] ) { - *outStream << - " ri[" << reduction->id << "].need_loc[" << i << "] = COLM_RN_LOC;\n"; - } - } - } - - *outStream << - "}\n"; - - *outStream << - "extern \"C\" int " << objectName << "_reducer_need_tok( program_t *prg, " - "struct pda_run *pda_run, int id )\n" - "{\n" - " if ( prg->reduce_clean && pda_run->reducer > 0 ) {\n" - /* Note we are forcing the reducer need for data. Enabling requires finding - * a solution for backtracking push. */ - " return COLM_RN_DATA | ri[pda_run->reducer].need_data[id] | \n" - " ri[pda_run->reducer].need_loc[id];\n" - " }\n" - " return COLM_RN_BOTH;\n" - "}\n" - "\n" - "extern \"C\" int " << objectName << "_reducer_need_ign( program_t *prg, struct pda_run *pda_run )\n" - "{\n" - // Using this requires finding a solution for backtracking push back. - //" if ( pda_run->reducer > 0 )\n" - //" return COLM_RN_NEITHER;\n" - " return COLM_RN_BOTH;\n" - "}\n"; -} - -void Compiler::writeReduceStructs() -{ - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - for ( ReduceNonTermList::Iter rdi = (*r)->reduceNonTerms; rdi.lte(); rdi++ ) { - *outStream << - "struct lel_" << rdi->nonTerm->uniqueType->langEl->fullName << "\n" - "{\n"; - - *outStream << - "#line " << rdi->loc.line << "\"" << rdi->loc.fileName << "\"\n"; - - writeHostItemList( 0, rdi->itemList ); - - *outStream << - "};\n"; - } - } - - *outStream << - "union commit_reduce_union\n" - "{\n"; - - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - for ( ReduceNonTermList::Iter rdi = (*r)->reduceNonTerms; rdi.lte(); rdi++ ) { - LangEl *langEl = rdi->nonTerm->uniqueType->langEl; - *outStream << - " lel_" << langEl->fullName << " " << langEl->fullName << ";\n"; - } - } - - *outStream << - "};\n" - "\n"; - - *outStream << - "extern \"C\" long " << objectName << "_commit_union_sz( int reducer )\n" - "{\n" - " return sizeof( commit_reduce_union );\n" - "}\n"; - - *outStream << - "struct read_reduce_node\n" - "{\n" - " std::string name;\n" - " int id;\n" - " int prod_num;\n" - " colm_location loc;\n" - " colm_data data;\n" - " commit_reduce_union u;\n" - " read_reduce_node *next;\n" - " read_reduce_node *child;\n" - "};\n" - "\n"; -} - - -void Compiler::writeUnescape() -{ - *outStream << - "static void unescape( colm_data *tokdata )\n" - "{\n" - " unsigned char *src = (unsigned char*)tokdata->data, *dest = (unsigned char*)tokdata->data;\n" - " while ( *src != 0 ) {\n" - " if ( *src == '\\\\' ) {\n" - " unsigned int i;\n" - " char buf[3];\n" - "\n" - " src += 1;\n" - " buf[0] = *src++;\n" - " buf[1] = *src++;\n" - " buf[2] = 0;\n" - "\n" - " sscanf( buf, \"%x\", &i );\n" - " *dest++ = (unsigned char)i;\n" - "\n" - " tokdata->length -= 2;\n" - " }\n" - " else {\n" - " *dest++ = *src++;\n" - " }\n" - " }\n" - " *dest = 0;\n" - "}\n" - "\n"; -} - -void Compiler::writeReduceDispatchers() -{ - *outStream << - "\n" - "extern \"C\" void " << objectName << "_commit_reduce_forward( program_t *prg, tree_t **root,\n" - " struct pda_run *pda_run, parse_tree_t *pt )\n" - "{\n" - " switch ( pda_run->reducer ) {\n"; - - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - Reduction *reduction = *r; - if ( reduction->parserBased ) { - *outStream << - " case " << reduction->id << ":\n" - " ((" << reduction->name << "*)prg->red_ctx)->commit_reduce_forward( " - "prg, root, pda_run, pt );\n" - " break;\n"; - } - } - - *outStream << - " }\n" - "}\n" - "\n"; - - *outStream << - "extern \"C\" void " << objectName << "_read_reduce( program_t *prg, int reducer, stream_t *stream )\n" - "{\n" - " switch ( reducer ) {\n"; - - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - Reduction *reduction = *r; - if ( reduction->postfixBased ) { - *outStream << - " case " << reduction->id << ":\n" - " ((" << reduction->name << "*)prg->red_ctx)->read_reduce_forward( prg, stream->impl->file );\n" - " break;\n"; - } - } - - *outStream << - " }\n" - "}\n" - "\n"; -} - -void Compiler::computeNeeded() -{ - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - Reduction *reduction = *r; - initReductionNeeds( reduction ); - - for ( ReduceActionList::Iter rdi = reduction->reduceActions; rdi.lte(); rdi++ ) - computeNeeded( reduction, rdi->production, rdi->itemList ); - } -} - -void Compiler::writeParseReduce( Reduction *reduction ) -{ - *outStream << - "void " << reduction->name << "::commit_reduce_forward( program_t *prg, \n" - " tree_t **root, struct pda_run *pda_run, parse_tree_t *pt )\n" - "{\n" - " tree_t **sp = root;\n" - "\n" - " parse_tree_t *lel = pt;\n" - " kid_t *kid = pt->shadow;\n" - "\n" - "recurse:\n" - "\n" - " if ( lel->child != 0 ) {\n" - " /* There are children. Must process all children first. */\n" - " vm_push_ptree( lel );\n" - " vm_push_kid( kid );\n" - "\n" - " lel = lel->child;\n" - " kid = tree_child( prg, kid->tree );\n" - " while ( lel != 0 ) {\n" - " goto recurse;\n" - " resume:\n" - " lel = lel->next;\n" - " kid = kid->next;\n" - " }\n" - "\n" - " kid = vm_pop_kid();\n" - " lel = vm_pop_ptree();\n" - " }\n" - "\n" - " if ( !( lel->flags & PF_COMMITTED ) ) {\n" - " /* Now can execute the reduction action. */\n" - " {\n"; - - - *outStream << - " { switch ( kid->tree->id ) {\n"; - - /* Populate a vector with the reduce actions. */ - Vector<ReduceAction*> actions; - actions.setAsNew( reduction->reduceActions.length() ); - long pos = 0; - for ( ReduceActionList::Iter rdi = reduction->reduceActions; rdi.lte(); rdi++ ) - actions[pos++] = rdi; - - /* Sort it by lhs id, then prod num. */ - MergeSort<ReduceAction*, CmpReduceAction> sortActions; - sortActions.sort( actions.data, actions.length() ); - - ReduceAction *last = 0; - - for ( Vector<ReduceAction*>::Iter rdi = actions; rdi.lte(); rdi++ ) { - ReduceAction *action = *rdi; - int lelId = action->production->prodName->id; - int prodNum = action->production->prodNum; - - /* Maybe close off the last prod. */ - if ( last != 0 && - last->production->prodName != action->production->prodName ) - { - *outStream << - " break;\n" - " }\n"; - - } - - /* Maybe open a new prod. */ - if ( last == 0 || - last->production->prodName != action->production->prodName ) - { - *outStream << - " case " << lelId << ": {\n"; - } - - *outStream << - " if ( kid->tree->prod_num == " << prodNum << " ) {\n"; - - - loadRefs( reduction, action->production, action->itemList, false ); - - *outStream << - "#line " << action->loc.line << " \"" << action->loc.fileName << "\"\n"; - - writeHostItemList( action->production, action->itemList ); - - *outStream << - " }\n"; - - last = action; - } - - if ( last != 0 ) { - *outStream << - " break;\n" - " }\n"; - } - - *outStream << - " } }\n" - " }\n" - " }\n" - "\n" - " commit_clear_parse_tree( prg, sp, pda_run, lel->child );\n" - " if ( prg->reduce_clean ) {\n" - " commit_clear_kid_list( prg, sp, kid->tree->child );\n" - " kid->tree->child = 0;\n" - " kid->tree->flags &= ~( AF_LEFT_IGNORE | AF_RIGHT_IGNORE );\n" - " }\n" - " lel->child = 0;\n" - "\n" - " if ( sp != root )\n" - " goto resume;\n" - " pt->flags |= PF_COMMITTED;\n" - "}\n" - "\n"; - -} - -void Compiler::writeParseReduce() -{ - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - Reduction *reduction = *r; - if ( reduction->parserBased ) - writeParseReduce( reduction ); - } -} - -void Compiler::writePostfixReduce( Reduction *reduction ) -{ - *outStream << - "void " << reduction->name << "::read_reduce_forward( program_t *prg, FILE *file )\n" - "{\n" - " __gnu_cxx::stdio_filebuf<char> fbuf( file, std::ios::in|std::ios::out|std::ios::app );\n" - " std::iostream in( &fbuf );\n" - " std::string type, tok, text;\n" - " long _id, line, column, byte, prod_num, children;\n" - " read_reduce_node sentinal;\n" - " sentinal.next = 0;\n" - " read_reduce_node *stack = &sentinal, *last = 0;\n" - " while ( in >> type ) {\n" - " /* read. */\n" - " if ( type == \"t\" ) {\n" - " in >> tok >> _id >> line >> column >> byte >> text;\n" - " read_reduce_node *node = new read_reduce_node;\n" - " node->name = tok;\n" - " node->id = _id;\n" - " node->loc.name = \"<>\";\n" - " node->loc.line = line;\n" - " node->loc.column = column;\n" - " node->loc.byte = byte;\n" - " node->data.data = strdup( text.c_str() );\n" - " node->data.length = text.size();\n" - " unescape( &node->data );\n" - "\n" - " node->next = stack;\n" - " node->child = 0;\n" - " stack = node;\n" - " }\n" - " else if ( type == \"r\" ) {\n" - " in >> tok >> _id >> prod_num >> children;\n" - " read_reduce_node *node = new read_reduce_node;\n" - " memset( &node->loc, 0, sizeof(colm_location) );\n" - " memset( &node->data, 0, sizeof(colm_data) );\n" - " node->name = tok;\n" - " node->id = _id;\n" - " node->prod_num = prod_num;\n" - " node->child = 0;\n" - " while ( children-- > 0 ) {\n" - " last = stack;\n" - " stack = stack->next;\n" - " last->next = node->child;\n" - " node->child = last;\n" - " }\n" - "\n" - " node->next = stack;\n" - " stack = node;\n" - "\n" - " { switch ( node->id ) {\n"; - - /* Populate a vector with the reduce actions. */ - Vector<ReduceAction*> actions; - actions.setAsNew( reduction->reduceActions.length() ); - long pos = 0; - for ( ReduceActionList::Iter rdi = reduction->reduceActions; rdi.lte(); rdi++ ) - actions[pos++] = rdi; - - /* Sort it by lhs id, then prod num. */ - MergeSort<ReduceAction*, CmpReduceAction> sortActions; - sortActions.sort( actions.data, actions.length() ); - - ReduceAction *last = 0; - - for ( Vector<ReduceAction*>::Iter rdi = actions; rdi.lte(); rdi++ ) { - ReduceAction *action = *rdi; - int lelId = action->production->prodName->id; - int prodNum = action->production->prodNum; - - /* Maybe close off the last prod. */ - if ( last != 0 && - last->production->prodName != action->production->prodName ) - { - *outStream << - " break;\n" - " }\n"; - } - - /* Maybe open a new prod. */ - if ( last == 0 || - last->production->prodName != action->production->prodName ) - { - *outStream << - " case " << lelId << ": {\n"; - } - - *outStream << - " if ( node->prod_num == " << prodNum << " ) {\n"; - - loadRefs( reduction, action->production, action->itemList, true ); - - *outStream << - "#line " << action->loc.line << "\"" << action->loc.fileName << "\"\n"; - - writeHostItemList( action->production, action->itemList ); - - *outStream << - " }\n"; - - last = action; - } - - if ( last != 0 ) { - *outStream << - " break;\n" - " }\n"; - } - - *outStream << - " } }\n" - " /* delete the children */\n" - " last = node->child;\n" - " while ( last != 0 ) {\n" - " read_reduce_node *next = last->next;\n" - " delete last;\n" - " last = next;\n" - " }\n" - " }\n" - " }\n" - "}\n" - "\n"; -} - -void Compiler::writePostfixReduce() -{ - bool unescape = false; - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - Reduction *reduction = *r; - if ( reduction->postfixBased ) { - if ( !unescape ) - writeUnescape(); - - writePostfixReduce( reduction ); - } - } -} - -void Compiler::writeCommit() -{ - *outStream << - "#include <colm/pdarun.h>\n" - "#include <colm/bytecode.h>\n" - "#include <colm/defs.h>\n" - "#include <colm/input.h>\n" - "#include <colm/tree.h>\n" - "#include <colm/program.h>\n" - "#include <colm/colm.h>\n" - "\n" - "#include <stdio.h>\n" - "#include <stdlib.h>\n" - "#include <string.h>\n" - "#include <assert.h>\n" - "#include <errno.h>\n" - "\n" - "#include <iostream>\n" - /* Not available on MAC OS. */ - // "#include <ext/stdio_filebuf.h>\n" - "#include <fstream>\n" - "\n" - "using std::endl;\n" - "\n" - "#include \"reducer.h\"\n" - "\n"; - - computeNeeded(); - - writeReduceStructs(); - - writeReduceDispatchers(); - - //writePostfixReduce(); - - writeParseReduce(); - - writeNeeds(); -} diff --git a/src/resolve.cc b/src/resolve.cc deleted file mode 100644 index 15ec00cb..00000000 --- a/src/resolve.cc +++ /dev/null @@ -1,976 +0,0 @@ -/* - * Copyright 2009-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdbool.h> -#include <iostream> -#include "compiler.h" - -/* - * Type Resolve. - */ - -using std::cout; -using std::cerr; -using std::endl; - -Namespace *TypeRef::resolveNspace( Compiler *pd ) -{ - if ( parsedVarRef != 0 && !nspaceQual->thisOnly() ) { - UniqueType *ut = parsedVarRef->lookup( pd ); - return ut->langEl->nspace; - } - else if ( parsedTypeRef != 0 && !nspaceQual->thisOnly() ) { - UniqueType *ut = parsedTypeRef->resolveType( pd ); - return ut->langEl->nspace; - } - else { - /* Lookup up the qualifiction and then the name. */ - return nspaceQual->getQual( pd ); - } -} - -UniqueType *TypeRef::resolveTypeName( Compiler *pd ) -{ - nspace = resolveNspace( pd ); - - if ( nspace == 0 ) - error(loc) << "do not have region for resolving reference" << endp; - - while ( nspace != 0 ) { - /* Search for the token in the region by typeName. */ - TypeMapEl *inDict = nspace->typeMap.find( typeName ); - - if ( inDict != 0 ) { - switch ( inDict->type ) { - /* Defer to the typeRef we are an alias of. We need to guard - * against loops here. */ - case TypeMapEl::AliasType: { - return inDict->typeRef->resolveType( pd ); - } - - case TypeMapEl::LangElType: { - UniqueType *ut = pd->findUniqueType( TYPE_TREE, inDict->value ); - return ut; - } - case TypeMapEl::StructType: { - UniqueType *ut = pd->findUniqueType( TYPE_STRUCT, inDict->structEl ); - return ut; - } - } - } - - if ( nspaceQual->thisOnly() ) - break; - - nspace = nspace->parentNamespace; - } - - error(loc) << "unknown type " << typeName << " in typeof expression" << endp; - return 0; -} - -UniqueType *TypeRef::resolveTypeLiteral( Compiler *pd ) -{ - /* Lookup up the qualifiction and then the name. */ - nspace = resolveNspace( pd ); - - if ( nspace == 0 ) - error(loc) << "do not have region for resolving reference" << endp; - - /* Interpret escape sequences and remove quotes. */ - bool unusedCI; - String interp; - prepareLitString( interp, unusedCI, pdaLiteral->data, - pdaLiteral->loc ); - - while ( nspace != 0 ) { - LiteralDictEl *ldel = nspace->literalDict.find( interp ); - - if ( ldel != 0 ) - return pd->findUniqueType( TYPE_TREE, ldel->value->tokenDef->tdLangEl ); - - if ( nspaceQual->thisOnly() ) - break; - - nspace = nspace->parentNamespace; - } - - error(loc) << "unknown type " << typeName << " in typeof expression" << endp; - return 0; -} - -bool TypeRef::uniqueGeneric( UniqueGeneric *&inMap, Compiler *pd, - const UniqueGeneric &searchKey ) -{ - bool inserted = false; - inMap = pd->uniqueGenericMap.find( &searchKey ); - if ( inMap == 0 ) { - inserted = true; - inMap = new UniqueGeneric( searchKey ); - pd->uniqueGenericMap.insert( inMap ); - } - return inserted; -} - -StructEl *TypeRef::declareListEl( Compiler *pd, TypeRef *valType ) -{ - static long vlistElId = 1; - String name( 32, "list_el_%d", vlistElId++ ); - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::StructType, - name, pd->nextObjectId++ ); - - StructDef *structDef = new StructDef( loc, name, objectDef ); - - pd->rootNamespace->structDefList.append( structDef ); - - /* Value Element. */ - String id = "value"; - ObjectField *elValObjField = ObjectField::cons( internal, - ObjectField::StructFieldType, valType, id ); - - objectDef->rootScope->insertField( elValObjField->name, elValObjField ); - - /* Typeref for the struct. Used for pointers. */ - NamespaceQual *nspaceQual = NamespaceQual::cons( pd->rootNamespace ); - TypeRef *selfTypeRef = TypeRef::cons( InputLoc(), nspaceQual, name, RepeatNone ); - - /* Type ref for the list pointers psuedo type. */ - TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::ListPtrs, 0, selfTypeRef, 0 ); - - ObjectField *of = ObjectField::cons( InputLoc(), - ObjectField::GenericElementType, elTr, name ); - - objectDef->rootScope->insertField( of->name, of ); - - return declareStruct( pd, pd->rootNamespace, name, structDef ); -} - -void ConsItemList::resolve( Compiler *pd ) -{ - /* Types in constructor. */ - for ( ConsItemList::Iter item = first(); item.lte(); item++ ) { - switch ( item->type ) { - case ConsItem::LiteralType: - /* Use pdaFactor reference resolving. */ - pd->resolveProdEl( item->prodEl ); - break; - case ConsItem::InputText: - break; - case ConsItem::ExprType: - item->expr->resolve( pd ); - break; - } - } -} - -UniqueType *TypeRef::resolveTypeListEl( Compiler *pd ) -{ - TypeRef *valTr = typeRef1; - UniqueType *utValue = valTr->resolveType( pd ); - - UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::ListEl, utValue ); - if ( uniqueGeneric( inMap, pd, searchKey ) ) - inMap->structEl = declareListEl( pd, valTr ); - - return pd->findUniqueType( TYPE_STRUCT, inMap->structEl ); -} - -UniqueType *TypeRef::resolveTypeList( Compiler *pd ) -{ - nspace = pd->rootNamespace; - - UniqueType *utValue = typeRef1->resolveType( pd ); - - if ( utValue->typeId != TYPE_STRUCT ) - error( loc ) << "only structs can be list elements" << endp; - - /* Find the list element. */ - ObjectDef *elObjDef = utValue->structEl->structDef->objectDef; - UniqueType *ptrsUt = pd->findUniqueType( TYPE_LIST_PTRS ); - ObjectField *listEl = elObjDef->findFieldType( pd, ptrsUt ); - - if ( !listEl ) - error( loc ) << "could not find list element in type ref" << endp; - - UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::List, utValue ); - if ( uniqueGeneric( inMap, pd, searchKey ) ) { - - GenericType *generic = new GenericType( GEN_LIST, - pd->nextGenericId++, typeRef1, 0, typeRef2, listEl ); - - nspace->genericList.append( generic ); - - generic->declare( pd, nspace ); - - inMap->generic = generic; - } - - generic = inMap->generic; - return pd->findUniqueType( TYPE_GENERIC, inMap->generic ); -} - -StructEl *TypeRef::declareMapElStruct( Compiler *pd, TypeRef *keyType, TypeRef *valType ) -{ - static long vlistElId = 1; - String name( 32, "map_el_%d", vlistElId++ ); - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::StructType, - name, pd->nextObjectId++ ); - - StructDef *structDef = new StructDef( loc, name, objectDef ); - - pd->rootNamespace->structDefList.append( structDef ); - - /* Value Element. */ - String id = "value"; - ObjectField *elValObjField = ObjectField::cons( internal, - ObjectField::StructFieldType, valType, id ); - - objectDef->rootScope->insertField( elValObjField->name, elValObjField ); - - /* Typeref for the pointers. */ - NamespaceQual *nspaceQual = NamespaceQual::cons( pd->rootNamespace ); - TypeRef *selfTypeRef = TypeRef::cons( InputLoc(), nspaceQual, name, RepeatNone ); - - TypeRef *elTr = TypeRef::cons( InputLoc(), TypeRef::MapPtrs, 0, selfTypeRef, keyType ); - - ObjectField *of = ObjectField::cons( InputLoc(), - ObjectField::GenericElementType, elTr, name ); - - objectDef->rootScope->insertField( of->name, of ); - - StructEl *sel = declareStruct( pd, pd->rootNamespace, name, structDef ); - return sel; -} - -UniqueType *TypeRef::resolveTypeMapEl( Compiler *pd ) -{ - TypeRef *keyType = typeRef1; - TypeRef *valType = typeRef2; - - UniqueType *utKey = keyType->resolveType( pd ); - UniqueType *utValue = valType->resolveType( pd ); - - UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::MapEl, utKey, utValue ); - if ( uniqueGeneric( inMap, pd, searchKey ) ) - inMap->structEl = declareMapElStruct( pd, keyType, valType ); - - return pd->findUniqueType( TYPE_STRUCT, inMap->structEl ); -} - - -UniqueType *TypeRef::resolveTypeMap( Compiler *pd ) -{ - nspace = pd->rootNamespace; - - UniqueType *utKey = typeRef1->resolveType( pd ); - UniqueType *utEl = typeRef2->resolveType( pd ); - - if ( utEl->typeId != TYPE_STRUCT ) - error( loc ) << "only structs can be map elements" << endp; - - /* Find the list element. */ - ObjectDef *elObjDef = utEl->structEl->structDef->objectDef; - UniqueType *ptrsUt = pd->findUniqueType( TYPE_MAP_PTRS ); - ObjectField *mapEl = elObjDef->findFieldType( pd, ptrsUt ); - - if ( !mapEl ) - error( loc ) << "could not find map element in type ref" << endp; - - UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::Map, utKey, utEl ); - - if ( uniqueGeneric( inMap, pd, searchKey ) ) { - - GenericType *generic = new GenericType( GEN_MAP, - pd->nextGenericId++, typeRef2, typeRef1, typeRef3, mapEl ); - - nspace->genericList.append( generic ); - - generic->declare( pd, nspace ); - - inMap->generic = generic; - } - - generic = inMap->generic; - return pd->findUniqueType( TYPE_GENERIC, inMap->generic ); -} - -UniqueType *TypeRef::resolveTypeParser( Compiler *pd ) -{ - nspace = pd->rootNamespace; - - UniqueType *utParse = typeRef1->resolveType( pd ); - - UniqueGeneric *inMap = 0, searchKey( UniqueGeneric::Parser, utParse ); - if ( uniqueGeneric( inMap, pd, searchKey ) ) { - GenericType *generic = new GenericType( GEN_PARSER, - pd->nextGenericId++, typeRef1, 0, 0, 0 ); - - nspace->genericList.append( generic ); - - generic->declare( pd, nspace ); - - inMap->generic = generic; - } - - generic = inMap->generic; - return pd->findUniqueType( TYPE_GENERIC, inMap->generic ); -} - - -/* - * End object based list/map - */ - - -UniqueType *TypeRef::resolveTypeRef( Compiler *pd ) -{ - typeRef1->resolveType( pd ); - return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl ); -} - -void TypeRef::resolveRepeat( Compiler *pd ) -{ - if ( uniqueType->typeId != TYPE_TREE ) - error(loc) << "cannot repeat non-tree type" << endp; - - UniqueRepeat searchKey( repeatType, uniqueType->langEl ); - UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey ); - if ( uniqueRepeat == 0 ) { - uniqueRepeat = new UniqueRepeat( repeatType, uniqueType->langEl ); - pd->uniqeRepeatMap.insert( uniqueRepeat ); - - LangEl *declLangEl = 0; - - switch ( repeatType ) { - case RepeatRepeat: { - /* If the factor is a repeat, create the repeat element and link the - * factor to it. */ - String repeatName( 128, "_repeat_%s", typeName.data ); - declLangEl = pd->makeRepeatProd( loc, nspace, repeatName, uniqueType ); - break; - } - case RepeatList: { - /* If the factor is a repeat, create the repeat element and link the - * factor to it. */ - String listName( 128, "_list_%s", typeName.data ); - declLangEl = pd->makeListProd( loc, nspace, listName, uniqueType ); - break; - } - case RepeatOpt: { - /* If the factor is an opt, create the opt element and link the factor - * to it. */ - String optName( 128, "_opt_%s", typeName.data ); - declLangEl = pd->makeOptProd( loc, nspace, optName, uniqueType ); - break; - } - - case RepeatNone: - break; - } - - uniqueRepeat->declLangEl = declLangEl; - declLangEl->repeatOf = uniqueRepeat->langEl; - } - - uniqueType = pd->findUniqueType( TYPE_TREE, uniqueRepeat->declLangEl ); -} - -UniqueType *TypeRef::resolveIterator( Compiler *pd ) -{ - UniqueType *searchUT = searchTypeRef->resolveType( pd ); - - /* Lookup the iterator call. Make sure it is an iterator. */ - VarRefLookup lookup = iterCall->langTerm->varRef->lookupIterCall( pd ); - if ( lookup.objMethod->iterDef == 0 ) { - error(loc) << "attempt to iterate using something " - "that is not an iterator" << endp; - } - - /* Now that we have done the iterator call lookup we can make the type - * reference for the object field. */ - UniqueType *iterUniqueType = pd->findUniqueType( TYPE_ITER, lookup.objMethod->iterDef ); - - iterDef = lookup.objMethod->iterDef; - searchUniqueType = searchUT; - - return iterUniqueType; -} - - -UniqueType *TypeRef::resolveType( Compiler *pd ) -{ - if ( uniqueType != 0 ) - return uniqueType; - - /* Not an iterator. May be a reference. */ - switch ( type ) { - case Name: - uniqueType = resolveTypeName( pd ); - break; - case Literal: - uniqueType = resolveTypeLiteral( pd ); - break; - case Parser: - uniqueType = resolveTypeParser( pd ); - break; - case Ref: - uniqueType = resolveTypeRef( pd ); - break; - case Iterator: - uniqueType = resolveIterator( pd ); - break; - - case List: - uniqueType = resolveTypeList( pd ); - break; - case ListPtrs: - uniqueType = pd->findUniqueType( TYPE_LIST_PTRS ); - break; - case ListEl: - uniqueType = resolveTypeListEl( pd ); - break; - - case Map: - uniqueType = resolveTypeMap( pd ); - break; - case MapPtrs: - uniqueType = pd->findUniqueType( TYPE_MAP_PTRS ); - break; - case MapEl: - uniqueType = resolveTypeMapEl( pd ); - break; - - case Unspecified: - /* No lookup needed, unique type(s) set when constructed. */ - break; - } - - if ( repeatType != RepeatNone ) - resolveRepeat( pd ); - - return uniqueType; -} - -void Compiler::resolveProdEl( ProdEl *prodEl ) -{ - prodEl->typeRef->resolveType( this ); - prodEl->langEl = prodEl->typeRef->uniqueType->langEl; -} - -void LangTerm::resolveFieldArgs( Compiler *pd ) -{ - /* Initialization expressions. */ - if ( fieldInitArgs != 0 ) { - for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) - (*pi)->expr->resolve( pd ); - } -} - -void LangTerm::resolve( Compiler *pd ) -{ - switch ( type ) { - case ConstructType: - typeRef->resolveType( pd ); - - resolveFieldArgs( pd ); - - /* Types in constructor. */ - constructor->list->resolve( pd ); - break; - - case VarRefType: - break; - - case MakeTreeType: - case MakeTokenType: - case MethodCallType: - if ( args != 0 ) { - for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) - (*pe)->expr->resolve( pd ); - } - break; - - case NumberType: - case StringType: - break; - - case ProdCompareType: - /* If it has a match expression go into that. */ - if ( expr != 0 ) - expr->resolve( pd ); - break; - - case MatchType: - for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) { - switch ( item->form ) { - case PatternItem::TypeRefForm: - /* Use pdaFactor reference resolving. */ - pd->resolveProdEl( item->prodEl ); - break; - case PatternItem::InputTextForm: - /* Nothing to do here. */ - break; - } - } - - break; - case NewType: - /* Init args, then the new type. */ - resolveFieldArgs( pd ); - typeRef->resolveType( pd ); - break; - case TypeIdType: - typeRef->resolveType( pd ); - break; - case SearchType: - typeRef->resolveType( pd ); - break; - case NilType: - case TrueType: - case FalseType: - break; - - case ParseType: - case ParseTreeType: - case ParseStopType: - typeRef->resolveType( pd ); - - resolveFieldArgs( pd ); - - parserText->list->resolve( pd ); - break; - - case SendType: -// for ( CallArgVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- ) -// (*pex)->expr->resolve( pd ); - parserText->list->resolve( pd ); - break; - case SendTreeType: - case EmbedStringType: - break; - - case CastType: - typeRef->resolveType( pd ); - expr->resolve( pd ); - break; - } -} - -void LangVarRef::resolve( Compiler *pd ) const -{ -} - -void LangExpr::resolve( Compiler *pd ) const -{ - switch ( type ) { - case BinaryType: { - left->resolve( pd ); - right->resolve( pd ); - break; - } - case UnaryType: { - right->resolve( pd ); - break; - } - case TermType: { - term->resolve( pd ); - break; - } - } -} - -void IterCall::resolve( Compiler *pd ) const -{ - switch ( form ) { - case Call: - langTerm->resolve( pd ); - break; - case Expr: - langExpr->resolve( pd ); - break; - } -} - -void LangStmt::resolveForIter( Compiler *pd ) const -{ - iterCall->resolve( pd ); - - /* Search type ref. */ - typeRef->resolveType( pd ); - - /* Iterator type ref. */ - objField->typeRef->resolveType( pd ); - - /* Resolve the statements. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); -} - -void LangStmt::resolve( Compiler *pd ) const -{ - switch ( type ) { - case ExprType: { - /* Evaluate the exrepssion, then pop it immediately. */ - expr->resolve( pd ); - break; - } - case IfType: { - /* Evaluate the test. */ - expr->resolve( pd ); - - /* Analyze the if true branch. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); - - if ( elsePart != 0 ) - elsePart->resolve( pd ); - - break; - } - case ElseType: { - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); - break; - } - case RejectType: - break; - case WhileType: { - expr->resolve( pd ); - - /* Compute the while block. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); - break; - } - case AssignType: { - /* Evaluate the exrepssion. */ - expr->resolve( pd ); - break; - } - case ForIterType: { - resolveForIter( pd ); - break; - } - case ReturnType: { - /* Evaluate the exrepssion. */ - expr->resolve( pd ); - break; - } - case BreakType: { - break; - } - case YieldType: { - /* take a reference and yield it. Immediately reset the referece. */ - varRef->resolve( pd ); - break; - } - } -} - -void ObjectDef::resolve( Compiler *pd ) -{ - for ( FieldList::Iter fli = fieldList; fli.lte(); fli++ ) { - ObjectField *field = fli->value; - - if ( field->typeRef != 0 ) - field->typeRef->resolveType( pd ); - } -} - -void CodeBlock::resolve( Compiler *pd ) const -{ - if ( localFrame != 0 ) { - localFrame->resolve( pd ); - } - - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); -} - -void Compiler::resolveFunction( Function *func ) -{ - if ( func->typeRef != 0 ) - func->typeRef->resolveType( this ); - - for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) - param->typeRef->resolveType( this ); - - CodeBlock *block = func->codeBlock; - block->resolve( this ); -} - -void Compiler::resolveInHost( Function *func ) -{ - if ( func->typeRef != 0 ) - func->typeRef->resolveType( this ); - - for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) - param->typeRef->resolveType( this ); -} - - -void Compiler::resolvePreEof( TokenRegion *region ) -{ - CodeBlock *block = region->preEofBlock; - block->resolve( this ); -} - -void Compiler::resolveRootBlock() -{ - CodeBlock *block = rootCodeBlock; - block->resolve( this ); -} - -void Compiler::resolveTranslateBlock( LangEl *langEl ) -{ - CodeBlock *block = langEl->transBlock; - block->resolve( this ); -} - -void Compiler::resolveReductionCode( Production *prod ) -{ - CodeBlock *block = prod->redBlock; - block->resolve( this ); -} - -void Compiler::resolveParseTree() -{ - /* Compile functions. */ - for ( FunctionList::Iter f = functionList; f.lte(); f++ ) - resolveFunction( f ); - - for ( FunctionList::Iter f = inHostList; f.lte(); f++ ) - resolveInHost( f ); - - /* Compile the reduction code. */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->redBlock != 0 ) - resolveReductionCode( prod ); - } - - /* Compile the token translation code. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->transBlock != 0 ) - resolveTranslateBlock( lel ); - } - - /* Compile preeof blocks. */ - for ( RegionList::Iter r = regionList; r.lte(); r++ ) { - if ( r->preEofBlock != 0 ) - resolvePreEof( r ); - } - - /* Compile the init code */ - resolveRootBlock( ); - - rootLocalFrame->resolve( this ); - - /* Init all user object fields (need consistent size). */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - ObjectDef *objDef = lel->objectDef; - if ( objDef != 0 ) { - /* Init all fields of the object. */ - for ( FieldList::Iter f = objDef->fieldList; f.lte(); f++ ) - f->value->typeRef->resolveType( this ); - } - } - - for ( StructElList::Iter sel = structEls; sel.lte(); sel++ ) { - ObjectDef *objDef = sel->structDef->objectDef; - for ( FieldList::Iter f = objDef->fieldList; f.lte(); f++ ) - f->value->typeRef->resolveType( this ); - } - - /* Init all fields of the global object. */ - for ( FieldList::Iter f = globalObjectDef->fieldList; f.lte(); f++ ) { - f->value->typeRef->resolveType( this ); - } -} - -/* Resolves production els and computes the precedence of each prod. */ -void Compiler::resolveProductionEls() -{ - /* NOTE: as we process this list it may be growing! */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - /* First resolve. */ - for ( ProdElList::Iter prodEl = *prod->prodElList; prodEl.lte(); prodEl++ ) - resolveProdEl( prodEl ); - - /* If there is no explicit precdence ... */ - if ( prod->predOf == 0 ) { - /* Compute the precedence of the productions. */ - for ( ProdElList::Iter prodEl = prod->prodElList->last(); prodEl.gtb(); prodEl-- ) { - /* Production inherits the precedence of the last terminal with - * precedence. */ - if ( prodEl->langEl->predType != PredNone ) { - prod->predOf = prodEl->langEl; - break; - } - } - } - } -} - -void Compiler::makeTerminalWrappers() -{ - /* Make terminal language elements corresponding to each nonterminal in - * the grammar. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->type == LangEl::NonTerm ) { - String name( lel->name.length() + 5, "_T_%s", lel->name.data ); - LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term ); - - /* Give the dup the attributes of the nonterminal. This ensures - * that the attributes are allocated when patterns and - * constructors are parsed. */ - termDup->objectDef = lel->objectDef; - - langEls.append( termDup ); - lel->termDup = termDup; - termDup->termDup = lel; - } - } -} - -void Compiler::makeEofElements() -{ - /* Make eof language elements for each user terminal. This is a bit excessive and - * need to be reduced to the ones that we need parsers for, but we don't know that yet. - * Another pass before this one is needed. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->eofLel == 0 && - lel != eofLangEl && - lel != errorLangEl && - lel != noTokenLangEl /* && - !( lel->tokenInstance == 0 || lel->tokenInstance->dupOf == 0 ) */ ) - { - String name( lel->name.length() + 5, "_eof_%s", lel->name.data ); - LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term ); - - langEls.append( eofLel ); - lel->eofLel = eofLel; - eofLel->eofLel = lel; - eofLel->isEOF = true; - } - } -} - -void Compiler::resolvePrecedence() -{ - for ( PredDeclList::Iter predDecl = predDeclList; predDecl != 0; predDecl++ ) { - predDecl->typeRef->resolveType( this ); - - LangEl *langEl = predDecl->typeRef->uniqueType->langEl; - langEl->predType = predDecl->predType; - langEl->predValue = predDecl->predValue; - } -} - -void Compiler::resolveReductionActions() -{ - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - for ( ReduceNonTermList::Iter rni = (*r)->reduceNonTerms; rni.lte(); rni++ ) - rni->nonTerm->resolveType( this ); - - for ( ReduceActionList::Iter rai = (*r)->reduceActions; rai.lte(); rai++ ) - rai->nonTerm->resolveType( this ); - } -} - -Production *Compiler::findProductionByLabel( LangEl *langEl, String label ) -{ - for ( LelDefList::Iter ldi = langEl->defList; ldi.lte(); ldi++ ) { - if ( ldi->_name != 0 && ( strcmp( ldi->_name, label ) == 0 ) ) - return ldi; - } - return 0; -} - -void Compiler::findReductionActionProds() -{ - for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) { - for ( ReduceActionList::Iter rai = (*r)->reduceActions; rai.lte(); rai++ ) { - rai->nonTerm->resolveType( this ); - LangEl *langEl = rai->nonTerm->uniqueType->langEl; - - Production *prod = findProductionByLabel( langEl, rai->prod ); - - if ( prod == 0 ) { - error(rai->loc) << "could not find production \"" << - rai->prod << "\"" << endp; - } - - rai->production = prod; - } - } -} - -void Compiler::resolveReducers() -{ - for ( ParserTextList::Iter pt = parserTextList; pt.lte(); pt++ ) { - if ( pt->reduce ) { - Reduction *reduction = rootNamespace->findReduction( pt->reducer ); - if ( reduction == 0 ) { - error ( pt->loc ) << "could not locate reduction \"" << - pt->reducer << "\"" << endp; - } - - pt->reducerId = reduction->id; - - /* Indicate which type of reducing we need. Parser based, or - * postfix. */ - if ( pt->read ) - reduction->postfixBased = true; - else - reduction->parserBased = true; - } - } -} - -void Compiler::resolvePass() -{ - /* - * Type Resolving. - */ - - resolvePrecedence(); - - resolveParseTree(); - - UniqueType *argvUT = argvTypeRef->resolveType( this ); - argvElSel = argvUT->generic->elUt->structEl; - - UniqueType *stdsUT = stdsTypeRef->resolveType( this ); - stdsElSel = stdsUT->generic->elUt->structEl; - - resolveReductionActions(); - - /* We must do this as the last step in the type resolution process because - * all type resolves can cause new language elments with associated - * productions. They get tacked onto the end of the list of productions. - * Doing it at the end results processing a growing list. */ - resolveProductionEls(); - - findReductionActionProds(); - - resolveReducers(); -} diff --git a/src/rtvector.h b/src/rtvector.h deleted file mode 100644 index e15d3f2a..00000000 --- a/src/rtvector.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2002-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_RTVECTOR_H -#define _COLM_RTVECTOR_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _COLM_RT_VECTOR_H */ - diff --git a/src/stream.c b/src/stream.c deleted file mode 100644 index 341197e5..00000000 --- a/src/stream.c +++ /dev/null @@ -1,805 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <colm/input.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> -#include <unistd.h> -#include <stdbool.h> - -#include <colm/pdarun.h> -#include <colm/debug.h> -#include <colm/program.h> -#include <colm/tree.h> -#include <colm/bytecode.h> -#include <colm/pool.h> -#include <colm/struct.h> - -DEF_STREAM_FUNCS( stream_funcs_data, stream_impl_data ); - -extern struct stream_funcs_data file_funcs; -extern struct stream_funcs_data accum_funcs; - -void stream_impl_push_line( struct stream_impl_data *ss, int ll ) -{ - if ( ss->line_len == 0 ) { - ss->lines_cur = 0; - ss->lines_alloc = 16; - ss->line_len = malloc( sizeof(int) * ss->lines_alloc ); - } - else if ( ss->lines_cur == ss->lines_alloc ) { - int lines_alloc_new = ss->lines_alloc * 2; - int *line_len_new = malloc( sizeof(int) * lines_alloc_new ); - memcpy( line_len_new, ss->line_len, sizeof(int) * ss->lines_alloc ); - ss->lines_alloc = lines_alloc_new; - ss->line_len = line_len_new; - } - - ss->line_len[ ss->lines_cur ] = ll; - ss->lines_cur += 1; -} - -int stream_impl_pop_line( struct stream_impl_data *ss ) -{ - int len = 0; - if ( ss->lines_cur > 0 ) { - ss->lines_cur -= 1; - len = ss->line_len[ss->lines_cur]; - } - return len; -} - -static void dump_contents( struct colm_program *prg, struct stream_impl_data *sid ) -{ - struct run_buf *rb = sid->queue.head; - while ( rb != 0 ) { - debug( prg, REALM_INPUT, " %p contents |%d|%d|%d|%.*s|\n", sid, - rb->offset, rb->length, - rb->length - rb->offset, - (int)rb->length - rb->offset, - rb->data + rb->offset ); - rb = rb->next; - } -} - -static bool loc_set( location_t *loc ) -{ - return loc->line != 0; -} - -static void close_stream_file( FILE *file ) -{ - if ( file != stdin && file != stdout && file != stderr && - fileno(file) != 0 && fileno( file) != 1 && fileno(file) != 2 ) - { - fclose( file ); - } -} - -static void si_data_push_tail( struct stream_impl_data *ss, struct run_buf *run_buf ) -{ - if ( ss->queue.head == 0 ) { - run_buf->prev = run_buf->next = 0; - ss->queue.head = ss->queue.tail = run_buf; - } - else { - ss->queue.tail->next = run_buf; - run_buf->prev = ss->queue.tail; - run_buf->next = 0; - ss->queue.tail = run_buf; - } -} - -static struct run_buf *si_data_pop_tail( struct stream_impl_data *ss ) -{ - struct run_buf *ret = ss->queue.tail; - ss->queue.tail = ss->queue.tail->prev; - if ( ss->queue.tail == 0 ) - ss->queue.head = 0; - else - ss->queue.tail->next = 0; - return ret; -} - - -static void si_data_push_head( struct stream_impl_data *ss, struct run_buf *run_buf ) -{ - if ( ss->queue.head == 0 ) { - run_buf->prev = run_buf->next = 0; - ss->queue.head = ss->queue.tail = run_buf; - } - else { - ss->queue.head->prev = run_buf; - run_buf->prev = 0; - run_buf->next = ss->queue.head; - ss->queue.head = run_buf; - } -} - -static struct run_buf *si_data_pop_head( struct stream_impl_data *ss ) -{ - struct run_buf *ret = ss->queue.head; - ss->queue.head = ss->queue.head->next; - if ( ss->queue.head == 0 ) - ss->queue.tail = 0; - else - ss->queue.head->prev = 0; - return ret; -} - - -struct run_buf *new_run_buf( int sz ) -{ - struct run_buf *rb; - if ( sz > FSM_BUFSIZE ) { - int ssz = sizeof(struct run_buf) + sz - FSM_BUFSIZE; - rb = (struct run_buf*) malloc( ssz ); - memset( rb, 0, ssz ); - } - else { - rb = (struct run_buf*) malloc( sizeof(struct run_buf) ); - memset( rb, 0, sizeof(struct run_buf) ); - } - return rb; -} - -/* Keep the position up to date after consuming text. */ -void update_position_data( struct stream_impl_data *is, const char *data, long length ) -{ - int i; - for ( i = 0; i < length; i++ ) { - if ( data[i] == '\n' ) { - stream_impl_push_line( is, is->column ); - is->line += 1; - is->column = 1; - } - else { - is->column += 1; - } - } - - is->byte += length; -} - -/* Keep the position up to date after sending back text. */ -void undo_position_data( struct stream_impl_data *is, const char *data, long length ) -{ - /* FIXME: this needs to fetch the position information from the parsed - * token and restore based on that.. */ - int i; - for ( i = 0; i < length; i++ ) { - if ( data[i] == '\n' ) { - is->line -= 1; - is->column = stream_impl_pop_line( is ); - } - else { - is->column -= 1; - } - } - - is->byte -= length; -} - - -/* - * Interface - */ - -static void data_transfer_loc( struct colm_program *prg, location_t *loc, struct stream_impl_data *ss ) -{ - loc->name = ss->name; - loc->line = ss->line; - loc->column = ss->column; - loc->byte = ss->byte; -} - -/* - * Data inputs: files, strings, etc. - */ - -static int data_get_data( struct colm_program *prg, struct stream_impl_data *ss, char *dest, int length ) -{ - int copied = 0; - - /* Move over skip bytes. */ - struct run_buf *buf = ss->queue.head; - while ( true ) { - if ( buf == 0 ) { - /* Got through the in-mem buffers without copying anything. */ - struct run_buf *run_buf = new_run_buf( 0 ); - int received = ss->funcs->get_data_source( prg, (struct stream_impl*)ss, run_buf->data, FSM_BUFSIZE ); - if ( received == 0 ) { - free( run_buf ); - break; - } - - run_buf->length = received; - si_data_push_tail( ss, run_buf ); - - buf = run_buf; - } - - int avail = buf->length - buf->offset; - - /* Anything available in the current buffer. */ - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[buf->offset]; - - int slen = avail < length ? avail : length; - memcpy( dest+copied, src, slen ) ; - copied += slen; - length -= slen; - } - - if ( length == 0 ) { - //debug( REALM_INPUT, "exiting get data\n", length ); - break; - } - - buf = buf->next; - } - - return copied; -} - -static struct stream_impl *data_split_consumed( program_t *prg, struct stream_impl_data *sid ) -{ - struct stream_impl *split_off = 0; - if ( sid->consumed > 0 ) { - debug( prg, REALM_INPUT, "maybe split: consumed is > 0, splitting\n" ); - split_off = colm_impl_consumed( "<text3>", sid->consumed ); - sid->consumed = 0; - } - return split_off; -} - -int data_append_data( struct colm_program *prg, struct stream_impl_data *sid, const char *data, int length ) -{ - struct run_buf *tail = sid->queue.tail; - if ( tail == 0 || length > (FSM_BUFSIZE - tail->length) ) { - debug( prg, REALM_INPUT, "data_append_data: allocating run buf\n" ); - tail = new_run_buf( length ); - si_data_push_tail( sid, tail ); - } - - debug( prg, REALM_INPUT, "data_append_data: appending to " - "accum tail, offset: %d, length: %d, dlen: %d\n", - tail->offset, tail->length, length ); - - memcpy( tail->data + tail->length, data, length ); - tail->length += length; - -#ifdef DEBUG - dump_contents( prg, sid ); -#endif - - return length; -} - -int data_undo_append_data( struct colm_program *prg, struct stream_impl_data *sid, int length ) -{ - int consumed = 0; - int remaining = length; - - /* Move over skip bytes. */ - while ( true ) { - struct run_buf *buf = sid->queue.tail; - - if ( buf == 0 ) - break; - - /* Anything available in the current buffer. */ - int avail = buf->length - buf->offset; - if ( avail > 0 ) { - /* The source data from the current buffer. */ - int slen = avail <= remaining ? avail : remaining; - consumed += slen; - remaining -= slen; - buf->length -= slen; - //sid->consumed += slen; - } - - if ( remaining == 0 ) - break; - - struct run_buf *run_buf = si_data_pop_tail( sid ); - free( run_buf ); - } - - debug( prg, REALM_INPUT, "data_undo_append_data: stream %p " - "ask: %d, consumed: %d, now: %d\n", sid, length, consumed ); - -#ifdef DEBUG - dump_contents( prg, sid ); -#endif - - return consumed; - -} - -static void data_destructor( program_t *prg, tree_t **sp, struct stream_impl_data *si ) -{ - if ( si->file != 0 ) - close_stream_file( si->file ); - - if ( si->collect != 0 ) { - str_collect_destroy( si->collect ); - free( si->collect ); - } - - struct run_buf *buf = si->queue.head; - while ( buf != 0 ) { - struct run_buf *next = buf->next; - free( buf ); - buf = next; - } - - si->queue.head = 0; - - if ( si->data != 0 ) - free( (char*)si->data ); - - /* FIXME: Need to leak this for now. Until we can return strings to a - * program loader and free them at a later date (after the colm program is - * deleted). */ - // if ( si->name != 0 ) - // free( si->name ); - - free( si ); -} - -static str_collect_t *data_get_collect( struct colm_program *prg, struct stream_impl_data *si ) -{ - return si->collect; -} - -static void data_flush_stream( struct colm_program *prg, struct stream_impl_data *si ) -{ - if ( si->file != 0 ) - fflush( si->file ); -} - -static void data_close_stream( struct colm_program *prg, struct stream_impl_data *si ) -{ - if ( si->file != 0 ) { - close_stream_file( si->file ); - si->file = 0; - } -} - -static int data_get_option( struct colm_program *prg, struct stream_impl_data *si, int option ) -{ - return si->auto_trim; -} - -static void data_set_option( struct colm_program *prg, struct stream_impl_data *si, int option, int value ) -{ - si->auto_trim = value ? 1 : 0; -} - -static void data_print_tree( struct colm_program *prg, tree_t **sp, - struct stream_impl_data *si, tree_t *tree, int trim ) -{ - if ( si->file != 0 ) - colm_print_tree_file( prg, sp, si, tree, trim ); - else if ( si->collect != 0 ) - colm_print_tree_collect( prg, sp, si->collect, tree, trim ); -} - -static int data_get_parse_block( struct colm_program *prg, struct stream_impl_data *ss, int *pskip, char **pdp, int *copied ) -{ - int ret = 0; - *copied = 0; - - /* Move over skip bytes. */ - struct run_buf *buf = ss->queue.head; - while ( true ) { - if ( buf == 0 ) { - /* Got through the in-mem buffers without copying anything. */ - struct run_buf *run_buf = new_run_buf( 0 ); - int received = ss->funcs->get_data_source( prg, (struct stream_impl*)ss, run_buf->data, FSM_BUFSIZE ); - if ( received == 0 ) { - free( run_buf ); - ret = INPUT_EOD; - break; - } - - run_buf->length = received; - si_data_push_tail( ss, run_buf ); - - int slen = received; - *pdp = run_buf->data; - *copied = slen; - ret = INPUT_DATA; - break; - } - - int avail = buf->length - buf->offset; - - /* Anything available in the current buffer. */ - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[buf->offset]; - - /* Need to skip? */ - if ( *pskip > 0 && *pskip >= avail ) { - /* Skipping the the whole source. */ - *pskip -= avail; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += *pskip; - avail -= *pskip; - *pskip = 0; - - int slen = avail; - *pdp = src; - *copied += slen; - ret = INPUT_DATA; - break; - } - } - - buf = buf->next; - } - - return ret; -} - -static int data_consume_data( struct colm_program *prg, struct stream_impl_data *sid, int length, location_t *loc ) -{ - int consumed = 0; - int remaining = length; - - /* Move over skip bytes. */ - while ( true ) { - struct run_buf *buf = sid->queue.head; - - if ( buf == 0 ) - break; - - /* Anything available in the current buffer. */ - int avail = buf->length - buf->offset; - if ( avail > 0 ) { - - if ( !loc_set( loc ) ) - data_transfer_loc( prg, loc, sid ); - - /* The source data from the current buffer. */ - int slen = avail <= remaining ? avail : remaining; - consumed += slen; - remaining -= slen; - update_position_data( sid, buf->data + buf->offset, slen ); - buf->offset += slen; - sid->consumed += slen; - } - - if ( remaining == 0 ) - break; - - struct run_buf *run_buf = si_data_pop_head( sid ); - free( run_buf ); - } - - debug( prg, REALM_INPUT, "data_consume_data: stream %p " - "ask: %d, consumed: %d, now: %d\n", sid, length, consumed, sid->consumed ); - -#ifdef DEBUG - dump_contents( prg, sid ); -#endif - - return consumed; -} - -static int data_undo_consume_data( struct colm_program *prg, struct stream_impl_data *sid, const char *data, int length ) -{ - const char *end = data + length; - int amount = length; - if ( amount > sid->consumed ) - amount = sid->consumed; - - int remaining = amount; - struct run_buf *head = sid->queue.head; - if ( head != 0 && head->offset > 0 ) { - /* Fill into the offset space. */ - int fill = remaining > head->offset ? head->offset : remaining; - end -= fill; - remaining -= fill; - - undo_position_data( sid, end, fill ); - memcpy( head->data + (head->offset - fill), end, fill ); - - head->offset -= fill; - sid->consumed -= fill; - } - - if ( remaining > 0 ) { - end -= remaining; - struct run_buf *new_buf = new_run_buf( 0 ); - new_buf->length = remaining; - undo_position_data( sid, end, remaining ); - memcpy( new_buf->data, end, remaining ); - si_data_push_head( sid, new_buf ); - sid->consumed -= amount; - } - - debug( prg, REALM_INPUT, "data_undo_consume_data: stream %p " - "undid consume %d of %d bytes, consumed now %d, \n", sid, amount, length, sid->consumed ); - -#ifdef DEBUG - dump_contents( prg, sid ); -#endif - - return amount; -} - -/* - * File Inputs - */ - -static int file_get_data_source( struct colm_program *prg, struct stream_impl_data *si, char *dest, int length ) -{ - return fread( dest, 1, length, si->file ); -} - -/* - * Text inputs - */ - -static int accum_get_data_source( struct colm_program *prg, struct stream_impl_data *si, char *dest, int want ) -{ - long avail = si->dlen - si->offset; - long take = avail < want ? avail : want; - if ( take > 0 ) - memcpy( dest, si->data + si->offset, take ); - si->offset += take; - return take; -} - -char stream_get_eof_sent( struct colm_program *prg, struct input_impl_seq *si ) -{ - return si->eof_sent; -} - -void stream_set_eof_sent( struct colm_program *prg, struct input_impl_seq *si, char eof_sent ) -{ - si->eof_sent = eof_sent; -} - -struct stream_funcs_data file_funcs = -{ - &data_get_parse_block, - &data_get_data, - &file_get_data_source, - - &data_consume_data, - &data_undo_consume_data, - - &data_transfer_loc, - &data_get_collect, - &data_flush_stream, - &data_close_stream, - &data_print_tree, - - &data_split_consumed, - &data_append_data, - &data_undo_append_data, - &data_destructor, - - &data_get_option, - &data_set_option, -}; - -struct stream_funcs_data accum_funcs = -{ - &data_get_parse_block, - &data_get_data, - &accum_get_data_source, - - &data_consume_data, - &data_undo_consume_data, - - &data_transfer_loc, - &data_get_collect, - &data_flush_stream, - &data_close_stream, - &data_print_tree, - - &data_split_consumed, - &data_append_data, - &data_undo_append_data, - &data_destructor, - - &data_get_option, - &data_set_option, -}; - -static void si_data_init( struct stream_impl_data *is, char *name ) -{ - memset( is, 0, sizeof(struct stream_impl_data) ); - - is->type = 'D'; - is->name = name; - is->line = 1; - is->column = 1; - is->byte = 0; - - /* Indentation turned off. */ - is->indent.level = COLM_INDENT_OFF; - is->indent.indent = 0; -} - -struct stream_impl *colm_impl_new_accum( char *name ) -{ - struct stream_impl_data *si = (struct stream_impl_data*)malloc(sizeof(struct stream_impl_data)); - si_data_init( si, name ); - si->funcs = (struct stream_funcs*)&accum_funcs; - - return (struct stream_impl*)si; -} - -static struct stream_impl *colm_impl_new_file( char *name, FILE *file ) -{ - struct stream_impl_data *ss = (struct stream_impl_data*)malloc(sizeof(struct stream_impl_data)); - si_data_init( ss, name ); - ss->funcs = (struct stream_funcs*)&file_funcs; - ss->file = file; - return (struct stream_impl*)ss; -} - -static struct stream_impl *colm_impl_new_fd( char *name, long fd ) -{ - struct stream_impl_data *si = (struct stream_impl_data*)malloc(sizeof(struct stream_impl_data)); - si_data_init( si, name ); - si->funcs = (struct stream_funcs*)&file_funcs; - si->file = fdopen( fd, ( fd == 0 ) ? "r" : "w" ); - return (struct stream_impl*)si; -} - -struct stream_impl *colm_impl_consumed( char *name, int len ) -{ - struct stream_impl_data *si = (struct stream_impl_data*)malloc(sizeof(struct stream_impl_data)); - si_data_init( si, name ); - si->funcs = (struct stream_funcs*)&accum_funcs; - - si->data = 0; - si->consumed = len; - si->offset = len; - - si->dlen = len; - - return (struct stream_impl*)si; -} - -struct stream_impl *colm_impl_new_text( char *name, const char *data, int len ) -{ - struct stream_impl_data *si = (struct stream_impl_data*)malloc(sizeof(struct stream_impl_data)); - si_data_init( si, name ); - si->funcs = (struct stream_funcs*)&accum_funcs; - - char *buf = (char*)malloc( len ); - memcpy( buf, data, len ); - - si->data = buf; - si->dlen = len; - - return (struct stream_impl*)si; -} - -struct stream_impl *colm_impl_new_collect( char *name ) -{ - struct stream_impl_data *ss = (struct stream_impl_data*)malloc(sizeof(struct stream_impl_data)); - si_data_init( ss, name ); - ss->funcs = (struct stream_funcs*)&accum_funcs; - ss->collect = (struct colm_str_collect*) malloc( sizeof( struct colm_str_collect ) ); - init_str_collect( ss->collect ); - return (struct stream_impl*)ss; -} - -struct stream_impl *stream_to_impl( stream_t *ptr ) -{ - return ptr->impl; -} - -str_t *collect_string( program_t *prg, stream_t *s ) -{ - str_collect_t *collect = s->impl->funcs->get_collect( prg, s->impl ); - head_t *head = string_alloc_full( prg, collect->data, collect->length ); - str_t *str = (str_t*)construct_string( prg, head ); - return str; -} - -stream_t *colm_stream_open_fd( program_t *prg, char *name, long fd ) -{ - struct stream_impl *impl = colm_impl_new_fd( colm_filename_add( prg, name ), fd ); - - struct colm_stream *s = colm_stream_new_struct( prg ); - s->impl = impl; - return s; -} - -stream_t *colm_stream_open_file( program_t *prg, tree_t *name, tree_t *mode ) -{ - head_t *head_name = ((str_t*)name)->value; - head_t *head_mode = ((str_t*)mode)->value; - stream_t *stream = 0; - - const char *given_mode = string_data(head_mode); - const char *fopen_mode = 0; - if ( memcmp( given_mode, "r", string_length(head_mode) ) == 0 ) - fopen_mode = "rb"; - else if ( memcmp( given_mode, "w", string_length(head_mode) ) == 0 ) - fopen_mode = "wb"; - else if ( memcmp( given_mode, "a", string_length(head_mode) ) == 0 ) - fopen_mode = "ab"; - else { - fatal( "unknown file open mode: %s\n", given_mode ); - } - - /* Need to make a C-string (null terminated). */ - char *file_name = (char*)malloc(string_length(head_name)+1); - memcpy( file_name, string_data(head_name), string_length(head_name) ); - file_name[string_length(head_name)] = 0; - - FILE *file = fopen( file_name, fopen_mode ); - if ( file != 0 ) { - stream = colm_stream_new_struct( prg ); - stream->impl = colm_impl_new_file( colm_filename_add( prg, file_name ), file ); - } - - free( file_name ); - - return stream; -} - - -void colm_stream_destroy( program_t *prg, tree_t **sp, struct_t *s ) -{ - stream_t *stream = (stream_t*) s; - struct stream_impl *si = stream->impl; - si->funcs->destructor( prg, sp, si ); -} - -stream_t *colm_stream_new_struct( program_t *prg ) -{ - size_t memsize = sizeof(struct colm_stream); - struct colm_stream *stream = (struct colm_stream*) malloc( memsize ); - memset( stream, 0, memsize ); - colm_struct_add( prg, (struct colm_struct *)stream ); - stream->id = prg->rtd->struct_stream_id; - stream->destructor = &colm_stream_destroy; - return stream; -} - -stream_t *colm_stream_open_collect( program_t *prg ) -{ - struct stream_impl *impl = colm_impl_new_collect( colm_filename_add( prg, "<internal>" ) ); - struct colm_stream *stream = colm_stream_new_struct( prg ); - stream->impl = impl; - return stream; -} - diff --git a/src/string.c b/src/string.c deleted file mode 100644 index 8a852e8b..00000000 --- a/src/string.c +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <assert.h> -#include <string.h> -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> - -#include <colm/pool.h> -#include <colm/pdarun.h> -#include <colm/bytecode.h> - -str_t *string_prefix( program_t *prg, str_t *str, long len ) -{ - head_t *head = string_alloc_full( prg, str->value->data, len ); - return (str_t*)construct_string( prg, head ); -} - -str_t *string_suffix( program_t *prg, str_t *str, long pos ) -{ - long len = str->value->length - pos; - head_t *head = string_alloc_full( prg, str->value->data + pos, len ); - return (str_t*)construct_string( prg, head ); -} - -tree_t *construct_string( program_t *prg, head_t *s ) -{ - str_t *str = (str_t*) tree_allocate( prg ); - str->id = LEL_ID_STR; - str->value = s; - - return (tree_t*)str; -} - - -/* - * In this system strings are not null terminated. Often strings come from a - * parse, in which case the string is just a pointer into the the data stream. - * A block in a parsed stream can hold many tokens and there is no room - * allocated for nulls. - */ - -head_t *string_copy( program_t *prg, head_t *head ) -{ - head_t *result = 0; - if ( head != 0 ) { - if ( (char*)(head+1) == head->data ) - result = string_alloc_full( prg, head->data, head->length ); - else - result = colm_string_alloc_pointer( prg, head->data, head->length ); - - if ( head->location != 0 ) { - result->location = location_allocate( prg ); - result->location->name = head->location->name; - result->location->line = head->location->line; - result->location->column = head->location->column; - result->location->byte = head->location->byte; - } - } - return result; -} - -void string_free( program_t *prg, head_t *head ) -{ - if ( head != 0 ) { - if ( head->location != 0 ) - location_free( prg, head->location ); - - if ( (char*)(head+1) == head->data ) { - /* Full string allocation. */ - free( head ); - } - else { - /* Just a string head. */ - head_free( prg, head ); - } - } -} - -const char *string_data( head_t *head ) -{ - if ( head == 0 ) - return 0; - return head->data; -} - -long string_length( head_t *head ) -{ - if ( head == 0 ) - return 0; - return head->length; -} - -void string_shorten( head_t *head, long newlen ) -{ - assert( newlen <= head->length ); - head->length = newlen; -} - -head_t *init_str_space( long length ) -{ - /* Find the length and allocate the space for the shared string. */ - head_t *head = (head_t*) malloc( sizeof(head_t) + length ); - - /* Init the header. */ - head->data = (char*)(head+1); - head->length = length; - head->location = 0; - - /* Save the pointer to the data. */ - return head; -} - -/* Create from a c-style string. */ -head_t *string_alloc_full( program_t *prg, const char *data, long length ) -{ - /* Init space for the data. */ - head_t *head = init_str_space( length ); - - /* Copy in the data. */ - memcpy( (head+1), data, length ); - - return head; -} - -/* Create from a c-style string. */ -head_t *colm_string_alloc_pointer( program_t *prg, const char *data, long length ) -{ - /* Find the length and allocate the space for the shared string. */ - head_t *head = head_allocate( prg ); - - /* Init the header. */ - head->data = data; - head->length = length; - - return head; -} - -head_t *concat_str( head_t *s1, head_t *s2 ) -{ - long s1Len = s1->length; - long s2Len = s2->length; - - /* Init space for the data. */ - head_t *head = init_str_space( s1Len + s2Len ); - - /* Copy in the data. */ - memcpy( (head+1), s1->data, s1Len ); - memcpy( (char*)(head+1) + s1Len, s2->data, s2Len ); - - return head; -} - -head_t *string_to_upper( head_t *s ) -{ - /* Init space for the data. */ - long len = s->length; - head_t *head = init_str_space( len ); - - /* Copy in the data. */ - const char *src = s->data; - char *dst = (char*)(head+1); - int i; - for ( i = 0; i < len; i++ ) - *dst++ = toupper( *src++ ); - - return head; -} - -head_t *string_to_lower( head_t *s ) -{ - /* Init space for the data. */ - long len = s->length; - head_t *head = init_str_space( len ); - - /* Copy in the data. */ - const char *src = s->data; - char *dst = (char*)(head+1); - int i; - for ( i = 0; i < len; i++ ) - *dst++ = tolower( *src++ ); - - return head; -} - - -/* Compare two strings. If identical returns 1, otherwise 0. */ -word_t cmp_string( head_t *s1, head_t *s2 ) -{ - if ( s1->length < s2->length ) - return -1; - else if ( s1->length > s2->length ) - return 1; - else { - char *d1 = (char*)(s1->data); - char *d2 = (char*)(s2->data); - return memcmp( d1, d2, s1->length ); - } -} - -word_t str_atoi( head_t *str ) -{ - /* FIXME: need to implement this by hand. There is no null terminator. */ - char *nulled = (char*)malloc( str->length + 1 ); - memcpy( nulled, str->data, str->length ); - nulled[str->length] = 0; - int res = atoi( nulled ); - free( nulled ); - return res; -} - -word_t str_atoo( head_t *str ) -{ - /* FIXME: need to implement this by hand. There is no null terminator. */ - char *nulled = (char*)malloc( str->length + 1 ); - memcpy( nulled, str->data, str->length ); - nulled[str->length] = 0; - int res = strtol( nulled, 0, 8 ); - free( nulled ); - return res; -} - -head_t *int_to_str( program_t *prg, word_t i ) -{ - char data[20]; - sprintf( data, "%ld", i ); - return string_alloc_full( prg, data, strlen(data) ); -} - -word_t str_uord16( head_t *head ) -{ - uchar *data = (uchar*)(head->data); - ulong res; - res = (ulong)data[1]; - res |= ((ulong)data[0]) << 8; - return res; -} - -word_t str_uord8( head_t *head ) -{ - uchar *data = (uchar*)(head->data); - ulong res = (ulong)data[0]; - return res; -} - -head_t *make_literal( program_t *prg, long offset ) -{ - return colm_string_alloc_pointer( prg, - prg->rtd->litdata[offset], - prg->rtd->litlen[offset] ); -} - -head_t *string_sprintf( program_t *prg, str_t *format, long integer ) -{ - head_t *format_head = format->value; - long written = snprintf( 0, 0, string_data(format_head), integer ); - head_t *head = init_str_space( written+1 ); - written = snprintf( (char*)head->data, written+1, string_data(format_head), integer ); - head->length -= 1; - return head; -} diff --git a/src/struct.c b/src/struct.c deleted file mode 100644 index 5ee58ed3..00000000 --- a/src/struct.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright 2016-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <stdlib.h> -#include <string.h> -#include <stdbool.h> - -#include <colm/program.h> -#include <colm/struct.h> - -#include "internal.h" -#include "bytecode.h" - -struct colm_tree *colm_get_global( program_t *prg, long pos ) -{ - return colm_struct_get_field( prg->global, tree_t*, pos ); -} - -void colm_struct_add( program_t *prg, struct colm_struct *item ) -{ - if ( prg->heap.head == 0 ) { - prg->heap.head = prg->heap.tail = item; - item->prev = item->next = 0; - } - else { - item->prev = prg->heap.tail; - item->next = 0; - prg->heap.tail->next = item; - prg->heap.tail = item; - } -} - -struct colm_struct *colm_struct_new_size( program_t *prg, int size ) -{ - size_t memsize = sizeof(struct colm_struct) + ( sizeof(tree_t*) * size ); - struct colm_struct *item = (struct colm_struct*) malloc( memsize ); - memset( item, 0, memsize ); - - colm_struct_add( prg, item ); - return item; -} - -struct colm_struct *colm_struct_new( program_t *prg, int id ) -{ - struct colm_struct *s = colm_struct_new_size( prg, prg->rtd->sel_info[id - prg->rtd->num_lang_els].size ); - s->id = id; - return s; -} - -struct struct_el_info *colm_sel_info( program_t *prg, int id ) -{ - return &prg->rtd->sel_info[id - prg->rtd->num_lang_els]; -} - -void colm_struct_delete( program_t *prg, tree_t **sp, struct colm_struct *el ) -{ - if ( el->id == prg->rtd->struct_inbuilt_id || el->id == prg->rtd->struct_stream_id ) { - colm_destructor_t destructor = ((struct colm_inbuilt*)el)->destructor; - if ( destructor != 0 ) - (*destructor)( prg, sp, el ); - } - else { - int tree_i; - struct struct_el_info *sel = colm_sel_info( prg, el->id ); - for ( tree_i = 0; tree_i < sel->trees_len; tree_i++ ) { - tree_t *tree = colm_struct_get_field( el, tree_t*, sel->trees[tree_i] ); - colm_tree_downref( prg, sp, tree ); - } - } - free( el ); -} - -void colm_parser_destroy( program_t *prg, tree_t **sp, struct colm_struct *s ) -{ - struct colm_parser *parser = (struct colm_parser*) s; - - /* Free the PDA run. */ - colm_pda_clear( prg, sp, parser->pda_run ); - free( parser->pda_run ); - - /* Free the result. */ - colm_tree_downref( prg, sp, parser->result ); -} - -parser_t *colm_parser_new( program_t *prg, struct generic_info *gi, int stop_id, int reducer ) -{ - struct pda_run *pda_run = malloc( sizeof(struct pda_run) ); - - /* Start off the parsing process. */ - colm_pda_init( prg, pda_run, prg->rtd->pda_tables, - gi->parser_id, stop_id, 0, 0, reducer ); - - size_t memsize = sizeof(struct colm_parser); - struct colm_parser *parser = (struct colm_parser*) malloc( memsize ); - memset( parser, 0, memsize ); - colm_struct_add( prg, (struct colm_struct*) parser ); - - parser->id = prg->rtd->struct_inbuilt_id; - parser->destructor = &colm_parser_destroy; - parser->pda_run = pda_run; - - return parser; -} - -void colm_map_destroy( program_t *prg, tree_t **sp, struct colm_struct *s ) -{ - struct colm_map *map = (struct colm_map*) s; - - map_el_t *el = map->head; - while ( el != 0 ) { - map_el_t *next = el->next; - colm_tree_downref( prg, sp, el->key ); - //mapElFree( prg, el ); - el = next; - } -} - -map_t *colm_map_new( struct colm_program *prg ) -{ - size_t memsize = sizeof(struct colm_map); - struct colm_map *map = (struct colm_map*) malloc( memsize ); - memset( map, 0, memsize ); - colm_struct_add( prg, (struct colm_struct *)map ); - map->id = prg->rtd->struct_inbuilt_id; - return map; -} - -struct_t *colm_construct_generic( program_t *prg, long generic_id, int stop_id ) -{ - struct generic_info *generic_info = &prg->rtd->generic_info[generic_id]; - struct_t *new_generic = 0; - switch ( generic_info->type ) { - case GEN_MAP: { - map_t *map = colm_map_new( prg ); - map->generic_info = generic_info; - new_generic = (struct_t*) map; - break; - } - case GEN_LIST: { - list_t *list = colm_list_new( prg ); - list->generic_info = generic_info; - new_generic = (struct_t*) list; - break; - } - case GEN_PARSER: { - parser_t *parser = colm_parser_new( prg, generic_info, stop_id, 0 ); - parser->input = colm_input_new( prg ); - new_generic = (struct_t*) parser; - break; - } - } - - return new_generic; -} - -struct_t *colm_construct_reducer( program_t *prg, long generic_id, int reducer_id ) -{ - struct generic_info *generic_info = &prg->rtd->generic_info[generic_id]; - struct_t *new_generic = 0; - - parser_t *parser = colm_parser_new( prg, generic_info, 0, reducer_id ); - parser->input = colm_input_new( prg ); - new_generic = (struct_t*) parser; - - return new_generic; -} diff --git a/src/struct.h b/src/struct.h deleted file mode 100644 index 13f78c40..00000000 --- a/src/struct.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright 2016-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_STRUCT_H -#define _COLM_STRUCT_H - -#if defined(__cplusplus) -extern "C" { -#endif - -typedef void (*colm_destructor_t)( struct colm_program *prg, - tree_t **sp, struct colm_struct *s ); - -struct colm_struct -{ - short id; - struct colm_struct *prev, *next; -}; - -/* Must overlay colm_struct. */ -struct colm_inbuilt -{ - short id; - struct colm_struct *prev, *next; - colm_destructor_t destructor; -}; - -/* Must overlay colm_inbuilt. */ -typedef struct colm_parser -{ - short id; - struct colm_struct *prev, *next; - colm_destructor_t destructor; - - struct pda_run *pda_run; - struct colm_input *input; - tree_t *result; -} parser_t; - -/* Must overlay colm_inbuilt. */ -typedef struct colm_input -{ - short id; - struct colm_struct *prev, *next; - colm_destructor_t destructor; - - struct input_impl *impl; -} input_t; - -/* Must overlay colm_inbuilt. */ -typedef struct colm_stream -{ - short id; - struct colm_struct *prev, *next; - colm_destructor_t destructor; - - struct stream_impl *impl; -} stream_t; - -#define COLM_LIST_EL_SIZE 2 -typedef struct colm_list_el -{ - struct colm_list_el *list_next; - struct colm_list_el *list_prev; -} list_el_t; - -/* Must overlay colm_inbuilt. */ -typedef struct colm_list -{ - short id; - struct colm_struct *prev, *next; - colm_destructor_t destructor; - - list_el_t *head, *tail; - long list_len; - struct generic_info *generic_info; -} list_t; - -typedef struct colm_map_el -{ - tree_t *key; - - struct colm_map_el *left, *right, *parent; - long height; - - struct colm_map_el *next, *prev; -} map_el_t; - -#define COLM_MAP_EL_SIZE ( sizeof(colm_map_el) / sizeof(void*) ) - -typedef struct colm_map -{ - short id; - struct colm_struct *prev, *next; - colm_destructor_t destructor; - - struct colm_map_el *head, *tail, *root; - long tree_size; - struct generic_info *generic_info; -} map_t; - -struct colm_struct *colm_struct_new_size( struct colm_program *prg, int size ); -struct colm_struct *colm_struct_new( struct colm_program *prg, int id ); -void colm_struct_add( struct colm_program *prg, struct colm_struct *item ); -void colm_struct_delete( struct colm_program *prg, struct colm_tree **sp, - struct colm_struct *el ); - -struct colm_struct *colm_struct_inbuilt( struct colm_program *prg, int size, - colm_destructor_t destructor ); - -#define colm_struct_get_field( obj, type, field ) \ - (type)(((void**)(((struct colm_struct*)obj)+1))[field]) - -#define colm_struct_set_field( obj, type, field, val ) \ - ((type*)(((struct colm_struct*)obj)+1))[field] = val - -#define colm_struct_get_addr( obj, type, field ) \ - (type)(&(((void **)(((struct colm_struct*)obj)+1))[field])) - -#define colm_struct_container( el, field ) \ - ((void*)el) - (field * sizeof(void*)) - sizeof(struct colm_struct) - -#define colm_generic_el_container( prg, el, genId ) \ - colm_struct_container( el, prg->rtd->generic_info[genId].el_offset ) - -#define colm_struct_to_list_el( prg, obj, genId ) \ - colm_struct_get_addr( obj, list_el_t*, prg->rtd->generic_info[genId].el_offset ) - -#define colm_struct_to_map_el( prg, obj, genId ) \ - colm_struct_get_addr( obj, map_el_t*, prg->rtd->generic_info[genId].el_offset ) - -parser_t *colm_parser_new( program_t *prg, struct generic_info *gi, int stop_id, int reducer ); -input_t *colm_input_new( struct colm_program *prg ); -stream_t *colm_stream_new_struct( struct colm_program *prg ); - -list_t *colm_list_new( struct colm_program *prg ); -struct colm_struct *colm_list_get( struct colm_program *prg, list_t *list, - word_t gen_id, word_t field ); -struct colm_struct *colm_list_el_get( struct colm_program *prg, - list_el_t *list_el, word_t gen_id, word_t field ); -list_el_t *colm_list_detach_head( list_t *list ); -list_el_t *colm_list_detach_tail( list_t *list ); -long colm_list_length( list_t *list ); - -map_t *colm_map_new( struct colm_program *prg ); -struct colm_struct *colm_map_el_get( struct colm_program *prg, - map_el_t *map_el, word_t gen_id, word_t field ); -struct colm_struct *colm_map_get( struct colm_program *prg, map_t *map, - word_t gen_id, word_t field ); - -struct colm_struct *colm_construct_generic( struct colm_program *prg, long generic_id, int stop_id ); -struct colm_struct *colm_construct_reducer( struct colm_program *prg, long generic_id, int reducer_id ); -struct input_impl *input_to_impl( input_t *ptr ); -struct stream_impl *stream_to_impl( stream_t *ptr ); - -#if defined(__cplusplus) -} -#endif - -#endif /* _COLM_STRUCT_H */ - diff --git a/src/synthesis.cc b/src/synthesis.cc deleted file mode 100644 index e1d192b6..00000000 --- a/src/synthesis.cc +++ /dev/null @@ -1,3362 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <assert.h> -#include <stdbool.h> -#include <iostream> -#include "compiler.h" - -using std::cout; -using std::cerr; -using std::endl; - -bool isStr( UniqueType *ut ) -{ - return ut->typeId == TYPE_TREE && ut->langEl != 0 && ut->langEl->id == LEL_ID_STR; -} - -bool isTree( UniqueType *ut ) -{ - return ut->typeId == TYPE_TREE; -} - -IterDef::IterDef( Type type ) -: - type(type), - func(0) -{ -} - -IterDef::IterDef( Type type, Function *func ) -: - type(type), - func(func) -{} - -IterImpl::IterImpl( Type type ) : - type(type), - func(0), - useFuncId(false), - useSearchUT(false), - useGenericId(false) -{ - switch ( type ) { - case Tree: - inCreateWV = IN_TRITER_FROM_REF; - inCreateWC = IN_TRITER_FROM_REF; - inUnwind = IN_TRITER_UNWIND; - inDestroy = IN_TRITER_DESTROY; - inAdvance = IN_TRITER_ADVANCE; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - - case Child: - inCreateWV = IN_TRITER_FROM_REF; - inCreateWC = IN_TRITER_FROM_REF; - inUnwind = IN_TRITER_UNWIND; - inDestroy = IN_TRITER_DESTROY; - inAdvance = IN_TRITER_NEXT_CHILD; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - - case RevChild: - inCreateWV = IN_REV_TRITER_FROM_REF; - inCreateWC = IN_REV_TRITER_FROM_REF; - inUnwind = IN_REV_TRITER_UNWIND; - inDestroy = IN_REV_TRITER_DESTROY; - inAdvance = IN_REV_TRITER_PREV_CHILD; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - - case Repeat: - inCreateWV = IN_TRITER_FROM_REF; - inCreateWC = IN_TRITER_FROM_REF; - inUnwind = IN_TRITER_UNWIND; - inDestroy = IN_TRITER_DESTROY; - inAdvance = IN_TRITER_NEXT_REPEAT; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - - case RevRepeat: - inCreateWV = IN_TRITER_FROM_REF; - inCreateWC = IN_TRITER_FROM_REF; - inUnwind = IN_TRITER_UNWIND; - inDestroy = IN_TRITER_DESTROY; - inAdvance = IN_TRITER_PREV_REPEAT; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - - case ListEl: - inCreateWV = IN_GEN_ITER_FROM_REF; - inCreateWC = IN_GEN_ITER_FROM_REF; - inUnwind = IN_GEN_ITER_UNWIND; - inDestroy = IN_GEN_ITER_DESTROY; - inAdvance = IN_LIST_ITER_ADVANCE; - - inGetCurR = IN_GEN_ITER_GET_CUR_R; -// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC; -// inSetCurWC = //IN_HALT; -// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR; - useGenericId = true; - break; - - case ListVal: - inCreateWV = IN_GEN_ITER_FROM_REF; - inCreateWC = IN_GEN_ITER_FROM_REF; - inUnwind = IN_GEN_ITER_UNWIND; - inDestroy = IN_GEN_ITER_DESTROY; - inAdvance = IN_LIST_ITER_ADVANCE; - - inGetCurR = IN_GEN_VITER_GET_CUR_R; -// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC; -// inSetCurWC = //IN_HALT; -// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR; - useGenericId = true; - break; - - case RevListVal: - inCreateWV = IN_GEN_ITER_FROM_REF; - inCreateWC = IN_GEN_ITER_FROM_REF; - inUnwind = IN_GEN_ITER_UNWIND; - inDestroy = IN_GEN_ITER_DESTROY; - inAdvance = IN_REV_LIST_ITER_ADVANCE; - - inGetCurR = IN_GEN_VITER_GET_CUR_R; -// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC; -// inSetCurWC = //IN_HALT; -// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR; - useGenericId = true; - break; - - - case MapVal: - inCreateWV = IN_GEN_ITER_FROM_REF; - inCreateWC = IN_GEN_ITER_FROM_REF; - inUnwind = IN_GEN_ITER_UNWIND; - inDestroy = IN_GEN_ITER_DESTROY; - inAdvance = IN_MAP_ITER_ADVANCE; - - inGetCurR = IN_GEN_VITER_GET_CUR_R; - inGetCurWC = IN_GEN_VITER_GET_CUR_R; //IN_HALT; //IN_LIST_ITER_GET_CUR_WC; -// inSetCurWC = IN_HALT;//IN_HALT; -// inRefFromCur = IN_HALT;//IN_LIST_ITER_REF_FROM_CUR; - useGenericId = true; - break; - - case MapEl: - inCreateWV = IN_GEN_ITER_FROM_REF; - inCreateWC = IN_GEN_ITER_FROM_REF; - inUnwind = IN_GEN_ITER_UNWIND; - inDestroy = IN_GEN_ITER_DESTROY; - inAdvance = IN_MAP_ITER_ADVANCE; - - inGetCurR = IN_GEN_ITER_GET_CUR_R; -// inGetCurWC = //IN_LIST_ITER_GET_CUR_WC; -// inSetCurWC = //IN_HALT; -// inRefFromCur = //IN_LIST_ITER_REF_FROM_CUR; - useGenericId = true; - break; - - case User: - assert(false); - } -} - -IterImpl::IterImpl( Type type, Function *func ) : - type(type), - func(func), - useFuncId(true), - useSearchUT(true), - useGenericId(false), - inCreateWV(IN_UITER_CREATE_WV), - inCreateWC(IN_UITER_CREATE_WC), - inUnwind(IN_UITER_UNWIND), - inDestroy(IN_UITER_DESTROY), - inAdvance(IN_UITER_ADVANCE), - inGetCurR(IN_UITER_GET_CUR_R), - inGetCurWC(IN_UITER_GET_CUR_WC), - inSetCurWC(IN_UITER_SET_CUR_WC), - inRefFromCur(IN_UITER_REF_FROM_CUR) -{} - -IterDef *Compiler::findIterDef( IterDef::Type type, Function *func ) -{ - IterDefSetEl *el = iterDefSet.find( IterDef( type, func ) ); - if ( el == 0 ) - el = iterDefSet.insert( IterDef( type, func ) ); - return &el->key; -} - -IterDef *Compiler::findIterDef( IterDef::Type type ) -{ - IterDefSetEl *el = iterDefSet.find( IterDef( type ) ); - if ( el == 0 ) - el = iterDefSet.insert( IterDef( type ) ); - return &el->key; -} - -UniqueType *Compiler::findUniqueType( enum TYPE typeId ) -{ - UniqueType searchKey( typeId ); - UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); - if ( uniqueType == 0 ) { - uniqueType = new UniqueType( typeId ); - uniqeTypeMap.insert( uniqueType ); - } - return uniqueType; -} - -UniqueType *Compiler::findUniqueType( enum TYPE typeId, LangEl *langEl ) -{ - UniqueType searchKey( typeId, langEl ); - UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); - if ( uniqueType == 0 ) { - uniqueType = new UniqueType( typeId, langEl ); - uniqeTypeMap.insert( uniqueType ); - } - return uniqueType; -} - -UniqueType *Compiler::findUniqueType( enum TYPE typeId, IterDef *iterDef ) -{ - UniqueType searchKey( typeId, iterDef ); - UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); - if ( uniqueType == 0 ) { - uniqueType = new UniqueType( typeId, iterDef ); - uniqeTypeMap.insert( uniqueType ); - } - return uniqueType; -} - -UniqueType *Compiler::findUniqueType( enum TYPE typeId, StructEl *structEl ) -{ - UniqueType searchKey( typeId, structEl ); - UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); - if ( uniqueType == 0 ) { - uniqueType = new UniqueType( typeId, structEl ); - uniqeTypeMap.insert( uniqueType ); - } - return uniqueType; -} - -UniqueType *Compiler::findUniqueType( enum TYPE typeId, GenericType *generic ) -{ - UniqueType searchKey( typeId, generic ); - UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); - if ( uniqueType == 0 ) { - uniqueType = new UniqueType( typeId, generic ); - uniqeTypeMap.insert( uniqueType ); - } - return uniqueType; -} - -/* 0-based. */ -ObjectField *ObjectDef::findFieldNum( long offset ) -{ - /* Bounds check. */ - if ( offset >= fieldList.length() ) - return 0; - - int fn = 0; - FieldList::Iter field = fieldList; - while ( fn < offset ) { - fn++; - field++; - } - - return field->value; -} - -/* Finds the first field by type. */ -ObjectField *ObjectDef::findFieldType( Compiler *pd, UniqueType *ut ) -{ - for ( FieldList::Iter f = fieldList; f.lte(); f++ ) { - UniqueType *fUT = f->value->typeRef->resolveType( pd ); - if ( fUT == ut ) - return f->value; - } - return 0; -} - - -long sizeOfField( UniqueType *fieldUT ) -{ - long size = 0; - switch ( fieldUT->typeId ) { - case TYPE_ITER: - /* Select on the iterator type. */ - switch ( fieldUT->iterDef->type ) { - case IterDef::Tree: - case IterDef::Child: - case IterDef::Repeat: - case IterDef::RevRepeat: - size = sizeof(tree_iter_t) / sizeof(word_t); - break; - - case IterDef::RevChild: - size = sizeof(rev_tree_iter_t) / sizeof(word_t); - break; - - case IterDef::MapEl: - case IterDef::ListEl: - case IterDef::RevListVal: - size = sizeof(generic_iter_t) / sizeof(word_t); - break; - - case IterDef::User: - /* User iterators are just a pointer to the user_iter_t struct. The - * struct needs to go right beneath the call to the user iterator - * so it can be found by a yield. It is therefore allocated on the - * stack right before the call. */ - size = 1; - break; - } - break; - case TYPE_REF: - size = 2; - break; - case TYPE_GENERIC: - size = 1; - break; - case TYPE_LIST_PTRS: - size = COLM_LIST_EL_SIZE; - break; - case TYPE_MAP_PTRS: - size = COLM_MAP_EL_SIZE; - break; - default: - size = 1; - break; - } - - return size; -} - -void ObjectDef::referenceField( Compiler *pd, ObjectField *field ) -{ - field->beenReferenced = true; -} - -UniqueType *LangVarRef::loadField( Compiler *pd, CodeVect &code, - ObjectDef *inObject, ObjectField *el, bool forWriting, bool revert ) const -{ - /* Ensure that the field is referenced. */ - inObject->referenceField( pd, el ); - - UniqueType *elUT = el->typeRef->uniqueType; - - if ( elUT->val() ) { - if ( forWriting ) { - /* The instruction, depends on whether or not we are reverting. */ - if ( pd->revertOn && revert ) - code.append( el->inGetValWV ); - else - code.append( el->inGetValWC ); - } - else { - /* Loading for writing */ - code.append( el->inGetValR ); - } - } - else { - /* If it's a reference then we load it read always. */ - if ( forWriting ) { - /* The instruction, depends on whether or not we are reverting. */ - if ( elUT->typeId == TYPE_ITER ) - code.append( el->iterImpl->inGetCurWC ); - else if ( pd->revertOn && revert ) - code.append( el->inGetWV ); - else - code.append( el->inGetWC ); - } - else { - /* Loading something for reading */ - if ( elUT->typeId == TYPE_ITER ) - code.append( el->iterImpl->inGetCurR ); - else - code.append( el->inGetR ); - } - } - - if ( el->useGenericId ) - code.appendHalf( el->generic->id ); - - if ( el->useOffset() ) { - /* Gets of locals and fields require offsets. Fake vars like token - * data and lhs don't require it. */ - code.appendHalf( el->offset ); - } - else if ( el->isRhsGet() ) { - /* Need to place the array computing the val. */ - code.append( el->rhsVal.length() ); - for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) { - code.append( rg->prodEl->production->prodNum ); - code.append( rg->prodEl->pos ); - } - } - - if ( el->isConstVal ) { - code.appendHalf( el->constValId ); - - if ( el->constValId == CONST_ARG ) { - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( el->constValArg, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.appendWord( mapEl->value ); - } - } - - /* If we are dealing with an iterator then dereference it. */ - if ( elUT->typeId == TYPE_ITER ) - elUT = el->typeRef->searchUniqueType; - - return elUT; -} - -/* The qualification must start at a local frame. There cannot be any pointer. */ -long LangVarRef::loadQualificationRefs( Compiler *pd, CodeVect &code, - NameScope *rootScope ) const -{ - long count = 0; - - /* Start the search from the root object. */ - NameScope *searchScope = rootScope; - - for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { - /* Lookup the field in the current qualification. */ - ObjectField *el = searchScope->findField( qi->data ); - if ( el == 0 ) - error(qi->loc) << "cannot resolve qualification " << qi->data << endp; - - if ( qi.pos() > 0 ) { - if ( el->isRhsGet() ) { - code.append( IN_RHS_REF_FROM_QUAL_REF ); - code.appendHalf( 0 ); - - code.append( el->rhsVal.length() ); - for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) { - code.append( rg->prodEl->production->prodNum ); - code.append( rg->prodEl->pos ); - } - } - else { - code.append( IN_REF_FROM_QUAL_REF ); - code.appendHalf( 0 ); - code.appendHalf( el->offset ); - } - } - else if ( el->iterImpl != 0 ) { - code.append( el->iterImpl->inRefFromCur ); - code.appendHalf( el->offset ); - } - else if ( el->typeRef->type == TypeRef::Ref ) { - code.append( IN_REF_FROM_REF ); - code.appendHalf( el->offset ); - } - else { - code.append( IN_REF_FROM_LOCAL ); - code.appendHalf( el->offset ); - } - - UniqueType *elUT = el->typeRef->uniqueType; - if ( elUT->typeId == TYPE_ITER ) - elUT = el->typeRef->searchUniqueType; - - assert( qi->form == QualItem::Dot ); - - ObjectDef *searchObjDef = elUT->objectDef(); - searchScope = searchObjDef->rootScope; - - count += 1; - } - return count; -} - -void LangVarRef::loadQualification( Compiler *pd, CodeVect &code, - NameScope *rootScope, int lastPtrInQual, bool forWriting, bool revert ) const -{ - /* Start the search from the root object. */ - NameScope *searchScope = rootScope; - - for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { - /* Lookup the field int the current qualification. */ - ObjectField *el = searchScope->findField( qi->data ); - if ( el == 0 ) - error(qi->loc) << "cannot resolve qualification " << qi->data << endp; - - if ( forWriting && el->refActive ) - error(qi->loc) << "reference active, cannot write to object" << endp; - - bool lfForWriting = forWriting; - bool lfRevert = revert; - - /* If there is a pointer in the qualification, we need to compute - * forWriting and revert. */ - if ( lastPtrInQual >= 0 ) { - if ( qi.pos() <= lastPtrInQual ) { - /* If we are before or at the pointer we are strictly read - * only, regardless of the origin. */ - lfForWriting = false; - lfRevert = false; - } - else { - /* If we are past the pointer then we are always reverting - * because the object is global. Forwriting is as passed in. - * */ - lfRevert = true; - } - } - - UniqueType *qualUT = loadField( pd, code, searchScope->owningObj, - el, lfForWriting, lfRevert ); - - if ( qi->form == QualItem::Dot ) { - /* Cannot a reference. Iterator yes (access of the iterator not - * hte current) */ - if ( qualUT->ptr() ) - error(loc) << "dot cannot be used to access a pointer" << endp; - } - else if ( qi->form == QualItem::Arrow ) { - if ( qualUT->ptr() ) { - /* This deref instruction exists to capture the pointer reverse - * execution purposes. */ - if ( pd->revertOn && qi.pos() == lastPtrInQual && forWriting ) { - /* This is like a global load. */ - code.append( IN_PTR_ACCESS_WV ); - } - } - else { - error(loc) << "arrow operator cannot be used to " - "access this type" << endp; - } - } - - ObjectDef *searchObjDef = qualUT->objectDef(); - searchScope = searchObjDef->rootScope; - } -} - -void LangVarRef::loadContextObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - /* Start the search in the global object. */ - ObjectDef *rootObj = structDef->objectDef; - - if ( forWriting && lastPtrInQual < 0 ) { - /* If we are writing an no reference was found in the qualification - * then load the gloabl with a revert. */ - if ( pd->revertOn ) - code.append( IN_LOAD_CONTEXT_WV ); - else - code.append( IN_LOAD_CONTEXT_WC ); - } - else { - /* Either we are reading or we are loading a pointer that will be - * dereferenced. */ - code.append( IN_LOAD_CONTEXT_R ); - } - - loadQualification( pd, code, rootObj->rootScope, lastPtrInQual, forWriting, true ); -} - -void LangVarRef::loadGlobalObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - NameScope *scope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope; - - if ( forWriting && lastPtrInQual < 0 ) { - /* If we are writing an no reference was found in the qualification - * then load the gloabl with a revert. */ - if ( pd->revertOn ) - code.append( IN_LOAD_GLOBAL_WV ); - else - code.append( IN_LOAD_GLOBAL_WC ); - } - else { - /* Either we are reading or we are loading a pointer that will be - * dereferenced. */ - code.append( IN_LOAD_GLOBAL_R ); - } - - loadQualification( pd, code, scope, lastPtrInQual, forWriting, true ); -} - -void LangVarRef::loadScopedObj( Compiler *pd, CodeVect &code, - NameScope *scope, int lastPtrInQual, bool forWriting ) const -{ -// NameScope *scope = nspace != 0 ? nspace->rootScope : pd->rootNamespace->rootScope; - - if ( forWriting && lastPtrInQual < 0 ) { - /* If we are writing an no reference was found in the qualification - * then load the gloabl with a revert. */ - if ( pd->revertOn ) - code.append( IN_LOAD_GLOBAL_WV ); - else - code.append( IN_LOAD_GLOBAL_WC ); - } - else { - /* Either we are reading or we are loading a pointer that will be - * dereferenced. */ - code.append( IN_LOAD_GLOBAL_R ); - } - - loadQualification( pd, code, scope, lastPtrInQual, forWriting, true ); -} - -void LangVarRef::loadInbuiltObject( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - /* Start the search in the local frame. */ - loadQualification( pd, code, scope, lastPtrInQual, forWriting, pd->revertOn ); -} - -void LangVarRef::loadLocalObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - /* Start the search in the local frame. */ - loadQualification( pd, code, scope, lastPtrInQual, forWriting, false ); -} - -void LangVarRef::loadObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - if ( nspaceQual != 0 && nspaceQual->qualNames.length() > 0 ) { - Namespace *nspace = pd->rootNamespace->findNamespace( nspaceQual->qualNames[0] ); - loadScopedObj( pd, code, nspace->rootScope, lastPtrInQual, forWriting ); - } - else if ( isInbuiltObject() ) - loadInbuiltObject( pd, code, lastPtrInQual, forWriting ); - else if ( isLocalRef() ) - loadLocalObj( pd, code, lastPtrInQual, forWriting ); - else if ( isProdRef( pd ) ) { - LangVarRef *dup = new LangVarRef( *this ); - dup->qual->prepend( QualItem( QualItem::Dot, InputLoc(), scope->caseClauseVarRef->name ) ); - dup->loadObj( pd, code, lastPtrInQual, forWriting ); - } - else if ( isStructRef() ) - loadContextObj( pd, code, lastPtrInQual, forWriting ); - else - loadGlobalObj( pd, code, lastPtrInQual, forWriting ); -} - - -bool castAssignment( Compiler *pd, CodeVect &code, UniqueType *destUT, - UniqueType *destSearchUT, UniqueType *srcUT ) -{ - if ( destUT == srcUT ) - return true; - - /* Casting trees to any. */ - if ( destUT->typeId == TYPE_TREE && destUT->langEl == pd->anyLangEl && - srcUT->typeId == TYPE_TREE ) - return true; - - /* Setting a reference from a tree. */ - if ( destUT->typeId == TYPE_REF && srcUT->typeId == TYPE_TREE && - destUT->langEl == srcUT->langEl ) - return true; - - /* Setting a tree from a reference. */ - if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_REF && - destUT->langEl == srcUT->langEl ) - return true; - - /* Setting an iterator from a tree. */ - if ( destUT->typeId == TYPE_ITER && srcUT->typeId == TYPE_TREE && - destSearchUT->langEl == srcUT->langEl ) - return true; - - /* Assigning nil to a tree. */ - if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_NIL ) - return true; - - if ( destUT->typeId == TYPE_STRUCT && srcUT->typeId == TYPE_NIL ) - return true; - - if ( destUT->typeId == TYPE_GENERIC && srcUT->typeId == TYPE_NIL ) - return true; - - if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_TREE && - srcUT->langEl == pd->anyLangEl ) - return true; - - return false; -} - -void LangVarRef::setFieldIter( Compiler *pd, CodeVect &code, - ObjectDef *inObject, ObjectField *el, UniqueType *objUT, - UniqueType *exprType, bool revert ) const -{ - code.append( el->iterImpl->inSetCurWC ); - code.appendHalf( el->offset ); -} - -void LangVarRef::setField( Compiler *pd, CodeVect &code, - ObjectDef *inObject, ObjectField *el, - UniqueType *exprUT, bool revert ) const -{ - /* Ensure that the field is referenced. */ - inObject->referenceField( pd, el ); - - if ( exprUT->val() ) { - if ( pd->revertOn && revert ) - code.append( el->inSetValWV ); - else - code.append( el->inSetValWC ); - } - else { - if ( pd->revertOn && revert ) - code.append( el->inSetWV ); - else - code.append( el->inSetWC ); - } - - /* Maybe write out an offset. */ - if ( el->useOffset() ) - code.appendHalf( el->offset ); -} - - -UniqueType *LangVarRef::evaluate( Compiler *pd, CodeVect &code, bool forWriting ) const -{ - /* Lookup the loadObj. */ - VarRefLookup lookup = lookupField( pd ); - - /* Load the object, if any. */ - loadObj( pd, code, lookup.lastPtrInQual, forWriting ); - - /* Load the field. */ - UniqueType *ut = loadField( pd, code, lookup.inObject, - lookup.objField, forWriting, false ); - - return ut; -} - -bool LangVarRef::canTakeRef( Compiler *pd, VarRefLookup &lookup ) const -{ - bool canTake = false; - - /* If the var is not a local, it must be an attribute accessed - * via a local and attributes. */ - if ( lookup.inObject->type == ObjectDef::FrameType ) - canTake = true; - else if ( isLocalRef() ) { - if ( lookup.lastPtrInQual < 0 && ! lookup.uniqueType->ptr() ) - canTake = true; - } - - return canTake; -} - -void LangVarRef::verifyRefPossible( Compiler *pd, VarRefLookup &lookup ) const -{ - bool canTake = canTakeRef( pd, lookup ); - - if ( !canTake ) { - error(loc) << "can only take references of locals or " - "attributes accessed via a local" << endp; - } - - if ( lookup.objField->refActive ) - error(loc) << "reference currently active, cannot take another" << endp; -} - -bool LangExpr::canTakeRef( Compiler *pd ) const -{ - bool canTake = false; - - if ( type == LangExpr::TermType && term->type == LangTerm::VarRefType ) { - VarRefLookup lookup = term->varRef->lookupField( pd ); - if ( term->varRef->canTakeRef( pd, lookup ) ) - canTake = true; - } - - return canTake; -} - - -/* Return the field referenced. */ -ObjectField *LangVarRef::preEvaluateRef( Compiler *pd, CodeVect &code ) const -{ - VarRefLookup lookup = lookupField( pd ); - - verifyRefPossible( pd, lookup ); - - loadQualificationRefs( pd, code, scope ); - - return lookup.objField; -} - -/* Return the field referenced. */ -ObjectField *LangVarRef::evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const -{ - VarRefLookup lookup = lookupField( pd ); - - verifyRefPossible( pd, lookup ); - - /* Ensure that the field is referenced. */ - lookup.inObject->referenceField( pd, lookup.objField ); - - /* Note that we could have modified children. */ - if ( qual->length() == 0 ) - lookup.objField->refActive = true; - - /* Whenever we take a reference we have to assume writing and that the - * tree is dirty. */ - lookup.objField->dirtyTree = true; - - if ( qual->length() > 0 ) { - if ( lookup.objField->isRhsGet() ) { - code.append( IN_RHS_REF_FROM_QUAL_REF ); - code.appendHalf( pushCount ); - - ObjectField *el = lookup.objField; - code.append( el->rhsVal.length() ); - for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) { - code.append( rg->prodEl->production->prodNum ); - code.append( rg->prodEl->pos ); - } - } - else { - code.append( IN_REF_FROM_QUAL_REF ); - code.appendHalf( pushCount ); - code.appendHalf( lookup.objField->offset ); - } - } - else if ( lookup.objField->iterImpl != 0 ) { - code.append( lookup.objField->iterImpl->inRefFromCur ); - code.appendHalf( lookup.objField->offset ); - } - else if ( lookup.objField->typeRef->type == TypeRef::Ref ) { - code.append( IN_REF_FROM_REF ); - code.appendHalf( lookup.objField->offset ); - } - else { - code.append( IN_REF_FROM_LOCAL ); - code.appendHalf( lookup.objField->offset ); - } - - return lookup.objField; -} - -IterImpl *LangVarRef::chooseTriterCall( Compiler *pd, - UniqueType *searchUT, CallArgVect *args ) -{ - IterImpl *iterImpl = 0; - - /* Evaluate the triter args and choose the triter call based on it. */ - if ( args->length() == 1 ) { - /* Evaluate the expression. */ - CodeVect unused; - CallArgVect::Iter pe = *args; - UniqueType *exprUT = (*pe)->expr->evaluate( pd, unused ); - - if ( exprUT->typeId == TYPE_GENERIC && exprUT->generic->typeId == GEN_LIST ) { - if ( searchUT == exprUT->generic->elUt ) - iterImpl = new IterImpl( IterImpl::ListEl ); - else - iterImpl = new IterImpl( IterImpl::ListVal ); - } - - if ( exprUT->typeId == TYPE_GENERIC && exprUT->generic->typeId == GEN_MAP ) { - if ( searchUT == exprUT->generic->elUt ) - iterImpl = new IterImpl( IterImpl::MapEl ); - else - iterImpl = new IterImpl( IterImpl::MapVal ); - } - } - - if ( iterImpl == 0 ) - iterImpl = new IterImpl( IterImpl::Tree ); - - return iterImpl; -} - -ObjectField **LangVarRef::evaluateArgs( Compiler *pd, CodeVect &code, - VarRefLookup &lookup, CallArgVect *args ) -{ - /* Parameter list is given only for user defined methods. Otherwise it - * will be null. */ - ParameterList *paramList = lookup.objMethod->paramList; - - /* Match the number of arguments. */ - int numArgs = args != 0 ? args->length() : 0; - if ( numArgs != lookup.objMethod->numParams ) - error(loc) << "wrong number of arguments" << endp; - - /* This is for storing the object fields used by references. */ - ObjectField **paramRefs = new ObjectField*[numArgs]; - memset( paramRefs, 0, sizeof(ObjectField*) * numArgs ); - - /* Done now if there are no args. */ - if ( args == 0 ) - return paramRefs; - - /* We use this only if there is a paramter list. */ - ParameterList::Iter p; - long size = 0; - long tempPops = 0; - long pos = 0; - - paramList != 0 && ( p = *paramList ); - for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Get the expression and the UT for the arg. */ - LangExpr *expression = (*pe)->expr; - UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; - - if ( paramUT->typeId == TYPE_REF ) { - if ( expression->canTakeRef( pd ) ) { - /* Push object loads for reference parameters. */ - LangVarRef *varRef = expression->term->varRef; - ObjectField *refOf = varRef->preEvaluateRef( pd, code ); - paramRefs[pe.pos()] = refOf; - - size += varRef->qual->length() * 2; - (*pe)->offQualRef = size; - /**/ - - refOf = varRef->evaluateRef( pd, code, 0 ); //(size - (*pe)->offQualRef) ); - paramRefs[pe.pos()] = refOf; - - //size += 2; - } - else { - /* First pass we need to allocate and evaluate temporaries. */ - UniqueType *exprUT = expression->evaluate( pd, code ); - - (*pe)->exprUT = exprUT; - - size += 1; - (*pe)->offTmp = size; - tempPops += 1; - /**/ - code.append( IN_REF_FROM_BACK ); - code.appendHalf( 0 ); //size - (*pe)->offTmp ); - - //size += 2; - } - - if ( lookup.objMethod->func ) { - code.append( IN_STASH_ARG ); - code.appendHalf( pos ); - code.appendHalf( 2 ); - } - - pos += 2; - } - else { - UniqueType *exprUT = expression->evaluate( pd, code ); - // pd->unwindCode.remove( 0, 1 ); - - if ( !castAssignment( pd, code, paramUT, 0, exprUT ) ) - error(loc) << "arg " << pe.pos()+1 << " is of the wrong type" << endp; - - size += 1; - - if ( lookup.objMethod->func && !lookup.objMethod->func->inHost ) { - code.append( IN_STASH_ARG ); - code.appendHalf( pos ); - code.appendHalf( 1 ); - } - - pos += 1; - } - - /* Advance the parameter list iterator if we have it. */ - paramList != 0 && p.increment(); - } - - argSize = tempPops; - - return paramRefs; -} - -void LangVarRef::resetActiveRefs( Compiler *pd, VarRefLookup &lookup, - ObjectField **paramRefs ) const -{ - /* Parameter list is given only for user defined methods. Otherwise it - * will be null. */ - for ( long p = 0; p < lookup.objMethod->numParams; p++ ) { - if ( paramRefs[p] != 0 ) - paramRefs[p]->refActive = false; - } -} - -bool LangVarRef::isFinishCall( VarRefLookup &lookup ) const -{ - return lookup.objMethod->type == ObjectMethod::ParseFinish; -} - -void LangVarRef::callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const -{ - /* This is for writing if it is a non-const builtin. */ - bool forWriting = lookup.objMethod->func == 0 && - !lookup.objMethod->isConst; - - if ( lookup.objMethod->useCallObj ) { - /* Load the object, if any. */ - loadObj( pd, code, lookup.lastPtrInQual, forWriting ); - } - - /* Check if we need to revert the function. If it operates on a reference - * or if it is not local then we need to revert it. */ - bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef() || isInbuiltObject(); - bool unwind = false; - - if ( isFinishCall( lookup ) ) { - code.append( IN_SEND_EOF_W ); - - LangTerm::parseFrag( pd, code, 0 ); - - code.append( IN_GET_PARSER_MEM_R ); - code.appendHalf( 0 ); - } - else { - if ( pd->revertOn && revert ) { - if ( lookup.objMethod->opcodeWV == IN_CALL_WV || - lookup.objMethod->opcodeWC == FN_EXIT ) - unwind = true; - - if ( lookup.objMethod->useFnInstr ) - code.append( IN_FN ); - code.append( lookup.objMethod->opcodeWV ); - } - else { - if ( lookup.objMethod->opcodeWC == IN_CALL_WC || - lookup.objMethod->opcodeWC == FN_EXIT ) - unwind = true; - - if ( lookup.objMethod->useFnInstr ) - code.append( IN_FN ); - code.append( lookup.objMethod->opcodeWC ); - } - } - - if ( lookup.objMethod->useFuncId ) - code.appendHalf( lookup.objMethod->funcId ); - - if ( lookup.objMethod->useGenericId ) - code.appendHalf( lookup.objMethod->generic->id ); - - if ( unwind ) { - if ( pd->unwindCode.length() == 0 ) - code.appendHalf( 0 ); - else { - code.appendHalf( pd->unwindCode.length() + 1 ); - code.append( pd->unwindCode ); - code.append( IN_DONE ); - } - } -} - -void LangVarRef::popRefQuals( Compiler *pd, CodeVect &code, - VarRefLookup &lookup, CallArgVect *args, bool temps ) const -{ - long popCount = 0; - - /* Evaluate and push the args. */ - if ( args != 0 ) { - for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Get the expression and the UT for the arg. */ - LangExpr *expression = (*pe)->expr; - UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; - - if ( paramUT->typeId == TYPE_REF ) { - if ( expression->canTakeRef( pd ) ) { - LangVarRef *varRef = expression->term->varRef; - popCount += varRef->qual->length() * 2; - } - } - } - - if ( popCount > 0 ) { - code.append( IN_POP_N_WORDS ); - code.appendHalf( (short)popCount ); - } - - if ( temps ) { - for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Get the expression and the UT for the arg. */ - LangExpr *expression = (*pe)->expr; - UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; - - if ( paramUT->typeId == TYPE_REF ) { - if ( ! expression->canTakeRef( pd ) ) - code.append( IN_POP_TREE ); - } - } - } - } -} - - -UniqueType *LangVarRef::evaluateCall( Compiler *pd, CodeVect &code, CallArgVect *args ) -{ - /* Evaluate the object. */ - VarRefLookup lookup = lookupMethod( pd ); - - Function *func = lookup.objMethod->func; - - /* Prepare the contiguous call args space. */ - int asLoc; - if ( func != 0 && !func->inHost ) { - code.append( IN_PREP_ARGS ); - asLoc = code.length(); - code.appendHalf( 0 ); - } - - /* Evaluate and push the arguments. */ - ObjectField **paramRefs = evaluateArgs( pd, code, lookup, args ); - - /* Write the call opcode. */ - callOperation( pd, code, lookup ); - - popRefQuals( pd, code, lookup, args, true ); - - resetActiveRefs( pd, lookup, paramRefs); - delete[] paramRefs; - - if ( func != 0 && !func->inHost ) { - code.append( IN_CLEAR_ARGS ); - code.appendHalf( func->paramListSize ); - code.setHalf( asLoc, func->paramListSize ); - } - - if ( func != 0 && !func->inHost ) - code.append( IN_LOAD_RETVAL ); - - /* Return the type to the expression. */ - return lookup.uniqueType; -} - -/* Can match on a tree or a ref. A tree always comes back. */ -UniqueType *LangTerm::evaluateMatch( Compiler *pd, CodeVect &code ) const -{ - /* Add the vars bound by the pattern into the local scope. */ - for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) { - if ( item->varRef != 0 ) - item->bindId = pattern->nextBindId++; - } - - UniqueType *ut = varRef->evaluate( pd, code ); - if ( ut->typeId != TYPE_TREE && ut->typeId != TYPE_REF ) { - error(varRef->loc) << "expected match against a tree/ref type" << endp; - } - - /* Store the language element type in the pattern. This is needed by - * the pattern parser. */ - pattern->langEl = ut->langEl; - - code.append( IN_MATCH ); - code.appendHalf( pattern->patRepId ); - - for ( PatternItemList::Iter item = pattern->list->last(); item.gtb(); item-- ) { - if ( item->varRef != 0 ) { - /* Compute the unique type. */ - UniqueType *exprType = pd->findUniqueType( TYPE_TREE, item->prodEl->langEl ); - - /* Get the type of the variable being assigned to. */ - VarRefLookup lookup = item->varRef->lookupField( pd ); - - item->varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); - item->varRef->setField( pd, code, lookup.inObject, - lookup.objField, exprType, false ); - } - } - - /* The process of matching turns refs into trees. */ - if ( ut->typeId == TYPE_REF ) - ut = pd->findUniqueType( TYPE_TREE, ut->langEl ); - - return ut; -} - -UniqueType *LangTerm::evaluateProdCompare( Compiler *pd, CodeVect &code ) const -{ - UniqueType *ut = varRef->evaluate( pd, code ); - if ( ut->typeId != TYPE_TREE && ut->typeId != TYPE_REF ) { - error(varRef->loc) << "expected match against a tree/ref type" << endp; - } - code.append( IN_PROD_NUM ); - - /* look up the production name. */ - Production *prod = pd->findProductionByLabel( ut->langEl, this->prod ); - - if ( prod == 0 ) { - error( this->loc) << "could not find " - "production label: " << this->prod << endp; - } - - unsigned int n = prod->prodNum; - code.append( IN_LOAD_INT ); - code.appendWord( n ); - - code.append( IN_TST_EQL_VAL ); - - if ( expr != 0 ) { - code.append( IN_DUP_VAL ); - - /* Test: jump past the match if the production test failed. We don't have - * the distance yet. */ - long jumpFalse = code.length(); - code.append( IN_JMP_FALSE_VAL ); - code.appendHalf( 0 ); - - code.append( IN_POP_VAL ); - - expr->evaluate( pd, code ); - - /* Set the jump false distance. */ - long falseDist = code.length() - jumpFalse - 3; - code.setHalf( jumpFalse+1, falseDist ); - - return ut; - } - - return pd->uniqueTypeInt; -} - -void LangTerm::evaluateCapture( Compiler *pd, CodeVect &code, UniqueType *valUt ) const -{ - if ( varRef != 0 ) { - /* Get the type of the variable being assigned to. */ - VarRefLookup lookup = varRef->lookupField( pd ); - - /* Need a copy of the tree. */ - code.append( lookup.uniqueType->tree() ? IN_DUP_TREE : IN_DUP_VAL ); - - varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); - varRef->setField( pd, code, lookup.inObject, lookup.objField, valUt, false ); - } -} - -UniqueType *LangTerm::evaluateNew( Compiler *pd, CodeVect &code ) const -{ - /* What is being newstructed. */ - UniqueType *newUT = typeRef->uniqueType; - - if ( newUT->typeId != TYPE_STRUCT && newUT->typeId != TYPE_GENERIC ) - error(loc) << "can only new a struct or generic" << endp; - - bool context = false; - if ( newUT->typeId == TYPE_GENERIC && - newUT->generic->typeId == GEN_PARSER && - newUT->generic->elUt->langEl->contextIn != 0 ) - { - if ( fieldInitArgs == 0 || fieldInitArgs->length() != 1 ) - error(loc) << "parse command requires just context " << endp; - context = true; - } - - if ( newUT->typeId == TYPE_GENERIC ) { - code.append( IN_CONS_GENERIC ); - code.appendHalf( newUT->generic->id ); - code.appendHalf( 0 ); // stopId - - if ( newUT->generic->typeId == GEN_PARSER ) { - - } - } - else if ( newUT->typeId == TYPE_STRUCT && newUT->structEl == pd->streamSel ) { - code.append( IN_NEW_STREAM ); - } - else { - code.append( IN_NEW_STRUCT ); - code.appendHalf( newUT->structEl->id ); - } - - /* - * First load the context into the parser. - */ - if ( context ) { - for ( int i = 0; i < fieldInitArgs->length(); i++ ) { - /* Eval what we are initializing with. */ - UniqueType *argUT = fieldInitArgs->data[i]->expr->evaluate( pd, code ); - - if ( argUT == pd->uniqueTypeInput ) { - code.append( IN_SET_PARSER_INPUT ); - } - else if ( argUT->typeId == TYPE_STRUCT ) { - code.append( IN_SET_PARSER_CONTEXT ); - } - else { - error(loc) << "cannot initialize parser with this type, context or input only" << endp; - } - } - } - - evaluateCapture( pd, code, newUT ); - - return newUT; -} - -UniqueType *LangTerm::evaluateCast( Compiler *pd, CodeVect &code ) const -{ - expr->evaluate( pd, code ); - code.append( IN_TREE_CAST ); - code.appendHalf( typeRef->uniqueType->langEl->id ); - return typeRef->uniqueType; -} - -void LangTerm::assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const -{ - /* Now assign the field initializations. Note that we need to do this in - * reverse because the last expression evaluated is at the top of the - * stack. */ - if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { - ObjectDef *objDef = replUT->objectDef(); - /* Note the reverse traversal. */ - for ( FieldInitVect::Iter pi = fieldInitArgs->last(); pi.gtb(); pi-- ) { - FieldInit *fieldInit = *pi; - ObjectField *field = objDef->findFieldNum( pi.pos() ); - if ( field == 0 ) { - error(fieldInit->loc) << "failed to find init pos " << - pi.pos() << " in object" << endp; - } - - /* Lookup the type of the field and compare it to the type of the - * expression. */ - UniqueType *fieldUT = field->typeRef->uniqueType; - if ( !castAssignment( pd, code, fieldUT, 0, fieldInit->exprUT ) ) - error(fieldInit->loc) << "type mismatch in initialization" << endp; - - /* The set field instruction must leave the object on the top of - * the stack. */ - code.append( IN_SET_FIELD_TREE_LEAVE_WC ); - code.appendHalf( field->offset ); - } - } -} - -UniqueType *LangTerm::evaluateConstruct( Compiler *pd, CodeVect &code ) const -{ - /* Evaluate the initialization expressions. */ - if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { - for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) { - FieldInit *fieldInit = *pi; - fieldInit->exprUT = fieldInit->expr->evaluate( pd, code ); - } - } - - /* Assign bind ids to the variables in the replacement. */ - for ( ConsItemList::Iter item = *constructor->list; item.lte(); item++ ) { - if ( item->expr != 0 ) - item->bindId = constructor->nextBindId++; - } - - /* Evaluate variable references. */ - for ( ConsItemList::Iter item = constructor->list->last(); item.gtb(); item-- ) { - if ( item->type == ConsItem::ExprType ) { - UniqueType *ut = item->expr->evaluate( pd, code ); - - if ( ut->typeId != TYPE_TREE ) { - error(constructor->loc) << "variables used in " - "replacements must be trees" << endp; - } - - if ( !isStr( ut ) ) { - if ( item->trim == ConsItem::TrimYes ) - code.append( IN_TREE_TRIM ); - } - - item->langEl = ut->langEl; - } - } - - /* Construct the tree using the tree information stored in the compiled - * code. */ - code.append( IN_CONSTRUCT ); - code.appendHalf( constructor->patRepId ); - - /* Lookup the type of the replacement and store it in the replacement - * object so that replacement parsing has a target. */ - UniqueType *replUT = typeRef->uniqueType; - if ( replUT->typeId != TYPE_TREE ) - error(loc) << "don't know how to construct this type" << endp; - - constructor->langEl = replUT->langEl; - assignFieldArgs( pd, code, replUT ); - - evaluateCapture( pd, code, replUT ); - - return replUT; -} - -void LangTerm::parseFrag( Compiler *pd, CodeVect &code, int stopId ) -{ - code.append( IN_PARSE_FRAG_W ); -} - -UniqueType *LangTerm::evaluateReadReduce( Compiler *pd, CodeVect &code ) const -{ - UniqueType *parserUT = typeRef->uniqueType; - UniqueType *targetUT = parserUT->generic->elUt; - - /* Should be one arg and it should be a stream. */ - - /* Assign bind ids to the variables in the replacement. */ - for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { - switch ( item->type ) { - case ConsItem::LiteralType: { - break; - } - case ConsItem::InputText: { - break; - } - case ConsItem::ExprType: { - item->expr->evaluate( pd, code ); - break; - }} - } - - code.append( IN_READ_REDUCE ); - code.appendHalf( parserUT->generic->id ); - code.appendHalf( parserText->reducerId ); - - return targetUT; -} - -UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code, - bool tree, bool stop ) const -{ - if ( parserText->reduce && parserText->read ) { - return evaluateReadReduce( pd, code ); - } - - UniqueType *parserUT = typeRef->uniqueType; - UniqueType *targetUT = parserUT->generic->elUt; - - /* If this is a parse stop then we need to verify that the type is - * compatible with parse stop. */ - if ( stop ) - targetUT->langEl->parseStop = true; - int stopId = stop ? targetUT->langEl->id : 0; - - bool context = false; - if ( fieldInitArgs != 0 ) { - if ( fieldInitArgs == 0 || ( fieldInitArgs->length() != 1 && fieldInitArgs->length() != 2 ) ) - error(loc) << "parse command requires just context and input" << endp; - context = true; - } - - /* Evaluate variable references. */ - for ( ConsItemList::Iter item = consItemList->last(); item.gtb(); item-- ) { - if ( item->type == ConsItem::ExprType ) { - UniqueType *ut = item->expr->evaluate( pd, code ); - - if ( ut->typeId != TYPE_TREE ) - error() << "variables used in replacements must be trees" << endp; - - if ( item->trim == ConsItem::TrimYes ) - code.append( IN_TREE_TRIM ); - - item->langEl = ut->langEl; - } - } - - /* Construct the parser. */ - - if ( parserText->reduce ) { - code.append( IN_CONS_REDUCER ); - code.appendHalf( parserUT->generic->id ); - code.appendHalf( parserText->reducerId ); - } - else { - code.append( IN_CONS_GENERIC ); - code.appendHalf( parserUT->generic->id ); - code.appendHalf( stopId ); - } - - /* - * First load the context into the parser. - */ - if ( context ) { - for ( int i = 0; i < fieldInitArgs->length(); i++ ) { - /* Eval what we are initializing with. */ - UniqueType *argUT = fieldInitArgs->data[i]->expr->evaluate( pd, code ); - - if ( argUT == pd->uniqueTypeInput ) { - code.append( IN_SET_PARSER_INPUT ); - } - else if ( argUT->typeId == TYPE_STRUCT && targetUT->langEl->contextIn != 0 ) { - code.append( IN_SET_PARSER_CONTEXT ); - } - else { - error(loc) << "cannot initialize parser with this type, context or input only" << endp; - } - } - } - - /*****************************/ - - if ( parserText->list->length() == 0 ) { - code.append( IN_SEND_NOTHING ); - - /* Parse instruction, dependent on whether or not we are producing - * revert or commit code. */ - parseFrag( pd, code, stopId ); - } - else { - for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { - bool isStream = false; - uchar trim = TRIM_DEFAULT; - - switch ( item->type ) { - case ConsItem::LiteralType: { - String result; - bool unusedCI; - prepareLitString( result, unusedCI, - item->prodEl->typeRef->pdaLiteral->data, - item->prodEl->typeRef->pdaLiteral->loc ); - - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( result, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ConsItem::InputText: { - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( item->data, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ConsItem::ExprType: { - UniqueType *ut = item->expr->evaluate( pd, code ); - - if ( ut->typeId == TYPE_VOID ) { - /* Clear it away if return type is void. */ - code.append( IN_POP_VAL ); - continue; - } - - if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) - code.append( IN_INT_TO_STR ); - - if ( ut == pd->uniqueTypeStream ) - isStream = true; - - if ( item->trim == ConsItem::TrimYes ) - trim = TRIM_YES; - else if ( item->trim == ConsItem::TrimNo ) - trim = TRIM_NO; - - break; - }} - - if ( isStream ) - code.append( IN_SEND_STREAM_W ); - else if ( tree ) { - code.append( IN_SEND_TREE_W ); - code.append( trim ); - } - else { - code.append( IN_SEND_TEXT_W ); - code.append( trim ); - } - - /* Parse instruction, dependent on whether or not we are producing - * revert or commit code. */ - parseFrag( pd, code, stopId ); - } - } - - /* - * Finish operation - */ - - if ( !stop ) { - code.append( IN_SEND_EOF_W ); - parseFrag( pd, code, stopId ); - } - - if ( parserText->reduce ) { - code.append( IN_REDUCE_COMMIT ); - } - - /* Pull out the error and save it off. */ - code.append( IN_DUP_VAL ); - code.append( IN_GET_PARSER_MEM_R ); - code.appendHalf( 1 ); - code.append( IN_SET_ERROR ); - - /* Replace the parser with the parsed tree. */ - code.append( IN_GET_PARSER_MEM_R ); - code.appendHalf( 0 ); - - /* Capture to the local var. */ - evaluateCapture( pd, code, targetUT ); - - return targetUT; -} - -void LangTerm::evaluateSendStream( Compiler *pd, CodeVect &code ) const -{ - UniqueType *varUt = varRef->evaluate( pd, code ); - - if ( varUt->listOf( pd->uniqueTypeStream ) ) { - code.append( IN_GET_VLIST_MEM_R ); - code.appendHalf( varUt->generic->id ); - code.appendHalf( 0 ); - } - - /* Assign bind ids to the variables in the replacement. */ - for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { - uchar trim = TRIM_DEFAULT; - - switch ( item->type ) { - case ConsItem::LiteralType: { - String result; - bool unusedCI; - prepareLitString( result, unusedCI, - item->prodEl->typeRef->pdaLiteral->data, - item->prodEl->typeRef->pdaLiteral->loc ); - - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( result, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ConsItem::InputText: { - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( item->data, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ConsItem::ExprType: { - UniqueType *ut = item->expr->evaluate( pd, code ); - if ( ut->typeId == TYPE_VOID ) { - /* Clear it away if return type is void. */ - code.append( IN_POP_VAL ); - continue; - } - - if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) - code.append( IN_INT_TO_STR ); - - if ( item->trim == ConsItem::TrimYes ) - trim = TRIM_YES; - else if ( item->trim == ConsItem::TrimNo ) - trim = TRIM_NO; - - break; - }} - - code.append( IN_PRINT_TREE ); - code.append( trim ); - } -} - -void LangTerm::evaluateSendParser( Compiler *pd, CodeVect &code, bool strings ) const -{ - UniqueType *varUt = varRef->evaluate( pd, code ); - - if ( varUt->parser() ) { - } - else if ( varUt->listOf( pd->uniqueTypeStream ) ) { - code.append( IN_GET_VLIST_MEM_R ); - code.appendHalf( varUt->generic->id ); - code.appendHalf( 0 ); - } - - if ( parserText->list->length() == 0 ) { - code.append( IN_SEND_NOTHING ); - - /* Parse instruction, dependent on whether or not we are producing - * revert or commit code. */ - parseFrag( pd, code, 0 ); - } - else { - - /* Assign bind ids to the variables in the replacement. */ - for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { - bool isStream = false; - uchar trim = TRIM_DEFAULT; - - switch ( item->type ) { - case ConsItem::LiteralType: { - String result; - bool unusedCI; - prepareLitString( result, unusedCI, - item->prodEl->typeRef->pdaLiteral->data, - item->prodEl->typeRef->pdaLiteral->loc ); - - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( result, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ConsItem::InputText: { - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( item->data, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ConsItem::ExprType: - UniqueType *ut = item->expr->evaluate( pd, code ); - if ( ut->typeId == TYPE_VOID ) { - /* Clear it away if return type is void. */ - code.append( IN_POP_VAL ); - continue; - } - - if ( ut == pd->uniqueTypeStream ) - isStream = true; - - if ( item->trim == ConsItem::TrimYes ) - trim = TRIM_YES; - else if ( item->trim == ConsItem::TrimNo ) - trim = TRIM_NO; - - if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) - code.append( IN_INT_TO_STR ); - - break; - } - - if ( isStream ) - code.append( IN_SEND_STREAM_W ); - else if ( !strings ) { - code.append( IN_SEND_TREE_W ); - code.append( trim ); - } - else { - code.append( IN_SEND_TEXT_W ); - code.append( trim ); - } - - parseFrag( pd, code, 0 ); - } - } - - if ( eof ) { - code.append( IN_SEND_EOF_W ); - parseFrag( pd, code, 0 ); - } -} - -UniqueType *LangTerm::evaluateSend( Compiler *pd, CodeVect &code ) const -{ - UniqueType *varUt = varRef->lookup( pd ); - - if ( varUt == pd->uniqueTypeStream ) - evaluateSendStream( pd, code ); - else if ( varUt->listOf( pd->uniqueTypeStream ) ) - evaluateSendStream( pd, code ); - else if ( varUt->parser() ) - evaluateSendParser( pd, code, true ); - else - error(loc) << "can only send to parsers and streams" << endl; - - return varUt; -} - - -UniqueType *LangTerm::evaluateSendTree( Compiler *pd, CodeVect &code ) const -{ - UniqueType *varUt = varRef->lookup( pd ); - - if ( varUt->parser() ) - evaluateSendParser( pd, code, false ); - else - error(loc) << "can only send_tree to parsers" << endl; - - return varUt; -} - -UniqueType *LangTerm::evaluateEmbedString( Compiler *pd, CodeVect &code ) const -{ - /* Assign bind ids to the variables in the replacement. */ - for ( ConsItemList::Iter item = *consItemList; item.lte(); item++ ) { - switch ( item->type ) { - case ConsItem::LiteralType: { - String result; - bool unusedCI; - prepareLitString( result, unusedCI, - item->prodEl->typeRef->pdaLiteral->data, - item->prodEl->typeRef->pdaLiteral->loc ); - - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( result, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ConsItem::InputText: { - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( item->data, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ConsItem::ExprType: { - UniqueType *ut = item->expr->evaluate( pd, code ); - - if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) - code.append( IN_INT_TO_STR ); - - if ( ut->typeId == TYPE_TREE && - ut->langEl != pd->strLangEl && ut != pd->uniqueTypeStream ) - { - /* Convert it to a string. */ - code.append( IN_TREE_TO_STR ); - } - break; - }} - } - - /* If there was nothing loaded, load the empty string. We must produce - * something. */ - if ( consItemList->length() == 0 ) { - String result = ""; - - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( result, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - } - - long items = consItemList->length(); - for ( long i = 0; i < items-1; i++ ) - code.append( IN_CONCAT_STR ); - - return pd->uniqueTypeStr; -} - -UniqueType *LangTerm::evaluateSearch( Compiler *pd, CodeVect &code ) const -{ - UniqueType *ut = typeRef->uniqueType; - if ( ut->typeId != TYPE_TREE ) - error(loc) << "can only search for tree types" << endp; - - /* Evaluate the expression. */ - UniqueType *treeUT = varRef->evaluate( pd, code ); - if ( treeUT->typeId != TYPE_TREE && treeUT->typeId != TYPE_REF ) - error(loc) << "search can be applied only to tree/ref types" << endp; - - /* Run the search. */ - code.append( IN_TREE_SEARCH ); - code.appendWord( ut->langEl->id ); - return ut; -} - -UniqueType *LangTerm::evaluate( Compiler *pd, CodeVect &code ) const -{ - UniqueType *retUt = 0; - switch ( type ) { - case VarRefType: - retUt = varRef->evaluate( pd, code ); - break; - case MethodCallType: - retUt = varRef->evaluateCall( pd, code, args ); - break; - case NilType: - code.append( IN_LOAD_NIL ); - retUt = pd->uniqueTypeNil; - break; - case TrueType: - code.append( IN_LOAD_TRUE ); - retUt = pd->uniqueTypeBool; - break; - case FalseType: - code.append( IN_LOAD_FALSE ); - retUt = pd->uniqueTypeBool; - break; - case MakeTokenType: - retUt = evaluateMakeToken( pd, code ); - break; - case MakeTreeType: - retUt = evaluateMakeTree( pd, code ); - break; - case NumberType: { - unsigned int n = atoi( data ); - code.append( IN_LOAD_INT ); - code.appendWord( n ); - retUt = pd->uniqueTypeInt; - break; - } - case StringType: { - String interp; - bool unused; - prepareLitString( interp, unused, data, InputLoc() ); - - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( interp, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - retUt = pd->uniqueTypeStr; - break; - } - case MatchType: - retUt = evaluateMatch( pd, code ); - break; - case ProdCompareType: - retUt = evaluateProdCompare( pd, code ); - break; - case ParseType: - retUt = evaluateParse( pd, code, false, false ); - break; - case ParseTreeType: - retUt = evaluateParse( pd, code, true, false ); - break; - case ParseStopType: - retUt = evaluateParse( pd, code, false, true ); - break; - case ConstructType: - retUt = evaluateConstruct( pd, code ); - break; - case SendType: - retUt = evaluateSend( pd, code ); - break; - case SendTreeType: - retUt = evaluateSendTree( pd, code ); - break; - case NewType: - retUt = evaluateNew( pd, code ); - break; - case TypeIdType: { - /* Evaluate the expression. */ - UniqueType *ut = typeRef->uniqueType; - if ( ut->typeId != TYPE_TREE ) - error() << "typeid can only be applied to tree types" << endp; - - code.append( IN_LOAD_INT ); - code.appendWord( ut->langEl->id ); - retUt = pd->uniqueTypeInt; - break; - } - case SearchType: - retUt = evaluateSearch( pd, code ); - break; - case EmbedStringType: - retUt = evaluateEmbedString( pd, code ); - break; - case CastType: - retUt = evaluateCast( pd, code ); - break; - } - - // if ( retUt->val() ) - // pd->unwindCode.insert( 0, IN_POP_VAL ); - // else - // pd->unwindCode.insert( 0, IN_POP_TREE ); - - return retUt; -} - -UniqueType *LangExpr::evaluate( Compiler *pd, CodeVect &code ) const -{ - switch ( type ) { - case BinaryType: { - switch ( op ) { - case '+': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - // pd->unwindCode.remove( 0, 2 ); - // pd->unwindCode.insert( 0, IN_POP_TREE ); - - if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { - code.append( IN_ADD_INT ); - return pd->uniqueTypeInt; - } - - if ( lt == pd->uniqueTypeStr && rt == pd->uniqueTypeStr ) { - code.append( IN_CONCAT_STR ); - return pd->uniqueTypeStr; - } - - - error(loc) << "do not have an addition operator for these types" << endp; - break; - } - case '-': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { - code.append( IN_SUB_INT ); - return pd->uniqueTypeInt; - } - - error(loc) << "do not have an addition operator for these types" << endp; - break; - } - case '*': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { - code.append( IN_MULT_INT ); - return pd->uniqueTypeInt; - } - - error(loc) << "do not have an multiplication " - "operator for these types" << endp; - break; - } - case '/': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { - code.append( IN_DIV_INT ); - return pd->uniqueTypeInt; - } - - error(loc) << "do not have an division" - "operator for these types" << endp; - break; - } - case OP_DoubleEql: { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt != rt ) - error(loc) << "comparison of different types" << endp; - - if ( lt->val() ) - code.append( IN_TST_EQL_VAL ); - else - code.append( IN_TST_EQL_TREE ); - return pd->uniqueTypeBool; - } - case OP_NotEql: { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt != rt ) - error(loc) << "comparison of different types" << endp; - - if ( lt->val() ) - code.append( IN_TST_NOT_EQL_VAL ); - else - code.append( IN_TST_NOT_EQL_TREE ); - - return pd->uniqueTypeBool; - } - case '<': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt != rt ) - error(loc) << "comparison of different types" << endp; - - if ( lt->val() ) - code.append( IN_TST_LESS_VAL ); - else - code.append( IN_TST_LESS_TREE ); - return pd->uniqueTypeBool; - } - case '>': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt != rt ) - error(loc) << "comparison of different types" << endp; - - if ( lt->val() ) - code.append( IN_TST_GRTR_VAL ); - else - code.append( IN_TST_GRTR_TREE ); - - return pd->uniqueTypeBool; - } - case OP_LessEql: { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt != rt ) - error(loc) << "comparison of different types" << endp; - - if ( lt->val() ) - code.append( IN_TST_LESS_EQL_VAL ); - else - code.append( IN_TST_LESS_EQL_TREE ); - - return pd->uniqueTypeBool; - } - case OP_GrtrEql: { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt != rt ) - error(loc) << "comparison of different types" << endp; - - if ( lt->val() ) - code.append( IN_TST_GRTR_EQL_VAL ); - else - code.append( IN_TST_GRTR_EQL_TREE ); - - return pd->uniqueTypeBool; - } - case OP_LogicalAnd: { - /* Evaluate the left and duplicate it. */ - UniqueType *lut = left->evaluate( pd, code ); - if ( !lut->val() ) - code.append( IN_TST_NZ_TREE ); - code.append( IN_DUP_VAL ); - - /* Jump over the right if false, leaving the original left - * result on the top of the stack. We don't know the - * distance yet so record the position of the jump. */ - long jump = code.length(); - code.append( IN_JMP_FALSE_VAL ); - code.appendHalf( 0 ); - - /* Evauluate the right, add the test. Store it separately. */ - UniqueType *rut = right->evaluate( pd, code ); - if ( !rut->val() ) - code.append( IN_TST_NZ_TREE ); - - code.append( IN_TST_LOGICAL_AND ); - - /* Set the distance of the jump. */ - long distance = code.length() - jump - 3; - code.setHalf( jump+1, distance ); - - return pd->uniqueTypeInt; - } - case OP_LogicalOr: { - /* Evaluate the left and duplicate it. */ - UniqueType *lut = left->evaluate( pd, code ); - if ( !lut->val() ) - code.append( IN_TST_NZ_TREE ); - code.append( IN_DUP_VAL ); - - /* Jump over the right if true, leaving the original left - * result on the top of the stack. We don't know the - * distance yet so record the position of the jump. */ - long jump = code.length(); - code.append( IN_JMP_TRUE_VAL ); - code.appendHalf( 0 ); - - /* Evauluate the right, add the test. */ - UniqueType *rut = right->evaluate( pd, code ); - if ( !rut->val() ) - code.append( IN_TST_NZ_TREE ); - - code.append( IN_TST_LOGICAL_OR ); - - /* Set the distance of the jump. */ - long distance = code.length() - jump - 3; - code.setHalf( jump+1, distance ); - - return pd->uniqueTypeInt; - } - } - - assert(false); - return 0; - } - case UnaryType: { - switch ( op ) { - case '!': { - /* Evaluate the left and duplicate it. */ - UniqueType *ut = right->evaluate( pd, code ); - if ( ut->val() ) - code.append( IN_NOT_VAL ); - else - code.append( IN_NOT_TREE ); - return pd->uniqueTypeBool; - } - case '$': { - UniqueType *ut = right->evaluate( pd, code ); - - if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) - code.append( IN_INT_TO_STR ); - - code.append( IN_TREE_TO_STR_TRIM ); - return pd->uniqueTypeStr; - - } - case 'S': { - UniqueType *ut = right->evaluate( pd, code ); - - if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) - code.append( IN_INT_TO_STR ); - - code.append( IN_TREE_TO_STR_TRIM_A ); - return pd->uniqueTypeStr; - } - case '%': { - UniqueType *ut = right->evaluate( pd, code ); - if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) - code.append( IN_INT_TO_STR ); - else - code.append( IN_TREE_TO_STR ); - return pd->uniqueTypeStr; - } - case '^': { - UniqueType *rt = right->evaluate( pd, code ); - code.append( IN_TREE_TRIM ); - return rt; - } - case '@': { - UniqueType *rt = right->evaluate( pd, code ); - //code.append( IN_TREE_TRIM ); - return rt; - } - default: - assert(false); - } - return 0; - } - case TermType: { - return term->evaluate( pd, code ); - } - } - return 0; -} - -void LangVarRef::assignValue( Compiler *pd, CodeVect &code, - UniqueType *exprUT ) const -{ - /* Lookup the left hand side of the assignment. */ - VarRefLookup lookup = lookupField( pd ); - - if ( lookup.objField->refActive ) - error(loc) << "reference active, cannot write to object" << endp; - - if ( lookup.firstConstPart >= 0 ) { - error(loc) << "left hand side qualification \"" << - qual->data[lookup.firstConstPart].data << "\" is const" << endp; - } - - if ( lookup.objField->isConst ) - error(loc) << "field \"" << name << "\" is const" << endp; - - /* Writing guarantees the field is dirty. tree is dirty. */ - lookup.objField->dirtyTree = true; - - /* Check the types of the assignment and possibly cast. */ - UniqueType *objUT = lookup.objField->typeRef->uniqueType; - assert( lookup.uniqueType == lookup.objField->typeRef->uniqueType ); - if ( !castAssignment( pd, code, objUT, lookup.iterSearchUT, exprUT ) ) - error(loc) << "type mismatch in assignment" << endp; - - /* Decide if we need to revert the assignment. */ - bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(); - - /* Load the object and generate the field setting code. */ - loadObj( pd, code, lookup.lastPtrInQual, true ); - - if ( lookup.uniqueType->typeId == TYPE_ITER ) - setFieldIter( pd, code, lookup.inObject, lookup.objField, lookup.uniqueType, exprUT, false ); - else - setField( pd, code, lookup.inObject, lookup.objField, exprUT, revert ); -} - -UniqueType *LangTerm::evaluateMakeToken( Compiler *pd, CodeVect &code ) const -{ -// if ( pd->compileContext != Compiler::CompileTranslation ) -// error(loc) << "make_token can be used only in a translation block" << endp; - - /* Match the number of arguments. */ - int numArgs = args != 0 ? args->length() : 0; - if ( numArgs < 2 ) - error(loc) << "need at least two arguments" << endp; - - for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Evaluate. */ - UniqueType *exprUT = (*pe)->expr->evaluate( pd, code ); - - if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) - error(loc) << "first arg, id, must be an int" << endp; - - if ( pe.pos() == 1 && exprUT != pd->uniqueTypeStr ) - error(loc) << "second arg, length, must be a string" << endp; - } - - /* The token is now created, send it. */ - code.append( IN_MAKE_TOKEN ); - code.append( args->length() ); - - return pd->uniqueTypeAny; -} - -UniqueType *LangTerm::evaluateMakeTree( Compiler *pd, CodeVect &code ) const -{ -// if ( pd->compileContext != Compiler::CompileTranslation ) -// error(loc) << "make_tree can be used only in a translation block" << endp; - - /* Match the number of arguments. */ - int numArgs = args != 0 ? args->length() : 0; - if ( numArgs < 1 ) - error(loc) << "need at least one argument" << endp; - - for ( CallArgVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Evaluate. */ - UniqueType *exprUT = (*pe)->expr->evaluate( pd, code ); - - if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) - error(loc) << "first arg, nonterm id, must be an int" << endp; - } - - /* The token is now created, send it. */ - code.append( IN_MAKE_TREE ); - code.append( args->length() ); - - return pd->uniqueTypeAny; -} - -void LangStmt::compileForIterBody( Compiler *pd, - CodeVect &code, UniqueType *iterUT ) const -{ - /* Remember the top of the loop. */ - long top = code.length(); - - /* Advance */ - code.append( objField->iterImpl->inAdvance ); - code.appendHalf( objField->offset ); - - /* Test: jump past the while block if false. Note that we don't have the - * distance yet. */ - long jumpFalse = code.length(); - code.append( IN_JMP_FALSE_VAL ); - code.appendHalf( 0 ); - - /* - * Set up the loop cleanup code. - */ - - /* Add the cleanup for the current loop. */ - int lcLen = pd->unwindCode.length(); - pd->unwindCode.insertHalf( 0, objField->offset ); - pd->unwindCode.insert( 0, objField->iterImpl->inUnwind ); - - /* Compile the contents. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); - - pd->unwindCode.remove( 0, pd->unwindCode.length() - lcLen ); - - /* Jump back to the top to retest. */ - long retestDist = code.length() - top + 3; - code.append( IN_JMP ); - code.appendHalf( -retestDist ); - - /* Set the jump false distance. */ - long falseDist = code.length() - jumpFalse - 3; - code.setHalf( jumpFalse+1, falseDist ); - - /* Compute the jump distance for the break jumps. */ - for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { - long distance = code.length() - *brk - 3; - code.setHalf( *brk+1, distance ); - } - pd->breakJumps.empty(); - - /* Destroy the iterator. */ - code.append( objField->iterImpl->inDestroy ); - code.appendHalf( objField->offset ); - - /* Clean up any prepush args. */ -} - -void LangStmt::compileForIter( Compiler *pd, CodeVect &code ) const -{ - /* The type we are searching for. */ - UniqueType *searchUT = typeRef->uniqueType; - - /* Lookup the iterator call. Make sure it is an iterator. */ - VarRefLookup lookup = iterCall->langTerm->varRef->lookupIterCall( pd ); - if ( lookup.objMethod->iterDef == 0 ) { - error(loc) << "attempt to iterate using something " - "that is not an iterator" << endp; - } - - /* Prepare the contiguous call args space. */ - Function *func = lookup.objMethod->func; - int asLoc; - if ( func != 0 ) { - code.append( IN_PREP_ARGS ); - asLoc = code.length(); - code.appendHalf( 0 ); - } - - /* - * Create the iterator from the local var. - */ - - UniqueType *iterUT = objField->typeRef->uniqueType; - IterImpl *iterImpl = 0; - - switch ( iterUT->iterDef->type ) { - case IterDef::Tree: - iterImpl = iterCall->langTerm->varRef->chooseTriterCall( pd, - searchUT, iterCall->langTerm->args ); - break; - case IterDef::Child: - iterImpl = new IterImpl( IterImpl::Child ); - break; - case IterDef::RevChild: - iterImpl = new IterImpl( IterImpl::RevChild ); - break; - case IterDef::Repeat: - iterImpl = new IterImpl( IterImpl::Repeat ); - break; - case IterDef::RevRepeat: - iterImpl = new IterImpl( IterImpl::RevRepeat ); - break; - case IterDef::User: - iterImpl = new IterImpl( IterImpl::User, iterUT->iterDef->func ); - break; - case IterDef::ListEl: - iterImpl = new IterImpl( IterImpl::ListEl ); - break; - case IterDef::RevListVal: - iterImpl = new IterImpl( IterImpl::RevListVal ); - break; - case IterDef::MapEl: - iterImpl = new IterImpl( IterImpl::MapEl ); - break; - } - - objField->iterImpl = iterImpl; - - /* Evaluate and push the arguments. */ - ObjectField **paramRefs = iterCall->langTerm->varRef->evaluateArgs( - pd, code, lookup, iterCall->langTerm->args ); - - if ( pd->revertOn ) - code.append( iterImpl->inCreateWV ); - else - code.append( iterImpl->inCreateWC ); - - code.appendHalf( objField->offset ); - - /* Arg size (or func id for user iters). */ - if ( lookup.objMethod->func != 0 ) - code.appendHalf( lookup.objMethod->func->funcId ); - else - code.appendHalf( iterCall->langTerm->varRef->argSize ); - - /* Search type. */ - if ( iterImpl->useSearchUT ) - code.appendHalf( searchUT->langEl->id ); - - if ( iterImpl->useGenericId ) { - CodeVect unused; - UniqueType *ut = - iterCall->langTerm->args->data[0]->expr->evaluate( pd, unused ); - - code.appendHalf( ut->generic->id ); - } - - compileForIterBody( pd, code, iterUT ); - - iterCall->langTerm->varRef->popRefQuals( pd, code, lookup, - iterCall->langTerm->args, false ); - - iterCall->langTerm->varRef->resetActiveRefs( pd, lookup, paramRefs ); - delete[] paramRefs; - - if ( func != 0 ) { - code.append( IN_CLEAR_ARGS ); - code.appendHalf( func->paramListSize ); - code.setHalf( asLoc, func->paramListSize ); - } -} - -void LangStmt::compileWhile( Compiler *pd, CodeVect &code ) const -{ - /* Generate code for the while test. Remember the top. */ - long top = code.length(); - UniqueType *eut = expr->evaluate( pd, code ); - - /* Jump past the while block if false. Note that we don't have the - * distance yet. */ - long jumpFalse = code.length(); - half_t jinstr = eut->tree() ? IN_JMP_FALSE_TREE : IN_JMP_FALSE_VAL; - code.append( jinstr ); - code.appendHalf( 0 ); - - /* Compute the while block. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); - - /* Jump back to the top to retest. */ - long retestDist = code.length() - top + 3; - code.append( IN_JMP ); - code.appendHalf( -retestDist ); - - /* Set the jump false distance. */ - long falseDist = code.length() - jumpFalse - 3; - code.setHalf( jumpFalse+1, falseDist ); - - /* Compute the jump distance for the break jumps. */ - for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { - long distance = code.length() - *brk - 3; - code.setHalf( *brk+1, distance ); - } - pd->breakJumps.empty(); -} - -void LangStmt::compile( Compiler *pd, CodeVect &code ) const -{ - CodeVect block; - - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( "unwind code\n", &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - block.append( IN_LOAD_STR ); - block.appendWord( mapEl->value ); - - block.append( IN_POP_TREE ); - - pd->unwindCode.insert( 0, block ); - - switch ( type ) { - case ExprType: { - /* Evaluate the exrepssion, then pop it immediately. */ - UniqueType *exprUt = expr->evaluate( pd, code ); - if ( exprUt->tree() ) - code.append( IN_POP_TREE ); - else - code.append( IN_POP_VAL ); - - // pd->unwindCode.remove( 0, 1 ); - break; - } - case IfType: { - long jumpFalse = 0, jumpPastElse = 0, distance = 0; - - /* Evaluate the test. */ - UniqueType *eut = expr->evaluate( pd, code ); - - /* Jump past the if block if false. We don't know the distance - * yet so store the location of the jump. */ - jumpFalse = code.length(); - half_t jinstr = eut->tree() ? IN_JMP_FALSE_TREE : IN_JMP_FALSE_VAL; - - code.append( jinstr ); - code.appendHalf( 0 ); - - /* Compile the if true branch. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); - - if ( elsePart != 0 ) { - /* Jump past the else code for the if true branch. */ - jumpPastElse = code.length(); - code.append( IN_JMP ); - code.appendHalf( 0 ); - } - - /* Set the distance for the jump false case. */ - distance = code.length() - jumpFalse - 3; - code.setHalf( jumpFalse+1, distance ); - - if ( elsePart != 0 ) { - /* Compile the else branch. */ - elsePart->compile( pd, code ); - - /* Set the distance for jump over the else part. */ - distance = code.length() - jumpPastElse - 3; - code.setHalf( jumpPastElse+1, distance ); - } - - break; - } - case ElseType: { - /* Compile the else branch. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); - break; - } - case RejectType: { - code.append( IN_REJECT ); - break; - } - case WhileType: { - compileWhile( pd, code ); - break; - } - case AssignType: { - /* Evaluate the exrepssion. */ - UniqueType *exprUT = expr->evaluate( pd, code ); - - /* Do the assignment. */ - varRef->assignValue( pd, code, exprUT ); - break; - } - case ForIterType: { - compileForIter( pd, code ); - break; - } - case ReturnType: { - /* Evaluate the exrepssion. */ - UniqueType *exprUT = expr->evaluate( pd, code ); - - if ( pd->curFunction == 0 ) { - /* In the main function */ - pd->mainReturnUT = exprUT; - } - else { - UniqueType *resUT = pd->curFunction->typeRef->uniqueType; - if ( resUT != pd->uniqueTypeVoid && - !castAssignment( pd, code, resUT, 0, exprUT ) ) - error(loc) << "return value wrong type" << endp; - } - - code.append( IN_SAVE_RET ); - - /* The loop cleanup code. */ - if ( pd->unwindCode.length() > 0 ) - code.append( pd->unwindCode ); - - /* Jump to the return label. The distance will be filled in - * later. */ - pd->returnJumps.append( code.length() ); - code.append( IN_JMP ); - code.appendHalf( 0 ); - break; - } - case BreakType: { - pd->breakJumps.append( code.length() ); - code.append( IN_JMP ); - code.appendHalf( 0 ); - break; - } - case YieldType: { - /* take a reference and yield it. Immediately reset the referece. */ - varRef->preEvaluateRef( pd, code ); - ObjectField *objField = varRef->evaluateRef( pd, code, 0 ); - code.append( IN_YIELD ); - - if ( varRef->qual->length() > 0 ) { - code.append( IN_POP_N_WORDS ); - code.appendHalf( (short)(varRef->qual->length()*2) ); - } - - objField->refActive = false; - break; - } - } - - pd->unwindCode.remove( 0, block.length() ); -} - -void CodeBlock::compile( Compiler *pd, CodeVect &code ) const -{ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); -} - -void Compiler::findLocals( ObjectDef *localFrame, CodeBlock *block ) -{ - Locals &locals = block->locals; - - for ( FieldList::Iter ol = localFrame->fieldList; ol.lte(); ol++ ) { - ObjectField *el = ol->value; - - /* FIXME: This test needs to be improved. Match_text was getting - * through before useOffset was tested. What will? */ - if ( el->useOffset() && !el->isLhsEl() && - ( el->beenReferenced || el->isParam() ) ) - { - UniqueType *ut = el->typeRef->uniqueType; - if ( ut->tree() ) { - int depth = el->scope->depth(); - locals.append( LocalLoc( LT_Tree, depth, el->offset ) ); - } - } - - if ( el->useOffset() ) { - UniqueType *ut = el->typeRef->uniqueType; - if ( ut->typeId == TYPE_ITER ) { - int depth = el->scope->depth(); - LocalType type = LT_Tree; - switch ( ut->iterDef->type ) { - case IterDef::Tree: - case IterDef::Child: - case IterDef::Repeat: - case IterDef::RevRepeat: - type = LT_Iter; - break; - - case IterDef::MapEl: - case IterDef::ListEl: - case IterDef::RevListVal: - /* ? */ - type = LT_Iter; - break; - - case IterDef::RevChild: - type = LT_RevIter; - break; - case IterDef::User: - type = LT_UserIter; - break; - } - - locals.append( LocalLoc( type, depth, (int)el->offset ) ); - } - } - } -} - -void Compiler::addProdLHSLoad( Production *prod, CodeVect &code, long &insertPos ) -{ - NameScope *scope = prod->redBlock->localFrame->rootScope; - ObjectField *lhsField = scope->findField("lhs"); - assert( lhsField != 0 ); - - CodeVect loads; - if ( lhsField->beenReferenced ) { - loads.append( IN_INIT_LHS_EL ); - loads.appendHalf( lhsField->offset ); - } - - code.insert( insertPos, loads ); - insertPos += loads.length(); -} - -void Compiler::addPushBackLHS( Production *prod, CodeVect &code, long &insertPos ) -{ - CodeBlock *block = prod->redBlock; - - /* If the lhs tree is dirty then we will need to save off the old lhs - * before it gets modified. We want to avoid this for attribute - * modifications. The computation of dirtyTree should deal with this for - * us. */ - NameScope *scope = block->localFrame->rootScope; - ObjectField *lhsField = scope->findField("lhs"); - assert( lhsField != 0 ); - - if ( lhsField->beenReferenced ) { - code.append( IN_STORE_LHS_EL ); - code.appendHalf( lhsField->offset ); - } -} - -void Compiler::addProdRHSLoads( Production *prod, CodeVect &code, long &insertPos ) -{ - CodeVect loads; - long elPos = 0; - for ( ProdElList::Iter rhsEl = *prod->prodElList; rhsEl.lte(); rhsEl++, elPos++ ) { - if ( rhsEl->type == ProdEl::ReferenceType ) { - if ( rhsEl->rhsElField->beenReferenced ) { - loads.append ( IN_INIT_RHS_EL ); - loads.appendHalf( elPos ); - loads.appendHalf( rhsEl->rhsElField->offset ); - } - } - } - - /* Insert and update the insert position. */ - code.insert( insertPos, loads ); - insertPos += loads.length(); -} - - - -void Compiler::makeProdCopies( Production *prod ) -{ - int pos = 0; - for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++, pos++) { - if ( pel->captureField != 0 ) { - prod->copy.append( pel->captureField->offset ); - prod->copy.append( pos ); - } - } -} - -void Compiler::compileReductionCode( Production *prod ) -{ - CodeBlock *block = prod->redBlock; - - /* Init the compilation context. */ - compileContext = CompileReduction; - revertOn = true; - block->frameId = nextFrameId++; - - CodeVect &code = block->codeWV; - - long afterInit = code.length(); - - /* Compile the reduce block. */ - block->compile( this, code ); - - /* Might need to load right hand side values. */ - addProdRHSLoads( prod, code, afterInit ); - - addProdLHSLoad( prod, code, afterInit ); - addPushBackLHS( prod, code, afterInit ); - - code.append( IN_PCR_RET ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocals( block->localFrame, block ); -} - -void Compiler::compileTranslateBlock( LangEl *langEl ) -{ - CodeBlock *block = langEl->transBlock; - - /* Set up compilation context. */ - compileContext = CompileTranslation; - revertOn = true; - block->frameId = nextFrameId++; - - CodeVect &code = block->codeWV; - - if ( langEl->tokenDef->reCaptureVect.length() > 0 ) { - code.append( IN_INIT_CAPTURES ); - code.append( langEl->tokenDef->reCaptureVect.length() ); - } - - /* Set the local frame and compile the reduce block. */ - block->compile( this, code ); - - code.append( IN_PCR_RET ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocals( block->localFrame, block ); -} - -void Compiler::compilePreEof( TokenRegion *region ) -{ - CodeBlock *block = region->preEofBlock; - - /* Set up compilation context. */ - compileContext = CompileTranslation; - revertOn = true; - block->frameId = nextFrameId++; - - addInput( block->localFrame ); - addThis( block->localFrame ); - - CodeVect &code = block->codeWV; - - /* Set the local frame and compile the reduce block. */ - block->compile( this, code ); - - code.append( IN_PCR_RET ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocals( block->localFrame, block ); -} - -int Compiler::arg0Offset() -{ - globalObjectDef->referenceField( this, arg0 ); - return arg0->offset; -} - -int Compiler::argvOffset() -{ - globalObjectDef->referenceField( this, argv ); - return argv->offset; -} - -int Compiler::stdsOffset() -{ - globalObjectDef->referenceField( this, stds ); - return stds->offset; -} - -void Compiler::compileRootBlock( ) -{ - CodeBlock *block = rootCodeBlock; - - /* The root block never needs to be reverted. */ - - /* Set up the compile context. No locals are needed for the root code - * block, but we need an empty local frame for the compile. */ - compileContext = CompileRoot; - revertOn = false; - - /* The block needs a frame id. */ - block->frameId = nextFrameId++; - - /* The root block is not reverted. */ - CodeVect &code = block->codeWC; - - code.append( IN_FN ); - code.append( FN_LOAD_ARG0 ); - code.appendHalf( arg0Offset() ); - - code.append( IN_FN ); - code.append( FN_LOAD_ARGV ); - code.appendHalf( argvOffset() ); - - code.append( IN_FN ); - code.append( FN_INIT_STDS ); - code.appendHalf( stdsOffset() ); - - block->compile( this, code ); - - code.append( IN_FN ); - code.append( FN_STOP ); - - /* Make the local trees descriptor. */ - findLocals( rootLocalFrame, block ); -} - -void ObjectField::initField() -{ - switch ( type ) { - case UserLocalType: - case LhsElType: - case ParamValType: - case RedRhsType: - inGetR = IN_GET_LOCAL_R; - inGetWC = IN_GET_LOCAL_WC; - inSetWC = IN_SET_LOCAL_WC; - inGetValR = IN_GET_LOCAL_VAL_R; - inGetValWC = IN_GET_LOCAL_VAL_R; - inGetValWV = IN_GET_LOCAL_VAL_R; - inSetValWC = IN_SET_LOCAL_VAL_WC; - break; - - case ParamRefType: - inGetR = IN_GET_LOCAL_REF_R; - inGetWC = IN_GET_LOCAL_REF_WC; - inSetWC = IN_SET_LOCAL_REF_WC; - break; - - case UserFieldType: - inGetR = IN_GET_FIELD_TREE_R; - inGetWC = IN_GET_FIELD_TREE_WC; - inGetWV = IN_GET_FIELD_TREE_WV; - inSetWC = IN_SET_FIELD_TREE_WC; - inSetWV = IN_SET_FIELD_TREE_WV; - - //inGetValR; - inGetValR = IN_GET_FIELD_VAL_R; - //inGetValWC = IN_GET_FIELD_VAL_WC; - //inGetValWV; - inSetValWC = IN_SET_FIELD_VAL_WC; - //inSetValWV; - break; - - case GenericElementType: - case GenericDependentType: - case StructFieldType: - inGetR = IN_GET_STRUCT_R; - inGetWC = IN_GET_STRUCT_WC; - inGetWV = IN_GET_STRUCT_WV; - inSetWC = IN_SET_STRUCT_WC; - inSetWV = IN_SET_STRUCT_WV; - inGetValR = IN_GET_STRUCT_VAL_R; - inGetValWC = IN_GET_STRUCT_VAL_R; - inGetValWV = IN_GET_STRUCT_VAL_R; - inSetValWC = IN_SET_STRUCT_VAL_WC; - inSetValWV = IN_SET_STRUCT_VAL_WV; - break; - - case RhsNameType: - inGetR = IN_GET_RHS_VAL_R; - inGetWC = IN_GET_RHS_VAL_WC; - inGetWV = IN_GET_RHS_VAL_WV; - inSetWC = IN_SET_RHS_VAL_WC; - inSetWV = IN_SET_RHS_VAL_WC; - break; - - /* Inbuilts have instructions intialized outside the cons, at place of - * call. */ - case InbuiltFieldType: - case InbuiltObjectType: - case InbuiltOffType: - break; - - /* Out of date impl. */ - case LexSubstrType: - break; - } -} - -void ObjectDef::placeField( Compiler *pd, ObjectField *field ) -{ - UniqueType *fieldUT = field->typeRef->uniqueType; - - switch ( field->type ) { - case ObjectField::LhsElType: - case ObjectField::UserLocalType: - case ObjectField::RedRhsType: - - /* Local frame fields. Move the running offset first since this is - * a negative off from the end. */ - nextOffset += sizeOfField( fieldUT ); - field->offset = -nextOffset; - break; - - - case ObjectField::GenericElementType: { - - /* Tree object frame fields. Record the position, then move the - * running offset. */ - field->offset = nextOffset; - nextOffset += sizeOfField( fieldUT ); - - if ( fieldUT->typeId == TYPE_MAP_PTRS ) { - if ( field->mapKeyField != 0 ) - field->mapKeyField->offset = field->offset; - } - - break; - } - - case ObjectField::UserFieldType: - - /* Tree object frame fields. Record the position, then move the - * running offset. */ - field->offset = nextOffset; - nextOffset += sizeOfField( fieldUT ); - break; - - case ObjectField::StructFieldType: - field->offset = nextOffset; - nextOffset += sizeOfField( fieldUT ); - break; - - case ObjectField::GenericDependentType: - /* There is an object field that this type depends on. When it is - * placed, this one will be placed as well. Nothing to do now. */ - - case ObjectField::InbuiltFieldType: - case ObjectField::InbuiltOffType: - case ObjectField::InbuiltObjectType: - case ObjectField::RhsNameType: - case ObjectField::LexSubstrType: - - case ObjectField::ParamValType: - case ObjectField::ParamRefType: - break; - } -} - -void Compiler::placeAllLanguageObjects() -{ - /* Init all user object fields (need consistent size). */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - ObjectDef *objDef = lel->objectDef; - if ( objDef != 0 ) { - /* Init all fields of the object. */ - for ( FieldList::Iter f = objDef->fieldList; f.lte(); f++ ) - objDef->placeField( this, f->value ); - } - } -} - -void Compiler::placeAllStructObjects() -{ - for ( StructElList::Iter s = structEls; s.lte(); s++ ) { - ObjectDef *objectDef = s->structDef->objectDef; - for ( FieldList::Iter f = objectDef->fieldList; f.lte(); f++ ) - objectDef->placeField( this, f->value ); - } -} - -void Compiler::placeFrameFields( ObjectDef *localFrame ) -{ - for ( FieldList::Iter f = localFrame->fieldList; f.lte(); f++ ) - localFrame->placeField( this, f->value ); -} - -void Compiler::placeAllFrameObjects() -{ - /* Functions. */ - for ( FunctionList::Iter f = functionList; f.lte(); f++ ) - placeFrameFields( f->localFrame ); - - for ( FunctionList::Iter f = inHostList; f.lte(); f++ ) - placeFrameFields( f->localFrame ); - - /* Reduction code. */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->redBlock != 0 ) - placeFrameFields( prod->redBlock->localFrame ); - } - - /* Token translation code. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->transBlock != 0 ) { - ObjectDef *localFrame = lel->transBlock->localFrame; - if ( lel->tokenDef->reCaptureVect.length() > 0 ) { - FieldList::Iter f = localFrame->fieldList; - for ( int i = 0; i < lel->tokenDef->reCaptureVect.length(); i++, f++ ) - localFrame->placeField( this, f->value ); - } - - placeFrameFields( localFrame ); - } - } - - /* Preeof blocks. */ - for ( RegionList::Iter r = regionList; r.lte(); r++ ) { - if ( r->preEofBlock != 0 ) - placeFrameFields( r->preEofBlock->localFrame ); - } - - /* Root code. */ - placeFrameFields( rootLocalFrame ); -} - -void Compiler::placeUserFunction( Function *func, bool isUserIter ) -{ - /* Set up the parameters. */ - long paramPos = 0, paramListSize = 0, paramOffset = 0; - UniqueType **paramUTs = new UniqueType*[func->paramList->length()]; - for ( ParameterList::Iter param = *func->paramList; param.lte(); param++, paramPos++ ) { - paramUTs[paramPos] = param->typeRef->uniqueType; - paramListSize += sizeOfField( paramUTs[paramPos] ); - } - - /* Param offset is relative to one past the last item in the array of - * words containing the args. */ - paramOffset = 0; - paramPos = 0; - for ( ParameterList::Iter param = *func->paramList; param.lte(); param++, paramPos++ ) { - /* How much space do we need to make for call overhead. */ - long frameAfterArgs = isUserIter ? IFR_AA : FR_AA; - - param->offset = frameAfterArgs + paramOffset; - - paramOffset += sizeOfField( paramUTs[paramPos] ); - } - - func->paramListSize = paramListSize; - func->paramUTs = paramUTs; - - func->objMethod->paramUTs = paramUTs; - - /* Insert the function into the global function map. */ - UniqueType *returnUT = func->typeRef != 0 ? - func->typeRef->uniqueType : uniqueTypeInt; - func->objMethod->returnUT = returnUT; - - func->objMethod->paramUTs = new UniqueType*[func->paramList->length()]; - memcpy( func->objMethod->paramUTs, paramUTs, - sizeof(UniqueType*) * func->paramList->length() ); -} - -void Compiler::placeAllFunctions() -{ - for ( FunctionList::Iter f = functionList; f.lte(); f++ ) - placeUserFunction( f, f->isUserIter ); - - for ( FunctionList::Iter f = inHostList; f.lte(); f++ ) - placeUserFunction( f, false ); -} - - -void Compiler::compileUserIter( Function *func, CodeVect &code ) -{ - CodeBlock *block = func->codeBlock; - - /* Compile the block. */ - block->compile( this, code ); - - /* Always yeild a nil at the end. This causes iteration to stop. */ - code.append( IN_LOAD_NIL ); - code.append( IN_YIELD ); -} - -void Compiler::compileUserIter( Function *func ) -{ - CodeBlock *block = func->codeBlock; - - /* Set up the context. */ - compileContext = CompileFunction; - curFunction = func; - block->frameId = nextFrameId++; - - /* Compile for revert and commit. */ - revertOn = true; - compileUserIter( func, block->codeWV ); - - revertOn = false; - compileUserIter( func, block->codeWC ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocals( block->localFrame, block ); - - /* FIXME: Need to deal with the freeing of local trees. */ -} - -/* Called for each type of function compile: revert and commit. */ -void Compiler::compileFunction( Function *func, CodeVect &code ) -{ - CodeBlock *block = func->codeBlock; - - /* Compile the block. */ - block->compile( this, code ); - - /* Check for a return statement. */ - if ( block->stmtList->length() == 0 || - block->stmtList->tail->type != LangStmt::ReturnType ) - { - /* Push the return value. */ - code.append( IN_LOAD_NIL ); - code.append( IN_SAVE_RET ); - } - - /* Compute the jump distance for the return jumps. */ - for ( LongVect::Iter rj = returnJumps; rj.lte(); rj++ ) { - long distance = code.length() - *rj - 3; - code.setHalf( *rj+1, distance ); - } - - /* Reset the vector of return jumps. */ - returnJumps.empty(); - - /* Return cleans up the stack (including the args) and leaves the return - * value on the top. */ - code.append( IN_RET ); -} - -void Compiler::compileFunction( Function *func ) -{ - CodeBlock *block = func->codeBlock; - - /* Set up the compilation context. */ - compileContext = CompileFunction; - curFunction = func; - - /* Assign a frame Id. */ - block->frameId = nextFrameId++; - - /* Compile once for revert. */ - revertOn = true; - compileFunction( func, block->codeWV ); - - /* Compile once for commit. */ - revertOn = false; - compileFunction( func, block->codeWC ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocals( block->localFrame, block ); -} - -void Compiler::removeNonUnparsableRepls() -{ - for ( ConsList::Iter repl = replList; repl.lte(); ) { - Constructor *maybeDel = repl++; - if ( !maybeDel->parse ) - replList.detach( maybeDel ); - } -} - -void Compiler::compileByteCode() -{ - /* Compile functions. */ - for ( FunctionList::Iter f = functionList; f.lte(); f++ ) { - if ( f->isUserIter ) - compileUserIter( f ); - else - compileFunction( f ); - } - - /* Compile the reduction code. */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - makeProdCopies( prod ); - if ( prod->redBlock != 0 ) - compileReductionCode( prod ); - } - - /* Compile the token translation code. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->transBlock != 0 ) - compileTranslateBlock( lel ); - } - - /* Compile preeof blocks. */ - for ( RegionList::Iter r = regionList; r.lte(); r++ ) { - if ( r->preEofBlock != 0 ) - compilePreEof( r ); - } - - /* Compile the init code */ - compileRootBlock( ); - removeNonUnparsableRepls(); -} diff --git a/src/tree.c b/src/tree.c deleted file mode 100644 index 4bd8db40..00000000 --- a/src/tree.c +++ /dev/null @@ -1,1620 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <string.h> -#include <stdlib.h> -#include <stdbool.h> -#include <stdio.h> -#include <unistd.h> -#include <assert.h> - -#include <colm/tree.h> -#include <colm/pool.h> -#include <colm/bytecode.h> -#include <colm/debug.h> - -kid_t *alloc_attrs( program_t *prg, long length ) -{ - kid_t *cur = 0; - long i; - for ( i = 0; i < length; i++ ) { - kid_t *next = cur; - cur = kid_allocate( prg ); - cur->next = next; - } - return cur; -} - -void free_attrs( program_t *prg, kid_t *attrs ) -{ - kid_t *cur = attrs; - while ( cur != 0 ) { - kid_t *next = cur->next; - kid_free( prg, cur ); - cur = next; - } -} - -void free_kid_list( program_t *prg, kid_t *kid ) -{ - while ( kid != 0 ) { - kid_t *next = kid->next; - kid_free( prg, kid ); - kid = next; - } -} - -static void colm_tree_set_attr( tree_t *tree, long pos, tree_t *val ) -{ - long i; - kid_t *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - for ( i = 0; i < pos; i++ ) - kid = kid->next; - kid->tree = val; -} - -tree_t *colm_get_attr( tree_t *tree, long pos ) -{ - long i; - kid_t *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - for ( i = 0; i < pos; i++ ) - kid = kid->next; - return kid->tree; -} - - -tree_t *colm_get_repeat_next( tree_t *tree ) -{ - kid_t *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid->next->tree; -} - -tree_t *colm_get_repeat_val( tree_t *tree ) -{ - kid_t *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid->tree; -} - -int colm_repeat_end( tree_t *tree ) -{ - kid_t *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid == 0; -} - -int colm_list_last( tree_t *tree ) -{ - kid_t *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid->next == 0; -} - -kid_t *get_attr_kid( tree_t *tree, long pos ) -{ - long i; - kid_t *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - for ( i = 0; i < pos; i++ ) - kid = kid->next; - return kid; -} - -kid_t *kid_list_concat( kid_t *list1, kid_t *list2 ) -{ - if ( list1 == 0 ) - return list2; - else if ( list2 == 0 ) - return list1; - - kid_t *dest = list1; - while ( dest->next != 0 ) - dest = dest->next; - dest->next = list2; - return list1; -} - -tree_t *colm_construct_pointer( program_t *prg, value_t value ) -{ - pointer_t *pointer = (pointer_t*) tree_allocate( prg ); - pointer->id = LEL_ID_PTR; - pointer->value = value; - - return (tree_t*)pointer; -} - -value_t colm_get_pointer_val( tree_t *ptr ) -{ - return ((pointer_t*)ptr)->value; -} - - -tree_t *colm_construct_term( program_t *prg, word_t id, head_t *tokdata ) -{ - struct lang_el_info *lel_info = prg->rtd->lel_info; - - tree_t *tree = tree_allocate( prg ); - tree->id = id; - tree->refs = 0; - tree->tokdata = tokdata; - - int object_length = lel_info[tree->id].object_length; - tree->child = alloc_attrs( prg, object_length ); - - return tree; -} - - -kid_t *construct_kid( program_t *prg, tree_t **bindings, kid_t *prev, long pat ); - -static kid_t *construct_ignore_list( program_t *prg, long ignore_ind ) -{ - struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; - - kid_t *first = 0, *last = 0; - while ( ignore_ind >= 0 ) { - head_t *ignore_data = colm_string_alloc_pointer( prg, nodes[ignore_ind].data, - nodes[ignore_ind].length ); - - tree_t *ign_tree = tree_allocate( prg ); - ign_tree->refs = 1; - ign_tree->id = nodes[ignore_ind].id; - ign_tree->tokdata = ignore_data; - - kid_t *ign_kid = kid_allocate( prg ); - ign_kid->tree = ign_tree; - ign_kid->next = 0; - - if ( last == 0 ) - first = ign_kid; - else - last->next = ign_kid; - - ignore_ind = nodes[ignore_ind].next; - last = ign_kid; - } - - return first; -} - -static kid_t *construct_left_ignore_list( program_t *prg, long pat ) -{ - struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; - return construct_ignore_list( prg, nodes[pat].left_ignore ); -} - -static kid_t *construct_right_ignore_list( program_t *prg, long pat ) -{ - struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; - return construct_ignore_list( prg, nodes[pat].right_ignore ); -} - -static void ins_left_ignore( program_t *prg, tree_t *tree, tree_t *ignore_list ) -{ - assert( ! (tree->flags & AF_LEFT_IGNORE) ); - - /* Allocate. */ - kid_t *kid = kid_allocate( prg ); - kid->tree = ignore_list; - colm_tree_upref( prg, ignore_list ); - - /* Attach it. */ - kid->next = tree->child; - tree->child = kid; - - tree->flags |= AF_LEFT_IGNORE; -} - -static void ins_right_ignore( program_t *prg, tree_t *tree, tree_t *ignore_list ) -{ - assert( ! (tree->flags & AF_RIGHT_IGNORE) ); - - /* Insert an ignore head in the child list. */ - kid_t *kid = kid_allocate( prg ); - kid->tree = ignore_list; - colm_tree_upref( prg, ignore_list ); - - /* Attach it. */ - if ( tree->flags & AF_LEFT_IGNORE ) { - kid->next = tree->child->next; - tree->child->next = kid; - } - else { - kid->next = tree->child; - tree->child = kid; - } - - tree->flags |= AF_RIGHT_IGNORE; -} - -tree_t *push_right_ignore( program_t *prg, tree_t *push_to, tree_t *right_ignore ) -{ - /* About to alter the data tree. Split first. */ - push_to = split_tree( prg, push_to ); - - if ( push_to->flags & AF_RIGHT_IGNORE ) { - /* The previous token already has a right ignore. Merge by - * attaching it as a left ignore of the new list. */ - kid_t *cur_ignore = tree_right_ignore_kid( prg, push_to ); - ins_left_ignore( prg, right_ignore, cur_ignore->tree ); - - /* Replace the current ignore. Safe to access refs here because we just - * upreffed it in insLeftIgnore. */ - cur_ignore->tree->refs -= 1; - cur_ignore->tree = right_ignore; - colm_tree_upref( prg, right_ignore ); - } - else { - /* Attach The ignore list. */ - ins_right_ignore( prg, push_to, right_ignore ); - } - - return push_to; -} - -tree_t *push_left_ignore( program_t *prg, tree_t *push_to, tree_t *left_ignore ) -{ - push_to = split_tree( prg, push_to ); - - /* Attach as left ignore to the token we are sending. */ - if ( push_to->flags & AF_LEFT_IGNORE ) { - /* The token already has a left-ignore. Merge by attaching it as a - * right ignore of the new list. */ - kid_t *cur_ignore = tree_left_ignore_kid( prg, push_to ); - ins_right_ignore( prg, left_ignore, cur_ignore->tree ); - - /* Replace the current ignore. Safe to upref here because we just - * upreffed it in insRightIgnore. */ - cur_ignore->tree->refs -= 1; - cur_ignore->tree = left_ignore; - colm_tree_upref( prg, left_ignore ); - } - else { - /* Attach the ignore list. */ - ins_left_ignore( prg, push_to, left_ignore ); - } - - return push_to; -} - -static void rem_left_ignore( program_t *prg, tree_t **sp, tree_t *tree ) -{ - assert( tree->flags & AF_LEFT_IGNORE ); - - kid_t *next = tree->child->next; - colm_tree_downref( prg, sp, tree->child->tree ); - kid_free( prg, tree->child ); - tree->child = next; - - tree->flags &= ~AF_LEFT_IGNORE; -} - -static void rem_right_ignore( program_t *prg, tree_t **sp, tree_t *tree ) -{ - assert( tree->flags & AF_RIGHT_IGNORE ); - - if ( tree->flags & AF_LEFT_IGNORE ) { - kid_t *next = tree->child->next->next; - colm_tree_downref( prg, sp, tree->child->next->tree ); - kid_free( prg, tree->child->next ); - tree->child->next = next; - } - else { - kid_t *next = tree->child->next; - colm_tree_downref( prg, sp, tree->child->tree ); - kid_free( prg, tree->child ); - tree->child = next; - } - - tree->flags &= ~AF_RIGHT_IGNORE; -} - -tree_t *pop_right_ignore( program_t *prg, tree_t **sp, tree_t *pop_from, tree_t **right_ignore ) -{ - /* Modifying the tree we are detaching from. */ - pop_from = split_tree( prg, pop_from ); - - kid_t *ri_kid = tree_right_ignore_kid( prg, pop_from ); - - /* If the right ignore has a left ignore, then that was the original - * right ignore. */ - kid_t *li = tree_left_ignore_kid( prg, ri_kid->tree ); - if ( li != 0 ) { - colm_tree_upref( prg, li->tree ); - rem_left_ignore( prg, sp, ri_kid->tree ); - *right_ignore = ri_kid->tree; - colm_tree_upref( prg, *right_ignore ); - ri_kid->tree = li->tree; - } - else { - *right_ignore = ri_kid->tree; - colm_tree_upref( prg, *right_ignore ); - rem_right_ignore( prg, sp, pop_from ); - } - - return pop_from; -} - -tree_t *pop_left_ignore( program_t *prg, tree_t **sp, tree_t *pop_from, tree_t **left_ignore ) -{ - /* Modifying, make the write safe. */ - pop_from = split_tree( prg, pop_from ); - - kid_t *li_kid = tree_left_ignore_kid( prg, pop_from ); - - /* If the left ignore has a right ignore, then that was the original - * left ignore. */ - kid_t *ri = tree_right_ignore_kid( prg, li_kid->tree ); - if ( ri != 0 ) { - colm_tree_upref( prg, ri->tree ); - rem_right_ignore( prg, sp, li_kid->tree ); - *left_ignore = li_kid->tree; - colm_tree_upref( prg, *left_ignore ); - li_kid->tree = ri->tree; - } - else { - *left_ignore = li_kid->tree; - colm_tree_upref( prg, *left_ignore ); - rem_left_ignore( prg, sp, pop_from ); - } - - return pop_from; -} - -tree_t *colm_construct_object( program_t *prg, kid_t *kid, tree_t **bindings, long lang_el_id ) -{ - struct lang_el_info *lel_info = prg->rtd->lel_info; - tree_t *tree = 0; - - tree = tree_allocate( prg ); - tree->id = lang_el_id; - tree->refs = 1; - tree->tokdata = 0; - tree->prod_num = 0; - - int object_length = lel_info[tree->id].object_length; - - kid_t *attrs = alloc_attrs( prg, object_length ); - kid_t *child = 0; - - tree->child = kid_list_concat( attrs, child ); - - return tree; -} - -/* Returns an uprefed tree. Saves us having to downref and bindings to zero to - * return a zero-ref tree. */ -tree_t *colm_construct_tree( program_t *prg, kid_t *kid, tree_t **bindings, long pat ) -{ - struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; - struct lang_el_info *lel_info = prg->rtd->lel_info; - tree_t *tree = 0; - - if ( nodes[pat].bind_id > 0 ) { - /* All bindings have been uprefed. */ - tree = bindings[nodes[pat].bind_id]; - - long ignore = nodes[pat].left_ignore; - tree_t *left_ignore = 0; - if ( ignore >= 0 ) { - kid_t *ignore = construct_left_ignore_list( prg, pat ); - - left_ignore = tree_allocate( prg ); - left_ignore->id = LEL_ID_IGNORE; - left_ignore->child = ignore; - - tree = push_left_ignore( prg, tree, left_ignore ); - } - - ignore = nodes[pat].right_ignore; - tree_t *right_ignore = 0; - if ( ignore >= 0 ) { - kid_t *ignore = construct_right_ignore_list( prg, pat ); - - right_ignore = tree_allocate( prg ); - right_ignore->id = LEL_ID_IGNORE; - right_ignore->child = ignore; - - tree = push_right_ignore( prg, tree, right_ignore ); - } - } - else { - tree = tree_allocate( prg ); - tree->id = nodes[pat].id; - tree->refs = 1; - tree->tokdata = nodes[pat].length == 0 ? 0 : - colm_string_alloc_pointer( prg, - nodes[pat].data, nodes[pat].length ); - tree->prod_num = nodes[pat].prod_num; - - int object_length = lel_info[tree->id].object_length; - - kid_t *attrs = alloc_attrs( prg, object_length ); - kid_t *child = construct_kid( prg, bindings, - 0, nodes[pat].child ); - - tree->child = kid_list_concat( attrs, child ); - - /* Right first, then left. */ - kid_t *ignore = construct_right_ignore_list( prg, pat ); - if ( ignore != 0 ) { - tree_t *ignore_list = tree_allocate( prg ); - ignore_list->id = LEL_ID_IGNORE; - ignore_list->refs = 1; - ignore_list->child = ignore; - - kid_t *ignore_head = kid_allocate( prg ); - ignore_head->tree = ignore_list; - ignore_head->next = tree->child; - tree->child = ignore_head; - - tree->flags |= AF_RIGHT_IGNORE; - } - - ignore = construct_left_ignore_list( prg, pat ); - if ( ignore != 0 ) { - tree_t *ignore_list = tree_allocate( prg ); - ignore_list->id = LEL_ID_IGNORE; - ignore_list->refs = 1; - ignore_list->child = ignore; - - kid_t *ignore_head = kid_allocate( prg ); - ignore_head->tree = ignore_list; - ignore_head->next = tree->child; - tree->child = ignore_head; - - tree->flags |= AF_LEFT_IGNORE; - } - - int i; - for ( i = 0; i < lel_info[tree->id].num_capture_attr; i++ ) { - long ci = pat+1+i; - CaptureAttr *ca = prg->rtd->capture_attr + lel_info[tree->id].capture_attr + i; - tree_t *attr = tree_allocate( prg ); - attr->id = nodes[ci].id; - attr->refs = 1; - attr->tokdata = nodes[ci].length == 0 ? 0 : - colm_string_alloc_pointer( prg, - nodes[ci].data, nodes[ci].length ); - - colm_tree_set_attr( tree, ca->offset, attr ); - } - } - - return tree; -} - -kid_t *construct_kid( program_t *prg, tree_t **bindings, kid_t *prev, long pat ) -{ - struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; - kid_t *kid = 0; - - if ( pat != -1 ) { - kid = kid_allocate( prg ); - kid->tree = colm_construct_tree( prg, kid, bindings, pat ); - - /* Recurse down next. */ - kid_t *next = construct_kid( prg, bindings, - kid, nodes[pat].next ); - - kid->next = next; - } - - return kid; -} - -tree_t *colm_construct_token( program_t *prg, tree_t **args, long nargs ) -{ - value_t id_int = (value_t)args[0]; - str_t *text_str = (str_t*)args[1]; - - long id = (long)id_int; - head_t *tokdata = string_copy( prg, text_str->value ); - - struct lang_el_info *lel_info = prg->rtd->lel_info; - tree_t *tree; - - if ( lel_info[id].ignore ) { - tree = tree_allocate( prg ); - tree->refs = 1; - tree->id = id; - tree->tokdata = tokdata; - } - else { - long object_length = lel_info[id].object_length; - assert( nargs-2 <= object_length ); - - kid_t *attrs = alloc_attrs( prg, object_length ); - - tree = tree_allocate( prg ); - tree->id = id; - tree->refs = 1; - tree->tokdata = tokdata; - - tree->child = attrs; - - long i; - for ( i = 2; i < nargs; i++ ) { - colm_tree_set_attr( tree, i-2, args[i] ); - colm_tree_upref( prg, colm_get_attr( tree, i-2 ) ); - } - } - return tree; -} - -tree_t *cast_tree( program_t *prg, int lang_el_id, tree_t *tree ) -{ - struct lang_el_info *lel_info = prg->rtd->lel_info; - - /* Need to keep a lookout for next down. If - * copying it, return the copy. */ - tree_t *new_tree = tree_allocate( prg ); - - new_tree->id = lang_el_id; - new_tree->tokdata = string_copy( prg, tree->tokdata ); - - /* Invalidate the production number. */ - new_tree->prod_num = -1; - - /* Copy the child list. Start with ignores, then the list. */ - kid_t *child = tree->child, *last = 0; - - /* Flags we are interested in. */ - new_tree->flags |= tree->flags & ( AF_LEFT_IGNORE | AF_RIGHT_IGNORE ); - - int ignores = 0; - if ( tree->flags & AF_LEFT_IGNORE ) - ignores += 1; - if ( tree->flags & AF_RIGHT_IGNORE ) - ignores += 1; - - /* Igores. */ - while ( ignores-- > 0 ) { - kid_t *new_kid = kid_allocate( prg ); - - new_kid->tree = child->tree; - new_kid->next = 0; - new_kid->tree->refs += 1; - - /* Store the first child. */ - if ( last == 0 ) - new_tree->child = new_kid; - else - last->next = new_kid; - - child = child->next; - last = new_kid; - } - - /* Skip over the source's attributes. */ - int object_length = lel_info[tree->id].object_length; - while ( object_length-- > 0 ) - child = child->next; - - /* Allocate the target type's kids. */ - object_length = lel_info[lang_el_id].object_length; - while ( object_length-- > 0 ) { - kid_t *new_kid = kid_allocate( prg ); - - new_kid->tree = 0; - new_kid->next = 0; - - /* Store the first child. */ - if ( last == 0 ) - new_tree->child = new_kid; - else - last->next = new_kid; - - last = new_kid; - } - - /* Copy the source's children. */ - while ( child != 0 ) { - kid_t *new_kid = kid_allocate( prg ); - - new_kid->tree = child->tree; - new_kid->next = 0; - new_kid->tree->refs += 1; - - /* Store the first child. */ - if ( last == 0 ) - new_tree->child = new_kid; - else - last->next = new_kid; - - child = child->next; - last = new_kid; - } - - return new_tree; -} - -tree_t *make_tree( program_t *prg, tree_t **args, long nargs ) -{ - value_t id_int = (value_t)args[0]; - - long id = (long)id_int; - struct lang_el_info *lel_info = prg->rtd->lel_info; - - tree_t *tree = tree_allocate( prg ); - tree->id = id; - tree->refs = 1; - - long object_length = lel_info[id].object_length; - kid_t *attrs = alloc_attrs( prg, object_length ); - - kid_t *last = 0, *child = 0; - for ( id = 1; id < nargs; id++ ) { - kid_t *kid = kid_allocate( prg ); - kid->tree = args[id]; - colm_tree_upref( prg, kid->tree ); - - if ( last == 0 ) - child = kid; - else - last->next = kid; - - last = kid; - } - - tree->child = kid_list_concat( attrs, child ); - - return tree; -} - -int test_false( program_t *prg, tree_t *tree ) -{ - int flse = ( - tree == 0 || - tree == prg->false_val - ); - return flse; -} - -kid_t *copy_ignore_list( program_t *prg, kid_t *ignore_header ) -{ - kid_t *new_header = kid_allocate( prg ); - kid_t *last = 0, *ic = (kid_t*)ignore_header->tree; - while ( ic != 0 ) { - kid_t *new_ic = kid_allocate( prg ); - - new_ic->tree = ic->tree; - new_ic->tree->refs += 1; - - /* List pointers. */ - if ( last == 0 ) - new_header->tree = (tree_t*)new_ic; - else - last->next = new_ic; - - ic = ic->next; - last = new_ic; - } - return new_header; -} - -kid_t *copy_kid_list( program_t *prg, kid_t *kid_list ) -{ - kid_t *new_list = 0, *last = 0, *ic = kid_list; - - while ( ic != 0 ) { - kid_t *new_ic = kid_allocate( prg ); - - new_ic->tree = ic->tree; - colm_tree_upref( prg, new_ic->tree ); - - /* List pointers. */ - if ( last == 0 ) - new_list = new_ic; - else - last->next = new_ic; - - ic = ic->next; - last = new_ic; - } - return new_list; -} - -/* New tree has zero ref. */ -tree_t *copy_real_tree( program_t *prg, tree_t *tree, kid_t *old_next_down, kid_t **new_next_down ) -{ - /* Need to keep a lookout for next down. If - * copying it, return the copy. */ - tree_t *new_tree = tree_allocate( prg ); - - new_tree->id = tree->id; - new_tree->tokdata = string_copy( prg, tree->tokdata ); - new_tree->prod_num = tree->prod_num; - - /* Copy the child list. Start with ignores, then the list. */ - kid_t *child = tree->child, *last = 0; - - /* Left ignores. */ - if ( tree->flags & AF_LEFT_IGNORE ) { - new_tree->flags |= AF_LEFT_IGNORE; -// kid_t *newHeader = copyIgnoreList( prg, child ); -// -// /* Always the head. */ -// newTree->child = newHeader; -// -// child = child->next; -// last = newHeader; - } - - /* Right ignores. */ - if ( tree->flags & AF_RIGHT_IGNORE ) { - new_tree->flags |= AF_RIGHT_IGNORE; -// kid_t *newHeader = copyIgnoreList( prg, child ); -// if ( last == 0 ) -// newTree->child = newHeader; -// else -// last->next = newHeader; -// child = child->next; -// last = newHeader; - } - - /* Attributes and children. */ - while ( child != 0 ) { - kid_t *new_kid = kid_allocate( prg ); - - /* Watch out for next down. */ - if ( child == old_next_down ) - *new_next_down = new_kid; - - new_kid->tree = child->tree; - new_kid->next = 0; - - /* May be an attribute. */ - if ( new_kid->tree != 0 ) - new_kid->tree->refs += 1; - - /* Store the first child. */ - if ( last == 0 ) - new_tree->child = new_kid; - else - last->next = new_kid; - - child = child->next; - last = new_kid; - } - - return new_tree; -} - - -tree_t *colm_copy_tree( program_t *prg, tree_t *tree, kid_t *old_next_down, kid_t **new_next_down ) -{ - assert( tree->id != LEL_ID_PTR && tree->id != LEL_ID_STR ); - - tree = copy_real_tree( prg, tree, old_next_down, new_next_down ); - - assert( tree->refs == 0 ); - - return tree; -} - -tree_t *split_tree( program_t *prg, tree_t *tree ) -{ - if ( tree != 0 ) { - assert( tree->refs >= 1 ); - - if ( tree->refs > 1 ) { - kid_t *old_next_down = 0, *new_next_down = 0; - tree_t *new_tree = colm_copy_tree( prg, tree, old_next_down, &new_next_down ); - colm_tree_upref( prg, new_tree ); - - /* Downref the original. Don't need to consider freeing because - * refs were > 1. */ - tree->refs -= 1; - - tree = new_tree; - } - - assert( tree->refs == 1 ); - } - return tree; -} - -/* We can't make recursive calls here since the tree we are freeing may be - * very large. Need the VM stack. */ -void tree_free_rec( program_t *prg, tree_t **sp, tree_t *tree ) -{ - tree_t **top = vm_ptop(); - -free_tree: - switch ( tree->id ) { - case LEL_ID_PTR: - tree_free( prg, tree ); - break; - case LEL_ID_STR: { - str_t *str = (str_t*) tree; - string_free( prg, str->value ); - tree_free( prg, tree ); - break; - } - default: { - if ( tree->id != LEL_ID_IGNORE ) - string_free( prg, tree->tokdata ); - - /* Attributes and grammar-based children. */ - kid_t *child = tree->child; - while ( child != 0 ) { - kid_t *next = child->next; - vm_push_tree( child->tree ); - kid_free( prg, child ); - child = next; - } - - tree_free( prg, tree ); - break; - }} - - /* Any trees to downref? */ - while ( sp != top ) { - tree = vm_pop_tree(); - if ( tree != 0 ) { - assert( tree->refs > 0 ); - tree->refs -= 1; - if ( tree->refs == 0 ) - goto free_tree; - } - } -} - -void colm_tree_upref( program_t *prg, tree_t *tree ) -{ - if ( tree != 0 ) { - assert( tree->id < prg->rtd->first_struct_el_id ); - tree->refs += 1; - } -} - -void colm_tree_downref( program_t *prg, tree_t **sp, tree_t *tree ) -{ - if ( tree != 0 ) { - assert( tree->id < prg->rtd->first_struct_el_id ); - assert( tree->refs > 0 ); - tree->refs -= 1; - if ( tree->refs == 0 ) - tree_free_rec( prg, sp, tree ); - } -} - -/* We can't make recursive calls here since the tree we are freeing may be - * very large. Need the VM stack. */ -void object_free_rec( program_t *prg, tree_t **sp, tree_t *tree ) -{ - tree_t **top = vm_ptop(); - -free_tree: - - switch ( tree->id ) { - case LEL_ID_STR: { - str_t *str = (str_t*) tree; - string_free( prg, str->value ); - tree_free( prg, tree ); - break; - } - case LEL_ID_PTR: { - tree_free( prg, tree ); - break; - } - default: { - if ( tree->id != LEL_ID_IGNORE ) - string_free( prg, tree->tokdata ); - - /* Attributes and grammar-based children. */ - kid_t *child = tree->child; - while ( child != 0 ) { - kid_t *next = child->next; - vm_push_tree( child->tree ); - kid_free( prg, child ); - child = next; - } - - tree_free( prg, tree ); - break; - }} - - /* Any trees to downref? */ - while ( sp != top ) { - tree = vm_pop_tree(); - if ( tree != 0 ) { - assert( tree->refs > 0 ); - tree->refs -= 1; - if ( tree->refs == 0 ) - goto free_tree; - } - } -} - -void object_downref( program_t *prg, tree_t **sp, tree_t *tree ) -{ - if ( tree != 0 ) { - assert( tree->refs > 0 ); - tree->refs -= 1; - if ( tree->refs == 0 ) - object_free_rec( prg, sp, tree ); - } -} - -/* Find the first child of a tree. */ -kid_t *tree_child( program_t *prg, const tree_t *tree ) -{ - struct lang_el_info *lel_info = prg->rtd->lel_info; - kid_t *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - /* Skip over attributes. */ - long object_length = lel_info[tree->id].object_length; - long a; - for ( a = 0; a < object_length; a++ ) - kid = kid->next; - - return kid; -} - -/* Detach at the first real child of a tree. */ -kid_t *tree_extract_child( program_t *prg, tree_t *tree ) -{ - struct lang_el_info *lel_info = prg->rtd->lel_info; - kid_t *kid = tree->child, *last = 0; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - /* Skip over attributes. */ - long a, object_length = lel_info[tree->id].object_length; - for ( a = 0; a < object_length; a++ ) { - last = kid; - kid = kid->next; - } - - if ( last == 0 ) - tree->child = 0; - else - last->next = 0; - - return kid; -} - - -/* Find the first child of a tree. */ -kid_t *tree_attr( program_t *prg, const tree_t *tree ) -{ - kid_t *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid; -} - -tree_t *tree_left_ignore( program_t *prg, tree_t *tree ) -{ - if ( tree->flags & AF_LEFT_IGNORE ) - return tree->child->tree; - return 0; -} - -tree_t *tree_right_ignore( program_t *prg, tree_t *tree ) -{ - if ( tree->flags & AF_RIGHT_IGNORE ) { - if ( tree->flags & AF_LEFT_IGNORE ) - return tree->child->next->tree; - else - return tree->child->tree; - } - return 0; -} - -kid_t *tree_left_ignore_kid( program_t *prg, tree_t *tree ) -{ - if ( tree->flags & AF_LEFT_IGNORE ) - return tree->child; - return 0; -} - -kid_t *tree_right_ignore_kid( program_t *prg, tree_t *tree ) -{ - if ( tree->flags & AF_RIGHT_IGNORE ) { - if ( tree->flags & AF_LEFT_IGNORE ) - return tree->child->next; - else - return tree->child; - } - return 0; -} - -void ref_set_value( program_t *prg, tree_t **sp, ref_t *ref, tree_t *v ) -{ - colm_tree_downref( prg, sp, ref->kid->tree ); - ref->kid->tree = v; -} - -tree_t *get_rhs_el( program_t *prg, tree_t *lhs, long position ) -{ - kid_t *pos = tree_child( prg, lhs ); - while ( position > 0 ) { - pos = pos->next; - position -= 1; - } - return pos->tree; -} - -kid_t *get_rhs_el_kid( program_t *prg, tree_t *lhs, long position ) -{ - kid_t *pos = tree_child( prg, lhs ); - while ( position > 0 ) { - pos = pos->next; - position -= 1; - } - return pos; -} - -parse_tree_t *get_rhs_parse_tree( program_t *prg, parse_tree_t *lhs, long position ) -{ - parse_tree_t *pos = lhs->child; - while ( position > 0 ) { - pos = pos->next; - position -= 1; - } - return pos; -} - -tree_t *colm_get_rhs_val( program_t *prg, tree_t *tree, int *a ) -{ - int i, len = a[0]; - for ( i = 0; i < len; i++ ) { - int prod_num = a[1 + i * 2]; - int child_num = a[1 + i * 2 + 1]; - if ( tree->prod_num == prod_num ) - return get_rhs_el( prg, tree, child_num ); - } - return 0; -} - -void colm_tree_set_field( program_t *prg, tree_t *tree, long field, tree_t *value ) -{ - assert( tree->refs == 1 ); - if ( value != 0 ) - assert( value->refs >= 1 ); - colm_tree_set_attr( tree, field, value ); -} - -tree_t *colm_tree_get_field( tree_t *tree, word_t field ) -{ - return colm_get_attr( tree, field ); -} - -kid_t *get_field_kid( tree_t *tree, word_t field ) -{ - return get_attr_kid( tree, field ); -} - -tree_t *get_field_split( program_t *prg, tree_t *tree, word_t field ) -{ - tree_t *val = colm_get_attr( tree, field ); - tree_t *split = split_tree( prg, val ); - colm_tree_set_attr( tree, field, split ); - return split; -} - -/* This must traverse in the same order that the bindId assignments are done - * in. */ -int match_pattern( tree_t **bindings, program_t *prg, long pat, kid_t *kid, int check_next ) -{ - struct pat_cons_node *nodes = prg->rtd->pat_repl_nodes; - - /* match node, recurse on children. */ - if ( pat != -1 && kid != 0 ) { - if ( nodes[pat].id == kid->tree->id ) { - /* If the pattern node has data, then this means we need to match - * the data against the token data. */ - if ( nodes[pat].data != 0 ) { - /* Check the length of token text. */ - if ( nodes[pat].length != string_length( kid->tree->tokdata ) ) - return false; - - /* Check the token text data. */ - if ( nodes[pat].length > 0 && memcmp( nodes[pat].data, - string_data( kid->tree->tokdata ), nodes[pat].length ) != 0 ) - return false; - } - - /* No failure, all okay. */ - if ( nodes[pat].bind_id > 0 ) { - bindings[nodes[pat].bind_id] = kid->tree; - } - - /* If we didn't match a terminal duplicate of a nonterm then check - * down the children. */ - if ( !nodes[pat].stop ) { - /* Check for failure down child branch. */ - int child_check = match_pattern( bindings, prg, - nodes[pat].child, tree_child( prg, kid->tree ), true ); - if ( ! child_check ) - return false; - } - - /* If checking next, then look for failure there. */ - if ( check_next ) { - int next_check = match_pattern( bindings, prg, - nodes[pat].next, kid->next, true ); - if ( ! next_check ) - return false; - } - - return true; - } - } - else if ( pat == -1 && kid == 0 ) { - /* Both null is a match. */ - return 1; - } - - return false; -} - - -long colm_cmp_tree( program_t *prg, const tree_t *tree1, const tree_t *tree2 ) -{ - long cmpres = 0; - if ( tree1 == 0 ) { - if ( tree2 == 0 ) - return 0; - else - return -1; - } - else if ( tree2 == 0 ) - return 1; - else if ( tree1->id < tree2->id ) - return -1; - else if ( tree1->id > tree2->id ) - return 1; - else if ( tree1->id == LEL_ID_PTR ) { - if ( ((pointer_t*)tree1)->value < ((pointer_t*)tree2)->value ) - return -1; - else if ( ((pointer_t*)tree1)->value > ((pointer_t*)tree2)->value ) - return 1; - } - else if ( tree1->id == LEL_ID_STR ) { - cmpres = cmp_string( ((str_t*)tree1)->value, ((str_t*)tree2)->value ); - if ( cmpres != 0 ) - return cmpres; - } - else { - if ( tree1->tokdata == 0 && tree2->tokdata != 0 ) - return -1; - else if ( tree1->tokdata != 0 && tree2->tokdata == 0 ) - return 1; - else if ( tree1->tokdata != 0 && tree2->tokdata != 0 ) { - cmpres = cmp_string( tree1->tokdata, tree2->tokdata ); - if ( cmpres != 0 ) - return cmpres; - } - } - - kid_t *kid1 = tree_child( prg, tree1 ); - kid_t *kid2 = tree_child( prg, tree2 ); - - while ( true ) { - if ( kid1 == 0 && kid2 == 0 ) - return 0; - else if ( kid1 == 0 && kid2 != 0 ) - return -1; - else if ( kid1 != 0 && kid2 == 0 ) - return 1; - else { - cmpres = colm_cmp_tree( prg, kid1->tree, kid2->tree ); - if ( cmpres != 0 ) - return cmpres; - } - kid1 = kid1->next; - kid2 = kid2->next; - } -} - - -void split_ref( program_t *prg, tree_t ***psp, ref_t *from_ref ) -{ - /* Go up the chain of kids, turing the pointers down. */ - ref_t *last = 0, *ref = from_ref, *next = 0; - while ( ref->next != 0 ) { - next = ref->next; - ref->next = last; - last = ref; - ref = next; - } - ref->next = last; - - /* Now traverse the list, which goes down. */ - while ( ref != 0 ) { - if ( ref->kid->tree->refs > 1 ) { - ref_t *next_down = ref->next; - while ( next_down != 0 && next_down->kid == ref->kid ) - next_down = next_down->next; - - kid_t *old_next_kid_down = next_down != 0 ? next_down->kid : 0; - kid_t *new_next_kid_down = 0; - - tree_t *new_tree = colm_copy_tree( prg, ref->kid->tree, - old_next_kid_down, &new_next_kid_down ); - colm_tree_upref( prg, new_tree ); - - /* Downref the original. Don't need to consider freeing because - * refs were > 1. */ - ref->kid->tree->refs -= 1; - - while ( ref != 0 && ref != next_down ) { - next = ref->next; - ref->next = 0; - - ref->kid->tree = new_tree; - ref = next; - } - - /* Correct kid pointers down from ref. */ - while ( next_down != 0 && next_down->kid == old_next_kid_down ) { - next_down->kid = new_next_kid_down; - next_down = next_down->next; - } - } - else { - /* Reset the list as we go down. */ - next = ref->next; - ref->next = 0; - ref = next; - } - } -} - -tree_t *set_list_mem( list_t *list, half_t field, tree_t *value ) -{ - if ( value != 0 ) - assert( value->refs >= 1 ); - - tree_t *existing = 0; - switch ( field ) { - case 0: -// existing = list->head->value; -// list->head->value = value; - break; - case 1: -// existing = list->tail->value; -// list->tail->value = value; - break; - default: - assert( false ); - break; - } - return existing; -} - -struct tree_pair map_remove( program_t *prg, map_t *map, tree_t *key ) -{ - map_el_t *map_el = map_impl_find( prg, map, key ); - struct tree_pair result = { 0, 0 }; - if ( map_el != 0 ) { - map_detach( prg, map, map_el ); - result.key = map_el->key; - //mapElFree( prg, mapEl ); - } - - return result; -} - -#if 0 -tree_t *map_unstore( program_t *prg, map_t *map, tree_t *key, tree_t *existing ) -{ - tree_t *stored = 0; - if ( existing == 0 ) { - map_el_t *map_el = map_detach_by_key( prg, map, key ); - // stored = mapEl->tree; - map_el_free( prg, map_el ); - } - else { - map_el_t *map_el = map_impl_find( prg, map, key ); - // stored = mapEl->tree; - //mapEl->tree = existing; - } - return stored; -} -#endif - -tree_t *map_find( program_t *prg, map_t *map, tree_t *key ) -{ -// map_el_t *mapEl = mapImplFind( prg, map, key ); -// return mapEl == 0 ? 0 : mapEl->tree; - return 0; -} - -long map_length( map_t *map ) -{ - return map->tree_size; -} - -void list_push_tail( program_t *prg, list_t *list, tree_t *val ) -{ -// if ( val != 0 ) -// assert( val->refs >= 1 ); -// list_el_t *listEl = colm_list_el_new( prg ); -// listEl->value = val; -// listAppend( list, listEl ); -} - -void list_push_head( program_t *prg, list_t *list, tree_t *val ) -{ -// if ( val != 0 ) -// assert( val->refs >= 1 ); -// list_el_t *listEl = listElAllocate( prg ); -// listEl->value = val; -// listPrepend( list, listEl ); -} - -tree_t *list_remove_end( program_t *prg, list_t *list ) -{ -// tree_t *tree = list->tail->value; -// listElFree( prg, listDetachLast( list ) ); -// return tree; - return 0; -} - -tree_t *list_remove_head( program_t *prg, list_t *list ) -{ -// tree_t *tree = list->head; -// listDetachFirst( list ); -// return tree; - return 0; -} - -tree_t *get_parser_mem( parser_t *parser, word_t field ) -{ - tree_t *result = 0; - switch ( field ) { - case 0: { - tree_t *tree = get_parsed_root( parser->pda_run, parser->pda_run->stop_target > 0 ); - result = tree; - break; - } - case 1: { - struct pda_run *pda_run = parser->pda_run; - result = pda_run->parse_error_text; - break; - } - default: { - assert( false ); - break; - } - } - return result; -} - -tree_t *get_list_mem_split( program_t *prg, list_t *list, word_t field ) -{ - tree_t *sv = 0; - switch ( field ) { - case 0: -// sv = splitTree( prg, list->head->value ); -// list->head->value = sv; - break; - case 1: -// sv = splitTree( prg, list->tail->value ); -// list->tail->value = sv; - break; - default: - assert( false ); - break; - } - return sv; -} - - -#if 0 -int map_insert( program_t *prg, map_t *map, tree_t *key, tree_t *element ) -{ - map_el_t *map_el = map_insert_key( prg, map, key, 0 ); - - if ( map_el != 0 ) { - //mapEl->tree = element; - return true; - } - - return false; -} -#endif - -#if 0 -void map_unremove( program_t *prg, map_t *map, tree_t *key, tree_t *element ) -{ - map_el_t *map_el = map_insert_key( prg, map, key, 0 ); - assert( map_el != 0 ); - //mapEl->tree = element; -} -#endif - -#if 0 -tree_t *map_uninsert( program_t *prg, map_t *map, tree_t *key ) -{ - map_el_t *el = map_detach_by_key( prg, map, key ); -// tree_t *val = el->tree; - map_el_free( prg, el ); -// return val; - return 0; -} -#endif - -#if 0 -tree_t *map_store( program_t *prg, map_t *map, tree_t *key, tree_t *element ) -{ - tree_t *old_tree = 0; - map_el_t *el_in_tree = 0; - map_el_t *map_el = map_insert_key( prg, map, key, &el_in_tree ); - -// if ( mapEl != 0 ) -// mapEl->tree = element; -// else { -// /* Element with key exists. Overwriting the value. */ -// oldTree = elInTree->tree; -// elInTree->tree = element; -// } - - return old_tree; -} -#endif - -static tree_t *tree_search_kid( program_t *prg, kid_t *kid, long id ) -{ - /* This node the one? */ - if ( kid->tree->id == id ) - return kid->tree; - - tree_t *res = 0; - - /* Search children. */ - kid_t *child = tree_child( prg, kid->tree ); - if ( child != 0 ) - res = tree_search_kid( prg, child, id ); - - /* Search siblings. */ - if ( res == 0 && kid->next != 0 ) - res = tree_search_kid( prg, kid->next, id ); - - return res; -} - -tree_t *tree_search( program_t *prg, tree_t *tree, long id ) -{ - tree_t *res = 0; - if ( tree->id == id ) - res = tree; - else { - kid_t *child = tree_child( prg, tree ); - if ( child != 0 ) - res = tree_search_kid( prg, child, id ); - } - return res; -} - -static location_t *loc_search_kid( program_t *prg, kid_t *kid ) -{ - /* This node the one? */ - if ( kid->tree->tokdata != 0 && kid->tree->tokdata->location != 0 ) - return kid->tree->tokdata->location; - - location_t *res = 0; - - /* Search children. */ - kid_t *child = tree_child( prg, kid->tree ); - if ( child != 0 ) - res = loc_search_kid( prg, child ); - - /* Search siblings. */ - if ( res == 0 && kid->next != 0 ) - res = loc_search_kid( prg, kid->next ); - - return res; -} - -static location_t *loc_search( program_t *prg, tree_t *tree ) -{ - location_t *res = 0; - if ( tree->tokdata != 0 && tree->tokdata->location != 0 ) - return tree->tokdata->location; - - kid_t *child = tree_child( prg, tree ); - if ( child != 0 ) - res = loc_search_kid( prg, child ); - - return res; -} - -struct colm_location *colm_find_location( program_t *prg, tree_t *tree ) -{ - return loc_search( prg, tree ); -} - -head_t *tree_to_str( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ) -{ - /* Collect the tree data. */ - str_collect_t collect; - init_str_collect( &collect ); - - if ( attrs ) - colm_print_tree_collect_a( prg, sp, &collect, tree, trim ); - else - colm_print_tree_collect( prg, sp, &collect, tree, trim ); - - /* Set up the input stream. */ - head_t *ret = string_alloc_full( prg, collect.data, collect.length ); - - str_collect_destroy( &collect ); - - return ret; -} - diff --git a/src/tree.h b/src/tree.h deleted file mode 100644 index fefb6b20..00000000 --- a/src/tree.h +++ /dev/null @@ -1,396 +0,0 @@ -/* - * Copyright 2010-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_TREE_H -#define _COLM_TREE_H - -#if defined(__cplusplus) -extern "C" { -#endif - -#include <colm/colm.h> -#include <colm/type.h> -#include <colm/input.h> -#include <colm/internal.h> -#include <colm/defs.h> - -#define COLM_INDENT_OFF -1 - -typedef unsigned char code_t; -#if SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P - typedef unsigned long word_t; -#elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P - typedef unsigned long long word_t; -#else - #error "The type word_t was not declared" -#endif -typedef unsigned long half_t; - -struct bindings; -struct function_info; - -typedef struct colm_tree tree_t; -#include <colm/struct.h> - -typedef struct colm_location -{ - const char *name; - long line; - long column; - long byte; -} location_t; - -/* Header located just before string data. */ -typedef struct colm_data -{ - const char *data; - long length; - struct colm_location *location; -} head_t; - -typedef struct colm_kid -{ - /* The tree needs to be first since pointers to kids are used to reference - * trees on the stack. A pointer to the word that is a tree_t* is cast to - * a kid_t*. */ - struct colm_tree *tree; - struct colm_kid *next; - unsigned char flags; -} kid_t; - -typedef struct colm_ref -{ - kid_t *kid; - struct colm_ref *next; -} ref_t; - -struct tree_pair -{ - tree_t *key; - tree_t *val; -}; - -typedef struct colm_parse_tree -{ - short id; - unsigned short flags; - - struct colm_parse_tree *child; - struct colm_parse_tree *next; - struct colm_parse_tree *left_ignore; - struct colm_parse_tree *right_ignore; - kid_t *shadow; - - /* Parsing algorithm. */ - long state; - short cause_reduce; - - /* Retry vars. Might be able to unify lower and upper. */ - long retry_region; - char retry_lower; - char retry_upper; -} parse_tree_t; - -typedef struct colm_pointer -{ - /* Must overlay tree_t. */ - short id; - unsigned short flags; - long refs; - kid_t *child; - - colm_value_t value; -} pointer_t; - -typedef struct colm_str -{ - /* Must overlay tree_t. */ - short id; - unsigned short flags; - long refs; - kid_t *child; - - head_t *value; -} str_t; - -/* - * Maps - */ -struct generic_info -{ - long type; - - long el_struct_id; - long el_offset; - - enum TYPE key_type; - long key_offset; - - enum TYPE value_type; - long value_offset; - - long parser_id; -}; - -enum IterType -{ - IT_Tree = 1, - IT_RevTree, - IT_User -}; - -typedef struct colm_tree_iter -{ - enum IterType type; - ref_t root_ref; - ref_t ref; - long search_id; - tree_t **stack_root; - long arg_size; - long yield_size; - long root_size; -} tree_iter_t; - -typedef struct colm_generic_iter -{ - enum IterType type; - ref_t root_ref; - ref_t ref; - tree_t **stack_root; - long arg_size; - long yield_size; - long root_size; - long generic_id; -} generic_iter_t; - -/* This must overlay tree iter because some of the same bytecodes are used. */ -typedef struct colm_rev_tree_iter -{ - enum IterType type; - ref_t root_ref; - ref_t ref; - long search_id; - tree_t **stack_root; - long arg_size; - long yield_size; - long root_size; - - /* For detecting a split at the leaf. */ - kid_t *kid_at_yield; - long children; -} rev_tree_iter_t; - -typedef struct colm_user_iter -{ - enum IterType type; - /* The current item. */ - ref_t ref; - tree_t **stack_root; - long arg_size; - long yield_size; - long root_size; - - code_t *resume; - tree_t **frame; - long search_id; -} user_iter_t; - -void colm_tree_upref_( tree_t *tree ); -void colm_tree_upref( struct colm_program *prg, tree_t *tree ); -void colm_tree_downref( struct colm_program *prg, tree_t **sp, tree_t *tree ); -long colm_cmp_tree( struct colm_program *prg, const tree_t *tree1, const tree_t *tree2 ); - -tree_t *push_right_ignore( struct colm_program *prg, tree_t *push_to, tree_t *right_ignore ); -tree_t *push_left_ignore( struct colm_program *prg, tree_t *push_to, tree_t *left_ignore ); -tree_t *pop_right_ignore( struct colm_program *prg, tree_t **sp, - tree_t *pop_from, tree_t **right_ignore ); -tree_t *pop_left_ignore( struct colm_program *prg, tree_t **sp, - tree_t *pop_from, tree_t **left_ignore ); -tree_t *tree_left_ignore( struct colm_program *prg, tree_t *tree ); -tree_t *tree_right_ignore( struct colm_program *prg, tree_t *tree ); -kid_t *tree_left_ignore_kid( struct colm_program *prg, tree_t *tree ); -kid_t *tree_right_ignore_kid( struct colm_program *prg, tree_t *tree ); -kid_t *tree_child( struct colm_program *prg, const tree_t *tree ); -kid_t *tree_attr( struct colm_program *prg, const tree_t *tree ); -kid_t *kid_list_concat( kid_t *list1, kid_t *list2 ); -kid_t *tree_extract_child( struct colm_program *prg, tree_t *tree ); -kid_t *reverse_kid_list( kid_t *kid ); - -tree_t *colm_construct_pointer( struct colm_program *prg, colm_value_t value ); -tree_t *colm_construct_term( struct colm_program *prg, word_t id, head_t *tokdata ); -tree_t *colm_construct_tree( struct colm_program *prg, kid_t *kid, - tree_t **bindings, long pat ); -tree_t *colm_construct_object( struct colm_program *prg, kid_t *kid, - tree_t **bindings, long lang_el_id ); -tree_t *colm_construct_token( struct colm_program *prg, tree_t **args, long nargs ); - -int test_false( struct colm_program *prg, tree_t *tree ); -tree_t *make_tree( struct colm_program *prg, tree_t **args, long nargs ); -stream_t *open_file( struct colm_program *prg, tree_t *name, tree_t *mode ); -stream_t *colm_stream_open_file( struct colm_program *prg, tree_t *name, tree_t *mode ); -stream_t *colm_stream_open_fd( struct colm_program *prg, char *name, long fd ); -kid_t *copy_ignore_list( struct colm_program *prg, kid_t *ignore_header ); -kid_t *copy_kid_list( struct colm_program *prg, kid_t *kid_list ); -void colm_stream_free( struct colm_program *prg, stream_t *s ); -tree_t *colm_copy_tree( struct colm_program *prg, tree_t *tree, - kid_t *old_next_down, kid_t **new_next_down ); - -colm_value_t colm_get_pointer_val( tree_t *pointer ); -tree_t *colm_tree_get_field( tree_t *tree, word_t field ); -tree_t *get_field_split( struct colm_program *prg, tree_t *tree, word_t field ); -tree_t *get_rhs_el( struct colm_program *prg, tree_t *lhs, long position ); -kid_t *get_rhs_el_kid( struct colm_program *prg, tree_t *lhs, long position ); -parse_tree_t *get_rhs_parse_tree( struct colm_program *prg, - parse_tree_t *lhs, long position ); -void colm_tree_set_field( struct colm_program *prg, tree_t *tree, long field, tree_t *value ); - -void set_triter_cur( struct colm_program *prg, tree_iter_t *iter, tree_t *tree ); -void set_uiter_cur( struct colm_program *prg, user_iter_t *uiter, tree_t *tree ); -void ref_set_value( struct colm_program *prg, tree_t **sp, ref_t *ref, tree_t *v ); -tree_t *tree_search( struct colm_program *prg, tree_t *tree, long id ); - -int match_pattern( tree_t **bindings, struct colm_program *prg, - long pat, kid_t *kid, int check_next ); -tree_t *tree_iter_deref_cur( tree_iter_t *iter ); - -/* For making references of attributes. */ -kid_t *get_field_kid( tree_t *tree, word_t field ); - -tree_t *copy_real_tree( struct colm_program *prg, tree_t *tree, - kid_t *old_next_down, kid_t **new_next_down ); -void split_iter_cur( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); -tree_t *set_list_mem( list_t *list, half_t field, tree_t *value ); - -void list_push_tail( struct colm_program *prg, list_t *list, tree_t *val ); -void list_push_head( struct colm_program *prg, list_t *list, tree_t *val ); -tree_t *list_remove_end( struct colm_program *prg, list_t *list ); -tree_t *list_remove_head( struct colm_program *prg, list_t *list ); -tree_t *get_list_mem_split( struct colm_program *prg, list_t *list, word_t field ); -tree_t *get_parser_mem( parser_t *parser, word_t field ); - -tree_t *tree_iter_advance( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); -tree_t *tree_iter_next_child( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); -tree_t *tree_rev_iter_prev_child( struct colm_program *prg, tree_t ***psp, rev_tree_iter_t *iter ); -tree_t *tree_iter_next_repeat( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); -tree_t *tree_iter_prev_repeat( struct colm_program *prg, tree_t ***psp, tree_iter_t *iter ); - -/* An automatically grown buffer for collecting tokens. Always reuses space; - * never down resizes. */ -typedef struct colm_str_collect -{ - char *data; - int allocated; - int length; - struct indent_impl indent; -} str_collect_t; - -void init_str_collect( str_collect_t *collect ); -void str_collect_destroy( str_collect_t *collect ); -void str_collect_append( str_collect_t *collect, const char *data, long len ); -void str_collect_clear( str_collect_t *collect ); -tree_t *tree_trim( struct colm_program *prg, tree_t **sp, tree_t *tree ); - -void colm_print_tree_collect( struct colm_program *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ); - -void colm_print_tree_collect_a( struct colm_program *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ); - -void colm_print_tree_file( struct colm_program *prg, tree_t **sp, - struct stream_impl_data *impl, tree_t *tree, int trim ); -void colm_print_xml_stdout( struct colm_program *prg, tree_t **sp, - struct stream_impl_data *impl, tree_t *tree, int comm_attr, int trim ); - -void colm_postfix_tree_collect( struct colm_program *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ); -void colm_postfix_tree_file( struct colm_program *prg, tree_t **sp, - struct stream_impl *impl, tree_t *tree, int trim ); - -/* - * Iterators. - */ - -user_iter_t *colm_uiter_create( struct colm_program *prg, tree_t ***psp, - struct function_info *fi, long search_id ); -void uiter_init( struct colm_program *prg, tree_t **sp, user_iter_t *uiter, - struct function_info *fi, int revert_on ); - -void colm_init_tree_iter( tree_iter_t *tree_iter, tree_t **stack_root, - long arg_size, long root_size, const ref_t *root_ref, int search_id ); -void colm_init_rev_tree_iter( rev_tree_iter_t *rev_triter, tree_t **stack_root, - long arg_size, long root_size, const ref_t *root_ref, int search_id, int children ); -void colm_init_user_iter( user_iter_t *user_iter, tree_t **stack_root, long root_size, - long arg_size, long search_id ); - -void colm_tree_iter_destroy( struct colm_program *prg, - tree_t ***psp, tree_iter_t *iter ); - -void colm_rev_tree_iter_destroy( struct colm_program *prg, - tree_t ***psp, rev_tree_iter_t *iter ); - -void colm_uiter_destroy( struct colm_program *prg, tree_t ***psp, user_iter_t *uiter ); -void colm_uiter_unwind( struct colm_program *prg, tree_t ***psp, user_iter_t *uiter ); - -tree_t *cast_tree( struct colm_program *prg, int lang_el_id, tree_t *tree ); - -void colm_init_list_iter( generic_iter_t *list_iter, tree_t **stack_root, - long arg_size, long root_size, const ref_t *root_ref, int generic_id ); -void colm_list_iter_destroy( struct colm_program *prg, - tree_t ***psp, generic_iter_t *iter ); - -tree_t *colm_list_iter_advance( struct colm_program *prg, - tree_t ***psp, generic_iter_t *iter ); -tree_t *colm_rev_list_iter_advance( struct colm_program *prg, - tree_t ***psp, generic_iter_t *iter ); - -tree_t *colm_list_iter_deref_cur( struct colm_program *prg, generic_iter_t *iter ); -void colm_list_append( struct colm_list *list, struct colm_list_el *new_el ); -void colm_list_prepend( struct colm_list *list, struct colm_list_el *new_el ); - -void colm_vlist_append( struct colm_program *prg, list_t *list, value_t value ); -void colm_vlist_prepend( struct colm_program *prg, list_t *list, value_t value ); -value_t colm_vlist_detach_head( struct colm_program *prg, list_t *list ); -value_t colm_vlist_detach_tail( struct colm_program *prg, list_t *list ); - -value_t colm_viter_deref_cur( struct colm_program *prg, generic_iter_t *iter ); - -str_t *string_prefix( program_t *prg, str_t *str, long len ); -str_t *string_suffix( program_t *prg, str_t *str, long pos ); -head_t *string_alloc_full( struct colm_program *prg, const char *data, long length ); -tree_t *construct_string( struct colm_program *prg, head_t *s ); - -void free_kid_list( program_t *prg, kid_t *kid ); - -void colm_print_tree_collect_xml( program_t *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ); - -void colm_print_tree_collect_xml_ac( program_t *prg, tree_t **sp, - str_collect_t *collect, tree_t *tree, int trim ); - -head_t *tree_to_str( program_t *prg, tree_t **sp, tree_t *tree, int trim, int attrs ); - -#if defined(__cplusplus) -} -#endif - -#endif /* COLM_TREE_H */ - diff --git a/src/type.h b/src/type.h deleted file mode 100644 index dca8f2ad..00000000 --- a/src/type.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _COLM_TYPE_H -#define _COLM_TYPE_H - -enum TYPE -{ - TYPE_NOTYPE = 0x00, - TYPE_NIL = 0x01, - TYPE_TREE = 0x02, - TYPE_REF = 0x03, - TYPE_ITER = 0x04, - TYPE_STRUCT = 0x05, - TYPE_GENERIC = 0x06, - TYPE_INT = 0x07, - TYPE_BOOL = 0x08, - TYPE_LIST_PTRS = 0x09, - TYPE_MAP_PTRS = 0x0a, - TYPE_VOID = 0x0b -}; - -#endif /* _COLM_TYPE_H */ - diff --git a/src/version.h.cmake.in b/src/version.h.cmake.in deleted file mode 100644 index 3e4c310f..00000000 --- a/src/version.h.cmake.in +++ /dev/null @@ -1,9 +0,0 @@ -/* version.h Generated from version.h.cmake.in by cmake */ - -#ifndef _COLM_VERSION_H -#define _COLM_VERSION_H - -#cmakedefine VERSION "@VERSION@" -#cmakedefine PUBDATE "@PUBDATE@" - -#endif /* _COLM_VERSION_H */ |